Building methods for a cuda task dependency graph.
More...
#include <cuda_flow_builder.hpp>
|
| cudaFlow (cudaGraph &graph) |
| constructs a cudaFlow builder object More...
|
|
bool | empty () const |
| queries the emptiness of the graph
|
|
cudaTask | noop () |
| creates a no-operation task More...
|
|
template<typename F , typename... ArgsT> |
cudaTask | kernel (dim3 g, dim3 b, size_t s, F &&f, ArgsT &&... args) |
| creates a kernel task More...
|
|
template<typename F , typename... ArgsT> |
cudaTask | kernel_on (int d, dim3 g, dim3 b, size_t s, F &&f, ArgsT &&... args) |
| creates a kernel task on a device More...
|
|
cudaTask | memset (void *dst, int v, size_t count) |
| creates a memset node More...
|
|
template<typename T , std::enable_if_t<!std::is_same< T, void >::value, void > * = nullptr> |
cudaTask | copy (T *tgt, const T *src, size_t num) |
| creates an 1D copy task More...
|
|
void | device (int device) |
| assigns a device to launch the cudaFlow More...
|
|
int | device () const |
| queries the device associated with the cudaFlow
|
|
void | stream (cudaStream_t stream) |
| assigns a stream to launch the cudaFlow More...
|
|
cudaStream_t | stream () const |
| queries the stream associated with the cudaFlow
|
|
Building methods for a cuda task dependency graph.
◆ cudaFlow()
tf::cudaFlow::cudaFlow |
( |
cudaGraph & |
graph | ) |
|
|
inline |
constructs a cudaFlow builder object
- Parameters
-
graph | a cudaGraph to manipulate |
◆ copy()
template<typename T , std::enable_if_t<!std::is_same< T, void >::value, void > * >
cudaTask tf::cudaFlow::copy |
( |
T * |
tgt, |
|
|
const T * |
src, |
|
|
size_t |
num |
|
) |
| |
creates an 1D copy task
- Template Parameters
-
- Parameters
-
tgt | pointer to the target memory block |
src | pointer to the source memory block |
num | number of elements to copy |
- Returns
- cudaTask handle
A copy task transfers num*sizeof(T) bytes of data from a source location to a target location. Direction can be either cpu-to-gpu, gpu-to-cpu, gpu-to-gpu, or gpu-to-cpu.
◆ device()
void tf::cudaFlow::device |
( |
int |
device | ) |
|
|
inline |
assigns a device to launch the cudaFlow
- Parameters
-
device | target device identifier |
◆ kernel()
template<typename F , typename... ArgsT>
cudaTask tf::cudaFlow::kernel |
( |
dim3 |
g, |
|
|
dim3 |
b, |
|
|
size_t |
s, |
|
|
F && |
f, |
|
|
ArgsT &&... |
args |
|
) |
| |
creates a kernel task
- Template Parameters
-
F | kernel function type |
ArgsT | kernel function parameters type |
- Parameters
-
g | configured grid |
b | configured block |
s | configured shared memory |
f | kernel function |
args | arguments to forward to the kernel function by copy |
- Returns
- cudaTask handle
◆ kernel_on()
template<typename F , typename... ArgsT>
cudaTask tf::cudaFlow::kernel_on |
( |
int |
d, |
|
|
dim3 |
g, |
|
|
dim3 |
b, |
|
|
size_t |
s, |
|
|
F && |
f, |
|
|
ArgsT &&... |
args |
|
) |
| |
creates a kernel task on a device
- Template Parameters
-
F | kernel function type |
ArgsT | kernel function parameters type |
- Parameters
-
d | device identifier to luanch the kernel |
g | configured grid |
b | configured block |
s | configured shared memory |
f | kernel function |
args | arguments to forward to the kernel function by copy |
- Returns
- cudaTask handle
◆ memset()
cudaTask tf::cudaFlow::memset |
( |
void * |
dst, |
|
|
int |
v, |
|
|
size_t |
count |
|
) |
| |
|
inline |
creates a memset node
- Parameters
-
dst | pointer to the destination device memory area |
v | value to set for each byte of specified memory |
count | size in bytes to set |
A memset tasks fills the first count
bytes of device memory area pointed by dst
with the byte value v
.
◆ noop()
creates a no-operation task
An empty node performs no operation during execution, but can be used for transitive ordering. For example, a phased execution graph with 2 groups of n nodes with a barrier between them can be represented using an empty node and 2*n dependency edges, rather than no empty node and n^2 dependency edges.
◆ stream()
void tf::cudaFlow::stream |
( |
cudaStream_t |
stream | ) |
|
|
inline |
assigns a stream to launch the cudaFlow
- Parameters
-
stream | target stream identifier |
The documentation for this class was generated from the following file: