3 #include "cuda_task.hpp" 59 template <
typename F,
typename... ArgsT>
79 std::enable_if_t<!std::is_same<T, void>::value,
void>* =
nullptr 94 return _graph._nodes.empty();
99 auto node = _graph.emplace_back();
100 node->_handle.emplace<cudaNode::Noop>();
102 ::cudaGraphAddEmptyNode(&node->_node, _graph._handle,
nullptr, 0),
103 "failed to create a no-operation (empty) node" 109 template <
typename F,
typename... ArgsT>
111 dim3 grid, dim3 block,
size_t shm, F&& func, ArgsT&&... args
114 using traits = function_traits<F>;
116 static_assert(traits::arity ==
sizeof...(ArgsT),
"arity mismatches");
118 void* arguments[
sizeof...(ArgsT)] = { (
void*)(&args)... };
120 auto node = _graph.emplace_back();
122 auto& p = node->_handle.emplace<cudaNode::Kernel>().param;
124 p.func = (
void*)func;
127 p.sharedMemBytes = shm;
128 p.kernelParams = arguments;
132 ::cudaGraphAddKernelNode(&node->_node, _graph._handle,
nullptr, 0, &p),
133 "failed to create a cudaKernel node" 142 std::enable_if_t<!std::is_same<T, void>::value,
void>*
146 using U = std::decay_t<T>;
148 auto node = _graph.emplace_back();
149 auto& p = node->_handle.emplace<cudaNode::Copy>().param;
151 p.srcArray =
nullptr;
152 p.srcPos = ::make_cudaPos(0, 0, 0);
153 p.srcPtr = ::make_cudaPitchedPtr(src, num*
sizeof(U), num, 1);
154 p.dstArray =
nullptr;
155 p.dstPos = ::make_cudaPos(0, 0, 0);
156 p.dstPtr = ::make_cudaPitchedPtr(tgt, num*
sizeof(U), num, 1);
157 p.extent = ::make_cudaExtent(num*
sizeof(U), 1, 1);
158 p.kind = cudaMemcpyDefault;
161 cudaGraphAddMemcpyNode(&node->_node, _graph._handle,
nullptr, 0, &p),
162 "failed to create a cudaCopy node" cudaFlow(cudaGraph &graph)
constructs a cudaFlow builder object
Definition: cuda_flow_builder.hpp:89
bool empty() const
queries the emptiness of the graph
Definition: cuda_flow_builder.hpp:93
Building methods of a cuda task dependency graph.
Definition: cuda_flow_builder.hpp:12
cudaTask noop()
creates a no-operation task
Definition: cuda_flow_builder.hpp:98
cudaTask kernel(dim3 g, dim3 b, size_t s, F &&f, ArgsT &&... args)
creates a kernel task
Definition: cuda_flow_builder.hpp:110
cudaTask copy(T *tgt, T *src, size_t num)
creates an 1D copy task
Definition: cuda_flow_builder.hpp:144
cudaTask placeholder()
creates a placeholder task
handle to a node in a cudaGraph
Definition: cuda_task.hpp:20