Cpp-Taskflow  2.3.1
cuda_flow_builder.hpp
1 #pragma once
2 
3 #include "cuda_task.hpp"
4 
5 namespace tf {
6 
12 class cudaFlow {
13 
14  public:
15 
21  cudaFlow(cudaGraph& graph);
22 
26  bool empty() const;
27 
32 
43  cudaTask noop();
44 
59  template <typename F, typename... ArgsT>
60  cudaTask kernel(dim3 g, dim3 b, size_t s, F&& f, ArgsT&&... args);
61 
77  template <
78  typename T,
79  std::enable_if_t<!std::is_same<T, void>::value, void>* = nullptr
80  >
81  cudaTask copy(T* tgt, T* src, size_t num);
82 
83  private:
84 
85  cudaGraph& _graph;
86 };
87 
88 // Constructor
89 inline cudaFlow::cudaFlow(cudaGraph& g) : _graph {g} {
90 }
91 
92 // Function: empty
93 inline bool cudaFlow::empty() const {
94  return _graph._nodes.empty();
95 }
96 
97 // Function: noop
99  auto node = _graph.emplace_back();
100  node->_handle.emplace<cudaNode::Noop>();
101  TF_CHECK_CUDA(
102  ::cudaGraphAddEmptyNode(&node->_node, _graph._handle, nullptr, 0),
103  "failed to create a no-operation (empty) node"
104  );
105  return cudaTask(node);
106 }
107 
108 // Function: kernel
109 template <typename F, typename... ArgsT>
111  dim3 grid, dim3 block, size_t shm, F&& func, ArgsT&&... args
112 ) {
113 
114  using traits = function_traits<F>;
115 
116  static_assert(traits::arity == sizeof...(ArgsT), "arity mismatches");
117 
118  void* arguments[sizeof...(ArgsT)] = { (void*)(&args)... };
119 
120  auto node = _graph.emplace_back();
121 
122  auto& p = node->_handle.emplace<cudaNode::Kernel>().param;
123 
124  p.func = (void*)func;
125  p.gridDim = grid;
126  p.blockDim = block;
127  p.sharedMemBytes = shm;
128  p.kernelParams = arguments;
129  p.extra = nullptr;
130 
131  TF_CHECK_CUDA(
132  ::cudaGraphAddKernelNode(&node->_node, _graph._handle, nullptr, 0, &p),
133  "failed to create a cudaKernel node"
134  );
135 
136  return cudaTask(node);
137 }
138 
139 // Function: copy
140 template <
141  typename T,
142  std::enable_if_t<!std::is_same<T, void>::value, void>*
143 >
144 cudaTask cudaFlow::copy(T* tgt, T* src, size_t num) {
145 
146  using U = std::decay_t<T>;
147 
148  auto node = _graph.emplace_back();
149  auto& p = node->_handle.emplace<cudaNode::Copy>().param;
150 
151  p.srcArray = nullptr;
152  p.srcPos = ::make_cudaPos(0, 0, 0);
153  p.srcPtr = ::make_cudaPitchedPtr(src, num*sizeof(U), num, 1);
154  p.dstArray = nullptr;
155  p.dstPos = ::make_cudaPos(0, 0, 0);
156  p.dstPtr = ::make_cudaPitchedPtr(tgt, num*sizeof(U), num, 1);
157  p.extent = ::make_cudaExtent(num*sizeof(U), 1, 1);
158  p.kind = cudaMemcpyDefault;
159 
160  TF_CHECK_CUDA(
161  cudaGraphAddMemcpyNode(&node->_node, _graph._handle, nullptr, 0, &p),
162  "failed to create a cudaCopy node"
163  );
164 
165  return cudaTask(node);
166 }
167 
168 } // end of namespace tf -----------------------------------------------------
cudaFlow(cudaGraph &graph)
constructs a cudaFlow builder object
Definition: cuda_flow_builder.hpp:89
Definition: error.hpp:9
bool empty() const
queries the emptiness of the graph
Definition: cuda_flow_builder.hpp:93
Building methods of a cuda task dependency graph.
Definition: cuda_flow_builder.hpp:12
cudaTask noop()
creates a no-operation task
Definition: cuda_flow_builder.hpp:98
cudaTask kernel(dim3 g, dim3 b, size_t s, F &&f, ArgsT &&... args)
creates a kernel task
Definition: cuda_flow_builder.hpp:110
cudaTask copy(T *tgt, T *src, size_t num)
creates an 1D copy task
Definition: cuda_flow_builder.hpp:144
cudaTask placeholder()
creates a placeholder task
handle to a node in a cudaGraph
Definition: cuda_task.hpp:20