fml  0.1-0
Fused Matrix Library
gpuhelpers.hh
1 // This file is part of fml which is released under the Boost Software
2 // License, Version 1.0. See accompanying file LICENSE or copy at
3 // https://www.boost.org/LICENSE_1_0.txt
4 
5 #ifndef FML_GPU_GPUHELPERS_H
6 #define FML_GPU_GPUHELPERS_H
7 #pragma once
8 
9 
10 #include <stdexcept>
11 
12 #include "../_internals/arraytools/src/arraytools.hpp"
13 
14 #include "../cpu/cpumat.hh"
15 #include "../cpu/cpuvec.hh"
16 
17 #include "internals/kernelfuns.hh"
18 
19 #include "card.hh"
20 #include "gpumat.hh"
21 #include "gpuvec.hh"
22 
23 
25 namespace gpuhelpers
26 {
36  inline std::shared_ptr<card> new_card(int id=0)
37  {
38  return std::make_shared<card>(id);
39  }
40 
41 
42 
43  namespace internals
44  {
45  static const size_t CPLEN = 1024;
46 
47  template <typename REAL_IN, typename REAL_OUT>
48  void copy_gpu2gpu(const len_t m, const len_t n, std::shared_ptr<card> c, dim3 griddim, dim3 blockdim, const REAL_IN *in, REAL_OUT *out)
49  {
50  if (std::is_same<REAL_IN, REAL_OUT>::value)
51  {
52  const size_t len = (size_t) m*n*sizeof(REAL_IN);
53  c->mem_gpu2gpu((void*)out, (void*)in, len);
54  }
55  else
56  fml::kernelfuns::kernel_copy<<<griddim, blockdim>>>(m, n, in, out);
57  }
58 
59  template <typename REAL_IN, typename REAL_OUT>
60  void copy_gpu2cpu(const len_t m, const len_t n, std::shared_ptr<card> c, const REAL_IN *in, REAL_OUT *out)
61  {
62  if (std::is_same<REAL_IN, REAL_OUT>::value)
63  {
64  const size_t len = (size_t) m*n*sizeof(REAL_IN);
65  c->mem_gpu2cpu((void*)out, (void*)in, len);
66  }
67  else
68  {
69  size_t top = (size_t) m*n;
70  size_t tmplen = std::min(top, CPLEN);
71  cpuvec<REAL_IN> tmp(tmplen);
72  REAL_IN *tmp_d = tmp.data_ptr();
73 
74  for (size_t i=0; i<top; i+=tmplen)
75  {
76  const size_t rem = top - i;
77  const size_t copylen = std::min(tmplen, rem);
78  c->mem_gpu2cpu((void*)tmp_d, (void*)(in + i), copylen*sizeof(*in));
79  arraytools::copy(copylen, tmp_d, out + i);
80  }
81  }
82  }
83 
84  template <typename REAL_IN, typename REAL_OUT>
85  void copy_cpu2gpu(const len_t m, const len_t n, std::shared_ptr<card> c, const REAL_IN *in, REAL_OUT *out)
86  {
87  if (std::is_same<REAL_IN, REAL_OUT>::value)
88  {
89  const size_t len = (size_t) m*n*sizeof(REAL_IN);
90  c->mem_cpu2gpu((void*)out, (void*)in, len);
91  }
92  else
93  {
94  size_t top = (size_t) m*n;
95  size_t tmplen = std::min(top, CPLEN);
96  cpuvec<REAL_OUT> tmp(tmplen);
97  REAL_OUT *tmp_d = tmp.data_ptr();
98 
99  for (size_t i=0; i<top; i+=tmplen)
100  {
101  const size_t rem = top - i;
102  const size_t copylen = std::min(tmplen, rem);
103  arraytools::copy(copylen, in + i, tmp_d);
104  c->mem_cpu2gpu((void*)(out + i), (void*)tmp_d, copylen*sizeof(*out));
105  }
106  }
107  }
108  }
109 
110 
111 
128  template <typename REAL_IN, typename REAL_OUT>
129  void gpu2cpu(const gpuvec<REAL_IN> &gpu, cpuvec<REAL_OUT> &cpu)
130  {
131  cpu.resize(gpu.size());
132  internals::copy_gpu2cpu(gpu.size(), (len_t)1, gpu.get_card(), gpu.data_ptr(), cpu.data_ptr());
133  }
134 
136  template <typename REAL>
138  {
139  cpuvec<REAL> cpu;
140  gpu2cpu(gpu, cpu);
141 
142  return cpu;
143  }
144 
146  template <typename REAL_IN, typename REAL_OUT>
147  void gpu2cpu(const gpumat<REAL_IN> &gpu, cpumat<REAL_OUT> &cpu)
148  {
149  cpu.resize(gpu.nrows(), gpu.ncols());
150  internals::copy_gpu2cpu(gpu.nrows(), gpu.ncols(), gpu.get_card(), gpu.data_ptr(), cpu.data_ptr());
151  }
152 
154  template <typename REAL>
156  {
157  cpumat<REAL> cpu;
158  gpu2cpu(gpu, cpu);
159 
160  return cpu;
161  }
162 
163 
164 
181  template <typename REAL_IN, typename REAL_OUT>
182  void cpu2gpu(const cpuvec<REAL_IN> &cpu, gpuvec<REAL_OUT> &gpu)
183  {
184  gpu.resize(cpu.size());
185  internals::copy_cpu2gpu(cpu.size(), (len_t)1, gpu.get_card(), cpu.data_ptr(), gpu.data_ptr());
186  }
187 
189  template <typename REAL_IN, typename REAL_OUT>
190  void cpu2gpu(const cpumat<REAL_IN> &cpu, gpumat<REAL_OUT> &gpu)
191  {
192  gpu.resize(cpu.nrows(), cpu.ncols());
193  internals::copy_cpu2gpu(cpu.nrows(), cpu.ncols(), gpu.get_card(), cpu.data_ptr(), gpu.data_ptr());
194  }
195 
196 
197 
217  template <typename REAL_IN, typename REAL_OUT>
218  void gpu2gpu(const gpuvec<REAL_IN> &gpu_in, gpuvec<REAL_OUT> &gpu_out)
219  {
220  auto c = gpu_in.get_card();
221  if (c->get_id() != gpu_out.get_card()->get_id())
222  throw std::logic_error("input/output data must be on the same gpu");
223 
224  gpu_out.resize(gpu_in.size());
225  internals::copy_gpu2gpu(gpu_in.size(), (len_t)1, c, gpu_in.get_griddim(), gpu_in.get_blockdim(), gpu_in.data_ptr(), gpu_out.data_ptr());
226  }
227 
229  template <typename REAL>
231  {
232  gpuvec<REAL> gpu_out(gpu_in.get_card());
233  gpu2gpu(gpu_in, gpu_out);
234 
235  return gpu_out;
236  }
237 
239  template <typename REAL_IN, typename REAL_OUT>
240  void gpu2gpu(const gpumat<REAL_IN> &gpu_in, gpumat<REAL_OUT> &gpu_out)
241  {
242  auto c = gpu_in.get_card();
243  if (c->get_id() != gpu_out.get_card()->get_id())
244  throw std::logic_error("input/output data must be on the same gpu");
245 
246  gpu_out.resize(gpu_in.nrows(), gpu_in.ncols());
247  internals::copy_gpu2gpu(gpu_in.nrows(), gpu_in.ncols(), c, gpu_in.get_griddim(), gpu_in.get_blockdim(), gpu_in.data_ptr(), gpu_out.data_ptr());
248  }
249 
251  template <typename REAL>
253  {
254  gpumat<REAL> gpu_out(gpu_in.get_card());
255  gpu2gpu(gpu_in, gpu_out);
256 
257  return gpu_out;
258  }
259 }
260 
261 
262 #endif
unimat::data_ptr
REAL * data_ptr()
Pointer to the internal array.
Definition: unimat.hh:35
gpumat
Matrix class for data held on a single GPU.
Definition: gpumat.hh:33
univec::data_ptr
T * data_ptr()
Pointer to the internal array.
Definition: univec.hh:26
cpuvec
Vector class for data held on a single CPU.
Definition: cpuvec.hh:29
gpuvec::resize
void resize(len_t size)
Resize the internal object storage.
Definition: gpuvec.hh:217
gpumat::resize
void resize(len_t nrows, len_t ncols)
Resize the internal object storage.
Definition: gpumat.hh:239
gpuvec
Vector class for data held on a single GPU.
Definition: gpuvec.hh:30
gpuhelpers::gpu2gpu
void gpu2gpu(const gpuvec< REAL_IN > &gpu_in, gpuvec< REAL_OUT > &gpu_out)
Copy data from a GPU object to another.
Definition: gpuhelpers.hh:218
unimat::nrows
len_t nrows() const
Number of rows.
Definition: unimat.hh:31
univec::size
len_t size() const
Number of elements in the vector.
Definition: univec.hh:24
unimat::ncols
len_t ncols() const
Number of columns.
Definition: unimat.hh:33
gpuhelpers::cpu2gpu
void cpu2gpu(const cpuvec< REAL_IN > &cpu, gpuvec< REAL_OUT > &gpu)
Copy data from a CPU object to a GPU object.
Definition: gpuhelpers.hh:182
cpuvec::resize
void resize(len_t size)
Resize the internal object storage.
Definition: cpuvec.hh:179
gpuhelpers::gpu2cpu
void gpu2cpu(const gpuvec< REAL_IN > &gpu, cpuvec< REAL_OUT > &cpu)
Copy data from a GPU object to a CPU object.
Definition: gpuhelpers.hh:129
cpumat
Matrix class for data held on a single CPU.
Definition: cpumat.hh:34
cpumat::resize
void resize(len_t nrows, len_t ncols)
Resize the internal object storage.
Definition: cpumat.hh:204
gpuhelpers
GPU class helpers.
Definition: gpuhelpers.hh:25
gpuhelpers::new_card
std::shared_ptr< card > new_card(int id=0)
Initialize a new card.
Definition: gpuhelpers.hh:36