fml  0.1-0
Fused Matrix Library
copy.hh
1 // This file is part of fml which is released under the Boost Software
2 // License, Version 1.0. See accompanying file LICENSE or copy at
3 // https://www.boost.org/LICENSE_1_0.txt
4 
5 #ifndef FML_GPU_COPY_H
6 #define FML_GPU_COPY_H
7 #pragma once
8 
9 
10 #include <stdexcept>
11 
12 #include "../_internals/arraytools/src/arraytools.hpp"
13 
14 #include "../cpu/cpumat.hh"
15 #include "../cpu/cpuvec.hh"
16 
17 #include "internals/kernelfuns.hh"
18 
19 #include "card.hh"
20 #include "gpumat.hh"
21 #include "gpuvec.hh"
22 
23 
24 namespace fml
25 {
26 namespace copy
27 {
28  namespace internals
29  {
30  static const size_t CPLEN = 1024;
31 
32  template <typename REAL_IN, typename REAL_OUT>
33  void copy_gpu2gpu(const len_t m, const len_t n, std::shared_ptr<card> c, dim3 griddim, dim3 blockdim, const REAL_IN *in, REAL_OUT *out)
34  {
35  if (std::is_same<REAL_IN, REAL_OUT>::value)
36  {
37  const size_t len = (size_t) m*n*sizeof(REAL_IN);
38  c->mem_gpu2gpu((void*)out, (void*)in, len);
39  }
40  else
41  fml::kernelfuns::kernel_copy<<<griddim, blockdim>>>(m, n, in, out);
42  }
43 
44  template <typename REAL_IN, typename REAL_OUT>
45  void copy_gpu2cpu(const len_t m, const len_t n, std::shared_ptr<card> c, const REAL_IN *in, REAL_OUT *out)
46  {
47  if (std::is_same<REAL_IN, REAL_OUT>::value)
48  {
49  const size_t len = (size_t) m*n*sizeof(REAL_IN);
50  c->mem_gpu2cpu((void*)out, (void*)in, len);
51  }
52  else
53  {
54  size_t top = (size_t) m*n;
55  size_t tmplen = std::min(top, CPLEN);
56  cpuvec<REAL_IN> tmp(tmplen);
57  REAL_IN *tmp_d = tmp.data_ptr();
58 
59  for (size_t i=0; i<top; i+=tmplen)
60  {
61  const size_t rem = top - i;
62  const size_t copylen = std::min(tmplen, rem);
63  c->mem_gpu2cpu((void*)tmp_d, (void*)(in + i), copylen*sizeof(*in));
64  arraytools::copy(copylen, tmp_d, out + i);
65  }
66  }
67  }
68 
69  template <typename REAL_IN, typename REAL_OUT>
70  void copy_cpu2gpu(const len_t m, const len_t n, std::shared_ptr<card> c, const REAL_IN *in, REAL_OUT *out)
71  {
72  if (std::is_same<REAL_IN, REAL_OUT>::value)
73  {
74  const size_t len = (size_t) m*n*sizeof(REAL_IN);
75  c->mem_cpu2gpu((void*)out, (void*)in, len);
76  }
77  else
78  {
79  size_t top = (size_t) m*n;
80  size_t tmplen = std::min(top, CPLEN);
81  cpuvec<REAL_OUT> tmp(tmplen);
82  REAL_OUT *tmp_d = tmp.data_ptr();
83 
84  for (size_t i=0; i<top; i+=tmplen)
85  {
86  const size_t rem = top - i;
87  const size_t copylen = std::min(tmplen, rem);
88  arraytools::copy(copylen, in + i, tmp_d);
89  c->mem_cpu2gpu((void*)(out + i), (void*)tmp_d, copylen*sizeof(*out));
90  }
91  }
92  }
93  }
94 
95 
96 
113  template <typename REAL_IN, typename REAL_OUT>
114  void gpu2cpu(const gpuvec<REAL_IN> &gpu, cpuvec<REAL_OUT> &cpu)
115  {
116  cpu.resize(gpu.size());
117  internals::copy_gpu2cpu(gpu.size(), (len_t)1, gpu.get_card(), gpu.data_ptr(), cpu.data_ptr());
118  }
119 
121  template <typename REAL>
123  {
124  cpuvec<REAL> cpu;
125  gpu2cpu(gpu, cpu);
126 
127  return cpu;
128  }
129 
131  template <typename REAL_IN, typename REAL_OUT>
132  void gpu2cpu(const gpumat<REAL_IN> &gpu, cpumat<REAL_OUT> &cpu)
133  {
134  cpu.resize(gpu.nrows(), gpu.ncols());
135  internals::copy_gpu2cpu(gpu.nrows(), gpu.ncols(), gpu.get_card(), gpu.data_ptr(), cpu.data_ptr());
136  }
137 
139  template <typename REAL>
141  {
142  cpumat<REAL> cpu;
143  gpu2cpu(gpu, cpu);
144 
145  return cpu;
146  }
147 
148 
149 
166  template <typename REAL_IN, typename REAL_OUT>
167  void cpu2gpu(const cpuvec<REAL_IN> &cpu, gpuvec<REAL_OUT> &gpu)
168  {
169  gpu.resize(cpu.size());
170  internals::copy_cpu2gpu(cpu.size(), (len_t)1, gpu.get_card(), cpu.data_ptr(), gpu.data_ptr());
171  }
172 
174  template <typename REAL_IN, typename REAL_OUT>
175  void cpu2gpu(const cpumat<REAL_IN> &cpu, gpumat<REAL_OUT> &gpu)
176  {
177  gpu.resize(cpu.nrows(), cpu.ncols());
178  internals::copy_cpu2gpu(cpu.nrows(), cpu.ncols(), gpu.get_card(), cpu.data_ptr(), gpu.data_ptr());
179  }
180 
181 
182 
202  template <typename REAL_IN, typename REAL_OUT>
203  void gpu2gpu(const gpuvec<REAL_IN> &gpu_in, gpuvec<REAL_OUT> &gpu_out)
204  {
205  auto c = gpu_in.get_card();
206  if (c->get_id() != gpu_out.get_card()->get_id())
207  throw std::logic_error("input/output data must be on the same gpu");
208 
209  gpu_out.resize(gpu_in.size());
210  internals::copy_gpu2gpu(gpu_in.size(), (len_t)1, c, gpu_in.get_griddim(), gpu_in.get_blockdim(), gpu_in.data_ptr(), gpu_out.data_ptr());
211  }
212 
214  template <typename REAL>
216  {
217  gpuvec<REAL> gpu_out(gpu_in.get_card());
218  gpu2gpu(gpu_in, gpu_out);
219 
220  return gpu_out;
221  }
222 
224  template <typename REAL_IN, typename REAL_OUT>
225  void gpu2gpu(const gpumat<REAL_IN> &gpu_in, gpumat<REAL_OUT> &gpu_out)
226  {
227  auto c = gpu_in.get_card();
228  if (c->get_id() != gpu_out.get_card()->get_id())
229  throw std::logic_error("input/output data must be on the same gpu");
230 
231  gpu_out.resize(gpu_in.nrows(), gpu_in.ncols());
232  internals::copy_gpu2gpu(gpu_in.nrows(), gpu_in.ncols(), c, gpu_in.get_griddim(), gpu_in.get_blockdim(), gpu_in.data_ptr(), gpu_out.data_ptr());
233  }
234 
236  template <typename REAL>
238  {
239  gpumat<REAL> gpu_out(gpu_in.get_card());
240  gpu2gpu(gpu_in, gpu_out);
241 
242  return gpu_out;
243  }
244 }
245 }
246 
247 
248 #endif
fml::cpumat
Matrix class for data held on a single CPU.
Definition: cpumat.hh:36
fml::copy::cpu2gpu
void cpu2gpu(const cpuvec< REAL_IN > &cpu, gpuvec< REAL_OUT > &gpu)
Copy data from a CPU object to a GPU object.
Definition: copy.hh:167
fml::univec::data_ptr
T * data_ptr()
Pointer to the internal array.
Definition: univec.hh:28
fml::gpuvec
Vector class for data held on a single GPU.
Definition: gpuvec.hh:32
fml::gpuvec::resize
void resize(len_t size)
Resize the internal object storage.
Definition: gpuvec.hh:225
fml::copy::gpu2cpu
void gpu2cpu(const gpuvec< REAL_IN > &gpu, cpuvec< REAL_OUT > &cpu)
Copy data from a GPU object to a CPU object.
Definition: copy.hh:114
fml::unimat::nrows
len_t nrows() const
Number of rows.
Definition: unimat.hh:36
fml::cpuvec::resize
void resize(len_t size)
Resize the internal object storage.
Definition: cpuvec.hh:210
fml::cpumat::resize
void resize(len_t nrows, len_t ncols)
Resize the internal object storage.
Definition: cpumat.hh:233
fml::cpuvec
Vector class for data held on a single CPU.
Definition: cpuvec.hh:31
fml::copy::gpu2gpu
void gpu2gpu(const gpuvec< REAL_IN > &gpu_in, gpuvec< REAL_OUT > &gpu_out)
Copy data from a GPU object to another.
Definition: copy.hh:203
fml::unimat::ncols
len_t ncols() const
Number of columns.
Definition: unimat.hh:38
fml::unimat::data_ptr
REAL * data_ptr()
Pointer to the internal array.
Definition: unimat.hh:40
fml
Core namespace.
Definition: dimops.hh:10
fml::univec::size
len_t size() const
Number of elements in the vector.
Definition: univec.hh:26
fml::gpumat::resize
void resize(len_t nrows, len_t ncols)
Resize the internal object storage.
Definition: gpumat.hh:256
fml::gpumat
Matrix class for data held on a single GPU.
Definition: gpumat.hh:35