fml  0.1-0
Fused Matrix Library
copy.hh
1 // This file is part of fml which is released under the Boost Software
2 // License, Version 1.0. See accompanying file LICENSE or copy at
3 // https://www.boost.org/LICENSE_1_0.txt
4 
5 #ifndef FML_PAR_CPU_COPY_H
6 #define FML_PAR_CPU_COPY_H
7 #pragma once
8 
9 
10 #include <stdexcept>
11 
12 #include "../comm.hh"
13 #include "parmat.hh"
14 
15 #include "../../cpu/cpumat.hh"
16 #include "../../cpu/copy.hh"
17 
18 
19 namespace fml
20 {
21 namespace copy
22 {
43  template <typename REAL_IN, typename REAL_OUT>
45  {
46  const len_t m = (len_t) par.nrows();
47  const len_t n = par.ncols();
48 
49  cpu.resize(m, n);
50  cpu.fill_zero();
51 
52  const len_t m_local = par.nrows_local();
53  const len_t nb4 = (len_t) par.nrows_before();
54  REAL_IN *par_d = par.data_obj().data_ptr();
55  REAL_OUT *cpu_d = cpu.data_ptr();
56 
57  for (len_t j=0; j<n; j++)
58  {
59  for (len_t i=0; i<m_local; i++)
60  (REAL_OUT) cpu_d[i+nb4 + m*j] = par_d[i + m_local*j];
61  }
62 
63  par.get_comm().allreduce(m*n, cpu.data_ptr());
64  }
65 
66 
67 
85  template <typename REAL_IN, typename REAL_OUT>
87  {
88  const len_t m = cpu.nrows();
89  const len_t n = cpu.ncols();
90 
91  par.resize(m, n);
92 
93  const len_t m_local = par.nrows_local();
94  const len_t nb4 = (len_t) par.nrows_before();
95  REAL_IN *cpu_d = cpu.data_ptr();
96  REAL_OUT *par_d = par.data_obj().data_ptr();
97 
98  for (len_t j=0; j<n; j++)
99  {
100  for (len_t i=0; i<m_local; i++)
101  par_d[i + m_local*j] = (REAL_OUT) cpu_d[i+nb4 + m*j];
102  }
103  }
104 
105 
106 
122  template <typename REAL_IN, typename REAL_OUT>
123  void par2par(const parmat_cpu<REAL_IN> &par_in, parmat_cpu<REAL_OUT> &par_out)
124  {
125  par_out.resize(par_in.nrows(), par_in.ncols());
126  cpu2cpu(par_in.data_obj(), par_out.data_obj());
127  }
128 }
129 }
130 
131 
132 #endif
fml::cpumat
Matrix class for data held on a single CPU.
Definition: cpumat.hh:36
fml::copy::cpu2cpu
void cpu2cpu(const cpuvec< REAL_IN > &cpu_in, cpuvec< REAL_OUT > &cpu_out)
Copy data from a CPU object to another.
Definition: copy.hh:40
fml::parmat_cpu
Definition: parmat.hh:21
fml::copy::par2par
void par2par(const parmat_cpu< REAL_IN > &par_in, parmat_cpu< REAL_OUT > &par_out)
Copy data from a PAR object to another.
Definition: copy.hh:123
fml::cpumat::fill_zero
void fill_zero()
Set all values to zero.
Definition: cpumat.hh:362
fml::unimat::nrows
len_t nrows() const
Number of rows.
Definition: unimat.hh:36
fml::cpumat::resize
void resize(len_t nrows, len_t ncols)
Resize the internal object storage.
Definition: cpumat.hh:233
fml::unimat::ncols
len_t ncols() const
Number of columns.
Definition: unimat.hh:38
fml::copy::cpu2par
void cpu2par(const cpumat< REAL_IN > &cpu, parmat_cpu< REAL_OUT > &par)
Copy data from an CPU object to a PAR object.
Definition: copy.hh:86
fml::unimat::data_ptr
REAL * data_ptr()
Pointer to the internal array.
Definition: unimat.hh:40
fml
Core namespace.
Definition: dimops.hh:10
fml::copy::par2cpu
void par2cpu(const parmat_cpu< REAL_IN > &par, cpumat< REAL_OUT > &cpu)
Copy data from an PAR object to a CPU object.
Definition: copy.hh:44