5 #ifndef FML_PAR_CPU_COPY_H
6 #define FML_PAR_CPU_COPY_H
15 #include "../../cpu/cpumat.hh"
16 #include "../../cpu/copy.hh"
43 template <
typename REAL_IN,
typename REAL_OUT>
46 const len_t m = (len_t) par.nrows();
47 const len_t n = par.ncols();
52 const len_t m_local = par.nrows_local();
53 const len_t nb4 = (len_t) par.nrows_before();
54 REAL_IN *par_d = par.data_obj().data_ptr();
57 for (len_t j=0; j<n; j++)
59 for (len_t i=0; i<m_local; i++)
60 (REAL_OUT) cpu_d[i+nb4 + m*j] = par_d[i + m_local*j];
63 par.get_comm().allreduce(m*n, cpu.
data_ptr());
85 template <
typename REAL_IN,
typename REAL_OUT>
88 const len_t m = cpu.
nrows();
89 const len_t n = cpu.
ncols();
93 const len_t m_local = par.nrows_local();
94 const len_t nb4 = (len_t) par.nrows_before();
96 REAL_OUT *par_d = par.data_obj().data_ptr();
98 for (len_t j=0; j<n; j++)
100 for (len_t i=0; i<m_local; i++)
101 par_d[i + m_local*j] = (REAL_OUT) cpu_d[i+nb4 + m*j];
122 template <
typename REAL_IN,
typename REAL_OUT>
125 par_out.resize(par_in.nrows(), par_in.ncols());
126 cpu2cpu(par_in.data_obj(), par_out.data_obj());