5 #ifndef FML_PAR_GPU_INTERNALS_MPI_REDUCTIONS_H
6 #define FML_PAR_GPU_INTERNALS_MPI_REDUCTIONS_H
10 #include "../../../gpu/card.hh"
18 template <
typename REAL>
19 void allreduce_gpumem(comm &r,
const int len, REAL *x_gpu)
21 r.allreduce(len, x_gpu);
26 template <
typename REAL>
27 void allreduce_cpumem(comm &r, card_sp_t c,
const int len, REAL *x_gpu, REAL *x_cpu)
29 c->mem_gpu2cpu(x_cpu, x_gpu, len*
sizeof(REAL));
30 r.allreduce(len, x_cpu);
31 c->mem_cpu2gpu(x_gpu, x_cpu, len*
sizeof(REAL));
34 template <
typename REAL>
35 void allreduce_cpumem(comm &r, card_sp_t c,
const int len, REAL *x_gpu)
37 REAL *x_cpu = (REAL*) std::malloc(len*
sizeof(REAL));
39 throw std::bad_alloc();
41 c->mem_gpu2cpu(x_cpu, x_gpu, len*
sizeof(REAL));
42 r.allreduce(len, x_cpu);
43 c->mem_cpu2gpu(x_gpu, x_cpu, len*
sizeof(REAL));