fml  0.1-0
Fused Matrix Library
mpihelpers.hh
1 // This file is part of fml which is released under the Boost Software
2 // License, Version 1.0. See accompanying file LICENSE or copy at
3 // https://www.boost.org/LICENSE_1_0.txt
4 
5 #ifndef FML_MPI_MPIHELPERS_H
6 #define FML_MPI_MPIHELPERS_H
7 #pragma once
8 
9 
10 #include <stdexcept>
11 
12 #include "../_internals/arraytools/src/arraytools.hpp"
13 
14 #include "../cpu/cpumat.hh"
15 #include "../cpu/cpuvec.hh"
16 
17 #include "internals/bcutils.hh"
18 #include "grid.hh"
19 #include "mpimat.hh"
20 
21 
23 namespace mpihelpers
24 {
45  template <typename REAL_IN, typename REAL_OUT>
47  {
48  grid g = mpi.get_grid();
49  if (!g.ingrid())
50  return;
51 
52  const len_local_t m_local = mpi.nrows_local();
53  const len_local_t n_local = mpi.ncols_local();
54 
55  const int mb = mpi.bf_rows();
56 
57  const len_t m = mpi.nrows();
58  const len_t n = mpi.ncols();
59 
60  if (m != cpu.nrows() || n != cpu.ncols())
61  cpu.resize(m, n);
62 
63  cpu.fill_zero();
64 
65  REAL_OUT *gbl = cpu.data_ptr();
66  const REAL_IN *sub = mpi.data_ptr();
67 
68  if (m_local > 0 && n_local > 0)
69  {
70  for (len_local_t j=0; j<n_local; j++)
71  {
72  const int gj = fml::bcutils::l2g(j, mpi.bf_cols(), g.npcol(), g.mycol());
73 
74  for (len_local_t i=0; i<m_local; i+=mb)
75  {
76  const int gi = fml::bcutils::l2g(i, mpi.bf_rows(), g.nprow(), g.myrow());
77 
78  for (int ii=0; ii<mb && ii+i<m_local; ii++)
79  gbl[gi+ii + m*gj] = (REAL_OUT) sub[i+ii + m_local*j];
80  }
81  }
82  }
83 
84  g.allreduce(m, n, gbl);
85  }
86 
87 
88 
90  template <typename REAL>
92  {
93  cpumat<REAL> cpu;
94  mpi2cpu_all(mpi, cpu);
95 
96  return cpu;
97  }
98 
99 
100 
125  template <typename REAL_IN, typename REAL_OUT>
126  void mpi2cpu(const mpimat<REAL_IN> &mpi, cpumat<REAL_OUT> &cpu, int rdest=0, int cdest=0)
127  {
128  const grid g = mpi.get_grid();
129  if (!g.ingrid())
130  return;
131 
132  bool i_am_ret = (g.myrow() == rdest && g.mycol() == cdest) ? true : false;
133 
134  const len_local_t m_local = mpi.nrows_local();
135 
136  const int mb = mpi.bf_rows();
137  const int nb = mpi.bf_cols();
138 
139  const len_t m = mpi.nrows();
140  const len_t n = mpi.ncols();
141 
142  if (i_am_ret)
143  {
144  if (m != cpu.nrows() || n != cpu.ncols())
145  cpu.resize(m, n);
146 
147  cpu.fill_zero();
148  }
149 
150  REAL_OUT *gbl = cpu.data_ptr();
151  const REAL_IN *sub = mpi.data_ptr();
152 
153  cpumat<REAL_OUT> tmp(mb, nb);
154  REAL_OUT *tmp_d = tmp.data_ptr();
155 
156  for (len_t gj=0; gj<n; gj+=nb)
157  {
158  const int pc = fml::bcutils::g2p(gj, nb, g.npcol());
159  const len_t j = fml::bcutils::g2l(gj, nb, g.npcol());
160  const len_t col_copylen = std::min(nb, n-gj);
161 
162  for (len_t gi=0; gi<m; gi+=mb)
163  {
164  const int pr = fml::bcutils::g2p(gi, mb, g.nprow());
165  const len_t i = fml::bcutils::g2l(gi, mb, g.nprow());
166  const len_t row_copylen = std::min(mb, m-gi);
167 
168  if (i_am_ret)
169  {
170  if (pr == g.myrow() && pc == g.mycol())
171  {
172  for (int jj=0; jj<col_copylen; jj++)
173  arraytools::copy(row_copylen, sub + i+m_local*(j+jj), gbl + gi+m*(gj+jj));
174  }
175  else
176  g.recv(row_copylen, col_copylen, m, gbl + gi+m*gj, pr, pc);
177  }
178  else if (pr == g.myrow() && pc == g.mycol())
179  {
180  for (len_t jj=0; jj<col_copylen; jj++)
181  {
182  for (len_t ii=0; ii<row_copylen; ii++)
183  tmp_d[ii + mb*jj] = (REAL_OUT) sub[i+ii + m_local*(j+jj)];
184  }
185 
186  g.send(row_copylen, col_copylen, mb, tmp_d, rdest, cdest);
187  }
188  }
189  }
190  }
191 
192 
193 
195  template <typename REAL>
197  {
198  cpumat<REAL> cpu;
199  mpi2cpu(mpi, cpu);
200 
201  return cpu;
202  }
203 
204 
205 
228  template <typename REAL_IN, typename REAL_OUT>
229  void cpu2mpi(const cpumat<REAL_IN> &cpu, mpimat<REAL_OUT> &mpi)
230  {
231  const len_t m = cpu.nrows();
232  const len_t n = cpu.ncols();
233 
234  if (m != mpi.nrows() || n != mpi.ncols())
235  mpi.resize(m, n);
236 
237  mpi.fill_zero();
238 
239  const grid g = mpi.get_grid();
240 
241  const len_local_t m_local = mpi.nrows_local();
242  const len_local_t n_local = mpi.ncols_local();
243  const int mb = mpi.bf_rows();
244 
245  const REAL_IN *gbl = cpu.data_ptr();
246  REAL_OUT *sub = mpi.data_ptr();
247 
248  if (m_local > 0 && n_local > 0)
249  {
250  for (len_local_t j=0; j<n_local; j++)
251  {
252  const int gj = fml::bcutils::l2g(j, mpi.bf_cols(), g.npcol(), g.mycol());
253 
254  for (len_local_t i=0; i<m_local; i+=mb)
255  {
256  const int gi = fml::bcutils::l2g(i, mpi.bf_rows(), g.nprow(), g.myrow());
257 
258  for (int ii=0; ii<mb && ii+i<m_local; ii++)
259  sub[i+ii + m_local*j] = (REAL_OUT) gbl[gi+ii + m*gj];
260  }
261  }
262  }
263  }
264 
265 
266 
283  template <typename REAL_IN, typename REAL_OUT>
284  void mpi2mpi(const mpimat<REAL_IN> &mpi_in, mpimat<REAL_OUT> &mpi_out)
285  {
286  if (mpi_in.get_grid().ictxt() != mpi_out.get_grid().ictxt())
287  throw std::runtime_error("mpimat objects must be distributed on the same process grid");
288 
289  mpi_out.resize(mpi_in.nrows(), mpi_in.ncols());
290 
291  size_t len = (size_t) mpi_in.nrows_local() * mpi_in.ncols_local();
292  arraytools::copy(len, mpi_in.data_ptr(), mpi_out.data_ptr());
293  }
294 }
295 
296 
297 #endif
mpimat::fill_zero
void fill_zero()
Set all values to zero.
Definition: mpimat.hh:559
unimat::data_ptr
REAL * data_ptr()
Pointer to the internal array.
Definition: unimat.hh:35
grid
2-dimensional MPI process grid.
Definition: grid.hh:64
grid::ingrid
bool ingrid() const
Check if the executing process is in the grid, i.e., if neither the process row nor column are -1.
Definition: grid.hh:296
mpihelpers::mpi2mpi
void mpi2mpi(const mpimat< REAL_IN > &mpi_in, mpimat< REAL_OUT > &mpi_out)
Copy data from an MPI object to another.
Definition: mpihelpers.hh:284
grid::nprow
int nprow() const
The number of processes rows in the BLACS context.
Definition: grid.hh:117
mpimat
Matrix class for data distributed over MPI in the 2-d block cyclic format.
Definition: mpimat.hh:38
grid::allreduce
void allreduce(const int m, const int n, int *x, const char scope='A', const blacsops op=BLACS_SUM) const
Sum reduce operation across all processes in the grid.
Definition: grid.hh:413
grid::recv
void recv(const int m, const int n, int *x, const int rsrc=0, const int csrc=0) const
Point-to-point receive. Should be matched by a corresponding 'send' call.
Definition: grid.hh:356
mpimat::resize
void resize(len_t nrows, len_t ncols)
Resize the internal object storage.
Definition: mpimat.hh:320
grid::npcol
int npcol() const
The number of processes columns in the BLACS context.
Definition: grid.hh:119
grid::mycol
int mycol() const
The process column (0-based index) of the calling process.
Definition: grid.hh:123
grid::myrow
int myrow() const
The process row (0-based index) of the calling process.
Definition: grid.hh:121
unimat::nrows
len_t nrows() const
Number of rows.
Definition: unimat.hh:31
unimat::ncols
len_t ncols() const
Number of columns.
Definition: unimat.hh:33
mpihelpers::cpu2mpi
void cpu2mpi(const cpumat< REAL_IN > &cpu, mpimat< REAL_OUT > &mpi)
Copy data from a CPU object to an MPI object.
Definition: mpihelpers.hh:229
grid::ictxt
int ictxt() const
Definition: grid.hh:113
grid::send
void send(const int m, const int n, const int *x, const int rdest=0, const int cdest=0) const
Point-to-point send. Should be matched by a corresponding 'recv' call.
Definition: grid.hh:314
cpumat
Matrix class for data held on a single CPU.
Definition: cpumat.hh:34
cpumat::fill_zero
void fill_zero()
Set all values to zero.
Definition: cpumat.hh:318
mpihelpers::mpi2cpu_all
void mpi2cpu_all(const mpimat< REAL_IN > &mpi, cpumat< REAL_OUT > &cpu)
Copy data from an MPI object to a CPU object.
Definition: mpihelpers.hh:46
mpihelpers
MPI class helpers.
Definition: mpihelpers.hh:23
cpumat::resize
void resize(len_t nrows, len_t ncols)
Resize the internal object storage.
Definition: cpumat.hh:204
mpihelpers::mpi2cpu
void mpi2cpu(const mpimat< REAL_IN > &mpi, cpumat< REAL_OUT > &cpu, int rdest=0, int cdest=0)
Copy data from an MPI object to a CPU object.
Definition: mpihelpers.hh:126