fml  0.1-0
Fused Matrix Library
linalg_svd.hh
1 // This file is part of fml which is released under the Boost Software
2 // License, Version 1.0. See accompanying file LICENSE or copy at
3 // https://www.boost.org/LICENSE_1_0.txt
4 
5 #ifndef FML_MPI_LINALG_LINALG_SVD_H
6 #define FML_MPI_LINALG_LINALG_SVD_H
7 #pragma once
8 
9 
10 #include <stdexcept>
11 
12 #include "../../_internals/linalgutils.hh"
13 #include "../../cpu/cpuvec.hh"
14 
15 #include "../internals/bcutils.hh"
16 #include "../internals/mpi_utils.hh"
17 
18 #include "../copy.hh"
19 #include "../mpimat.hh"
20 
21 #include "linalg_blas.hh"
22 #include "linalg_eigen.hh"
23 #include "linalg_qr.hh"
24 
25 
26 namespace fml
27 {
28 namespace linalg
29 {
30  namespace
31  {
32  namespace
33  {
34  template <typename REAL>
35  int svd_internals(const int nu, const int nv, mpimat<REAL> &x, cpuvec<REAL> &s, mpimat<REAL> &u, mpimat<REAL> &vt)
36  {
37  int info = 0;
38  char jobu, jobvt;
39 
40  const len_t m = x.nrows();
41  const len_t n = x.ncols();
42  const len_t minmn = std::min(m, n);
43 
44  s.resize(minmn);
45 
46  if (nu == 0 && nv == 0)
47  {
48  jobu = 'N';
49  jobvt = 'N';
50  }
51  else // if (nu <= minmn && nv <= minmn)
52  {
53  jobu = 'V';
54  jobvt = 'V';
55 
56  const int mb = x.bf_rows();
57  const int nb = x.bf_cols();
58 
59  u.resize(m, minmn, mb, nb);
60  vt.resize(minmn, n, mb, nb);
61  }
62 
63  REAL tmp;
64  fml::scalapack::gesvd(jobu, jobvt, m, n, x.data_ptr(), x.desc_ptr(), s.data_ptr(), u.data_ptr(), u.desc_ptr(), vt.data_ptr(), vt.desc_ptr(), &tmp, -1, &info);
65  int lwork = (int) tmp;
66  cpuvec<REAL> work(lwork);
67 
68  fml::scalapack::gesvd(jobu, jobvt, m, n, x.data_ptr(), x.desc_ptr(), s.data_ptr(), u.data_ptr(), u.desc_ptr(), vt.data_ptr(), vt.desc_ptr(), work.data_ptr(), lwork, &info);
69 
70  return info;
71  }
72  }
73 
95  template <typename REAL>
96  void svd(mpimat<REAL> &x, cpuvec<REAL> &s)
97  {
98  mpimat<REAL> ignored(x.get_grid());
99  int info = svd_internals(0, 0, x, s, ignored, ignored);
100  fml::linalgutils::check_info(info, "gesvd");
101  }
102 
104  template <typename REAL>
105  void svd(mpimat<REAL> &x, cpuvec<REAL> &s, mpimat<REAL> &u, mpimat<REAL> &vt)
106  {
107  err::check_grid(x, u, vt);
108 
109  int info = svd_internals(1, 1, x, s, u, vt);
110  fml::linalgutils::check_info(info, "gesvd");
111  }
112 
113 
114 
115  template <typename REAL>
116  void tssvd(mpimat<REAL> &x, cpuvec<REAL> &s, mpimat<REAL> &u, mpimat<REAL> &vt)
117  {
118  const len_t m = x.nrows();
119  const len_t n = x.ncols();
120  if (m <= n)
121  throw std::runtime_error("'x' must have more rows than cols");
122 
123  const grid g = x.get_grid();
124 
125  cpuvec<REAL> qraux(n);
126  cpuvec<REAL> work(m);
127  qr_internals(false, x, qraux, work);
128 
129  mpimat<REAL> R(g, n, n, x.bf_rows(), x.bf_cols());
130  qr_R(x, R);
131 
132  mpimat<REAL> u_R(g, n, n, x.bf_rows(), x.bf_cols());
133  svd(R, s, u_R, vt);
134 
135  u.resize(m, n);
136  u.fill_eye();
137 
138  qr_Q(x, qraux, u, work);
139 
140  matmult(false, false, (REAL)1.0, u, u_R, x);
141  copy::mpi2mpi(x, u);
142  }
143 
144  template <typename REAL>
145  void tssvd(mpimat<REAL> &x, cpuvec<REAL> &s)
146  {
147  const len_t m = x.nrows();
148  const len_t n = x.ncols();
149  if (m <= n)
150  throw std::runtime_error("'x' must have more rows than cols");
151 
152  const grid g = x.get_grid();
153  s.resize(n);
154 
155  cpuvec<REAL> qraux(n);
156  cpuvec<REAL> work(m);
157  qr_internals(false, x, qraux, work);
158 
159  fml::mpi_utils::tri2zero('L', false, g, n, n, x.data_ptr(), x.desc_ptr());
160 
161  int info = 0;
162 
163  REAL tmp;
164  fml::scalapack::gesvd('N', 'N', n, n, x.data_ptr(), x.desc_ptr(),
165  s.data_ptr(), NULL, NULL, NULL, NULL, &tmp, -1, &info);
166  int lwork = (int) tmp;
167  if (lwork > work.size())
168  work.resize(lwork);
169 
170  fml::scalapack::gesvd('N', 'N', n, n, x.data_ptr(), x.desc_ptr(),
171  s.data_ptr(), NULL, NULL, NULL, NULL, work.data_ptr(), lwork, &info);
172  fml::linalgutils::check_info(info, "gesvd");
173  }
174 
175 
176 
177  template <typename REAL>
178  void sfsvd(mpimat<REAL> &x, cpuvec<REAL> &s, mpimat<REAL> &u, mpimat<REAL> &vt)
179  {
180  const len_t m = x.nrows();
181  const len_t n = x.ncols();
182  if (m >= n)
183  throw std::runtime_error("'x' must have more cols than rows");
184 
185  const grid g = x.get_grid();
186 
187  cpuvec<REAL> lqaux;
188  cpuvec<REAL> work;
189  lq_internals(x, lqaux, work);
190 
191  mpimat<REAL> L(g, m, m, x.bf_rows(), x.bf_cols());
192  lq_L(x, L);
193 
194  mpimat<REAL> vt_L(g, m, m, x.bf_rows(), x.bf_cols());
195  svd(L, s, u, vt_L);
196 
197  vt.resize(n, m);
198  vt.fill_eye();
199 
200  lq_Q(x, lqaux, vt, work);
201 
202  matmult(false, false, (REAL)1.0, vt_L, vt, x);
203  copy::mpi2mpi(x, vt);
204  }
205 
206  template <typename REAL>
207  void sfsvd(mpimat<REAL> &x, cpuvec<REAL> &s)
208  {
209  const len_t m = x.nrows();
210  const len_t n = x.ncols();
211  if (m >= n)
212  throw std::runtime_error("'x' must have more cols than rows");
213 
214  const grid g = x.get_grid();
215  s.resize(m);
216 
217  cpuvec<REAL> lqaux;
218  cpuvec<REAL> work;
219  lq_internals(x, lqaux, work);
220 
221  fml::mpi_utils::tri2zero('U', false, g, m, m, x.data_ptr(), x.desc_ptr());
222 
223  int info = 0;
224 
225  REAL tmp;
226  fml::scalapack::gesvd('N', 'N', m, m, x.data_ptr(), x.desc_ptr(),
227  s.data_ptr(), NULL, NULL, NULL, NULL, &tmp, -1, &info);
228  int lwork = (int) tmp;
229  if (lwork > work.size())
230  work.resize(lwork);
231 
232  fml::scalapack::gesvd('N', 'N', m, m, x.data_ptr(), x.desc_ptr(),
233  s.data_ptr(), NULL, NULL, NULL, NULL, work.data_ptr(), lwork, &info);
234  fml::linalgutils::check_info(info, "gesvd");
235  }
236  }
237 
271  template <typename REAL>
273  {
274  err::check_grid(x, u, vt);
275 
276  if (x.is_square())
277  svd(x, s, u, vt);
278  else if (x.nrows() > x.ncols())
279  tssvd(x, s, u, vt);
280  else
281  sfsvd(x, s, u, vt);
282  }
283 
285  template <typename REAL>
287  {
288  if (x.is_square())
289  svd(x, s);
290  else if (x.nrows() > x.ncols())
291  tssvd(x, s);
292  else
293  sfsvd(x, s);
294  }
295 
296 
297 
323  template <typename REAL>
325  {
326  err::check_grid(x, u, vt);
327 
328  const len_t m = x.nrows();
329  const len_t n = x.ncols();
330  const len_t minmn = std::min(m, n);
331 
332  const grid g = x.get_grid();
333  mpimat<REAL> cp(g, x.bf_rows(), x.bf_cols());
334 
335  if (m >= n)
336  {
337  crossprod((REAL)1.0, x, cp);
338  eigen_sym(cp, s, vt);
339  vt.rev_cols();
340  copy::mpi2mpi(vt, cp);
341  }
342  else
343  {
344  tcrossprod((REAL)1.0, x, cp);
345  eigen_sym(cp, s, u);
346  u.rev_cols();
347  copy::mpi2mpi(u, cp);
348  }
349 
350  s.rev();
351  REAL *s_d = s.data_ptr();
352  #pragma omp for simd
353  for (len_t i=0; i<s.size(); i++)
354  s_d[i] = sqrt(fabs(s_d[i]));
355 
356  len_t m_local, n_local;
357  REAL *ev_d;
358  if (m >= n)
359  {
360  m_local = vt.nrows_local();
361  n_local = vt.ncols_local();
362  ev_d = vt.data_ptr();
363  }
364  else
365  {
366  m_local = cp.nrows_local();
367  n_local = cp.ncols_local();
368  ev_d = cp.data_ptr();
369  }
370 
371  for (len_t j=0; j<n_local; j++)
372  {
373  #pragma omp for simd
374  for (len_t i=0; i<m_local; i++)
375  {
376  const int gi = fml::bcutils::l2g(i, x.bf_rows(), g.nprow(), g.myrow());
377  const int gj = fml::bcutils::l2g(j, x.bf_cols(), g.npcol(), g.mycol());
378 
379  if (gi < minmn && gj < minmn)
380  ev_d[i + m_local*j] /= s_d[gj];
381  }
382  }
383 
384  if (m >= n)
385  {
386  matmult(false, false, (REAL)1.0, x, vt, u);
387  xpose(cp, vt);
388  }
389  else
390  matmult(true, false, (REAL)1.0, cp, x, vt);
391  }
392 
394  template <typename REAL>
395  void cpsvd(const mpimat<REAL> &x, cpuvec<REAL> &s)
396  {
397  const len_t m = x.nrows();
398  const len_t n = x.ncols();
399 
400  const grid g = x.get_grid();
401  mpimat<REAL> cp(g, x.bf_rows(), x.bf_cols());
402 
403  if (m >= n)
404  crossprod((REAL)1.0, x, cp);
405  else
406  tcrossprod((REAL)1.0, x, cp);
407 
408  eigen_sym(cp, s);
409 
410  s.rev();
411  REAL *s_d = s.data_ptr();
412  #pragma omp for simd
413  for (len_t i=0; i<s.size(); i++)
414  s_d[i] = sqrt(fabs(s_d[i]));
415  }
416 
417 
418 
419  namespace
420  {
421  template <typename REAL>
422  void rsvd_A(const uint32_t seed, const int k, const int q, mpimat<REAL> &x,
423  mpimat<REAL> &QY)
424  {
425  const len_t m = x.nrows();
426  const len_t n = x.ncols();
427 
428  const int mb = x.bf_rows();
429  const int nb = x.bf_cols();
430 
431  auto g = x.get_grid();
432 
433  mpimat<REAL> omega(g, n, 2*k, mb, nb);
434  omega.fill_runif(seed);
435 
436  mpimat<REAL> Y(g, m, 2*k, mb, nb);
437  mpimat<REAL> Z(g, n, 2*k, mb, nb);
438  mpimat<REAL> QZ(g, n, 2*k, mb, nb);
439 
440  cpuvec<REAL> qraux;
441  cpuvec<REAL> work;
442 
443  mpimat<REAL> B(g, 2*k, n, mb, nb);
444 
445  // Stage A
446  matmult(false, false, (REAL)1.0, x, omega, Y);
447  qr_internals(false, Y, qraux, work);
448  qr_Q(Y, qraux, QY, work);
449 
450  for (int i=0; i<q; i++)
451  {
452  matmult(true, false, (REAL)1.0, x, QY, Z);
453  qr_internals(false, Z, qraux, work);
454  qr_Q(Z, qraux, QZ, work);
455 
456  matmult(false, false, (REAL)1.0, x, QZ, Y);
457  qr_internals(false, Y, qraux, work);
458  qr_Q(Y, qraux, QY, work);
459  }
460  }
461  }
462 
484  template <typename REAL>
485  void rsvd(const uint32_t seed, const int k, const int q, mpimat<REAL> &x,
486  cpuvec<REAL> &s)
487  {
488  const len_t m = x.nrows();
489  const len_t n = x.ncols();
490 
491  mpimat<REAL> QY(x.get_grid(), m, 2*k, x.bf_rows(), x.bf_cols());
492  mpimat<REAL> B(x.get_grid(), 2*k, n, x.bf_rows(), x.bf_cols());
493 
494  // Stage A
495  rsvd_A(seed, k, q, x, QY);
496 
497  // Stage B
498  matmult(true, false, (REAL)1.0, QY, x, B);
499 
500  svd(B, s);
501 
502  s.resize(k);
503  }
504 
506  template <typename REAL>
507  void rsvd(const uint32_t seed, const int k, const int q, mpimat<REAL> &x,
509  {
510  err::check_grid(x, u, vt);
511 
512  const len_t m = x.nrows();
513  const len_t n = x.ncols();
514 
515  mpimat<REAL> QY(x.get_grid(), m, 2*k, x.bf_rows(), x.bf_cols());
516  mpimat<REAL> B(x.get_grid(), 2*k, n, x.bf_rows(), x.bf_cols());
517 
518  // Stage A
519  rsvd_A(seed, k, q, x, QY);
520 
521  // Stage B
522  matmult(true, false, (REAL)1.0, QY, x, B);
523 
524  mpimat<REAL> uB(x.get_grid());
525  svd(B, s, uB, vt);
526 
527  s.resize(k);
528 
529  matmult(false, false, (REAL)1.0, QY, uB, u);
530  u.resize(u.nrows(), k);
531  }
532 }
533 }
534 
535 
536 #endif
fml::mpimat::resize
void resize(len_t nrows, len_t ncols)
Resize the internal object storage.
Definition: mpimat.hh:323
fml::grid
2-dimensional MPI process grid.
Definition: grid.hh:70
fml::linalg::crossprod
void crossprod(const REAL alpha, const cpumat< REAL > &x, cpumat< REAL > &ret)
Computes lower triangle of alpha*x^T*x.
Definition: linalg_blas.hh:379
fml::mpimat
Matrix class for data distributed over MPI in the 2-d block cyclic format.
Definition: mpimat.hh:40
fml::grid::mycol
int mycol() const
The process column (0-based index) of the calling process.
Definition: grid.hh:129
fml::unimat::is_square
bool is_square() const
Is the matrix square?
Definition: unimat.hh:34
fml::linalg::xpose
void xpose(const cpumat< REAL > &x, cpumat< REAL > &tx)
Computes the transpose out-of-place (i.e. in a copy).
Definition: linalg_blas.hh:463
fml::univec::data_ptr
T * data_ptr()
Pointer to the internal array.
Definition: univec.hh:28
fml::unimat::nrows
len_t nrows() const
Number of rows.
Definition: unimat.hh:36
fml::copy::mpi2mpi
void mpi2mpi(const mpimat< REAL_IN > &mpi_in, mpimat< REAL_OUT > &mpi_out)
Copy data from an MPI object to another.
Definition: copy.hh:288
fml::cpuvec::resize
void resize(len_t size)
Resize the internal object storage.
Definition: cpuvec.hh:209
fml::linalg::rsvd
void rsvd(const uint32_t seed, const int k, const int q, cpumat< REAL > &x, cpuvec< REAL > &s)
Computes the truncated singular value decomposition using the normal projections method of Halko et a...
Definition: linalg_svd.hh:458
fml::cpuvec::rev
void rev()
Reverse the vector.
Definition: cpuvec.hh:437
fml::cpuvec
Vector class for data held on a single CPU.
Definition: cpuvec.hh:31
fml::unimat::ncols
len_t ncols() const
Number of columns.
Definition: unimat.hh:38
fml::mpimat::rev_cols
void rev_cols()
Reverse the columns of the matrix.
Definition: mpimat.hh:953
fml::linalg::qrsvd
void qrsvd(cpumat< REAL > &x, cpuvec< REAL > &s, cpumat< REAL > &u, cpumat< REAL > &vt)
Computes the singular value decomposition by first reducing the rectangular matrix to a square matrix...
Definition: linalg_svd.hh:269
fml::linalg::tssvd
void tssvd(parmat_cpu< REAL > &x, cpuvec< REAL > &s)
Computes the singular value decomposition by first reducing the rectangular matrix to a square matrix...
Definition: svd.hh:130
fml::linalg::svd
void svd(cpumat< REAL > &x, cpuvec< REAL > &s)
Computes the singular value decomposition.
Definition: linalg_svd.hh:97
fml::unimat::data_ptr
REAL * data_ptr()
Pointer to the internal array.
Definition: unimat.hh:40
fml::linalg::lq_L
void lq_L(const cpumat< REAL > &LQ, cpumat< REAL > &L)
Recover the L matrix from an LQ decomposition.
Definition: linalg_qr.hh:247
fml
Core namespace.
Definition: dimops.hh:10
fml::linalg::qr_Q
void qr_Q(const cpumat< REAL > &QR, const cpuvec< REAL > &qraux, cpumat< REAL > &Q, cpuvec< REAL > &work)
Recover the Q matrix from a QR decomposition.
Definition: linalg_qr.hh:120
fml::univec::size
len_t size() const
Number of elements in the vector.
Definition: univec.hh:26
fml::linalg::qr_R
void qr_R(const cpumat< REAL > &QR, cpumat< REAL > &R)
Recover the R matrix from a QR decomposition.
Definition: linalg_qr.hh:162
fml::linalg::eigen_sym
void eigen_sym(cpumat< REAL > &x, cpuvec< REAL > &values)
Compute the eigenvalues and optionally the eigenvectors for a symmetric matrix.
Definition: linalg_eigen.hh:93
fml::grid::npcol
int npcol() const
The number of processes columns in the BLACS context.
Definition: grid.hh:125
fml::grid::myrow
int myrow() const
The process row (0-based index) of the calling process.
Definition: grid.hh:127
fml::linalg::tcrossprod
void tcrossprod(const REAL alpha, const cpumat< REAL > &x, cpumat< REAL > &ret)
Computes lower triangle of alpha*x*x^T.
Definition: linalg_blas.hh:423
fml::linalg::lq_Q
void lq_Q(const cpumat< REAL > &LQ, const cpuvec< REAL > &lqaux, cpumat< REAL > &Q, cpuvec< REAL > &work)
Recover the Q matrix from an LQ decomposition.
Definition: linalg_qr.hh:279
fml::grid::nprow
int nprow() const
The number of processes rows in the BLACS context.
Definition: grid.hh:123
fml::linalg::matmult
void matmult(const bool transx, const bool transy, const REAL alpha, const cpumat< REAL > &x, const cpumat< REAL > &y, cpumat< REAL > &ret)
Computes ret = alpha*op(x)*op(y) where op(A) is A or A^T.
Definition: linalg_blas.hh:257
fml::linalg::cpsvd
void cpsvd(const cpumat< REAL > &x, cpuvec< REAL > &s, cpumat< REAL > &u, cpumat< REAL > &vt)
Computes the singular value decomposition using the "crossproducts SVD". This method is not numerical...
Definition: linalg_svd.hh:319