fml  0.1-0
Fused Matrix Library
svd.hh
1 // This file is part of fml which is released under the Boost Software
2 // License, Version 1.0. See accompanying file LICENSE or copy at
3 // https://www.boost.org/LICENSE_1_0.txt
4 
5 #ifndef FML_PAR_CPU_LINALG_SVD_H
6 #define FML_PAR_CPU_LINALG_SVD_H
7 #pragma once
8 
9 
10 #include "../parmat.hh"
11 
12 #include "blas.hh"
13 #include "qr.hh"
14 
15 #include "../../../_internals/omp.hh"
16 
17 #include "../../../cpu/linalg/linalg_blas.hh"
18 #include "../../../cpu/linalg/linalg_invert.hh"
19 #include "../../../cpu/linalg/linalg_qr.hh"
20 #include "../../../cpu/linalg/linalg_svd.hh"
21 
22 #include "../../../cpu/copy.hh"
23 
24 
25 namespace fml
26 {
27 namespace linalg
28 {
53  template <typename REAL>
54  void cpsvd(const parmat_cpu<REAL> &x, cpuvec<REAL> &s)
55  {
56  const len_t n = x.ncols();
57 
58  auto cp = crossprod((REAL)1.0, x.data_obj());
59  x.get_comm().allreduce(n*n, cp.data_ptr());
60  eigen_sym(cp, s);
61  }
62 
64  template <typename REAL>
65  void cpsvd(const parmat_cpu<REAL> &x, cpuvec<REAL> &s,
66  cpumat<REAL> &u, cpumat<REAL> &vt)
67  {
68  const len_t n = x.ncols();
69 
70  auto cp = crossprod((REAL)1.0, x.data_obj());
71  x.get_comm().allreduce(n*n, cp.data_ptr());
72  eigen_sym(cp, s, vt);
73 
74  s.rev();
75  REAL *s_d = s.data_ptr();
76  #pragma omp for simd
77  for (len_t i=0; i<s.size(); i++)
78  s_d[i] = sqrt(fabs(s_d[i]));
79 
80  vt.rev_cols();
81  fml::copy::cpu2cpu(vt, cp);
82  REAL *vt_d = vt.data_ptr();
83  #pragma omp parallel for if(n*n > omp::OMP_MIN_SIZE)
84  for (len_t j=0; j<n; j++)
85  {
86  #pragma omp simd
87  for (len_t i=0; i<n; i++)
88  vt_d[i + n*j] /= s_d[j];
89  }
90 
91  fml::linalg::matmult(false, false, (REAL)1.0, x.data_obj(), vt, u);
92  fml::linalg::xpose(cp, vt);
93  }
94 
95 
96 
129  template <typename REAL>
131  {
132  cpumat<REAL> R;
133  qr_R(mpi::REDUCE_TO_ALL, x, R);
134  svd(R, s);
135  }
136 
138  template <typename REAL>
140  cpumat<REAL> &vt)
141  {
142  const len_t n = x.ncols();
143  if (x.nrows() < (len_global_t)n)
144  throw std::runtime_error("impossible dimensions");
145 
146  cpumat<REAL> x_cpy = x.data_obj().dupe();
147 
148  cpumat<REAL> R_local;
149  cpuvec<REAL> qraux;
150  qr(false, x.data_obj(), qraux);
151  qr_R(x.data_obj(), R_local);
152 
153  cpumat<REAL> R(n, n);
154  tsqr::qr_allreduce(mpi::REDUCE_TO_ALL, n, n, R_local.data_ptr(),
155  R.data_ptr(), x.get_comm().get_comm());
156 
157  copy::cpu2cpu(R, R_local);
158  cpumat<REAL> u_R;
159  linalg::svd(R_local, s, u_R, vt);
160 
161  // u = Q*u_R = x*R^{-1}*u_R
162  u.resize(x.nrows(), x.ncols());
163  linalg::trinv(true, false, R);
164  linalg::matmult(false, false, (REAL)1, x_cpy, R, x.data_obj());
165  linalg::matmult(false, false, (REAL)1, x.data_obj(), u_R, u.data_obj());
166  }
167 
168 
169 
170  namespace internals
171  {
172  template <typename REAL>
173  void rsvd_A(const uint32_t seed, const int k, const int q,
175  {
176  const len_global_t m = x.nrows();
177  const len_t n = x.ncols();
178  if (m < (len_global_t)n)
179  throw std::runtime_error("must have m>n");
180  if (k > n)
181  throw std::runtime_error("must have k<n");
182 
183  parmat_cpu<REAL> Y(x.get_comm(), m, 2*k, x.nrows_before());
184  parmat_cpu<REAL> Y_tmp(x.get_comm(), m, 2*k, x.nrows_before());
185  cpumat<REAL> Z(n, 2*k);
186  cpumat<REAL> QZ(n, 2*k);
187 
188  cpumat<REAL> R(2*k, 2*k);
189  cpumat<REAL> R_local;
190  cpuvec<REAL> qraux;
191  cpuvec<REAL> work;
192 
193  cpumat<REAL> omega(n, 2*k);
194  omega.fill_runif(seed);
195 
196  // Stage A
197  matmult(x, omega, Y);
198  qr_Q(Y, Y_tmp, R, R_local, qraux, QY);
199 
200  for (int i=0; i<q; i++)
201  {
202  matmult(x, QY, Z);
203  linalg::qr_internals(false, Z, qraux, work);
204  linalg::qr_Q(Z, qraux, QZ, work);
205 
206  matmult(x, QZ, Y);
207  qr_Q(Y, Y_tmp, R, R_local, qraux, QY);
208  }
209  }
210  }
211 
234  template <typename REAL>
235  void rsvd(const uint32_t seed, const int k, const int q,
237  {
238  parmat_cpu<REAL> QY(x.get_comm(), x.nrows(), 2*k, x.nrows_before());
239  cpumat<REAL> B(2*k, x.ncols());
240 
241  // stage A
242  internals::rsvd_A(seed, k, q, x, QY);
243 
244  // Stage B
245  matmult(QY, x, B);
246 
247  linalg::svd(B, s);
248  s.resize(k);
249  }
250 
252  template <typename REAL>
253  void rsvd(const uint32_t seed, const int k, const int q,
255  cpumat<REAL> &vt)
256  {
257  parmat_cpu<REAL> QY(x.get_comm(), x.nrows(), 2*k, x.nrows_before());
258  cpumat<REAL> B(2*k, x.ncols());
259 
260  // stage A
261  internals::rsvd_A(seed, k, q, x, QY);
262 
263  // Stage B
264  matmult(QY, x, B);
265 
266  cpumat<REAL> uB;
267  linalg::svd(B, s, uB, vt);
268 
269  s.resize(k);
270 
271  matmult(QY, uB, u);
272  // u.resize(u.nrows(), k);
273  }
274 }
275 }
276 
277 
278 #endif
fml::cpumat
Matrix class for data held on a single CPU.
Definition: cpumat.hh:36
fml::copy::cpu2cpu
void cpu2cpu(const cpuvec< REAL_IN > &cpu_in, cpuvec< REAL_OUT > &cpu_out)
Copy data from a CPU object to another.
Definition: copy.hh:40
fml::parmat_cpu
Definition: parmat.hh:21
fml::linalg::crossprod
void crossprod(const REAL alpha, const cpumat< REAL > &x, cpumat< REAL > &ret)
Computes lower triangle of alpha*x^T*x.
Definition: crossprod.hh:37
fml::linalg::xpose
void xpose(const cpumat< REAL > &x, cpumat< REAL > &tx)
Computes the transpose out-of-place (i.e. in a copy).
Definition: xpose.hh:37
fml::univec::data_ptr
T * data_ptr()
Pointer to the internal array.
Definition: univec.hh:28
fml::cpuvec::resize
void resize(len_t size)
Resize the internal object storage.
Definition: cpuvec.hh:210
fml::linalg::qr
void qr(const bool pivot, cpumat< REAL > &x, cpuvec< REAL > &qraux)
Computes the QR decomposition.
Definition: qr.hh:94
fml::linalg::rsvd
void rsvd(const uint32_t seed, const int k, const int q, cpumat< REAL > &x, cpuvec< REAL > &s)
Computes the truncated singular value decomposition using the normal projections method of Halko et a...
Definition: svd.hh:462
fml::cpuvec::rev
void rev()
Reverse the vector.
Definition: cpuvec.hh:447
fml::cpuvec
Vector class for data held on a single CPU.
Definition: cpuvec.hh:31
fml::linalg::tssvd
void tssvd(parmat_cpu< REAL > &x, cpuvec< REAL > &s)
Computes the singular value decomposition by first reducing the rectangular matrix to a square matrix...
Definition: svd.hh:130
fml::linalg::svd
void svd(cpumat< REAL > &x, cpuvec< REAL > &s)
Computes the singular value decomposition.
Definition: svd.hh:101
fml::unimat::data_ptr
REAL * data_ptr()
Pointer to the internal array.
Definition: unimat.hh:40
fml::cpumat::rev_cols
void rev_cols()
Reverse the columns of the matrix.
Definition: cpumat.hh:638
fml
Core namespace.
Definition: dimops.hh:10
fml::linalg::qr_Q
void qr_Q(const cpumat< REAL > &QR, const cpuvec< REAL > &qraux, cpumat< REAL > &Q, cpuvec< REAL > &work)
Recover the Q matrix from a QR decomposition.
Definition: qr.hh:120
fml::linalg::trinv
void trinv(const bool upper, const bool unit_diag, cpumat< REAL > &x)
Compute the matrix inverse of a triangular matrix.
Definition: invert.hh:87
fml::univec::size
len_t size() const
Number of elements in the vector.
Definition: univec.hh:26
fml::linalg::qr_R
void qr_R(const cpumat< REAL > &QR, cpumat< REAL > &R)
Recover the R matrix from a QR decomposition.
Definition: qr.hh:162
fml::linalg::eigen_sym
void eigen_sym(cpumat< REAL > &x, cpuvec< REAL > &values)
Compute the eigenvalues and optionally the eigenvectors for a symmetric matrix.
Definition: eigen.hh:95
fml::linalg::matmult
void matmult(const bool transx, const bool transy, const REAL alpha, const cpumat< REAL > &x, const cpumat< REAL > &y, cpumat< REAL > &ret)
Computes ret = alpha*op(x)*op(y) where op(A) is A or A^T.
Definition: matmult.hh:43
fml::linalg::cpsvd
void cpsvd(const cpumat< REAL > &x, cpuvec< REAL > &s, cpumat< REAL > &u, cpumat< REAL > &vt)
Computes the singular value decomposition using the "crossproducts SVD". This method is not numerical...
Definition: svd.hh:323