fml  0.1-0
Fused Matrix Library
svd.hh
1 // This file is part of fml which is released under the Boost Software
2 // License, Version 1.0. See accompanying file LICENSE or copy at
3 // https://www.boost.org/LICENSE_1_0.txt
4 
5 #ifndef FML_CPU_LINALG_LINALG_SVD_H
6 #define FML_CPU_LINALG_LINALG_SVD_H
7 #pragma once
8 
9 
10 #include <cmath>
11 #include <stdexcept>
12 
13 #include "../../_internals/linalgutils.hh"
14 #include "../../_internals/omp.hh"
15 
16 #include "../copy.hh"
17 #include "../cpumat.hh"
18 #include "../cpuvec.hh"
19 
20 #include "internals/blas.hh"
21 #include "internals/lapack.hh"
22 #include "crossprod.hh"
23 #include "eigen.hh"
24 #include "matmult.hh"
25 #include "qr.hh"
26 #include "xpose.hh"
27 
28 
29 namespace fml
30 {
31 namespace linalg
32 {
33  namespace
34  {
35  template <typename REAL>
36  int svd_internals(const int nu, const int nv, cpumat<REAL> &x, cpuvec<REAL> &s,
37  cpumat<REAL> &u, cpumat<REAL> &vt)
38  {
39  int info = 0;
40  char jobz;
41  int ldvt;
42 
43  const len_t m = x.nrows();
44  const len_t n = x.ncols();
45  const len_t minmn = std::min(m, n);
46 
47  s.resize(minmn);
48 
49  if (nu == 0 && nv == 0)
50  {
51  jobz = 'N';
52  ldvt = 1; // value is irrelevant, but must exist!
53  }
54  else if (nu <= minmn && nv <= minmn)
55  {
56  jobz = 'S';
57  ldvt = minmn;
58 
59  u.resize(m, minmn);
60  vt.resize(minmn, n);
61  }
62  else
63  {
64  jobz = 'A';
65  ldvt = n;
66  }
67 
68  cpuvec<int> iwork(8*minmn);
69 
70  REAL tmp;
71  fml::lapack::gesdd(jobz, m, n, x.data_ptr(), m, s.data_ptr(), u.data_ptr(), m, vt.data_ptr(), ldvt, &tmp, -1, iwork.data_ptr(), &info);
72  int lwork = (int) tmp;
73  cpuvec<REAL> work(lwork);
74 
75  fml::lapack::gesdd(jobz, m, n, x.data_ptr(), m, s.data_ptr(), u.data_ptr(), m, vt.data_ptr(), ldvt, work.data_ptr(), lwork, iwork.data_ptr(), &info);
76 
77  return info;
78  }
79  }
80 
100  template <typename REAL>
102  {
103  cpumat<REAL> ignored;
104  int info = svd_internals(0, 0, x, s, ignored, ignored);
105  fml::linalgutils::check_info(info, "gesdd");
106  }
107 
109  template <typename REAL>
111  {
112  int info = svd_internals(1, 1, x, s, u, vt);
113  fml::linalgutils::check_info(info, "gesdd");
114  }
115 
116 
117 
118  namespace
119  {
120  template <typename REAL>
122  {
123  const len_t m = x.nrows();
124  const len_t n = x.ncols();
125  if (m <= n)
126  throw std::runtime_error("'x' must have more rows than cols");
127 
128  cpuvec<REAL> qraux(n);
129  cpuvec<REAL> work(m);
130  qr_internals(false, x, qraux, work);
131 
132  cpumat<REAL> R(n, n);
133  qr_R(x, R);
134 
135  cpumat<REAL> u_R(n, n);
136  svd(R, s, u_R, vt);
137 
138  u.resize(m, n);
139  u.fill_eye();
140 
141  qr_Q(x, qraux, u, work);
142 
143  matmult(false, false, (REAL)1.0, u, u_R, x);
144  copy::cpu2cpu(x, u);
145  }
146 
147  template <typename REAL>
148  void tssvd(cpumat<REAL> &x, cpuvec<REAL> &s)
149  {
150  const len_t m = x.nrows();
151  const len_t n = x.ncols();
152  if (m <= n)
153  throw std::runtime_error("'x' must have more rows than cols");
154 
155  s.resize(n);
156 
157  cpuvec<REAL> qraux(n);
158  cpuvec<REAL> work(m);
159  qr_internals(false, x, qraux, work);
160 
161  fml::cpu_utils::tri2zero('L', false, n, n, x.data_ptr(), m);
162 
163  int info = 0;
164  cpuvec<int> iwork(8*n);
165 
166  REAL tmp;
167  fml::lapack::gesdd('N', n, n, x.data_ptr(), m, s.data_ptr(), NULL, m, NULL,
168  1, &tmp, -1, iwork.data_ptr(), &info);
169  int lwork = (int) tmp;
170  if (lwork > work.size())
171  work.resize(lwork);
172 
173  fml::lapack::gesdd('N', n, n, x.data_ptr(), m, s.data_ptr(), NULL, m, NULL,
174  1, work.data_ptr(), lwork, iwork.data_ptr(), &info);
175  fml::linalgutils::check_info(info, "gesdd");
176  }
177 
178 
179 
180  template <typename REAL>
181  void sfsvd(cpumat<REAL> &x, cpuvec<REAL> &s, cpumat<REAL> &u, cpumat<REAL> &vt)
182  {
183  const len_t m = x.nrows();
184  const len_t n = x.ncols();
185  if (m >= n)
186  throw std::runtime_error("'x' must have more cols than rows");
187 
188  cpuvec<REAL> lqaux;
189  cpuvec<REAL> work;
190  lq_internals(x, lqaux, work);
191 
192  cpumat<REAL> L(m, m);
193  lq_L(x, L);
194 
195  cpumat<REAL> vt_L(m, m);
196  svd(L, s, u, vt_L);
197 
198  vt.resize(n, m);
199  vt.fill_eye();
200 
201  lq_Q(x, lqaux, vt, work);
202 
203  matmult(false, false, (REAL)1.0, vt_L, vt, x);
204  copy::cpu2cpu(x, vt);
205  }
206 
207  template <typename REAL>
208  void sfsvd(cpumat<REAL> &x, cpuvec<REAL> &s)
209  {
210  const len_t m = x.nrows();
211  const len_t n = x.ncols();
212  if (m >= n)
213  throw std::runtime_error("'x' must have more cols than rows");
214 
215  s.resize(m);
216 
217  cpuvec<REAL> lqaux;
218  cpuvec<REAL> work;
219  lq_internals(x, lqaux, work);
220 
221  fml::cpu_utils::tri2zero('U', false, m, m, x.data_ptr(), m);
222 
223  int info = 0;
224  cpuvec<int> iwork(8*n);
225 
226  REAL tmp;
227  fml::lapack::gesdd('N', m, m, x.data_ptr(), m, s.data_ptr(), NULL, m, NULL,
228  1, &tmp, -1, iwork.data_ptr(), &info);
229  int lwork = (int) tmp;
230  if (lwork > work.size())
231  work.resize(lwork);
232 
233  fml::lapack::gesdd('N', m, m, x.data_ptr(), m, s.data_ptr(), NULL, m, NULL,
234  1, work.data_ptr(), lwork, iwork.data_ptr(), &info);
235  fml::linalgutils::check_info(info, "gesdd");
236  }
237  }
238 
272  template <typename REAL>
274  {
275  if (x.is_square())
276  svd(x, s, u, vt);
277  else if (x.nrows() > x.ncols())
278  tssvd(x, s, u, vt);
279  else
280  sfsvd(x, s, u, vt);
281  }
282 
284  template <typename REAL>
286  {
287  if (x.is_square())
288  svd(x, s);
289  else if (x.nrows() > x.ncols())
290  tssvd(x, s);
291  else
292  sfsvd(x, s);
293  }
294 
295 
296 
322  template <typename REAL>
324  cpumat<REAL> &vt)
325  {
326  const len_t m = x.nrows();
327  const len_t n = x.ncols();
328  const len_t minmn = std::min(m, n);
329 
330  cpumat<REAL> cp;
331 
332  if (m >= n)
333  {
334  crossprod((REAL)1.0, x, cp);
335  eigen_sym(cp, s, vt);
336  vt.rev_cols();
337  copy::cpu2cpu(vt, cp);
338  }
339  else
340  {
341  tcrossprod((REAL)1.0, x, cp);
342  eigen_sym(cp, s, u);
343  u.rev_cols();
344  copy::cpu2cpu(u, cp);
345  }
346 
347  s.rev();
348  REAL *s_d = s.data_ptr();
349  #pragma omp for simd
350  for (len_t i=0; i<s.size(); i++)
351  s_d[i] = sqrt(fabs(s_d[i]));
352 
353  REAL *ev_d;
354  if (m >= n)
355  ev_d = vt.data_ptr();
356  else
357  ev_d = cp.data_ptr();
358 
359  #pragma omp parallel for if(minmn*minmn > fml::omp::OMP_MIN_SIZE)
360  for (len_t j=0; j<minmn; j++)
361  {
362  #pragma omp simd
363  for (len_t i=0; i<minmn; i++)
364  ev_d[i + minmn*j] /= s_d[j];
365  }
366 
367  if (m >= n)
368  {
369  matmult(false, false, (REAL)1.0, x, vt, u);
370  xpose(cp, vt);
371  }
372  else
373  matmult(true, false, (REAL)1.0, cp, x, vt);
374  }
375 
377  template <typename REAL>
378  void cpsvd(const cpumat<REAL> &x, cpuvec<REAL> &s)
379  {
380  const len_t m = x.nrows();
381  const len_t n = x.ncols();
382 
383  cpumat<REAL> cp;
384 
385  if (m >= n)
386  crossprod((REAL)1.0, x, cp);
387  else
388  tcrossprod((REAL)1.0, x, cp);
389 
390  eigen_sym(cp, s);
391 
392  s.rev();
393  REAL *s_d = s.data_ptr();
394  #pragma omp for simd
395  for (len_t i=0; i<s.size(); i++)
396  s_d[i] = sqrt(fabs(s_d[i]));
397  }
398 
399 
400 
401  namespace
402  {
403  template <typename REAL>
404  void rsvd_A(const uint32_t seed, const int k, const int q, cpumat<REAL> &x,
405  cpumat<REAL> &QY)
406  {
407  const len_t m = x.nrows();
408  const len_t n = x.ncols();
409 
410  cpumat<REAL> omega(n, 2*k);
411  omega.fill_runif(seed);
412 
413  cpumat<REAL> Y(m, 2*k);
414  cpumat<REAL> Z(n, 2*k);
415  cpumat<REAL> QZ(n, 2*k);
416 
417  cpuvec<REAL> qraux;
418  cpuvec<REAL> work;
419 
420  cpumat<REAL> B(2*k, n);
421 
422  // Stage A
423  matmult(false, false, (REAL)1.0, x, omega, Y);
424  qr_internals(false, Y, qraux, work);
425  qr_Q(Y, qraux, QY, work);
426 
427  for (int i=0; i<q; i++)
428  {
429  matmult(true, false, (REAL)1.0, x, QY, Z);
430  qr_internals(false, Z, qraux, work);
431  qr_Q(Z, qraux, QZ, work);
432 
433  matmult(false, false, (REAL)1.0, x, QZ, Y);
434  qr_internals(false, Y, qraux, work);
435  qr_Q(Y, qraux, QY, work);
436  }
437  }
438  }
439 
461  template <typename REAL>
462  void rsvd(const uint32_t seed, const int k, const int q, cpumat<REAL> &x,
463  cpuvec<REAL> &s)
464  {
465  const len_t m = x.nrows();
466  const len_t n = x.ncols();
467 
468  cpumat<REAL> QY(m, 2*k);
469  cpumat<REAL> B(2*k, n);
470 
471  // Stage A
472  rsvd_A(seed, k, q, x, QY);
473 
474  // Stage B
475  matmult(true, false, (REAL)1.0, QY, x, B);
476 
477  cpumat<REAL> uB;
478  svd(B, s);
479 
480  s.resize(k);
481  }
482 
484  template <typename REAL>
485  void rsvd(const uint32_t seed, const int k, const int q, cpumat<REAL> &x,
487  {
488  const len_t m = x.nrows();
489  const len_t n = x.ncols();
490 
491  cpumat<REAL> QY(m, 2*k);
492  cpumat<REAL> B(2*k, n);
493 
494  // Stage A
495  rsvd_A(seed, k, q, x, QY);
496 
497  // Stage B
498  matmult(true, false, (REAL)1.0, QY, x, B);
499 
500  cpumat<REAL> uB;
501  svd(B, s, uB, vt);
502 
503  s.resize(k);
504 
505  matmult(false, false, (REAL)1.0, QY, uB, u);
506  u.resize(u.nrows(), k);
507  }
508 }
509 }
510 
511 
512 #endif
fml::cpumat
Matrix class for data held on a single CPU.
Definition: cpumat.hh:36
fml::copy::cpu2cpu
void cpu2cpu(const cpuvec< REAL_IN > &cpu_in, cpuvec< REAL_OUT > &cpu_out)
Copy data from a CPU object to another.
Definition: copy.hh:40
fml::cpumat::fill_eye
void fill_eye()
Set diagonal entries to 1 and non-diagonal entries to 0.
Definition: cpumat.hh:451
fml::linalg::crossprod
void crossprod(const REAL alpha, const cpumat< REAL > &x, cpumat< REAL > &ret)
Computes lower triangle of alpha*x^T*x.
Definition: crossprod.hh:37
fml::unimat::is_square
bool is_square() const
Is the matrix square?
Definition: unimat.hh:34
fml::linalg::xpose
void xpose(const cpumat< REAL > &x, cpumat< REAL > &tx)
Computes the transpose out-of-place (i.e. in a copy).
Definition: xpose.hh:37
fml::univec::data_ptr
T * data_ptr()
Pointer to the internal array.
Definition: univec.hh:28
fml::unimat::nrows
len_t nrows() const
Number of rows.
Definition: unimat.hh:36
fml::cpuvec::resize
void resize(len_t size)
Resize the internal object storage.
Definition: cpuvec.hh:210
fml::linalg::rsvd
void rsvd(const uint32_t seed, const int k, const int q, cpumat< REAL > &x, cpuvec< REAL > &s)
Computes the truncated singular value decomposition using the normal projections method of Halko et a...
Definition: svd.hh:462
fml::cpuvec::rev
void rev()
Reverse the vector.
Definition: cpuvec.hh:447
fml::cpumat::resize
void resize(len_t nrows, len_t ncols)
Resize the internal object storage.
Definition: cpumat.hh:233
fml::cpuvec
Vector class for data held on a single CPU.
Definition: cpuvec.hh:31
fml::unimat::ncols
len_t ncols() const
Number of columns.
Definition: unimat.hh:38
fml::linalg::qrsvd
void qrsvd(cpumat< REAL > &x, cpuvec< REAL > &s, cpumat< REAL > &u, cpumat< REAL > &vt)
Computes the singular value decomposition by first reducing the rectangular matrix to a square matrix...
Definition: svd.hh:273
fml::linalg::tssvd
void tssvd(parmat_cpu< REAL > &x, cpuvec< REAL > &s)
Computes the singular value decomposition by first reducing the rectangular matrix to a square matrix...
Definition: svd.hh:130
fml::linalg::svd
void svd(cpumat< REAL > &x, cpuvec< REAL > &s)
Computes the singular value decomposition.
Definition: svd.hh:101
fml::unimat::data_ptr
REAL * data_ptr()
Pointer to the internal array.
Definition: unimat.hh:40
fml::linalg::lq_L
void lq_L(const cpumat< REAL > &LQ, cpumat< REAL > &L)
Recover the L matrix from an LQ decomposition.
Definition: qr.hh:247
fml::cpumat::rev_cols
void rev_cols()
Reverse the columns of the matrix.
Definition: cpumat.hh:638
fml
Core namespace.
Definition: dimops.hh:10
fml::linalg::qr_Q
void qr_Q(const cpumat< REAL > &QR, const cpuvec< REAL > &qraux, cpumat< REAL > &Q, cpuvec< REAL > &work)
Recover the Q matrix from a QR decomposition.
Definition: qr.hh:120
fml::univec::size
len_t size() const
Number of elements in the vector.
Definition: univec.hh:26
fml::linalg::qr_R
void qr_R(const cpumat< REAL > &QR, cpumat< REAL > &R)
Recover the R matrix from a QR decomposition.
Definition: qr.hh:162
fml::linalg::eigen_sym
void eigen_sym(cpumat< REAL > &x, cpuvec< REAL > &values)
Compute the eigenvalues and optionally the eigenvectors for a symmetric matrix.
Definition: eigen.hh:95
fml::linalg::tcrossprod
void tcrossprod(const REAL alpha, const cpumat< REAL > &x, cpumat< REAL > &ret)
Computes lower triangle of alpha*x*x^T.
Definition: crossprod.hh:81
fml::linalg::lq_Q
void lq_Q(const cpumat< REAL > &LQ, const cpuvec< REAL > &lqaux, cpumat< REAL > &Q, cpuvec< REAL > &work)
Recover the Q matrix from an LQ decomposition.
Definition: qr.hh:279
fml::linalg::matmult
void matmult(const bool transx, const bool transy, const REAL alpha, const cpumat< REAL > &x, const cpumat< REAL > &y, cpumat< REAL > &ret)
Computes ret = alpha*op(x)*op(y) where op(A) is A or A^T.
Definition: matmult.hh:43
fml::linalg::cpsvd
void cpsvd(const cpumat< REAL > &x, cpuvec< REAL > &s, cpumat< REAL > &u, cpumat< REAL > &vt)
Computes the singular value decomposition using the "crossproducts SVD". This method is not numerical...
Definition: svd.hh:323