fml  0.1-0
Fused Matrix Library
linalg_svd.hh
1 // This file is part of fml which is released under the Boost Software
2 // License, Version 1.0. See accompanying file LICENSE or copy at
3 // https://www.boost.org/LICENSE_1_0.txt
4 
5 #ifndef FML_CPU_LINALG_LINALG_SVD_H
6 #define FML_CPU_LINALG_LINALG_SVD_H
7 #pragma once
8 
9 
10 #include <cmath>
11 #include <stdexcept>
12 
13 #include "../../_internals/linalgutils.hh"
14 #include "../../_internals/omp.hh"
15 
16 #include "../copy.hh"
17 #include "../cpumat.hh"
18 #include "../cpuvec.hh"
19 
20 #include "linalg_blas.hh"
21 #include "linalg_eigen.hh"
22 #include "linalg_qr.hh"
23 
24 
25 namespace fml
26 {
27 namespace linalg
28 {
29  namespace
30  {
31  template <typename REAL>
32  int svd_internals(const int nu, const int nv, cpumat<REAL> &x, cpuvec<REAL> &s,
33  cpumat<REAL> &u, cpumat<REAL> &vt)
34  {
35  int info = 0;
36  char jobz;
37  int ldvt;
38 
39  const len_t m = x.nrows();
40  const len_t n = x.ncols();
41  const len_t minmn = std::min(m, n);
42 
43  s.resize(minmn);
44 
45  if (nu == 0 && nv == 0)
46  {
47  jobz = 'N';
48  ldvt = 1; // value is irrelevant, but must exist!
49  }
50  else if (nu <= minmn && nv <= minmn)
51  {
52  jobz = 'S';
53  ldvt = minmn;
54 
55  u.resize(m, minmn);
56  vt.resize(minmn, n);
57  }
58  else
59  {
60  jobz = 'A';
61  ldvt = n;
62  }
63 
64  cpuvec<int> iwork(8*minmn);
65 
66  REAL tmp;
67  fml::lapack::gesdd(jobz, m, n, x.data_ptr(), m, s.data_ptr(), u.data_ptr(), m, vt.data_ptr(), ldvt, &tmp, -1, iwork.data_ptr(), &info);
68  int lwork = (int) tmp;
69  cpuvec<REAL> work(lwork);
70 
71  fml::lapack::gesdd(jobz, m, n, x.data_ptr(), m, s.data_ptr(), u.data_ptr(), m, vt.data_ptr(), ldvt, work.data_ptr(), lwork, iwork.data_ptr(), &info);
72 
73  return info;
74  }
75  }
76 
96  template <typename REAL>
98  {
99  cpumat<REAL> ignored;
100  int info = svd_internals(0, 0, x, s, ignored, ignored);
101  fml::linalgutils::check_info(info, "gesdd");
102  }
103 
105  template <typename REAL>
107  {
108  int info = svd_internals(1, 1, x, s, u, vt);
109  fml::linalgutils::check_info(info, "gesdd");
110  }
111 
112 
113 
114  namespace
115  {
116  template <typename REAL>
118  {
119  const len_t m = x.nrows();
120  const len_t n = x.ncols();
121  if (m <= n)
122  throw std::runtime_error("'x' must have more rows than cols");
123 
124  cpuvec<REAL> qraux(n);
125  cpuvec<REAL> work(m);
126  qr_internals(false, x, qraux, work);
127 
128  cpumat<REAL> R(n, n);
129  qr_R(x, R);
130 
131  cpumat<REAL> u_R(n, n);
132  svd(R, s, u_R, vt);
133 
134  u.resize(m, n);
135  u.fill_eye();
136 
137  qr_Q(x, qraux, u, work);
138 
139  matmult(false, false, (REAL)1.0, u, u_R, x);
140  copy::cpu2cpu(x, u);
141  }
142 
143  template <typename REAL>
144  void tssvd(cpumat<REAL> &x, cpuvec<REAL> &s)
145  {
146  const len_t m = x.nrows();
147  const len_t n = x.ncols();
148  if (m <= n)
149  throw std::runtime_error("'x' must have more rows than cols");
150 
151  s.resize(n);
152 
153  cpuvec<REAL> qraux(n);
154  cpuvec<REAL> work(m);
155  qr_internals(false, x, qraux, work);
156 
157  fml::cpu_utils::tri2zero('L', false, n, n, x.data_ptr(), m);
158 
159  int info = 0;
160  cpuvec<int> iwork(8*n);
161 
162  REAL tmp;
163  fml::lapack::gesdd('N', n, n, x.data_ptr(), m, s.data_ptr(), NULL, m, NULL,
164  1, &tmp, -1, iwork.data_ptr(), &info);
165  int lwork = (int) tmp;
166  if (lwork > work.size())
167  work.resize(lwork);
168 
169  fml::lapack::gesdd('N', n, n, x.data_ptr(), m, s.data_ptr(), NULL, m, NULL,
170  1, work.data_ptr(), lwork, iwork.data_ptr(), &info);
171  fml::linalgutils::check_info(info, "gesdd");
172  }
173 
174 
175 
176  template <typename REAL>
177  void sfsvd(cpumat<REAL> &x, cpuvec<REAL> &s, cpumat<REAL> &u, cpumat<REAL> &vt)
178  {
179  const len_t m = x.nrows();
180  const len_t n = x.ncols();
181  if (m >= n)
182  throw std::runtime_error("'x' must have more cols than rows");
183 
184  cpuvec<REAL> lqaux;
185  cpuvec<REAL> work;
186  lq_internals(x, lqaux, work);
187 
188  cpumat<REAL> L(m, m);
189  lq_L(x, L);
190 
191  cpumat<REAL> vt_L(m, m);
192  svd(L, s, u, vt_L);
193 
194  vt.resize(n, m);
195  vt.fill_eye();
196 
197  lq_Q(x, lqaux, vt, work);
198 
199  matmult(false, false, (REAL)1.0, vt_L, vt, x);
200  copy::cpu2cpu(x, vt);
201  }
202 
203  template <typename REAL>
204  void sfsvd(cpumat<REAL> &x, cpuvec<REAL> &s)
205  {
206  const len_t m = x.nrows();
207  const len_t n = x.ncols();
208  if (m >= n)
209  throw std::runtime_error("'x' must have more cols than rows");
210 
211  s.resize(m);
212 
213  cpuvec<REAL> lqaux;
214  cpuvec<REAL> work;
215  lq_internals(x, lqaux, work);
216 
217  fml::cpu_utils::tri2zero('U', false, m, m, x.data_ptr(), m);
218 
219  int info = 0;
220  cpuvec<int> iwork(8*n);
221 
222  REAL tmp;
223  fml::lapack::gesdd('N', m, m, x.data_ptr(), m, s.data_ptr(), NULL, m, NULL,
224  1, &tmp, -1, iwork.data_ptr(), &info);
225  int lwork = (int) tmp;
226  if (lwork > work.size())
227  work.resize(lwork);
228 
229  fml::lapack::gesdd('N', m, m, x.data_ptr(), m, s.data_ptr(), NULL, m, NULL,
230  1, work.data_ptr(), lwork, iwork.data_ptr(), &info);
231  fml::linalgutils::check_info(info, "gesdd");
232  }
233  }
234 
268  template <typename REAL>
270  {
271  if (x.is_square())
272  svd(x, s, u, vt);
273  else if (x.nrows() > x.ncols())
274  tssvd(x, s, u, vt);
275  else
276  sfsvd(x, s, u, vt);
277  }
278 
280  template <typename REAL>
282  {
283  if (x.is_square())
284  svd(x, s);
285  else if (x.nrows() > x.ncols())
286  tssvd(x, s);
287  else
288  sfsvd(x, s);
289  }
290 
291 
292 
318  template <typename REAL>
320  cpumat<REAL> &vt)
321  {
322  const len_t m = x.nrows();
323  const len_t n = x.ncols();
324  const len_t minmn = std::min(m, n);
325 
326  cpumat<REAL> cp;
327 
328  if (m >= n)
329  {
330  crossprod((REAL)1.0, x, cp);
331  eigen_sym(cp, s, vt);
332  vt.rev_cols();
333  copy::cpu2cpu(vt, cp);
334  }
335  else
336  {
337  tcrossprod((REAL)1.0, x, cp);
338  eigen_sym(cp, s, u);
339  u.rev_cols();
340  copy::cpu2cpu(u, cp);
341  }
342 
343  s.rev();
344  REAL *s_d = s.data_ptr();
345  #pragma omp for simd
346  for (len_t i=0; i<s.size(); i++)
347  s_d[i] = sqrt(fabs(s_d[i]));
348 
349  REAL *ev_d;
350  if (m >= n)
351  ev_d = vt.data_ptr();
352  else
353  ev_d = cp.data_ptr();
354 
355  #pragma omp parallel for if(minmn*minmn > fml::omp::OMP_MIN_SIZE)
356  for (len_t j=0; j<minmn; j++)
357  {
358  #pragma omp simd
359  for (len_t i=0; i<minmn; i++)
360  ev_d[i + minmn*j] /= s_d[j];
361  }
362 
363  if (m >= n)
364  {
365  matmult(false, false, (REAL)1.0, x, vt, u);
366  xpose(cp, vt);
367  }
368  else
369  matmult(true, false, (REAL)1.0, cp, x, vt);
370  }
371 
373  template <typename REAL>
374  void cpsvd(const cpumat<REAL> &x, cpuvec<REAL> &s)
375  {
376  const len_t m = x.nrows();
377  const len_t n = x.ncols();
378 
379  cpumat<REAL> cp;
380 
381  if (m >= n)
382  crossprod((REAL)1.0, x, cp);
383  else
384  tcrossprod((REAL)1.0, x, cp);
385 
386  eigen_sym(cp, s);
387 
388  s.rev();
389  REAL *s_d = s.data_ptr();
390  #pragma omp for simd
391  for (len_t i=0; i<s.size(); i++)
392  s_d[i] = sqrt(fabs(s_d[i]));
393  }
394 
395 
396 
397  namespace
398  {
399  template <typename REAL>
400  void rsvd_A(const uint32_t seed, const int k, const int q, cpumat<REAL> &x,
401  cpumat<REAL> &QY)
402  {
403  const len_t m = x.nrows();
404  const len_t n = x.ncols();
405 
406  cpumat<REAL> omega(n, 2*k);
407  omega.fill_runif(seed);
408 
409  cpumat<REAL> Y(m, 2*k);
410  cpumat<REAL> Z(n, 2*k);
411  cpumat<REAL> QZ(n, 2*k);
412 
413  cpuvec<REAL> qraux;
414  cpuvec<REAL> work;
415 
416  cpumat<REAL> B(2*k, n);
417 
418  // Stage A
419  matmult(false, false, (REAL)1.0, x, omega, Y);
420  qr_internals(false, Y, qraux, work);
421  qr_Q(Y, qraux, QY, work);
422 
423  for (int i=0; i<q; i++)
424  {
425  matmult(true, false, (REAL)1.0, x, QY, Z);
426  qr_internals(false, Z, qraux, work);
427  qr_Q(Z, qraux, QZ, work);
428 
429  matmult(false, false, (REAL)1.0, x, QZ, Y);
430  qr_internals(false, Y, qraux, work);
431  qr_Q(Y, qraux, QY, work);
432  }
433  }
434  }
435 
457  template <typename REAL>
458  void rsvd(const uint32_t seed, const int k, const int q, cpumat<REAL> &x,
459  cpuvec<REAL> &s)
460  {
461  const len_t m = x.nrows();
462  const len_t n = x.ncols();
463 
464  cpumat<REAL> QY(m, 2*k);
465  cpumat<REAL> B(2*k, n);
466 
467  // Stage A
468  rsvd_A(seed, k, q, x, QY);
469 
470  // Stage B
471  matmult(true, false, (REAL)1.0, QY, x, B);
472 
473  cpumat<REAL> uB;
474  svd(B, s);
475 
476  s.resize(k);
477  }
478 
480  template <typename REAL>
481  void rsvd(const uint32_t seed, const int k, const int q, cpumat<REAL> &x,
483  {
484  const len_t m = x.nrows();
485  const len_t n = x.ncols();
486 
487  cpumat<REAL> QY(m, 2*k);
488  cpumat<REAL> B(2*k, n);
489 
490  // Stage A
491  rsvd_A(seed, k, q, x, QY);
492 
493  // Stage B
494  matmult(true, false, (REAL)1.0, QY, x, B);
495 
496  cpumat<REAL> uB;
497  svd(B, s, uB, vt);
498 
499  s.resize(k);
500 
501  matmult(false, false, (REAL)1.0, QY, uB, u);
502  u.resize(u.nrows(), k);
503  }
504 }
505 }
506 
507 
508 #endif
fml::cpumat
Matrix class for data held on a single CPU.
Definition: cpumat.hh:36
fml::copy::cpu2cpu
void cpu2cpu(const cpuvec< REAL_IN > &cpu_in, cpuvec< REAL_OUT > &cpu_out)
Copy data from a CPU object to another.
Definition: copy.hh:40
fml::cpumat::fill_eye
void fill_eye()
Set diagonal entries to 1 and non-diagonal entries to 0.
Definition: cpumat.hh:439
fml::linalg::crossprod
void crossprod(const REAL alpha, const cpumat< REAL > &x, cpumat< REAL > &ret)
Computes lower triangle of alpha*x^T*x.
Definition: linalg_blas.hh:379
fml::unimat::is_square
bool is_square() const
Is the matrix square?
Definition: unimat.hh:34
fml::linalg::xpose
void xpose(const cpumat< REAL > &x, cpumat< REAL > &tx)
Computes the transpose out-of-place (i.e. in a copy).
Definition: linalg_blas.hh:463
fml::univec::data_ptr
T * data_ptr()
Pointer to the internal array.
Definition: univec.hh:28
fml::unimat::nrows
len_t nrows() const
Number of rows.
Definition: unimat.hh:36
fml::cpuvec::resize
void resize(len_t size)
Resize the internal object storage.
Definition: cpuvec.hh:209
fml::linalg::rsvd
void rsvd(const uint32_t seed, const int k, const int q, cpumat< REAL > &x, cpuvec< REAL > &s)
Computes the truncated singular value decomposition using the normal projections method of Halko et a...
Definition: linalg_svd.hh:458
fml::cpuvec::rev
void rev()
Reverse the vector.
Definition: cpuvec.hh:437
fml::cpumat::resize
void resize(len_t nrows, len_t ncols)
Resize the internal object storage.
Definition: cpumat.hh:230
fml::cpuvec
Vector class for data held on a single CPU.
Definition: cpuvec.hh:31
fml::unimat::ncols
len_t ncols() const
Number of columns.
Definition: unimat.hh:38
fml::linalg::qrsvd
void qrsvd(cpumat< REAL > &x, cpuvec< REAL > &s, cpumat< REAL > &u, cpumat< REAL > &vt)
Computes the singular value decomposition by first reducing the rectangular matrix to a square matrix...
Definition: linalg_svd.hh:269
fml::linalg::tssvd
void tssvd(parmat_cpu< REAL > &x, cpuvec< REAL > &s)
Computes the singular value decomposition by first reducing the rectangular matrix to a square matrix...
Definition: svd.hh:130
fml::linalg::svd
void svd(cpumat< REAL > &x, cpuvec< REAL > &s)
Computes the singular value decomposition.
Definition: linalg_svd.hh:97
fml::unimat::data_ptr
REAL * data_ptr()
Pointer to the internal array.
Definition: unimat.hh:40
fml::linalg::lq_L
void lq_L(const cpumat< REAL > &LQ, cpumat< REAL > &L)
Recover the L matrix from an LQ decomposition.
Definition: linalg_qr.hh:247
fml::cpumat::rev_cols
void rev_cols()
Reverse the columns of the matrix.
Definition: cpumat.hh:626
fml
Core namespace.
Definition: dimops.hh:10
fml::linalg::qr_Q
void qr_Q(const cpumat< REAL > &QR, const cpuvec< REAL > &qraux, cpumat< REAL > &Q, cpuvec< REAL > &work)
Recover the Q matrix from a QR decomposition.
Definition: linalg_qr.hh:120
fml::univec::size
len_t size() const
Number of elements in the vector.
Definition: univec.hh:26
fml::linalg::qr_R
void qr_R(const cpumat< REAL > &QR, cpumat< REAL > &R)
Recover the R matrix from a QR decomposition.
Definition: linalg_qr.hh:162
fml::linalg::eigen_sym
void eigen_sym(cpumat< REAL > &x, cpuvec< REAL > &values)
Compute the eigenvalues and optionally the eigenvectors for a symmetric matrix.
Definition: linalg_eigen.hh:93
fml::linalg::tcrossprod
void tcrossprod(const REAL alpha, const cpumat< REAL > &x, cpumat< REAL > &ret)
Computes lower triangle of alpha*x*x^T.
Definition: linalg_blas.hh:423
fml::linalg::lq_Q
void lq_Q(const cpumat< REAL > &LQ, const cpuvec< REAL > &lqaux, cpumat< REAL > &Q, cpuvec< REAL > &work)
Recover the Q matrix from an LQ decomposition.
Definition: linalg_qr.hh:279
fml::linalg::matmult
void matmult(const bool transx, const bool transy, const REAL alpha, const cpumat< REAL > &x, const cpumat< REAL > &y, cpumat< REAL > &ret)
Computes ret = alpha*op(x)*op(y) where op(A) is A or A^T.
Definition: linalg_blas.hh:257
fml::linalg::cpsvd
void cpsvd(const cpumat< REAL > &x, cpuvec< REAL > &s, cpumat< REAL > &u, cpumat< REAL > &vt)
Computes the singular value decomposition using the "crossproducts SVD". This method is not numerical...
Definition: linalg_svd.hh:319