fml  0.1-0
Fused Matrix Library
svd.hh
1 // This file is part of fml which is released under the Boost Software
2 // License, Version 1.0. See accompanying file LICENSE or copy at
3 // https://www.boost.org/LICENSE_1_0.txt
4 
5 #ifndef FML_PAR_GPU_LINALG_SVD_H
6 #define FML_PAR_GPU_LINALG_SVD_H
7 #pragma once
8 
9 
10 #include "../parmat.hh"
11 
12 #include "blas.hh"
13 #include "qr.hh"
14 
15 #include "../../../gpu/linalg/linalg_blas.hh"
16 #include "../../../gpu/linalg/linalg_invert.hh"
17 #include "../../../gpu/linalg/linalg_qr.hh"
18 #include "../../../gpu/linalg/linalg_svd.hh"
19 
20 #include "../../../gpu/copy.hh"
21 
22 
23 namespace fml
24 {
25 namespace linalg
26 {
51  template <typename REAL>
52  void cpsvd(const parmat_gpu<REAL> &x, gpuvec<REAL> &s)
53  {
54  const len_t n = x.ncols();
55 
56  auto cp = crossprod((REAL)1.0, x.data_obj());
57  x.get_comm().allreduce(n*n, cp.data_ptr());
58  eigen_sym(cp, s);
59  }
60 
62  template <typename REAL>
64  gpumat<REAL> &vt)
65  {
66  const len_t n = x.ncols();
67 
68  auto cp = crossprod((REAL)1.0, x.data_obj());
69  x.get_comm().allreduce(n*n, cp.data_ptr());
70  eigen_sym(cp, s, vt);
71 
72  auto c = vt.get_card();
73 
74  s.rev();
75  REAL *s_d = s.data_ptr();
76  auto sgrid = s.get_griddim();
77  auto sblock = s.get_blockdim();
78  kernelfuns::kernel_root_abs<<<sgrid, sblock>>>(s.size(), s_d);
79 
80  REAL *vt_d = vt.data_ptr();
81  vt.rev_cols();
82  copy::gpu2gpu(vt, cp);
83 
84  auto xgrid = vt.get_griddim();
85  auto xblock = vt.get_blockdim();
86  kernelfuns::kernel_sweep_cols_div<<<xgrid, xblock>>>(n, n, vt_d, s_d);
87 
88  matmult(false, false, (REAL)1.0, x.data_obj(), vt, u);
89  xpose(cp, vt);
90  }
91 
92 
93 
126  template <typename REAL>
128  {
129  gpumat<REAL> R(s.get_card());
130  qr_R(mpi::REDUCE_TO_ALL, x, R);
131  svd(R, s);
132  }
133 
135  template <typename REAL>
138  {
139  const len_t n = x.ncols();
140  if (x.nrows() < (len_global_t)n)
141  throw std::runtime_error("impossible dimensions");
142 
143  gpumat<REAL> x_cpy = x.data_obj().dupe();
144 
145  gpumat<REAL> R_local(s.get_card());
146  gpuvec<REAL> qraux(s.get_card());
147  qr(false, x.data_obj(), qraux);
148  qr_R(x.data_obj(), R_local);
149 
150  gpumat<REAL> R(s.get_card(), n, n);
151  tsqr::qr_allreduce(mpi::REDUCE_TO_ALL, n, n, R_local.data_ptr(), R.data_ptr(), x.get_comm().get_comm());
152 
153  copy::gpu2gpu(R, R_local);
154  gpumat<REAL> u_R(s.get_card());
155  svd(R_local, s, u_R, vt);
156 
157  // u = Q*u_R = x*R^{-1}*u_R
158  u.resize(x.nrows(), x.ncols());
159  trinv(true, false, R);
160  matmult(false, false, (REAL)1, x_cpy, R, x.data_obj());
161  matmult(false, false, (REAL)1, x.data_obj(), u_R, u.data_obj());
162  }
163 
164 
165 
166  namespace internals
167  {
168  template <typename REAL>
169  void rsvd_A(const uint32_t seed, const int k, const int q,
171  {
172  const len_global_t m = x.nrows();
173  const len_t n = x.ncols();
174  if (m < (len_global_t)n)
175  throw std::runtime_error("must have m>n");
176  if (k > n)
177  throw std::runtime_error("must have k<n");
178 
179  auto c = x.data_obj().get_card();
180 
181  parmat_gpu<REAL> Y(x.get_comm(), c, m, 2*k, x.nrows_before());
182  parmat_gpu<REAL> Y_tmp(x.get_comm(), c, m, 2*k, x.nrows_before());
183  gpumat<REAL> Z(c, n, 2*k);
184  gpumat<REAL> QZ(c, n, 2*k);
185 
186  gpumat<REAL> R(c, 2*k, 2*k);
187  gpumat<REAL> R_local(c);
188  gpuvec<REAL> qraux(c);
189  gpuvec<REAL> work(c);
190 
191  gpumat<REAL> omega(c, n, 2*k);
192  omega.fill_runif(seed);
193 
194  // Stage A
195  matmult(x, omega, Y);
196  internals::qr_Q(Y, Y_tmp, R, R_local, qraux, QY);
197 
198  for (int i=0; i<q; i++)
199  {
200  matmult(x, QY, Z);
201  linalg::qr_internals(false, Z, qraux, work);
202  linalg::qr_Q(Z, qraux, QZ, work);
203 
204  matmult(x, QZ, Y);
205  internals::qr_Q(Y, Y_tmp, R, R_local, qraux, QY);
206  }
207  }
208  }
209 
232  template <typename REAL>
233  void rsvd(const uint32_t seed, const int k, const int q,
235  {
236  parmat_gpu<REAL> QY(x.get_comm(), s.get_card(), x.nrows(), 2*k, x.nrows_before());
237  gpumat<REAL> B(s.get_card(), 2*k, x.ncols());
238 
239  // stage A
240  internals::rsvd_A(seed, k, q, x, QY);
241 
242  // Stage B
243  matmult(QY, x, B);
244 
245  linalg::qrsvd(B, s);
246  s.resize(k);
247  }
248 
250  template <typename REAL>
251  void rsvd(const uint32_t seed, const int k, const int q,
253  gpumat<REAL> &vt)
254  {
255  parmat_gpu<REAL> QY(x.get_comm(), s.get_card(), x.nrows(), 2*k, x.nrows_before());
256  gpumat<REAL> B(s.get_card(), 2*k, x.ncols());
257 
258  // stage A
259  internals::rsvd_A(seed, k, q, x, QY);
260 
261  // Stage B
262  matmult(QY, x, B);
263 
264  cpumat<REAL> uB(s.get_card());
265  linalg::qrsvd(B, s, uB, vt);
266 
267  s.resize(k);
268 
269  matmult(QY, uB, u);
270  // u.resize(u.nrows(), k);
271  }
272 }
273 }
274 
275 
276 #endif
fml::cpumat
Matrix class for data held on a single CPU.
Definition: cpumat.hh:36
fml::parmat_gpu
Definition: parmat.hh:20
fml::linalg::crossprod
void crossprod(const REAL alpha, const cpumat< REAL > &x, cpumat< REAL > &ret)
Computes lower triangle of alpha*x^T*x.
Definition: crossprod.hh:37
fml::linalg::xpose
void xpose(const cpumat< REAL > &x, cpumat< REAL > &tx)
Computes the transpose out-of-place (i.e. in a copy).
Definition: xpose.hh:37
fml::univec::data_ptr
T * data_ptr()
Pointer to the internal array.
Definition: univec.hh:28
fml::gpuvec
Vector class for data held on a single GPU.
Definition: gpuvec.hh:32
fml::gpuvec::resize
void resize(len_t size)
Resize the internal object storage.
Definition: gpuvec.hh:225
fml::linalg::qr
void qr(const bool pivot, cpumat< REAL > &x, cpuvec< REAL > &qraux)
Computes the QR decomposition.
Definition: qr.hh:94
fml::linalg::rsvd
void rsvd(const uint32_t seed, const int k, const int q, cpumat< REAL > &x, cpuvec< REAL > &s)
Computes the truncated singular value decomposition using the normal projections method of Halko et a...
Definition: svd.hh:462
fml::copy::gpu2gpu
void gpu2gpu(const gpuvec< REAL_IN > &gpu_in, gpuvec< REAL_OUT > &gpu_out)
Copy data from a GPU object to another.
Definition: copy.hh:203
fml::linalg::qrsvd
void qrsvd(cpumat< REAL > &x, cpuvec< REAL > &s, cpumat< REAL > &u, cpumat< REAL > &vt)
Computes the singular value decomposition by first reducing the rectangular matrix to a square matrix...
Definition: svd.hh:273
fml::linalg::tssvd
void tssvd(parmat_cpu< REAL > &x, cpuvec< REAL > &s)
Computes the singular value decomposition by first reducing the rectangular matrix to a square matrix...
Definition: svd.hh:130
fml::linalg::svd
void svd(cpumat< REAL > &x, cpuvec< REAL > &s)
Computes the singular value decomposition.
Definition: svd.hh:101
fml::unimat::data_ptr
REAL * data_ptr()
Pointer to the internal array.
Definition: unimat.hh:40
fml::gpuvec::rev
void rev()
Reverse the vector.
Definition: gpuvec.hh:462
fml
Core namespace.
Definition: dimops.hh:10
fml::linalg::qr_Q
void qr_Q(const cpumat< REAL > &QR, const cpuvec< REAL > &qraux, cpumat< REAL > &Q, cpuvec< REAL > &work)
Recover the Q matrix from a QR decomposition.
Definition: qr.hh:120
fml::linalg::trinv
void trinv(const bool upper, const bool unit_diag, cpumat< REAL > &x)
Compute the matrix inverse of a triangular matrix.
Definition: invert.hh:87
fml::univec::size
len_t size() const
Number of elements in the vector.
Definition: univec.hh:26
fml::linalg::qr_R
void qr_R(const cpumat< REAL > &QR, cpumat< REAL > &R)
Recover the R matrix from a QR decomposition.
Definition: qr.hh:162
fml::linalg::eigen_sym
void eigen_sym(cpumat< REAL > &x, cpuvec< REAL > &values)
Compute the eigenvalues and optionally the eigenvectors for a symmetric matrix.
Definition: eigen.hh:95
fml::gpumat
Matrix class for data held on a single GPU.
Definition: gpumat.hh:35
fml::gpumat::rev_cols
void rev_cols()
Reverse the columns of the matrix.
Definition: gpumat.hh:616
fml::linalg::matmult
void matmult(const bool transx, const bool transy, const REAL alpha, const cpumat< REAL > &x, const cpumat< REAL > &y, cpumat< REAL > &ret)
Computes ret = alpha*op(x)*op(y) where op(A) is A or A^T.
Definition: matmult.hh:43
fml::linalg::cpsvd
void cpsvd(const cpumat< REAL > &x, cpuvec< REAL > &s, cpumat< REAL > &u, cpumat< REAL > &vt)
Computes the singular value decomposition using the "crossproducts SVD". This method is not numerical...
Definition: svd.hh:323