fml  0.1-0
Fused Matrix Library
linalg_factorizations.hh
1 // This file is part of fml which is released under the Boost Software
2 // License, Version 1.0. See accompanying file LICENSE or copy at
3 // https://www.boost.org/LICENSE_1_0.txt
4 
5 #ifndef FML_GPU_LINALG_LINALG_FACTORIZATIONS_H
6 #define FML_GPU_LINALG_LINALG_FACTORIZATIONS_H
7 #pragma once
8 
9 
10 #include <stdexcept>
11 
12 #include "../../_internals/linalgutils.hh"
13 
14 #include "../arch/arch.hh"
15 
16 #include "../internals/gpu_utils.hh"
17 #include "../internals/gpuscalar.hh"
18 #include "../internals/kernelfuns.hh"
19 
20 #include "../copy.hh"
21 #include "../gpumat.hh"
22 #include "../gpuvec.hh"
23 
24 #include "linalg_err.hh"
25 #include "linalg_blas.hh"
26 
27 
28 namespace fml
29 {
30 namespace linalg
31 {
53  template <typename REAL>
54  void lu(gpumat<REAL> &x, gpuvec<int> &p, int &info)
55  {
56  err::check_card(x, p);
57 
58  info = 0;
59  const len_t m = x.nrows();
60  const len_t n = x.ncols();
61  auto c = x.get_card();
62 
63  const len_t lipiv = std::min(m, n);
64  if (!p.get_card()->valid_card())
65  p.inherit(c);
66 
67  p.resize(lipiv);
68 
69  #if defined(FML_GPULAPACK_VENDOR)
70  int lwork;
71  gpulapack_status_t check = gpulapack::getrf_buflen(c->lapack_handle(), m,
72  n, x.data_ptr(), m, &lwork);
73  gpulapack::err::check_ret(check, "getrf_bufferSize");
74 
75  gpuvec<REAL> work(c, lwork);
76  gpuscalar<int> info_device(c, info);
77 
78  check = gpulapack::getrf(c->lapack_handle(), m, n, x.data_ptr(), m,
79  work.data_ptr(), p.data_ptr(), info_device.data_ptr());
80 
81  info_device.get_val(&info);
82  gpulapack::err::check_ret(check, "getrf");
83  #elif defined(FML_GPULAPACK_MAGMA)
84  cpuvec<int> p_cpu(lipiv);
85  gpulapack::getrf(m, n, x.data_ptr(), m, p_cpu.data_ptr(), &info);
86  copy::cpu2gpu(p_cpu, p);
87  #else
88  #error "Unsupported GPU lapack"
89  #endif
90  }
91 
93  template <typename REAL>
94  void lu(gpumat<REAL> &x)
95  {
96  gpuvec<int> p(x.get_card());
97  int info;
98 
99  lu(x, p, info);
100 
101  fml::linalgutils::check_info(info, "getrf");
102  }
103 
104 
105 
106  namespace
107  {
108  template <typename REAL>
109  int svd_internals(const int nu, const int nv, gpumat<REAL> &x, gpuvec<REAL> &s, gpumat<REAL> &u, gpumat<REAL> &vt)
110  {
111  auto c = x.get_card();
112 
113  const len_t m = x.nrows();
114  const len_t n = x.ncols();
115  const len_t minmn = std::min(m, n);
116 
117  s.resize(minmn);
118 
119  signed char jobu, jobvt;
120  if (nu == 0 && nv == 0)
121  {
122  jobu = 'N';
123  jobvt = 'N';
124  }
125  else //if (nu <= minmn && nv <= minmn)
126  {
127  jobu = 'S';
128  jobvt = 'S';
129 
130  u.resize(m, minmn);
131  vt.resize(minmn, n);
132  }
133 
134  int lwork;
135  gpulapack_status_t check = gpulapack::gesvd_buflen(c->lapack_handle(), m, n,
136  x.data_ptr(), &lwork);
137  gpulapack::err::check_ret(check, "gesvd_bufferSize");
138 
139  gpuvec<REAL> work(c, lwork);
140  gpuvec<REAL> rwork(c, minmn-1);
141 
142  int info = 0;
143  gpuscalar<int> info_device(c, info);
144 
145  check = gpulapack::gesvd(c->lapack_handle(), jobu, jobvt, m, n, x.data_ptr(),
146  m, s.data_ptr(), u.data_ptr(), m, vt.data_ptr(), minmn, work.data_ptr(),
147  lwork, rwork.data_ptr(), info_device.data_ptr());
148 
149  info_device.get_val(&info);
150  gpulapack::err::check_ret(check, "gesvd");
151 
152  return info;
153  }
154  }
155 
176  template <typename REAL>
178  {
179  err::check_card(x, s);
180 
181  gpumat<REAL> ignored(x.get_card());
182  int info = svd_internals(0, 0, x, s, ignored, ignored);
183  fml::linalgutils::check_info(info, "gesvd");
184  }
185 
187  template <typename REAL>
189  {
190  err::check_card(x, s);
191  err::check_card(x, u);
192  err::check_card(x, vt);
193 
194  if (x.nrows() >= x.ncols())
195  {
196  int info = svd_internals(1, 1, x, s, u, vt);
197  fml::linalgutils::check_info(info, "gesvd");
198  }
199  else
200  {
201  auto tx = xpose(x);
202  gpumat<REAL> v(x.get_card());
203  int info = svd_internals(1, 1, tx, s, v, u);
204  xpose(v, vt);
205  fml::linalgutils::check_info(info, "gesvd");
206  }
207  }
208 
209 
210 
211  namespace
212  {
213  template <typename REAL>
214  int eig_sym_internals(const bool only_values, gpumat<REAL> &x,
215  gpuvec<REAL> &values, gpumat<REAL> &vectors)
216  {
217  if (!x.is_square())
218  throw std::runtime_error("'x' must be a square matrix");
219 
220  auto c = x.get_card();
221 
222  len_t n = x.nrows();
223  values.resize(n);
224 
225  cusolverEigMode_t jobz;
226  if (only_values)
227  jobz = CUSOLVER_EIG_MODE_NOVECTOR;
228  else
229  jobz = CUSOLVER_EIG_MODE_VECTOR;
230 
231  int lwork;
232  gpulapack_status_t check = gpulapack::syevd_buflen(c->lapack_handle(), jobz,
233  GPUBLAS_FILL_L, n, x.data_ptr(), n, values.data_ptr(), &lwork);
234  gpulapack::err::check_ret(check, "syevd_bufferSize");
235 
236  gpuvec<REAL> work(c, lwork);
237 
238  int info = 0;
239  gpuscalar<int> info_device(c, info);
240 
241  check = gpulapack::syevd(c->lapack_handle(), jobz, GPUBLAS_FILL_L,
242  n, x.data_ptr(), n, values.data_ptr(), work.data_ptr(), lwork,
243  info_device.data_ptr());
244 
245  info_device.get_val(&info);
246  gpulapack::err::check_ret(check, "syevd");
247  fml::linalgutils::check_info(info, "syevd");
248 
249  if (!only_values)
250  {
251  vectors.resize(n, n);
252  copy::gpu2gpu(x, vectors);
253  }
254 
255  return info;
256  }
257  }
258 
259 
280  template <typename REAL>
282  {
283  err::check_card(x, values);
284  gpumat<REAL> ignored(x.get_card());
285 
286  int info = eig_sym_internals(true, x, values, ignored);
287  fml::linalgutils::check_info(info, "syevd");
288  }
289 
291  template <typename REAL>
292  void eigen_sym(gpumat<REAL> &x, gpuvec<REAL> &values, gpumat<REAL> &vectors)
293  {
294  err::check_card(x, values);
295  err::check_card(x, vectors);
296 
297  int info = eig_sym_internals(false, x, values, vectors);
298  fml::linalgutils::check_info(info, "syevd");
299  }
300 
301 
302 
321  template <typename REAL>
323  {
324  if (!x.is_square())
325  throw std::runtime_error("'x' must be a square matrix");
326 
327  // Factor x = LU
328  auto c = x.get_card();
329  gpuvec<int> p(c);
330  int info;
331  lu(x, p, info);
332  fml::linalgutils::check_info(info, "getrf");
333 
334  // Invert
335  const len_t n = x.nrows();
336  const len_t nrhs = n;
337  gpumat<REAL> inv(c, n, nrhs);
338  inv.fill_eye();
339 
340  gpuscalar<int> info_device(c, info);
341 
342  gpulapack_status_t check = gpulapack::getrs(c->lapack_handle(), GPUBLAS_OP_N, n,
343  nrhs, x.data_ptr(), n, p.data_ptr(), inv.data_ptr(), n, info_device.data_ptr());
344 
345  info_device.get_val(&info);
346  gpulapack::err::check_ret(check, "getrs");
347  fml::linalgutils::check_info(info, "getrs");
348 
349  copy::gpu2gpu(inv, x);
350  }
351 
352 
353 
354  namespace
355  {
356  template <typename REAL>
357  void solver(gpumat<REAL> &x, len_t ylen, len_t nrhs, REAL *y_d)
358  {
359  const len_t n = x.nrows();
360  if (!x.is_square())
361  throw std::runtime_error("'x' must be a square matrix");
362  if (n != ylen)
363  throw std::runtime_error("rhs 'y' must be compatible with data matrix 'x'");
364 
365  // Factor x = LU
366  auto c = x.get_card();
367  gpuvec<int> p(c);
368  int info;
369  lu(x, p, info);
370  fml::linalgutils::check_info(info, "getrf");
371 
372  // Solve xb = y
373  gpuscalar<int> info_device(c, info);
374 
375  gpulapack_status_t check = gpulapack::getrs(c->lapack_handle(), GPUBLAS_OP_N,
376  n, nrhs, x.data_ptr(), n, p.data_ptr(), y_d, n, info_device.data_ptr());
377 
378  info_device.get_val(&info);
379  gpulapack::err::check_ret(check, "getrs");
380  fml::linalgutils::check_info(info, "getrs");
381  }
382  }
383 
403  template <typename REAL>
405  {
406  err::check_card(x, y);
407  solver(x, y.size(), 1, y.data_ptr());
408  }
409 
411  template <typename REAL>
413  {
414  err::check_card(x, y);
415  solver(x, y.nrows(), y.ncols(), y.data_ptr());
416  }
417 
418 
419 
420  namespace
421  {
422  template <typename REAL>
423  void qr_internals(const bool pivot, gpumat<REAL> &x, gpuvec<REAL> &qraux, gpuvec<REAL> &work)
424  {
425  if (pivot)
426  throw std::runtime_error("pivoting not supported at this time");
427 
428  const len_t m = x.nrows();
429  const len_t n = x.ncols();
430  const len_t minmn = std::min(m, n);
431  auto c = x.get_card();
432 
433  qraux.resize(minmn);
434 
435  int lwork;
436  gpulapack_status_t check = gpulapack::geqrf_buflen(c->lapack_handle(), m,
437  n, x.data_ptr(), m, &lwork);
438  gpulapack::err::check_ret(check, "geqrf_bufferSize");
439 
440  if (lwork > work.size())
441  work.resize(lwork);
442 
443  int info = 0;
444  gpuscalar<int> info_device(c, info);
445 
446  check = gpulapack::geqrf(c->lapack_handle(), m, n, x.data_ptr(), m,
447  qraux.data_ptr(), work.data_ptr(), lwork, info_device.data_ptr());
448 
449  info_device.get_val(&info);
450  gpulapack::err::check_ret(check, "syevd");
451  fml::linalgutils::check_info(info, "geqrf");
452  }
453  }
454 
477  template <typename REAL>
478  void qr(const bool pivot, gpumat<REAL> &x, gpuvec<REAL> &qraux)
479  {
480  err::check_card(x, qraux);
481  gpuvec<REAL> work(x.get_card());
482  qr_internals(pivot, x, qraux, work);
483  }
484 
504  template <typename REAL>
505  void qr_Q(const gpumat<REAL> &QR, const gpuvec<REAL> &qraux, gpumat<REAL> &Q,
506  gpuvec<REAL> &work)
507  {
508  err::check_card(QR, qraux);
509  err::check_card(QR, Q);
510  err::check_card(QR, work);
511 
512  const len_t m = QR.nrows();
513  const len_t n = QR.ncols();
514  const len_t minmn = std::min(m, n);
515 
516  auto c = QR.get_card();
517 
518  int lwork;
519  gpulapack_status_t check = gpulapack::ormqr_buflen(c->lapack_handle(),
520  GPUBLAS_SIDE_LEFT, GPUBLAS_OP_N, m, minmn, minmn, QR.data_ptr(), m,
521  qraux.data_ptr(), Q.data_ptr(), m, &lwork);
522 
523  if (lwork > work.size())
524  work.resize(lwork);
525 
526  Q.resize(m, minmn);
527  Q.fill_eye();
528 
529  int info = 0;
530  gpuscalar<int> info_device(c, info);
531 
532  check = gpulapack::ormqr(c->lapack_handle(), GPUBLAS_SIDE_LEFT,
533  GPUBLAS_OP_N, m, minmn, minmn, QR.data_ptr(), m, qraux.data_ptr(),
534  Q.data_ptr(), m, work.data_ptr(), lwork, info_device.data_ptr());
535 
536  info_device.get_val(&info);
537  gpulapack::err::check_ret(check, "ormqr");
538  fml::linalgutils::check_info(info, "ormqr");
539  }
540 
558  template <typename REAL>
559  void qr_R(const gpumat<REAL> &QR, gpumat<REAL> &R)
560  {
561  err::check_card(QR, R);
562 
563  const len_t m = QR.nrows();
564  const len_t n = QR.ncols();
565  const len_t minmn = std::min(m, n);
566 
567  R.resize(minmn, n);
568  R.fill_zero();
569  fml::gpu_utils::lacpy(GPUBLAS_FILL_U, m, n, QR.data_ptr(), m, R.data_ptr(), minmn);
570  }
571 
572 
573 
596  template <typename REAL>
597  void lq(gpumat<REAL> &x, gpuvec<REAL> &lqaux)
598  {
599  err::check_card(x, lqaux);
600 
601  gpumat<REAL> tx(x.get_card());
602  xpose(x, tx);
603 
604  gpuvec<REAL> work(x.get_card());
605  qr_internals(false, tx, lqaux, work);
606 
607  xpose(tx, x);
608  }
609 
628  template <typename REAL>
629  void lq_L(const gpumat<REAL> &LQ, gpumat<REAL> &L)
630  {
631  err::check_card(LQ, L);
632 
633  const len_t m = LQ.nrows();
634  const len_t n = LQ.ncols();
635  const len_t minmn = std::min(m, n);
636 
637  L.resize(m, minmn);
638  L.fill_zero();
639  fml::gpu_utils::lacpy(GPUBLAS_FILL_L, m, n, LQ.data_ptr(), m, L.data_ptr(), m);
640  }
641 
662  template <typename REAL>
663  void lq_Q(const gpumat<REAL> &LQ, const gpuvec<REAL> &lqaux, gpumat<REAL> &Q,
664  gpuvec<REAL> &work)
665  {
666  err::check_card(LQ, lqaux);
667  err::check_card(LQ, Q);
668  err::check_card(LQ, work);
669 
670  gpumat<REAL> QR(LQ.get_card());
671  xpose(LQ, QR);
672 
673  const len_t m = LQ.nrows();
674  const len_t n = LQ.ncols();
675  const len_t minmn = std::min(m, n);
676 
677  auto c = QR.get_card();
678 
679  int lwork;
680  gpulapack_status_t check = gpulapack::ormqr_buflen(c->lapack_handle(),
681  GPUBLAS_SIDE_RIGHT, GPUBLAS_OP_T, minmn, n, n, QR.data_ptr(), QR.nrows(),
682  lqaux.data_ptr(), Q.data_ptr(), minmn, &lwork);
683 
684  if (lwork > work.size())
685  work.resize(lwork);
686 
687  Q.resize(minmn, n);
688  Q.fill_eye();
689 
690  int info = 0;
691  gpuscalar<int> info_device(c, info);
692 
693  check = gpulapack::ormqr(c->lapack_handle(), GPUBLAS_SIDE_RIGHT,
694  GPUBLAS_OP_T, minmn, n, n, QR.data_ptr(), QR.nrows(), lqaux.data_ptr(),
695  Q.data_ptr(), minmn, work.data_ptr(), lwork, info_device.data_ptr());
696 
697  info_device.get_val(&info);
698  gpulapack::err::check_ret(check, "ormqr");
699  fml::linalgutils::check_info(info, "ormqr");
700  }
701 
702 
703 
723  template <typename REAL>
725  {
726  const len_t n = x.nrows();
727  if (n != x.ncols())
728  throw std::runtime_error("'x' must be a square matrix");
729 
730  auto c = x.get_card();
731  const auto fill = GPUBLAS_FILL_L;
732 
733  int lwork;
734  gpulapack_status_t check = gpulapack::potrf_buflen(c->lapack_handle(), fill, n,
735  x.data_ptr(), n, &lwork);
736  gpulapack::err::check_ret(check, "potrf_bufferSize");
737 
738  gpuvec<REAL> work(c, lwork);
739 
740  int info = 0;
741  gpuscalar<int> info_device(c, info);
742  check = gpulapack::potrf(c->lapack_handle(), fill, n, x.data_ptr(), n,
743  work.data_ptr(), lwork, info_device.data_ptr());
744 
745  info_device.get_val(&info);
746  gpulapack::err::check_ret(check, "potrf");
747  if (info < 0)
748  fml::linalgutils::check_info(info, "potrf");
749  else if (info > 0)
750  throw std::runtime_error("chol: leading minor of order " + std::to_string(info) + " is not positive definite");
751 
752  fml::gpu_utils::tri2zero('U', false, n, n, x.data_ptr(), n);
753  }
754 }
755 }
756 
757 
758 #endif
fml::copy::cpu2gpu
void cpu2gpu(const cpuvec< REAL_IN > &cpu, gpuvec< REAL_OUT > &gpu)
Copy data from a CPU object to a GPU object.
Definition: copy.hh:167
fml::linalg::solve
void solve(cpumat< REAL > &x, cpuvec< REAL > &y)
Solve a system of equations.
Definition: linalg_factorizations.hh:326
fml::unimat::is_square
bool is_square() const
Is the matrix square?
Definition: unimat.hh:34
fml::linalg::xpose
void xpose(const cpumat< REAL > &x, cpumat< REAL > &tx)
Computes the transpose out-of-place (i.e. in a copy).
Definition: linalg_blas.hh:284
fml::univec::data_ptr
T * data_ptr()
Pointer to the internal array.
Definition: univec.hh:28
fml::gpuvec
Vector class for data held on a single GPU.
Definition: gpuvec.hh:32
fml::gpuvec::resize
void resize(len_t size)
Resize the internal object storage.
Definition: gpuvec.hh:223
fml::unimat::nrows
len_t nrows() const
Number of rows.
Definition: unimat.hh:36
fml::linalg::qr
void qr(const bool pivot, cpumat< REAL > &x, cpuvec< REAL > &qraux)
Computes the QR decomposition.
Definition: linalg_factorizations.hh:405
fml::linalg::lu
void lu(cpumat< REAL > &x, cpuvec< int > &p, int &info)
Computes the PLU factorization with partial pivoting.
Definition: linalg_factorizations.hh:53
fml::cpuvec
Vector class for data held on a single CPU.
Definition: cpuvec.hh:31
fml::copy::gpu2gpu
void gpu2gpu(const gpuvec< REAL_IN > &gpu_in, gpuvec< REAL_OUT > &gpu_out)
Copy data from a GPU object to another.
Definition: copy.hh:203
fml::unimat::ncols
len_t ncols() const
Number of columns.
Definition: unimat.hh:38
fml::linalg::svd
void svd(cpumat< REAL > &x, cpuvec< REAL > &s)
Computes the singular value decomposition.
Definition: linalg_factorizations.hh:146
fml::unimat::data_ptr
REAL * data_ptr()
Pointer to the internal array.
Definition: unimat.hh:40
fml::linalg::lq_L
void lq_L(const cpumat< REAL > &LQ, cpumat< REAL > &L)
Recover the L matrix from an LQ decomposition.
Definition: linalg_factorizations.hh:558
fml::gpumat::fill_eye
void fill_eye()
Set diagonal entries to 1 and non-diagonal entries to 0.
Definition: gpumat.hh:432
fml
Core namespace.
Definition: linalgutils.hh:15
fml::linalg::qr_Q
void qr_Q(const cpumat< REAL > &QR, const cpuvec< REAL > &qraux, cpumat< REAL > &Q, cpuvec< REAL > &work)
Recover the Q matrix from a QR decomposition.
Definition: linalg_factorizations.hh:431
fml::univec::size
len_t size() const
Number of elements in the vector.
Definition: univec.hh:26
fml::linalg::qr_R
void qr_R(const cpumat< REAL > &QR, cpumat< REAL > &R)
Recover the R matrix from a QR decomposition.
Definition: linalg_factorizations.hh:473
fml::linalg::chol
void chol(cpumat< REAL > &x)
Compute the Choleski factorization.
Definition: linalg_factorizations.hh:636
fml::gpumat::resize
void resize(len_t nrows, len_t ncols)
Resize the internal object storage.
Definition: gpumat.hh:242
fml::linalg::lq
void lq(cpumat< REAL > &x, cpuvec< REAL > &lqaux)
Computes the LQ decomposition.
Definition: linalg_factorizations.hh:534
fml::gpumat::fill_zero
void fill_zero()
Set all values to zero.
Definition: gpumat.hh:389
fml::linalg::eigen_sym
void eigen_sym(cpumat< REAL > &x, cpuvec< REAL > &values)
Compute the eigenvalues and optionally the eigenvectors for a symmetric matrix.
Definition: linalg_factorizations.hh:230
fml::linalg::invert
void invert(cpumat< REAL > &x)
Compute the matrix inverse.
Definition: linalg_factorizations.hh:265
fml::gpumat
Matrix class for data held on a single GPU.
Definition: gpumat.hh:35
fml::gpuscalar
Definition: gpuscalar.hh:16
fml::linalg::lq_Q
void lq_Q(const cpumat< REAL > &LQ, const cpuvec< REAL > &lqaux, cpumat< REAL > &Q, cpuvec< REAL > &work)
Recover the Q matrix from an LQ decomposition.
Definition: linalg_factorizations.hh:590