fml  0.1-0
Fused Matrix Library
linalg_qr.hh
1 // This file is part of fml which is released under the Boost Software
2 // License, Version 1.0. See accompanying file LICENSE or copy at
3 // https://www.boost.org/LICENSE_1_0.txt
4 
5 #ifndef FML_GPU_LINALG_LINALG_QR_H
6 #define FML_GPU_LINALG_LINALG_QR_H
7 #pragma once
8 
9 
10 #include <stdexcept>
11 
12 #include "../../_internals/linalgutils.hh"
13 
14 #include "../arch/arch.hh"
15 
16 #include "../internals/gpu_utils.hh"
17 #include "../internals/gpuscalar.hh"
18 
19 #include "../gpumat.hh"
20 #include "../gpuvec.hh"
21 
22 #include "linalg_err.hh"
23 #include "linalg_blas.hh"
24 
25 
26 namespace fml
27 {
28 namespace linalg
29 {
30  namespace
31  {
32  template <typename REAL>
33  void qr_internals(const bool pivot, gpumat<REAL> &x, gpuvec<REAL> &qraux, gpuvec<REAL> &work)
34  {
35  if (pivot)
36  throw std::runtime_error("pivoting not supported at this time");
37 
38  const len_t m = x.nrows();
39  const len_t n = x.ncols();
40  const len_t minmn = std::min(m, n);
41  auto c = x.get_card();
42 
43  qraux.resize(minmn);
44 
45  int lwork;
46  gpulapack_status_t check = gpulapack::geqrf_buflen(c->lapack_handle(), m,
47  n, x.data_ptr(), m, &lwork);
48  gpulapack::err::check_ret(check, "geqrf_bufferSize");
49 
50  if (lwork > work.size())
51  work.resize(lwork);
52 
53  int info = 0;
54  gpuscalar<int> info_device(c, info);
55 
56  check = gpulapack::geqrf(c->lapack_handle(), m, n, x.data_ptr(), m,
57  qraux.data_ptr(), work.data_ptr(), lwork, info_device.data_ptr());
58 
59  info_device.get_val(&info);
60  gpulapack::err::check_ret(check, "syevd");
61  fml::linalgutils::check_info(info, "geqrf");
62  }
63  }
64 
87  template <typename REAL>
88  void qr(const bool pivot, gpumat<REAL> &x, gpuvec<REAL> &qraux)
89  {
90  err::check_card(x, qraux);
91  gpuvec<REAL> work(x.get_card());
92  qr_internals(pivot, x, qraux, work);
93  }
94 
114  template <typename REAL>
115  void qr_Q(const gpumat<REAL> &QR, const gpuvec<REAL> &qraux, gpumat<REAL> &Q,
116  gpuvec<REAL> &work)
117  {
118  err::check_card(QR, qraux, Q, work);
119 
120  const len_t m = QR.nrows();
121  const len_t n = QR.ncols();
122  const len_t minmn = std::min(m, n);
123 
124  auto c = QR.get_card();
125 
126  int lwork;
127  gpulapack_status_t check = gpulapack::orgqr_buflen(c->lapack_handle(),
128  m, minmn, minmn, QR.data_ptr(), m, qraux.data_ptr(), &lwork);
129 
130  if (lwork > work.size())
131  work.resize(lwork);
132 
133  Q.resize(m, minmn);
134 
135  int info = 0;
136  gpuscalar<int> info_device(c, info);
137 
138 
139  fml::gpu_utils::lacpy('A', m, minmn, QR.data_ptr(), m, Q.data_ptr(), m);
140 
141  check = gpulapack::orgqr(c->lapack_handle(), m, minmn, minmn, Q.data_ptr(),
142  m, qraux.data_ptr(), work.data_ptr(), lwork, info_device.data_ptr());
143 
144  info_device.get_val(&info);
145  gpulapack::err::check_ret(check, "ormqr");
146  fml::linalgutils::check_info(info, "ormqr");
147  }
148 
166  template <typename REAL>
167  void qr_R(const gpumat<REAL> &QR, gpumat<REAL> &R)
168  {
169  err::check_card(QR, R);
170 
171  const len_t m = QR.nrows();
172  const len_t n = QR.ncols();
173  const len_t minmn = std::min(m, n);
174 
175  R.resize(minmn, n);
176  R.fill_zero();
177  fml::gpu_utils::lacpy('U', m, n, QR.data_ptr(), m, R.data_ptr(), minmn);
178  }
179 
180 
181 
204  template <typename REAL>
205  void lq(gpumat<REAL> &x, gpuvec<REAL> &lqaux)
206  {
207  err::check_card(x, lqaux);
208 
209  gpumat<REAL> tx(x.get_card());
210  xpose(x, tx);
211 
212  gpuvec<REAL> work(x.get_card());
213  qr_internals(false, tx, lqaux, work);
214 
215  xpose(tx, x);
216  }
217 
236  template <typename REAL>
237  void lq_L(const gpumat<REAL> &LQ, gpumat<REAL> &L)
238  {
239  err::check_card(LQ, L);
240 
241  const len_t m = LQ.nrows();
242  const len_t n = LQ.ncols();
243  const len_t minmn = std::min(m, n);
244 
245  L.resize(m, minmn);
246  L.fill_zero();
247  fml::gpu_utils::lacpy('L', m, n, LQ.data_ptr(), m, L.data_ptr(), m);
248  }
249 
270  template <typename REAL>
271  void lq_Q(const gpumat<REAL> &LQ, const gpuvec<REAL> &lqaux, gpumat<REAL> &Q,
272  gpuvec<REAL> &work)
273  {
274  err::check_card(LQ, lqaux, Q, work);
275 
276  gpumat<REAL> QR(LQ.get_card());
277  xpose(LQ, QR);
278 
279  const len_t m = LQ.nrows();
280  const len_t n = LQ.ncols();
281  const len_t minmn = std::min(m, n);
282 
283  auto c = QR.get_card();
284 
285  int lwork;
286  gpulapack_status_t check = gpulapack::ormqr_buflen(c->lapack_handle(),
287  GPUBLAS_SIDE_RIGHT, GPUBLAS_OP_T, minmn, n, n, QR.data_ptr(), QR.nrows(),
288  lqaux.data_ptr(), Q.data_ptr(), minmn, &lwork);
289 
290  if (lwork > work.size())
291  work.resize(lwork);
292 
293  Q.resize(minmn, n);
294  Q.fill_eye();
295 
296  int info = 0;
297  gpuscalar<int> info_device(c, info);
298 
299  check = gpulapack::ormqr(c->lapack_handle(), GPUBLAS_SIDE_RIGHT,
300  GPUBLAS_OP_T, minmn, n, n, QR.data_ptr(), QR.nrows(), lqaux.data_ptr(),
301  Q.data_ptr(), minmn, work.data_ptr(), lwork, info_device.data_ptr());
302 
303  info_device.get_val(&info);
304  gpulapack::err::check_ret(check, "ormqr");
305  fml::linalgutils::check_info(info, "ormqr");
306  }
307 }
308 }
309 
310 
311 #endif
fml::linalg::xpose
void xpose(const cpumat< REAL > &x, cpumat< REAL > &tx)
Computes the transpose out-of-place (i.e. in a copy).
Definition: linalg_blas.hh:463
fml::univec::data_ptr
T * data_ptr()
Pointer to the internal array.
Definition: univec.hh:28
fml::gpuvec
Vector class for data held on a single GPU.
Definition: gpuvec.hh:32
fml::unimat::nrows
len_t nrows() const
Number of rows.
Definition: unimat.hh:36
fml::linalg::qr
void qr(const bool pivot, cpumat< REAL > &x, cpuvec< REAL > &qraux)
Computes the QR decomposition.
Definition: linalg_qr.hh:94
fml::unimat::ncols
len_t ncols() const
Number of columns.
Definition: unimat.hh:38
fml::unimat::data_ptr
REAL * data_ptr()
Pointer to the internal array.
Definition: unimat.hh:40
fml::linalg::lq_L
void lq_L(const cpumat< REAL > &LQ, cpumat< REAL > &L)
Recover the L matrix from an LQ decomposition.
Definition: linalg_qr.hh:247
fml::gpumat::fill_eye
void fill_eye()
Set diagonal entries to 1 and non-diagonal entries to 0.
Definition: gpumat.hh:443
fml
Core namespace.
Definition: dimops.hh:10
fml::linalg::qr_Q
void qr_Q(const cpumat< REAL > &QR, const cpuvec< REAL > &qraux, cpumat< REAL > &Q, cpuvec< REAL > &work)
Recover the Q matrix from a QR decomposition.
Definition: linalg_qr.hh:120
fml::linalg::qr_R
void qr_R(const cpumat< REAL > &QR, cpumat< REAL > &R)
Recover the R matrix from a QR decomposition.
Definition: linalg_qr.hh:162
fml::gpumat::resize
void resize(len_t nrows, len_t ncols)
Resize the internal object storage.
Definition: gpumat.hh:253
fml::linalg::lq
void lq(cpumat< REAL > &x, cpuvec< REAL > &lqaux)
Computes the LQ decomposition.
Definition: linalg_qr.hh:223
fml::gpumat::fill_zero
void fill_zero()
Set all values to zero.
Definition: gpumat.hh:400
fml::gpumat
Matrix class for data held on a single GPU.
Definition: gpumat.hh:35
fml::gpuscalar
Definition: gpuscalar.hh:16
fml::linalg::lq_Q
void lq_Q(const cpumat< REAL > &LQ, const cpuvec< REAL > &lqaux, cpumat< REAL > &Q, cpuvec< REAL > &work)
Recover the Q matrix from an LQ decomposition.
Definition: linalg_qr.hh:279