fml  0.1-0
Fused Matrix Library
linalg_lu.hh
1 // This file is part of fml which is released under the Boost Software
2 // License, Version 1.0. See accompanying file LICENSE or copy at
3 // https://www.boost.org/LICENSE_1_0.txt
4 
5 #ifndef FML_GPU_LINALG_LINALG_LU_H
6 #define FML_GPU_LINALG_LINALG_LU_H
7 #pragma once
8 
9 
10 #include <stdexcept>
11 
12 #include "../../_internals/linalgutils.hh"
13 
14 #include "../arch/arch.hh"
15 
16 #include "../internals/gpuscalar.hh"
17 
18 #include "../copy.hh"
19 #include "../gpumat.hh"
20 #include "../gpuvec.hh"
21 
22 #include "linalg_err.hh"
23 
24 
25 namespace fml
26 {
27 namespace linalg
28 {
50  template <typename REAL>
51  void lu(gpumat<REAL> &x, gpuvec<int> &p, int &info)
52  {
53  err::check_card(x, p);
54 
55  info = 0;
56  const len_t m = x.nrows();
57  const len_t n = x.ncols();
58  auto c = x.get_card();
59 
60  const len_t lipiv = std::min(m, n);
61  if (!p.get_card()->valid_card())
62  p.inherit(c);
63 
64  p.resize(lipiv);
65 
66  #if defined(FML_GPULAPACK_VENDOR)
67  int lwork;
68  gpulapack_status_t check = gpulapack::getrf_buflen(c->lapack_handle(), m,
69  n, x.data_ptr(), m, &lwork);
70  gpulapack::err::check_ret(check, "getrf_bufferSize");
71 
72  gpuvec<REAL> work(c, lwork);
73  gpuscalar<int> info_device(c, info);
74 
75  check = gpulapack::getrf(c->lapack_handle(), m, n, x.data_ptr(), m,
76  work.data_ptr(), p.data_ptr(), info_device.data_ptr());
77 
78  info_device.get_val(&info);
79  gpulapack::err::check_ret(check, "getrf");
80  #elif defined(FML_GPULAPACK_MAGMA)
81  cpuvec<int> p_cpu(lipiv);
82  gpulapack::getrf(m, n, x.data_ptr(), m, p_cpu.data_ptr(), &info);
83  copy::cpu2gpu(p_cpu, p);
84  #else
85  #error "Unsupported GPU lapack"
86  #endif
87  }
88 
90  template <typename REAL>
91  void lu(gpumat<REAL> &x)
92  {
93  gpuvec<int> p(x.get_card());
94  int info;
95 
96  lu(x, p, info);
97 
98  fml::linalgutils::check_info(info, "getrf");
99  }
100 }
101 }
102 
103 
104 #endif
fml::copy::cpu2gpu
void cpu2gpu(const cpuvec< REAL_IN > &cpu, gpuvec< REAL_OUT > &gpu)
Copy data from a CPU object to a GPU object.
Definition: copy.hh:167
fml::univec::data_ptr
T * data_ptr()
Pointer to the internal array.
Definition: univec.hh:28
fml::gpuvec
Vector class for data held on a single GPU.
Definition: gpuvec.hh:32
fml::gpuvec::resize
void resize(len_t size)
Resize the internal object storage.
Definition: gpuvec.hh:224
fml::unimat::nrows
len_t nrows() const
Number of rows.
Definition: unimat.hh:36
fml::linalg::lu
void lu(cpumat< REAL > &x, cpuvec< int > &p, int &info)
Computes the PLU factorization with partial pivoting.
Definition: linalg_lu.hh:48
fml::cpuvec
Vector class for data held on a single CPU.
Definition: cpuvec.hh:31
fml::unimat::ncols
len_t ncols() const
Number of columns.
Definition: unimat.hh:38
fml::unimat::data_ptr
REAL * data_ptr()
Pointer to the internal array.
Definition: unimat.hh:40
fml
Core namespace.
Definition: dimops.hh:10
fml::gpumat
Matrix class for data held on a single GPU.
Definition: gpumat.hh:35
fml::gpuscalar
Definition: gpuscalar.hh:16