fml  0.1-0
Fused Matrix Library
linalg_misc.hh
1 // This file is part of fml which is released under the Boost Software
2 // License, Version 1.0. See accompanying file LICENSE or copy at
3 // https://www.boost.org/LICENSE_1_0.txt
4 
5 #ifndef FML_GPU_LINALG_LINALG_MISC_H
6 #define FML_GPU_LINALG_LINALG_MISC_H
7 #pragma once
8 
9 
10 #include <stdexcept>
11 
12 #include "../../_internals/linalgutils.hh"
13 
14 #include "../arch/arch.hh"
15 
16 #include "../internals/gpuscalar.hh"
17 #include "../internals/kernelfuns.hh"
18 
19 #include "../gpumat.hh"
20 #include "../gpuvec.hh"
21 
22 #include "linalg_err.hh"
23 #include "linalg_lu.hh"
24 
25 
26 namespace fml
27 {
28 namespace linalg
29 {
30  namespace
31  {
32  static __global__ void kernel_lu_pivot_sgn(const len_t n, int *ipiv, int *sgn)
33  {
34  int i = blockDim.x*blockIdx.x + threadIdx.x;
35 
36  if (i < n)
37  {
38  ipiv[i] = (ipiv[i] != (i+1) ? -1 : 1);
39  atomicAdd(sgn, ipiv[i]);
40 
41  if (threadIdx.x == 0)
42  (*sgn) = ((*sgn)%2 == 0 ? 1 : -1);
43  }
44  }
45 
46  template <typename REAL>
47  __global__ void kernel_det_mod(const len_t m, const len_t n, const REAL *x, REAL *mod, int *sgn)
48  {
49  int i = blockDim.x*blockIdx.x + threadIdx.x;
50  int j = blockDim.y*blockIdx.y + threadIdx.y;
51 
52  if (i < m && j < n && i == j)
53  {
54  REAL d = x[i + m*j];
55  int s = 0;
56 
57  if (d < 0)
58  {
59  d = (*mod) + log(-d);
60  s++;
61  }
62  else
63  d = (*mod) + log(d);
64 
65  atomicAdd(mod, d);
66  int s_g = (*sgn);
67  (*sgn) = 0;
68  atomicAdd(sgn, s);
69 
70  if (threadIdx.x == 0 && threadIdx.y == 0)
71  {
72  (*sgn) = ((*sgn)%2 == 0 ? 1 : -1);
73  (*sgn) *= s_g;
74  }
75  }
76  }
77  }
78 
97  template <typename REAL>
98  void det(gpumat<REAL> &x, int &sign, REAL &modulus)
99  {
100  if (!x.is_square())
101  throw std::runtime_error("'x' must be a square matrix");
102 
103  auto c = x.get_card();
104 
105  gpuvec<int> p(c);
106  int info;
107  lu(x, p, info);
108 
109  if (info != 0)
110  {
111  if (info > 0)
112  {
113  sign = 1;
114  modulus = -INFINITY;
115  return;
116  }
117  else
118  return;
119  }
120 
121  // get determinant
122  modulus = 0.0;
123  sign = 1;
124 
125  gpuscalar<int> sign_gpu(c, sign);
126  gpuscalar<REAL> modulus_gpu(c, modulus);
127 
128  kernel_lu_pivot_sgn<<<p.get_griddim(), p.get_blockdim()>>>(p.size(),
129  p.data_ptr(), sign_gpu.data_ptr());
130  kernel_det_mod<<<x.get_griddim(), x.get_blockdim()>>>(x.nrows(), x.ncols(),
131  x.data_ptr(), modulus_gpu.data_ptr(), sign_gpu.data_ptr());
132 
133  sign_gpu.get_val(&sign);
134  modulus_gpu.get_val(&modulus);
135  }
136 
137 
138 
146  template <typename REAL>
147  REAL trace(const gpumat<REAL> &x)
148  {
149  const len_t m = x.nrows();
150  const len_t n = x.ncols();
151  auto c = x.get_card();
152 
153  REAL tr = 0;
154  gpuscalar<REAL> tr_gpu(c, tr);
155 
156  fml::kernelfuns::kernel_trace<<<x.get_griddim(), x.get_blockdim()>>>(m, n,
157  x.data_ptr(), tr_gpu.data_ptr());
158 
159  tr_gpu.get_val(&tr);
160  c->check();
161 
162  return tr;
163  }
164 }
165 }
166 
167 
168 #endif
fml::unimat::is_square
bool is_square() const
Is the matrix square?
Definition: unimat.hh:34
fml::univec::data_ptr
T * data_ptr()
Pointer to the internal array.
Definition: univec.hh:28
fml::gpuvec
Vector class for data held on a single GPU.
Definition: gpuvec.hh:32
fml::unimat::nrows
len_t nrows() const
Number of rows.
Definition: unimat.hh:36
fml::linalg::trace
REAL trace(const cpumat< REAL > &x)
Computes the trace, i.e. the sum of the diagonal.
Definition: linalg_misc.hh:109
fml::linalg::lu
void lu(cpumat< REAL > &x, cpuvec< int > &p, int &info)
Computes the PLU factorization with partial pivoting.
Definition: linalg_lu.hh:48
fml::unimat::ncols
len_t ncols() const
Number of columns.
Definition: unimat.hh:38
fml::linalg::det
void det(cpumat< REAL > &x, int &sign, REAL &modulus)
Computes the determinant in logarithmic form.
Definition: linalg_misc.hh:46
fml::unimat::data_ptr
REAL * data_ptr()
Pointer to the internal array.
Definition: unimat.hh:40
fml
Core namespace.
Definition: dimops.hh:10
fml::univec::size
len_t size() const
Number of elements in the vector.
Definition: univec.hh:26
fml::gpumat
Matrix class for data held on a single GPU.
Definition: gpumat.hh:35
fml::gpuscalar
Definition: gpuscalar.hh:16