fml  0.1-0
Fused Matrix Library
det.hh
1 // This file is part of fml which is released under the Boost Software
2 // License, Version 1.0. See accompanying file LICENSE or copy at
3 // https://www.boost.org/LICENSE_1_0.txt
4 
5 #ifndef FML_GPU_LINALG_DET_H
6 #define FML_GPU_LINALG_DET_H
7 #pragma once
8 
9 
10 #include <stdexcept>
11 
12 #include "../arch/arch.hh"
13 
14 #include "../internals/gpuscalar.hh"
15 
16 #include "../gpuvec.hh"
17 #include "../gpumat.hh"
18 
19 #include "lu.hh"
20 
21 
22 namespace fml
23 {
24 namespace linalg
25 {
26  namespace
27  {
28  static __global__ void kernel_lu_pivot_sgn(const len_t n, int *ipiv, int *sgn)
29  {
30  int i = blockDim.x*blockIdx.x + threadIdx.x;
31 
32  if (i < n)
33  {
34  ipiv[i] = (ipiv[i] != (i+1) ? -1 : 1);
35  atomicAdd(sgn, ipiv[i]);
36 
37  if (threadIdx.x == 0)
38  (*sgn) = ((*sgn)%2 == 0 ? 1 : -1);
39  }
40  }
41 
42  template <typename REAL>
43  __global__ void kernel_det_mod(const len_t m, const len_t n, const REAL *x, REAL *mod, int *sgn)
44  {
45  int i = blockDim.x*blockIdx.x + threadIdx.x;
46  int j = blockDim.y*blockIdx.y + threadIdx.y;
47 
48  if (i < m && j < n && i == j)
49  {
50  REAL d = x[i + m*j];
51  int s = 0;
52 
53  if (d < 0)
54  {
55  d = (*mod) + log(-d);
56  s++;
57  }
58  else
59  d = (*mod) + log(d);
60 
61  atomicAdd(mod, d);
62  int s_g = (*sgn);
63  (*sgn) = 0;
64  atomicAdd(sgn, s);
65 
66  if (threadIdx.x == 0 && threadIdx.y == 0)
67  {
68  (*sgn) = ((*sgn)%2 == 0 ? 1 : -1);
69  (*sgn) *= s_g;
70  }
71  }
72  }
73  }
74 
93  template <typename REAL>
94  void det(gpumat<REAL> &x, int &sign, REAL &modulus)
95  {
96  if (!x.is_square())
97  throw std::runtime_error("'x' must be a square matrix");
98 
99  auto c = x.get_card();
100 
101  gpuvec<int> p(c);
102  int info;
103  lu(x, p, info);
104 
105  if (info != 0)
106  {
107  if (info > 0)
108  {
109  sign = 1;
110  modulus = -INFINITY;
111  return;
112  }
113  else
114  return;
115  }
116 
117  // get determinant
118  modulus = 0.0;
119  sign = 1;
120 
121  gpuscalar<int> sign_gpu(c, sign);
122  gpuscalar<REAL> modulus_gpu(c, modulus);
123 
124  kernel_lu_pivot_sgn<<<p.get_griddim(), p.get_blockdim()>>>(p.size(),
125  p.data_ptr(), sign_gpu.data_ptr());
126  kernel_det_mod<<<x.get_griddim(), x.get_blockdim()>>>(x.nrows(), x.ncols(),
127  x.data_ptr(), modulus_gpu.data_ptr(), sign_gpu.data_ptr());
128 
129  sign_gpu.get_val(&sign);
130  modulus_gpu.get_val(&modulus);
131  }
132 }
133 }
134 
135 
136 #endif
fml::unimat::is_square
bool is_square() const
Is the matrix square?
Definition: unimat.hh:34
fml::univec::data_ptr
T * data_ptr()
Pointer to the internal array.
Definition: univec.hh:28
fml::gpuvec
Vector class for data held on a single GPU.
Definition: gpuvec.hh:32
fml::unimat::nrows
len_t nrows() const
Number of rows.
Definition: unimat.hh:36
fml::linalg::lu
void lu(cpumat< REAL > &x, cpuvec< int > &p, int &info)
Computes the PLU factorization with partial pivoting.
Definition: lu.hh:48
fml::unimat::ncols
len_t ncols() const
Number of columns.
Definition: unimat.hh:38
fml::linalg::det
void det(cpumat< REAL > &x, int &sign, REAL &modulus)
Computes the determinant in logarithmic form.
Definition: det.hh:41
fml::unimat::data_ptr
REAL * data_ptr()
Pointer to the internal array.
Definition: unimat.hh:40
fml
Core namespace.
Definition: dimops.hh:10
fml::univec::size
len_t size() const
Number of elements in the vector.
Definition: univec.hh:26
fml::gpumat
Matrix class for data held on a single GPU.
Definition: gpumat.hh:35
fml::gpuscalar
Definition: gpuscalar.hh:16