5 #ifndef FML_GPU_LINALG_LINALG_MISC_H
6 #define FML_GPU_LINALG_LINALG_MISC_H
12 #include "../../_internals/linalgutils.hh"
14 #include "../arch/arch.hh"
16 #include "../internals/gpuscalar.hh"
17 #include "../internals/kernelfuns.hh"
19 #include "../gpumat.hh"
20 #include "../gpuvec.hh"
22 #include "linalg_err.hh"
23 #include "linalg_lu.hh"
32 static __global__
void kernel_lu_pivot_sgn(
const len_t n,
int *ipiv,
int *sgn)
34 int i = blockDim.x*blockIdx.x + threadIdx.x;
38 ipiv[i] = (ipiv[i] != (i+1) ? -1 : 1);
39 atomicAdd(sgn, ipiv[i]);
42 (*sgn) = ((*sgn)%2 == 0 ? 1 : -1);
46 template <
typename REAL>
47 __global__
void kernel_det_mod(
const len_t m,
const len_t n,
const REAL *x, REAL *mod,
int *sgn)
49 int i = blockDim.x*blockIdx.x + threadIdx.x;
50 int j = blockDim.y*blockIdx.y + threadIdx.y;
52 if (i < m && j < n && i == j)
70 if (threadIdx.x == 0 && threadIdx.y == 0)
72 (*sgn) = ((*sgn)%2 == 0 ? 1 : -1);
97 template <
typename REAL>
101 throw std::runtime_error(
"'x' must be a square matrix");
103 auto c = x.get_card();
128 kernel_lu_pivot_sgn<<<p.get_griddim(), p.get_blockdim()>>>(p.
size(),
130 kernel_det_mod<<<x.get_griddim(), x.get_blockdim()>>>(x.
nrows(), x.
ncols(),
131 x.
data_ptr(), modulus_gpu.data_ptr(), sign_gpu.data_ptr());
133 sign_gpu.get_val(&sign);
134 modulus_gpu.get_val(&modulus);
146 template <
typename REAL>
149 const len_t m = x.
nrows();
150 const len_t n = x.
ncols();
151 auto c = x.get_card();
156 fml::kernelfuns::kernel_trace<<<x.get_griddim(), x.get_blockdim()>>>(m, n,