fml  0.1-0
Fused Matrix Library
trace.hh
1 // This file is part of fml which is released under the Boost Software
2 // License, Version 1.0. See accompanying file LICENSE or copy at
3 // https://www.boost.org/LICENSE_1_0.txt
4 
5 #ifndef FML_GPU_LINALG_TRACE_H
6 #define FML_GPU_LINALG_TRACE_H
7 #pragma once
8 
9 
10 #include "../arch/arch.hh"
11 
12 #include "../internals/gpuscalar.hh"
13 
14 #include "../gpumat.hh"
15 
16 #include "lu.hh"
17 
18 
19 namespace fml
20 {
21 namespace linalg
22 {
23  namespace
24  {
25  template <typename REAL>
26  __global__ void kernel_trace(const len_t m, const len_t n, const REAL *data, REAL *tr)
27  {
28  int i = blockDim.x*blockIdx.x + threadIdx.x;
29  int j = blockDim.y*blockIdx.y + threadIdx.y;
30 
31  if (i < m && j < n && i == j)
32  atomicAdd(tr, data[i + m*i]);
33  }
34  }
35 
36 
37 
45  template <typename REAL>
46  REAL trace(const gpumat<REAL> &x)
47  {
48  const len_t m = x.nrows();
49  const len_t n = x.ncols();
50  auto c = x.get_card();
51 
52  REAL tr = 0;
53  gpuscalar<REAL> tr_gpu(c, tr);
54 
55  kernel_trace<<<x.get_griddim(), x.get_blockdim()>>>(m, n, x.data_ptr(),
56  tr_gpu.data_ptr());
57 
58  tr_gpu.get_val(&tr);
59  c->check();
60 
61  return tr;
62  }
63 }
64 }
65 
66 
67 #endif
fml::unimat::nrows
len_t nrows() const
Number of rows.
Definition: unimat.hh:36
fml::linalg::trace
REAL trace(const cpumat< REAL > &x)
Computes the trace, i.e. the sum of the diagonal.
Definition: trace.hh:27
fml::unimat::ncols
len_t ncols() const
Number of columns.
Definition: unimat.hh:38
fml::unimat::data_ptr
REAL * data_ptr()
Pointer to the internal array.
Definition: unimat.hh:40
fml
Core namespace.
Definition: dimops.hh:10
fml::gpumat
Matrix class for data held on a single GPU.
Definition: gpumat.hh:35
fml::gpuscalar
Definition: gpuscalar.hh:16