5 #ifndef FML_GPU_STATS_COR_H
6 #define FML_GPU_STATS_COR_H
12 #include "../../_internals/omp.hh"
14 #include "../gpumat.hh"
15 #include "../gpuvec.hh"
17 #include "../dimops.hh"
19 #include "../linalg/crossprod.hh"
20 #include "../linalg/matmult.hh"
42 template <
typename REAL>
47 const REAL alpha = 1. / ((REAL) (x.
nrows()-1));
54 template <
typename REAL>
58 throw std::runtime_error(
"non-conformable arguments");
63 const REAL alpha = 1. / ((REAL) (x.
nrows()-1));
71 template <
typename REAL>
72 __global__
void kernel_cor_vecvec(
const len_t n,
const REAL *x,
73 const REAL *y,
const REAL *meanx,
const REAL *meany, REAL *cp, REAL *normx,
76 int i = blockDim.x*blockIdx.x + threadIdx.x;
77 int j = blockDim.y*blockIdx.y + threadIdx.y;
81 const REAL xi_mm = x[i] - (*meanx);
82 const REAL yi_mm = y[i] - (*meany);
84 atomicAdd(cp, xi_mm * yi_mm);
85 atomicAdd(normx, xi_mm * xi_mm);
86 atomicAdd(normy, yi_mm * yi_mm);
92 template <
typename REAL>
95 const len_t n = x.
size();
97 throw std::runtime_error(
"non-conformal arguments");
99 const REAL meanx = x.
sum() / n;
100 const REAL meany = y.
sum() / n;
104 REAL cp = 0, normx = 0, normy = 0;
109 internals::kernel_cor_vecvec<<<x.get_griddim(), x.get_blockdim()>>>(n,
111 cp_gpu.data_ptr(), normx_gpu.data_ptr(), normy_gpu.data_ptr());
114 normx_gpu.get_val(&normx);
115 normy_gpu.get_val(&normy);
117 return cp / sqrt(normx * normy);