5 #ifndef FML_GPU_STATS_COV_H
6 #define FML_GPU_STATS_COV_H
12 #include "../internals/gpuscalar.hh"
14 #include "../gpumat.hh"
15 #include "../gpuvec.hh"
17 #include "../dimops.hh"
19 #include "../linalg/crossprod.hh"
20 #include "../linalg/matmult.hh"
42 template <
typename REAL>
47 const REAL alpha = 1. / ((REAL) (x.
nrows()-1));
54 template <
typename REAL>
58 throw std::runtime_error(
"non-conformable arguments");
63 const REAL alpha = 1. / ((REAL) (x.
nrows()-1));
71 template <
typename REAL>
72 __global__
void kernel_cov_vecvec(
const len_t n,
const REAL *x,
73 const REAL *y, REAL *sxy, REAL *sx, REAL *sy)
75 int i = blockDim.x*blockIdx.x + threadIdx.x;
76 int j = blockDim.y*blockIdx.y + threadIdx.y;
80 atomicAdd(sxy, x[i]*y[i]);
88 template <
typename REAL>
91 const len_t n = x.
size();
93 throw std::runtime_error(
"non-conformal arguments");
101 internals::kernel_cov_vecvec<<<x.get_griddim(), x.get_blockdim()>>>(n,
105 sxy_gpu.get_val(&sxy);
109 return (sxy - (sx*sy*((REAL) 1./n))) * ((REAL) 1./(n-1));