fml  0.1-0
Fused Matrix Library
cov.hh
1 // This file is part of fml which is released under the Boost Software
2 // License, Version 1.0. See accompanying file LICENSE or copy at
3 // https://www.boost.org/LICENSE_1_0.txt
4 
5 #ifndef FML_GPU_STATS_COV_H
6 #define FML_GPU_STATS_COV_H
7 #pragma once
8 
9 
10 #include <stdexcept>
11 
12 #include "../internals/gpuscalar.hh"
13 
14 #include "../gpumat.hh"
15 #include "../gpuvec.hh"
16 
17 #include "../dimops.hh"
18 
19 #include "../linalg/crossprod.hh"
20 #include "../linalg/matmult.hh"
21 
22 
23 namespace fml
24 {
25 namespace stats
26 {
42  template <typename REAL>
44  {
45  dimops::scale(true, false, x);
46 
47  const REAL alpha = 1. / ((REAL) (x.nrows()-1));
48  linalg::crossprod(alpha, x, cov);
49  }
50 
51 
52 
54  template <typename REAL>
56  {
57  if (x.nrows() != y.nrows())
58  throw std::runtime_error("non-conformable arguments");
59 
60  dimops::scale(true, false, x);
61  dimops::scale(true, false, y);
62 
63  const REAL alpha = 1. / ((REAL) (x.nrows()-1));
64  linalg::matmult(true, false, alpha, x, y, cov);
65  }
66 
67 
68 
69  namespace internals
70  {
71  template <typename REAL>
72  __global__ void kernel_cov_vecvec(const len_t n, const REAL *x,
73  const REAL *y, REAL *sxy, REAL *sx, REAL *sy)
74  {
75  int i = blockDim.x*blockIdx.x + threadIdx.x;
76  int j = blockDim.y*blockIdx.y + threadIdx.y;
77 
78  if (i < n && j < n)
79  {
80  atomicAdd(sxy, x[i]*y[i]);
81  atomicAdd(sx, x[i]);
82  atomicAdd(sy, y[i]);
83  }
84  }
85  }
86 
88  template <typename REAL>
89  REAL cov(const gpuvec<REAL> &x, const gpuvec<REAL> &y)
90  {
91  const len_t n = x.size();
92  if (n != y.size())
93  throw std::runtime_error("non-conformal arguments");
94 
95  REAL sxy, sx, sy;
96  sxy = sx = sy = 0;
97  gpuscalar<REAL> sxy_gpu(x.get_card(), sxy);
98  gpuscalar<REAL> sx_gpu(x.get_card(), sx);
99  gpuscalar<REAL> sy_gpu(x.get_card(), sy);
100 
101  internals::kernel_cov_vecvec<<<x.get_griddim(), x.get_blockdim()>>>(n,
102  x.data_ptr(), y.data_ptr(), sxy_gpu.data_ptr(), sx_gpu.data_ptr(),
103  sy_gpu.data_ptr());
104 
105  sxy_gpu.get_val(&sxy);
106  sx_gpu.get_val(&sx);
107  sy_gpu.get_val(&sy);
108 
109  return (sxy - (sx*sy*((REAL) 1./n))) * ((REAL) 1./(n-1));
110  }
111 }
112 }
113 
114 
115 #endif
fml::linalg::crossprod
void crossprod(const REAL alpha, const cpumat< REAL > &x, cpumat< REAL > &ret)
Computes lower triangle of alpha*x^T*x.
Definition: crossprod.hh:37
fml::univec::data_ptr
T * data_ptr()
Pointer to the internal array.
Definition: univec.hh:28
fml::gpuvec
Vector class for data held on a single GPU.
Definition: gpuvec.hh:32
fml::unimat::nrows
len_t nrows() const
Number of rows.
Definition: unimat.hh:36
fml::dimops::scale
void scale(const bool rm_mean, const bool rm_sd, cpumat< REAL > &x)
Remove the mean and/or the sd from a matrix.
Definition: dimops.hh:376
fml
Core namespace.
Definition: dimops.hh:10
fml::univec::size
len_t size() const
Number of elements in the vector.
Definition: univec.hh:26
fml::stats::cov
void cov(cpumat< REAL > &x, cpumat< REAL > &cov)
Covariance.
Definition: cov.hh:43
fml::gpumat
Matrix class for data held on a single GPU.
Definition: gpumat.hh:35
fml::gpuscalar
Definition: gpuscalar.hh:16
fml::linalg::matmult
void matmult(const bool transx, const bool transy, const REAL alpha, const cpumat< REAL > &x, const cpumat< REAL > &y, cpumat< REAL > &ret)
Computes ret = alpha*op(x)*op(y) where op(A) is A or A^T.
Definition: matmult.hh:43