fml  0.1-0
Fused Matrix Library
norm.hh
1 // This file is part of fml which is released under the Boost Software
2 // License, Version 1.0. See accompanying file LICENSE or copy at
3 // https://www.boost.org/LICENSE_1_0.txt
4 
5 #ifndef FML_GPU_LINALG_NORM_H
6 #define FML_GPU_LINALG_NORM_H
7 #pragma once
8 
9 
10 #include <stdexcept>
11 
12 #include "../arch/arch.hh"
13 
14 #include "../internals/atomics.hh"
15 #include "../internals/gpu_utils.hh"
16 #include "../internals/gpuscalar.hh"
17 #include "../internals/kernelfuns.hh"
18 
19 #include "../gpumat.hh"
20 
21 #include "qr.hh"
22 #include "svd.hh"
23 
24 
25 namespace fml
26 {
27 namespace linalg
28 {
29  namespace
30  {
31  template <typename REAL>
32  __global__ void kernel_norm_1(const len_t m, const len_t n, const REAL *x, REAL *macs, REAL *norm)
33  {
34  int i = blockDim.x*blockIdx.x + threadIdx.x;
35  int j = blockDim.y*blockIdx.y + threadIdx.y;
36 
37  if (i < m && j < n)
38  {
39  REAL tmp = fabs(x[i + m*j]);
40  atomicAdd(macs + j, tmp);
41  atomics::atomicMaxf(norm, macs[j]);
42  }
43  }
44  }
45 
60  template <typename REAL>
61  REAL norm_1(const gpumat<REAL> &x)
62  {
63  REAL norm = 0;
64  gpuscalar<REAL> norm_gpu(x.get_card(), norm);
65  gpuvec<REAL> macs(x.get_card(), x.ncols());
66  macs.fill_zero();
67 
68  kernel_norm_1<<<x.get_griddim(), x.get_blockdim()>>>(x.nrows(),
69  x.ncols(), x.data_ptr(), macs.data_ptr(), norm_gpu.data_ptr());
70 
71  norm_gpu.get_val(&norm);
72 
73  return norm;
74  }
75 
76 
77 
78  namespace
79  {
80  template <typename REAL>
81  __global__ void kernel_norm_I(const len_t m, const len_t n, const REAL *x, REAL *mars, REAL *norm)
82  {
83  int i = blockDim.x*blockIdx.x + threadIdx.x;
84  int j = blockDim.y*blockIdx.y + threadIdx.y;
85 
86  if (i < m && j < n)
87  {
88  REAL tmp = fabs(x[i + m*j]);
89  atomicAdd(mars + i, tmp);
90  atomics::atomicMaxf(norm, mars[i]);
91  }
92  }
93  }
94 
109  template <typename REAL>
110  REAL norm_I(const gpumat<REAL> &x)
111  {
112  REAL norm = 0;
113  gpuscalar<REAL> norm_gpu(x.get_card(), norm);
114  gpuvec<REAL> mars(x.get_card(), x.nrows());
115  mars.fill_zero();
116 
117  kernel_norm_I<<<x.get_griddim(), x.get_blockdim()>>>(x.nrows(),
118  x.ncols(), x.data_ptr(), mars.data_ptr(), norm_gpu.data_ptr());
119 
120  norm_gpu.get_val(&norm);
121 
122  return norm;
123  }
124 
125 
126 
127  namespace
128  {
129  template <typename REAL>
130  __global__ void kernel_norm_F_sq(const len_t m, const len_t n, const REAL *x, REAL *norm)
131  {
132  int i = blockDim.x*blockIdx.x + threadIdx.x;
133  int j = blockDim.y*blockIdx.y + threadIdx.y;
134 
135  if (i < m && j < n)
136  {
137  REAL tmp = x[i + m*j] * x[i + m*j];
138  atomicAdd(norm, tmp);
139  }
140  }
141  }
142 
152  template <typename REAL>
153  REAL norm_F(const gpumat<REAL> &x)
154  {
155  REAL norm = 0;
156  gpuscalar<REAL> norm_gpu(x.get_card(), norm);
157 
158  kernel_norm_F_sq<<<x.get_griddim(), x.get_blockdim()>>>(x.nrows(),
159  x.ncols(), x.data_ptr(), norm_gpu.data_ptr());
160 
161  norm_gpu.get_val(&norm);
162 
163  return sqrt(norm);
164  }
165 
166 
167 
168  namespace
169  {
170  template <typename REAL>
171  __global__ void kernel_norm_M(const len_t m, const len_t n, const REAL *x, REAL *norm)
172  {
173  int i = blockDim.x*blockIdx.x + threadIdx.x;
174  int j = blockDim.y*blockIdx.y + threadIdx.y;
175 
176  if (i < m && j < n)
177  {
178  REAL tmp = fabs(x[i + m*j]);
179  atomics::atomicMaxf(norm, tmp);
180  }
181  }
182  }
183 
193  template <typename REAL>
194  REAL norm_M(const gpumat<REAL> &x)
195  {
196  REAL norm = 0;
197  gpuscalar<REAL> norm_gpu(x.get_card(), norm);
198 
199  kernel_norm_M<<<x.get_griddim(), x.get_blockdim()>>>(x.nrows(),
200  x.ncols(), x.data_ptr(), norm_gpu.data_ptr());
201 
202  norm_gpu.get_val(&norm);
203 
204  return norm;
205  }
206 
207 
208 
227  template <typename REAL>
229  {
230  REAL ret;
231  gpuvec<REAL> s(x.get_card());
232  cpsvd(x, s);
233  ret = s.get(0);
234 
235  return ret;
236  }
237 
238 
239 
240  namespace
241  {
242  template <typename REAL>
243  REAL cond_square_internals(const char norm, gpumat<REAL> &x)
244  {
245  const len_t n = x.nrows();
246 
247  REAL ret;
248 
249  if (norm == '1')
250  ret = norm_1(x);
251  else //if (norm == 'I')
252  ret = norm_I(x);
253 
254  invert(x);
255 
256  if (norm == '1')
257  ret *= norm_1(x);
258  else //if (norm == 'I')
259  ret *= norm_I(x);
260 
261  return ret;
262  }
263 
264  template <typename REAL>
265  REAL cond_nonsquare_internals(const char norm, gpumat<REAL> &x)
266  {
267  const len_t m = x.nrows();
268  const len_t n = x.ncols();
269 
270  auto c = x.get_card();
271 
272  REAL ret;
273  gpublas_status_t check;
274 
275  gpuvec<REAL> aux(c);
276 
277  if (m > n)
278  {
279  gpumat<REAL> R(c);
280  qr(false, x, aux);
281  qr_R(x, R);
282 
283  if (norm == '1')
284  ret = norm_1(R);
285  else //if (norm == 'I')
286  ret = norm_I(R);
287 
288  x.fill_eye();
289 
290  check = gpublas::trsm(c->blas_handle(), GPUBLAS_SIDE_LEFT,
291  GPUBLAS_FILL_U, GPUBLAS_OP_N, GPUBLAS_DIAG_NON_UNIT, n, n,
292  (REAL)1, R.data_ptr(), n, x.data_ptr(), m);
293  gpublas::err::check_ret(check, "trsm");
294 
295  gpu_utils::lacpy('U', n, n, x.data_ptr(), m, R.data_ptr(), n);
296 
297  if (norm == '1')
298  ret *= norm_1(R);
299  else //if (norm == 'I')
300  ret *= norm_I(R);
301  }
302  else
303  {
304  gpumat<REAL> L(c);
305  lq(x, aux);
306  lq_L(x, L);
307 
308  if (norm == '1')
309  ret = norm_1(L);
310  else //if (norm == 'I')
311  ret = norm_I(L);
312 
313  x.fill_eye();
314 
315  check = gpublas::trsm(c->blas_handle(), GPUBLAS_SIDE_LEFT,
316  GPUBLAS_FILL_L, GPUBLAS_OP_N, GPUBLAS_DIAG_NON_UNIT, m, m,
317  (REAL)1, L.data_ptr(), m, x.data_ptr(), m);
318  gpublas::err::check_ret(check, "trsm");
319 
320  gpu_utils::lacpy('L', m, m, x.data_ptr(), m, L.data_ptr(), m);
321 
322  if (norm == '1')
323  ret *= norm_1(L);
324  else //if (norm == 'I')
325  ret *= norm_I(L);
326  }
327 
328  return ret;
329  }
330  }
331 
350  template <typename REAL>
352  {
353  if (x.is_square())
354  return cond_square_internals('1', x);
355  else
356  return cond_nonsquare_internals('1', x);
357  }
358 
359 
360 
379  template <typename REAL>
381  {
382  if (x.is_square())
383  return cond_square_internals('I', x);
384  else
385  return cond_nonsquare_internals('I', x);
386  }
387 
388 
389 
390  namespace
391  {
392  static __global__ void kernel_min_nz(const len_t len, const float *data, float *mn)
393  {
394  int i = blockDim.x*blockIdx.x + threadIdx.x;
395 
396  if (i < len && data[i] > 0)
397  atomics::atomicMinf(mn, data[i]);
398  }
399 
400  static __global__ void kernel_min_nz(const len_t len, const double *data, double *mn)
401  {
402  int i = blockDim.x*blockIdx.x + threadIdx.x;
403 
404  if (i < len && data[i] > 0)
405  atomics::atomicMinf(mn, data[i]);
406  }
407  }
408 
426  template <typename REAL>
428  {
429  gpuvec<REAL> s(x.get_card());
430  cpsvd(x, s);
431 
432  REAL max = s.max();
433 
434  if (max == 0)
435  return 0;
436 
437  REAL min;
438  fml::gpuscalar<REAL> min_gpu(x.get_card());
439  kernel_min_nz<<<s.get_griddim(), s.get_blockdim()>>>(s.size(), s.data_ptr(),
440  min_gpu.data_ptr());
441  min_gpu.get_val(&min);
442 
443  return max/min;
444  }
445 }
446 }
447 
448 
449 #endif
fml::linalg::norm_I
REAL norm_I(const cpumat< REAL > &x)
Computes the infinity matrix norm, the maximum absolute row sum.
Definition: norm.hh:79
fml::linalg::cond_2
REAL cond_2(cpumat< REAL > &x)
Estimates the condition number under the 2 norm.
Definition: norm.hh:339
fml::unimat::is_square
bool is_square() const
Is the matrix square?
Definition: unimat.hh:34
fml::univec::data_ptr
T * data_ptr()
Pointer to the internal array.
Definition: univec.hh:28
fml::gpuvec
Vector class for data held on a single GPU.
Definition: gpuvec.hh:32
fml::gpuvec::fill_zero
void fill_zero()
Set all values to zero.
Definition: gpuvec.hh:387
fml::unimat::nrows
len_t nrows() const
Number of rows.
Definition: unimat.hh:36
fml::linalg::cond_1
REAL cond_1(cpumat< REAL > &x)
Estimates the condition number under the 1-norm.
Definition: norm.hh:281
fml::linalg::qr
void qr(const bool pivot, cpumat< REAL > &x, cpuvec< REAL > &qraux)
Computes the QR decomposition.
Definition: qr.hh:94
fml::linalg::norm_1
REAL norm_1(const cpumat< REAL > &x)
Computes the 1 matrix norm, the maximum absolute column sum.
Definition: norm.hh:38
fml::unimat::ncols
len_t ncols() const
Number of columns.
Definition: unimat.hh:38
fml::linalg::cond_I
REAL cond_I(cpumat< REAL > &x)
Estimates the condition number under the infinity norm.
Definition: norm.hh:311
fml::gpuvec::max
T max() const
Maximum value of the vector.
Definition: gpuvec.hh:488
fml::linalg::norm_2
REAL norm_2(cpumat< REAL > &x)
Computes the 2/spectral matrix norm.
Definition: norm.hh:188
fml::gpuvec::get
T get(const len_t i) const
Get the specified value.
Definition: gpuvec.hh:529
fml::unimat::data_ptr
REAL * data_ptr()
Pointer to the internal array.
Definition: unimat.hh:40
fml::linalg::lq_L
void lq_L(const cpumat< REAL > &LQ, cpumat< REAL > &L)
Recover the L matrix from an LQ decomposition.
Definition: qr.hh:247
fml
Core namespace.
Definition: dimops.hh:10
fml::univec::size
len_t size() const
Number of elements in the vector.
Definition: univec.hh:26
fml::linalg::qr_R
void qr_R(const cpumat< REAL > &QR, cpumat< REAL > &R)
Recover the R matrix from a QR decomposition.
Definition: qr.hh:162
fml::linalg::lq
void lq(cpumat< REAL > &x, cpuvec< REAL > &lqaux)
Computes the LQ decomposition.
Definition: qr.hh:223
fml::linalg::invert
void invert(cpumat< REAL > &x)
Compute the matrix inverse.
Definition: invert.hh:46
fml::linalg::norm_M
REAL norm_M(const cpumat< REAL > &x)
Computes the maximum modulus matrix norm.
Definition: norm.hh:145
fml::gpumat
Matrix class for data held on a single GPU.
Definition: gpumat.hh:35
fml::gpuscalar
Definition: gpuscalar.hh:16
fml::linalg::cpsvd
void cpsvd(const cpumat< REAL > &x, cpuvec< REAL > &s, cpumat< REAL > &u, cpumat< REAL > &vt)
Computes the singular value decomposition using the "crossproducts SVD". This method is not numerical...
Definition: svd.hh:323
fml::linalg::norm_F
REAL norm_F(const cpumat< REAL > &x)
Computes the Frobenius/Euclidean matrix norm.
Definition: norm.hh:118