fml  0.1-0
Fused Matrix Library
add.hh
1 // This file is part of fml which is released under the Boost Software
2 // License, Version 1.0. See accompanying file LICENSE or copy at
3 // https://www.boost.org/LICENSE_1_0.txt
4 
5 #ifndef FML_CPU_LINALG_ADD_H
6 #define FML_CPU_LINALG_ADD_H
7 #pragma once
8 
9 
10 #include "../../_internals/linalgutils.hh"
11 #include "../../_internals/omp.hh"
12 
13 #include "../cpumat.hh"
14 
15 
16 namespace fml
17 {
18 namespace linalg
19 {
34  template <typename REAL>
35  void add(const bool transx, const bool transy, const REAL alpha,
36  const REAL beta, const cpumat<REAL> &x, const cpumat<REAL> &y,
37  cpumat<REAL> &ret)
38  {
39  len_t m, n;
40  fml::linalgutils::matadd_params(transx, transy, x.nrows(), x.ncols(),
41  y.nrows(), y.ncols(), &m, &n);
42 
43  if (ret.nrows() != m || ret.ncols() != n)
44  ret.resize(m, n);
45 
46  const REAL *x_d = x.data_ptr();
47  const REAL *y_d = y.data_ptr();
48  REAL *ret_d = ret.data_ptr();
49 
50  if (!transx && !transy)
51  {
52  #pragma omp parallel for if(m*n > fml::omp::OMP_MIN_SIZE)
53  for (len_t j=0; j<n; j++)
54  {
55  #pragma omp simd
56  for (len_t i=0; i<m; i++)
57  ret_d[i + m*j] = alpha*x_d[i + m*j] + beta*y_d[i + m*j];
58  }
59  }
60  else if (transx && transy)
61  {
62  #pragma omp parallel for if(m*n > fml::omp::OMP_MIN_SIZE)
63  for (len_t j=0; j<m; j++)
64  {
65  #pragma omp simd
66  for (len_t i=0; i<n; i++)
67  ret_d[j + m*i] = alpha*x_d[i + n*j] + beta*y_d[i + n*j];
68  }
69  }
70  else if (transx && !transy)
71  {
72  #pragma omp parallel for if(m*n > fml::omp::OMP_MIN_SIZE)
73  for (len_t j=0; j<n; j++)
74  {
75  #pragma omp simd
76  for (len_t i=0; i<m; i++)
77  ret_d[i + m*j] = alpha*x_d[j + n*i] + beta*y_d[i + m*j];
78  }
79  }
80  else if (!transx && transy)
81  {
82  #pragma omp parallel for if(m*n > fml::omp::OMP_MIN_SIZE)
83  for (len_t j=0; j<n; j++)
84  {
85  #pragma omp simd
86  for (len_t i=0; i<m; i++)
87  ret_d[i + m*j] = alpha*x_d[i + m*j] + beta*y_d[j + n*i];
88  }
89  }
90  }
91 
92 
93 
95  template <typename REAL>
96  cpumat<REAL> add(const bool transx, const bool transy, const REAL alpha,
97  const REAL beta, const cpumat<REAL> &x, const cpumat<REAL> &y)
98  {
99  len_t m, n;
100  fml::linalgutils::matadd_params(transx, transy, x.nrows(), x.ncols(),
101  y.nrows(), y.ncols(), &m, &n);
102 
103  cpumat<REAL> ret(m, n);
104  add(transx, transy, alpha, beta, x, y, ret);
105  return ret;
106  }
107 }
108 }
109 
110 
111 #endif
fml::cpumat
Matrix class for data held on a single CPU.
Definition: cpumat.hh:36
fml::unimat::nrows
len_t nrows() const
Number of rows.
Definition: unimat.hh:36
fml::cpumat::resize
void resize(len_t nrows, len_t ncols)
Resize the internal object storage.
Definition: cpumat.hh:233
fml::unimat::ncols
len_t ncols() const
Number of columns.
Definition: unimat.hh:38
fml::unimat::data_ptr
REAL * data_ptr()
Pointer to the internal array.
Definition: unimat.hh:40
fml
Core namespace.
Definition: dimops.hh:10
fml::linalg::add
void add(const bool transx, const bool transy, const REAL alpha, const REAL beta, const cpumat< REAL > &x, const cpumat< REAL > &y, cpumat< REAL > &ret)
Returns alpha*op(x) + beta*op(y) where op(A) is A or A^T.
Definition: add.hh:35