5 #ifndef FML_CPU_LINALG_ADD_H
6 #define FML_CPU_LINALG_ADD_H
10 #include "../../_internals/linalgutils.hh"
11 #include "../../_internals/omp.hh"
13 #include "../cpumat.hh"
34 template <
typename REAL>
35 void add(
const bool transx,
const bool transy,
const REAL alpha,
40 fml::linalgutils::matadd_params(transx, transy, x.
nrows(), x.
ncols(),
50 if (!transx && !transy)
52 #pragma omp parallel for if(m*n > fml::omp::OMP_MIN_SIZE)
53 for (len_t j=0; j<n; j++)
56 for (len_t i=0; i<m; i++)
57 ret_d[i + m*j] = alpha*x_d[i + m*j] + beta*y_d[i + m*j];
60 else if (transx && transy)
62 #pragma omp parallel for if(m*n > fml::omp::OMP_MIN_SIZE)
63 for (len_t j=0; j<m; j++)
66 for (len_t i=0; i<n; i++)
67 ret_d[j + m*i] = alpha*x_d[i + n*j] + beta*y_d[i + n*j];
70 else if (transx && !transy)
72 #pragma omp parallel for if(m*n > fml::omp::OMP_MIN_SIZE)
73 for (len_t j=0; j<n; j++)
76 for (len_t i=0; i<m; i++)
77 ret_d[i + m*j] = alpha*x_d[j + n*i] + beta*y_d[i + m*j];
80 else if (!transx && transy)
82 #pragma omp parallel for if(m*n > fml::omp::OMP_MIN_SIZE)
83 for (len_t j=0; j<n; j++)
86 for (len_t i=0; i<m; i++)
87 ret_d[i + m*j] = alpha*x_d[i + m*j] + beta*y_d[j + n*i];
95 template <
typename REAL>
100 fml::linalgutils::matadd_params(transx, transy, x.
nrows(), x.
ncols(),
104 add(transx, transy, alpha, beta, x, y, ret);