fml  0.1-0
Fused Matrix Library
xpose.hh
1 // This file is part of fml which is released under the Boost Software
2 // License, Version 1.0. See accompanying file LICENSE or copy at
3 // https://www.boost.org/LICENSE_1_0.txt
4 
5 #ifndef FML_CPU_LINALG_XPOSE_H
6 #define FML_CPU_LINALG_XPOSE_H
7 #pragma once
8 
9 
10 #include "../../_internals/omp.hh"
11 
12 #include "../cpumat.hh"
13 
14 #include "internals/blas.hh"
15 
16 
17 namespace fml
18 {
20 namespace linalg
21 {
36  template <typename REAL>
37  void xpose(const cpumat<REAL> &x, cpumat<REAL> &tx)
38  {
39  const len_t m = x.nrows();
40  const len_t n = x.ncols();
41 
42  if (m != tx.ncols() || n != tx.nrows())
43  tx.resize(n, m);
44 
45  const int blocksize = 8;
46  const REAL *x_d = x.data_ptr();
47  REAL *tx_d = tx.data_ptr();
48 
49  #pragma omp parallel for shared(tx) schedule(dynamic, 1) if(m*n > fml::omp::OMP_MIN_SIZE)
50  for (int j=0; j<n; j+=blocksize)
51  {
52  for (int i=0; i<m; i+=blocksize)
53  {
54  for (int col=j; col<j+blocksize && col<n; ++col)
55  {
56  for (int row=i; row<i+blocksize && row<m; ++row)
57  tx_d[col + n*row] = x_d[row + m*col];
58  }
59  }
60  }
61  }
62 
64  template <typename REAL>
66  {
67  cpumat<REAL> tx(x.ncols(), x.nrows());
68  xpose(x, tx);
69  return tx;
70  }
71 }
72 }
73 
74 
75 #endif
fml::cpumat
Matrix class for data held on a single CPU.
Definition: cpumat.hh:36
fml::linalg::xpose
void xpose(const cpumat< REAL > &x, cpumat< REAL > &tx)
Computes the transpose out-of-place (i.e. in a copy).
Definition: xpose.hh:37
fml::unimat::nrows
len_t nrows() const
Number of rows.
Definition: unimat.hh:36
fml::cpumat::resize
void resize(len_t nrows, len_t ncols)
Resize the internal object storage.
Definition: cpumat.hh:233
fml::unimat::ncols
len_t ncols() const
Number of columns.
Definition: unimat.hh:38
fml::unimat::data_ptr
REAL * data_ptr()
Pointer to the internal array.
Definition: unimat.hh:40
fml
Core namespace.
Definition: dimops.hh:10