fml
0.1-0
Fused Matrix Library
xpose.hh
1
// This file is part of fml which is released under the Boost Software
2
// License, Version 1.0. See accompanying file LICENSE or copy at
3
// https://www.boost.org/LICENSE_1_0.txt
4
5
#ifndef FML_CPU_LINALG_XPOSE_H
6
#define FML_CPU_LINALG_XPOSE_H
7
#pragma once
8
9
10
#include "../../_internals/omp.hh"
11
12
#include "../cpumat.hh"
13
14
#include "internals/blas.hh"
15
16
17
namespace
fml
18
{
20
namespace
linalg
21
{
36
template
<
typename
REAL>
37
void
xpose
(
const
cpumat<REAL>
&x,
cpumat<REAL>
&tx)
38
{
39
const
len_t m = x.
nrows
();
40
const
len_t n = x.
ncols
();
41
42
if
(m != tx.
ncols
() || n != tx.
nrows
())
43
tx.
resize
(n, m);
44
45
const
int
blocksize = 8;
46
const
REAL *x_d = x.
data_ptr
();
47
REAL *tx_d = tx.
data_ptr
();
48
49
#pragma omp parallel for shared(tx) schedule(dynamic, 1) if(m*n > fml::omp::OMP_MIN_SIZE)
50
for
(
int
j=0; j<n; j+=blocksize)
51
{
52
for
(
int
i=0; i<m; i+=blocksize)
53
{
54
for
(
int
col=j; col<j+blocksize && col<n; ++col)
55
{
56
for
(
int
row=i; row<i+blocksize && row<m; ++row)
57
tx_d[col + n*row] = x_d[row + m*col];
58
}
59
}
60
}
61
}
62
64
template
<
typename
REAL>
65
cpumat<REAL>
xpose
(
const
cpumat<REAL>
&x)
66
{
67
cpumat<REAL>
tx(x.
ncols
(), x.
nrows
());
68
xpose
(x, tx);
69
return
tx;
70
}
71
}
72
}
73
74
75
#endif
fml::cpumat
Matrix class for data held on a single CPU.
Definition:
cpumat.hh:36
fml::linalg::xpose
void xpose(const cpumat< REAL > &x, cpumat< REAL > &tx)
Computes the transpose out-of-place (i.e. in a copy).
Definition:
xpose.hh:37
fml::unimat::nrows
len_t nrows() const
Number of rows.
Definition:
unimat.hh:36
fml::cpumat::resize
void resize(len_t nrows, len_t ncols)
Resize the internal object storage.
Definition:
cpumat.hh:233
fml::unimat::ncols
len_t ncols() const
Number of columns.
Definition:
unimat.hh:38
fml::unimat::data_ptr
REAL * data_ptr()
Pointer to the internal array.
Definition:
unimat.hh:40
fml
Core namespace.
Definition:
dimops.hh:10
fml
src
fml
cpu
linalg
xpose.hh
Generated by
1.8.17