5 #ifndef FML_MPI_LINALG_LINALG_NORM_H
6 #define FML_MPI_LINALG_LINALG_NORM_H
10 #include "../../cpu/cpuvec.hh"
12 #include "../mpimat.hh"
33 template <
typename REAL>
36 const len_t n = x.
nrows();
37 const len_t m_local = x.nrows_local();
38 const len_t n_local = x.ncols_local();
40 const grid g = x.get_grid();
48 #pragma omp parallel for if(m_local*n_local > fml::omp::OMP_MIN_SIZE)
49 for (len_t j=0; j<n_local; j++)
51 for (len_t i=0; i<m_local; i++)
52 macs_d[j] += fabs(x_d[i + m_local*j]);
57 for (len_t j=0; j<n; j++)
84 template <
typename REAL>
87 const len_t m = x.
nrows();
88 const len_t m_local = x.nrows_local();
89 const len_t n_local = x.ncols_local();
91 const grid g = x.get_grid();
99 for (len_t j=0; j<n_local; j++)
101 for (len_t i=0; i<m_local; i++)
102 mars_d[i] += fabs(x_d[i + m_local*j]);
107 for (len_t i=0; i<m; i++)
109 if (norm < mars_d[i])
113 g.
allreduce(1, 1, &norm,
'C', BLACS_MAX);
129 template <
typename REAL>
132 const len_t m_local = x.nrows_local();
133 const len_t n_local = x.ncols_local();
138 for (len_t j=0; j<n_local; j++)
140 for (len_t i=0; i<m_local; i++)
141 norm += x_d[i + m_local*j] * x_d[i + m_local*j];
144 x.get_grid().allreduce(1, 1, &norm,
'A');
160 template <
typename REAL>
163 const len_t m_local = x.nrows_local();
164 const len_t n_local = x.ncols_local();
169 for (len_t j=0; j<n_local; j++)
171 for (len_t i=0; i<m_local; i++)
173 REAL tmp = fabs(x_d[i + m_local*j]);
179 x.get_grid().allreduce(1, 1, &norm,
'A', BLACS_MAX);
202 template <
typename REAL>
217 template <
typename REAL>
218 REAL cond_square_internals(
const char norm,
mpimat<REAL> &x)
220 const len_t n = x.
nrows();
231 scalapack::gecon(norm, n, x.
data_ptr(), x.desc_ptr(), xnorm,
232 &ret, &tmp, -1, &liwork, -1, &info);
234 int lwork = (int) tmp;
238 scalapack::gecon(norm, n, x.
data_ptr(), x.desc_ptr(), xnorm,
239 &ret, work.data_ptr(), lwork, iwork.data_ptr(), liwork, &info);
241 fml::linalgutils::check_info(info,
"gecon");
243 return ((REAL)1)/ret;
246 template <
typename REAL>
247 REAL cond_nonsquare_internals(
const char norm, mpimat<REAL> &x)
249 const len_t m = x.nrows();
250 const len_t n = x.ncols();
259 mpimat<REAL> R(x.get_grid(), x.bf_rows(), x.bf_cols());
265 scalapack::trcon(norm,
'U',
'N', n, R.data_ptr(), R.desc_ptr(), &ret,
266 &tmp, -1, &liwork, -1, &info);
268 int lwork = (int) tmp;
269 cpuvec<REAL> work(lwork);
270 cpuvec<int> iwork(liwork);
272 scalapack::trcon(norm,
'U',
'N', n, R.data_ptr(), R.desc_ptr(), &ret,
273 x.data_ptr(), lwork, iwork.data_ptr(), iwork.size(), &info);
277 mpimat<REAL> L(x.get_grid(), x.bf_rows(), x.bf_cols());
283 scalapack::trcon(norm,
'L',
'N', m, L.data_ptr(), L.desc_ptr(), &ret,
284 &tmp, -1, &liwork, -1, &info);
286 int lwork = (int) tmp;
287 cpuvec<REAL> work(lwork);
288 cpuvec<int> iwork(liwork);
290 scalapack::trcon(norm,
'L',
'N', m, L.data_ptr(), L.desc_ptr(), &ret,
291 x.data_ptr(), x.nrows()*x.ncols(), iwork.data_ptr(), iwork.size(), &info);
294 fml::linalgutils::check_info(info,
"trcon");
296 return ((REAL)1)/ret;
319 template <
typename REAL>
323 return cond_square_internals(
'1', x);
325 return cond_nonsquare_internals(
'1', x);
349 template <
typename REAL>
353 return cond_square_internals(
'I', x);
355 return cond_nonsquare_internals(
'I', x);
377 template <
typename REAL>
387 for (len_t i=1; i<s.
size(); i++)
391 if (s_d[i] > 0 && s_d[i] < min)