5 #ifndef FML_MPI_LINALG_NORM_H
6 #define FML_MPI_LINALG_NORM_H
10 #include "../../cpu/cpuvec.hh"
12 #include "../mpimat.hh"
37 template <
typename REAL>
40 const len_t n = x.
nrows();
41 const len_t m_local = x.nrows_local();
42 const len_t n_local = x.ncols_local();
44 const grid g = x.get_grid();
52 #pragma omp parallel for if(m_local*n_local > fml::omp::OMP_MIN_SIZE)
53 for (len_t j=0; j<n_local; j++)
55 for (len_t i=0; i<m_local; i++)
56 macs_d[j] += fabs(x_d[i + m_local*j]);
61 for (len_t j=0; j<n; j++)
88 template <
typename REAL>
91 const len_t m = x.
nrows();
92 const len_t m_local = x.nrows_local();
93 const len_t n_local = x.ncols_local();
95 const grid g = x.get_grid();
103 for (len_t j=0; j<n_local; j++)
105 for (len_t i=0; i<m_local; i++)
106 mars_d[i] += fabs(x_d[i + m_local*j]);
111 for (len_t i=0; i<m; i++)
113 if (norm < mars_d[i])
117 g.
allreduce(1, 1, &norm,
'C', BLACS_MAX);
133 template <
typename REAL>
136 const len_t m_local = x.nrows_local();
137 const len_t n_local = x.ncols_local();
142 for (len_t j=0; j<n_local; j++)
144 for (len_t i=0; i<m_local; i++)
145 norm += x_d[i + m_local*j] * x_d[i + m_local*j];
148 x.get_grid().allreduce(1, 1, &norm,
'A');
164 template <
typename REAL>
167 const len_t m_local = x.nrows_local();
168 const len_t n_local = x.ncols_local();
173 for (len_t j=0; j<n_local; j++)
175 for (len_t i=0; i<m_local; i++)
177 REAL tmp = fabs(x_d[i + m_local*j]);
183 x.get_grid().allreduce(1, 1, &norm,
'A', BLACS_MAX);
206 template <
typename REAL>
221 template <
typename REAL>
222 REAL cond_square_internals(
const char norm,
mpimat<REAL> &x)
224 const len_t n = x.
nrows();
235 scalapack::gecon(norm, n, x.
data_ptr(), x.desc_ptr(), xnorm,
236 &ret, &tmp, -1, &liwork, -1, &info);
238 int lwork = (int) tmp;
242 scalapack::gecon(norm, n, x.
data_ptr(), x.desc_ptr(), xnorm,
243 &ret, work.data_ptr(), lwork, iwork.data_ptr(), liwork, &info);
245 fml::linalgutils::check_info(info,
"gecon");
247 return ((REAL)1)/ret;
250 template <
typename REAL>
251 REAL cond_nonsquare_internals(
const char norm, mpimat<REAL> &x)
253 const len_t m = x.nrows();
254 const len_t n = x.ncols();
263 mpimat<REAL> R(x.get_grid(), x.bf_rows(), x.bf_cols());
269 scalapack::trcon(norm,
'U',
'N', n, R.data_ptr(), R.desc_ptr(), &ret,
270 &tmp, -1, &liwork, -1, &info);
272 int lwork = (int) tmp;
273 cpuvec<REAL> work(lwork);
274 cpuvec<int> iwork(liwork);
276 scalapack::trcon(norm,
'U',
'N', n, R.data_ptr(), R.desc_ptr(), &ret,
277 x.data_ptr(), lwork, iwork.data_ptr(), iwork.size(), &info);
281 mpimat<REAL> L(x.get_grid(), x.bf_rows(), x.bf_cols());
287 scalapack::trcon(norm,
'L',
'N', m, L.data_ptr(), L.desc_ptr(), &ret,
288 &tmp, -1, &liwork, -1, &info);
290 int lwork = (int) tmp;
291 cpuvec<REAL> work(lwork);
292 cpuvec<int> iwork(liwork);
294 scalapack::trcon(norm,
'L',
'N', m, L.data_ptr(), L.desc_ptr(), &ret,
295 x.data_ptr(), x.nrows()*x.ncols(), iwork.data_ptr(), iwork.size(), &info);
298 fml::linalgutils::check_info(info,
"trcon");
300 return ((REAL)1)/ret;
323 template <
typename REAL>
327 return cond_square_internals(
'1', x);
329 return cond_nonsquare_internals(
'1', x);
353 template <
typename REAL>
357 return cond_square_internals(
'I', x);
359 return cond_nonsquare_internals(
'I', x);
381 template <
typename REAL>
391 for (len_t i=1; i<s.
size(); i++)
395 if (s_d[i] > 0 && s_d[i] < min)