Tpetra parallel linear algebra  Version of the Day
Tpetra_computeRowAndColumnOneNorms_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_COMPUTEROWANDCOLUMNONENORMS_DEF_HPP
43 #define TPETRA_COMPUTEROWANDCOLUMNONENORMS_DEF_HPP
44 
51 
52 #include "Tpetra_Details_copyConvert.hpp"
53 #include "Tpetra_CrsMatrix.hpp"
54 #include "Tpetra_Export.hpp"
55 #include "Tpetra_Map.hpp"
56 #include "Tpetra_MultiVector.hpp"
57 #include "Tpetra_RowMatrix.hpp"
58 #include "Kokkos_Core.hpp"
59 #include "Teuchos_CommHelpers.hpp"
60 #include <memory>
61 
62 namespace Tpetra {
63 namespace Details {
64 
65 template<class SC, class LO, class GO, class NT>
66 std::size_t
67 lclMaxNumEntriesRowMatrix (const Tpetra::RowMatrix<SC, LO, GO, NT>& A)
68 {
69  const auto& rowMap = * (A.getRowMap ());
70  const LO lclNumRows = static_cast<LO> (rowMap.getNodeNumElements ());
71 
72  std::size_t maxNumEnt {0};
73  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
74  const std::size_t numEnt = A.getNumEntriesInLocalRow (lclRow);
75  maxNumEnt = numEnt > maxNumEnt ? numEnt : maxNumEnt;
76  }
77  return maxNumEnt;
78 }
79 
80 template<class SC, class LO, class GO, class NT>
81 void
82 forEachLocalRowMatrixRow (const Tpetra::RowMatrix<SC, LO, GO, NT>& A,
83  const LO lclNumRows,
84  const std::size_t maxNumEnt,
85  std::function<void (const LO lclRow,
86  const Teuchos::ArrayView<LO>& /* ind */,
87  const Teuchos::ArrayView<SC>& /* val */,
88  std::size_t /* numEnt */ )> doForEachRow)
89 {
90  Teuchos::Array<LO> indBuf (maxNumEnt);
91  Teuchos::Array<SC> valBuf (maxNumEnt);
92 
93  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
94  std::size_t numEnt = A.getNumEntriesInLocalRow (lclRow);
95  Teuchos::ArrayView<LO> ind = indBuf.view (0, numEnt);
96  Teuchos::ArrayView<SC> val = valBuf.view (0, numEnt);
97  A.getLocalRowCopy (lclRow, ind, val, numEnt);
98  doForEachRow (lclRow, ind, val, numEnt);
99  }
100 }
101 
102 template<class SC, class LO, class GO, class NT>
103 void
104 forEachLocalRowMatrixRow (const Tpetra::RowMatrix<SC, LO, GO, NT>& A,
105  std::function<void (const LO lclRow,
106  const Teuchos::ArrayView<LO>& /* ind */,
107  const Teuchos::ArrayView<SC>& /* val */,
108  std::size_t /* numEnt */ )> doForEachRow)
109 {
110  const auto& rowMap = * (A.getRowMap ());
111  const LO lclNumRows = static_cast<LO> (rowMap.getNodeNumElements ());
112  const std::size_t maxNumEnt = lclMaxNumEntriesRowMatrix (A);
113 
114  forEachLocalRowMatrixRow<SC, LO, GO, NT> (A, lclNumRows, maxNumEnt, doForEachRow);
115 }
116 
120 template<class SC, class LO, class GO, class NT>
121 void
122 computeLocalRowScaledColumnNorms_RowMatrix (EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
123  typename NT::device_type>& result,
125 {
126  using KAT = Kokkos::ArithTraits<SC>;
127  using mag_type = typename KAT::mag_type;
128 
129  auto rowNorms_h = Kokkos::create_mirror_view (result.rowNorms);
130  Kokkos::deep_copy (rowNorms_h, result.rowNorms);
131  auto rowScaledColNorms_h = Kokkos::create_mirror_view (result.rowScaledColNorms);
132 
133  forEachLocalRowMatrixRow<SC, LO, GO, NT> (A,
134  [&] (const LO lclRow,
135  const Teuchos::ArrayView<LO>& ind,
136  const Teuchos::ArrayView<SC>& val,
137  std::size_t numEnt) {
138  const mag_type rowNorm = rowNorms_h[lclRow];
139  for (std::size_t k = 0; k < numEnt; ++k) {
140  const mag_type matrixAbsVal = KAT::abs (val[k]);
141  const LO lclCol = ind[k];
142 
143  rowScaledColNorms_h[lclCol] += matrixAbsVal / rowNorm;
144  }
145  });
146  Kokkos::deep_copy (result.rowScaledColNorms, rowScaledColNorms_h);
147 }
148 
151 template<class SC, class LO, class GO, class NT>
152 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type, typename NT::device_type>
154  const bool assumeSymmetric)
155 {
156  using KAT = Kokkos::ArithTraits<SC>;
157  using val_type = typename KAT::val_type;
158  using mag_type = typename KAT::mag_type;
159  using KAM = Kokkos::ArithTraits<mag_type>;
160  using device_type = typename NT::device_type;
161 
162  const auto& rowMap = * (A.getRowMap ());
163  const auto& colMap = * (A.getColMap ());
164  const LO lclNumRows = static_cast<LO> (rowMap.getNodeNumElements ());
165  const LO lclNumCols = static_cast<LO> (colMap.getNodeNumElements ());
166 
168  (lclNumRows, lclNumCols, assumeSymmetric);
169  auto result_h = result.createMirrorView ();
170 
171  forEachLocalRowMatrixRow<SC, LO, GO, NT> (A,
172  [&] (const LO lclRow,
173  const Teuchos::ArrayView<LO>& ind,
174  const Teuchos::ArrayView<SC>& val,
175  std::size_t numEnt) {
176  mag_type rowNorm {0.0};
177  val_type diagVal {0.0};
178  const GO gblRow = rowMap.getGlobalElement (lclRow);
179  // OK if invalid(); then we simply won't find the diagonal entry.
180  const GO lclDiagColInd = colMap.getLocalElement (gblRow);
181 
182  for (std::size_t k = 0; k < numEnt; ++k) {
183  const val_type matrixVal = val[k];
184  if (KAT::isInf (matrixVal)) {
185  result_h.foundInf = true;
186  }
187  if (KAT::isNan (matrixVal)) {
188  result_h.foundNan = true;
189  }
190  const mag_type matrixAbsVal = KAT::abs (matrixVal);
191  rowNorm += matrixAbsVal;
192  const LO lclCol = ind[k];
193  if (lclCol == lclDiagColInd) {
194  diagVal += val[k]; // repeats count additively
195  }
196  if (! assumeSymmetric) {
197  result_h.colNorms[lclCol] += matrixAbsVal;
198  }
199  } // for each entry in row
200 
201  // This is a local result. If the matrix has an overlapping
202  // row Map, then the global result might differ.
203  if (diagVal == KAT::zero ()) {
204  result_h.foundZeroDiag = true;
205  }
206  if (rowNorm == KAM::zero ()) {
207  result_h.foundZeroRowNorm = true;
208  }
209  // NOTE (mfh 24 May 2018) We could actually compute local
210  // rowScaledColNorms in situ at this point, if ! assumeSymmetric
211  // and row Map is the same as range Map (so that the local row
212  // norms are the same as the global row norms).
213  result_h.rowDiagonalEntries[lclRow] += diagVal;
214  result_h.rowNorms[lclRow] = rowNorm;
215  if (! assumeSymmetric &&
216  lclDiagColInd != Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
217  result_h.colDiagonalEntries[lclDiagColInd] += diagVal;
218  }
219  });
220 
221  result.assign (result_h);
222  return result;
223 }
224 
225 template<class SC, class LO, class GO, class NT>
226 class ComputeLocalRowScaledColumnNorms {
227 public:
228  using crs_matrix_type = ::Tpetra::CrsMatrix<SC, LO, GO, NT>;
229  using val_type = typename Kokkos::ArithTraits<SC>::val_type;
230  using mag_type = typename Kokkos::ArithTraits<val_type>::mag_type;
231  using device_type = typename crs_matrix_type::device_type;
232 
233  ComputeLocalRowScaledColumnNorms (const Kokkos::View<mag_type*, device_type>& rowScaledColNorms,
234  const Kokkos::View<const mag_type*, device_type>& rowNorms,
235  const crs_matrix_type& A) :
236  rowScaledColNorms_ (rowScaledColNorms),
237  rowNorms_ (rowNorms),
238  A_lcl_ (A.getLocalMatrix ())
239  {}
240 
241  KOKKOS_INLINE_FUNCTION void operator () (const LO lclRow) const {
242  using KAT = Kokkos::ArithTraits<val_type>;
243 
244  const auto curRow = A_lcl_.rowConst (lclRow);
245  const mag_type rowNorm = rowNorms_[lclRow];
246  const LO numEnt = curRow.length;
247  for (LO k = 0; k < numEnt; ++k) {
248  const mag_type matrixAbsVal = KAT::abs (curRow.value(k));
249  const LO lclCol = curRow.colidx(k);
250 
251  Kokkos::atomic_add (&rowScaledColNorms_[lclCol], matrixAbsVal / rowNorm);
252  }
253  }
254 
255  static void
256  run (const Kokkos::View<mag_type*, device_type>& rowScaledColNorms,
257  const Kokkos::View<const mag_type*, device_type>& rowNorms,
258  const crs_matrix_type& A)
259  {
260  using execution_space = typename device_type::execution_space;
261  using range_type = Kokkos::RangePolicy<execution_space, LO>;
262  using functor_type = ComputeLocalRowScaledColumnNorms<SC, LO, GO, NT>;
263 
264  functor_type functor (rowScaledColNorms, rowNorms, A);
265  const LO lclNumRows =
266  static_cast<LO> (A.getRowMap ()->getNodeNumElements ());
267  Kokkos::parallel_for ("computeLocalRowScaledColumnNorms",
268  range_type (0, lclNumRows), functor);
269  }
270 
271 private:
272  Kokkos::View<mag_type*, device_type> rowScaledColNorms_;
273  Kokkos::View<const mag_type*, device_type> rowNorms_;
274 
275  using local_matrix_type = typename crs_matrix_type::local_matrix_type;
276  local_matrix_type A_lcl_;
277 };
278 
279 template<class SC, class LO, class GO, class NT>
280 void
281 computeLocalRowScaledColumnNorms_CrsMatrix (EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
282  typename NT::device_type>& result,
284 {
285  using impl_type = ComputeLocalRowScaledColumnNorms<SC, LO, GO, NT>;
286  impl_type::run (result.rowScaledColNorms, result.rowNorms, A);
287 }
288 
289 template<class SC, class LO, class GO, class NT>
290 void
291 computeLocalRowScaledColumnNorms (EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
292  typename NT::device_type>& result,
294 {
295  using crs_matrix_type = Tpetra::CrsMatrix<SC, LO, GO, NT>;
296  using val_type = typename Kokkos::ArithTraits<SC>::val_type;
297  using mag_type = typename Kokkos::ArithTraits<val_type>::mag_type;
298  using device_type = typename NT::device_type;
299 
300  auto colMapPtr = A.getColMap ();
301  TEUCHOS_TEST_FOR_EXCEPTION
302  (colMapPtr.get () == nullptr, std::invalid_argument,
303  "computeLocalRowScaledColumnNorms: "
304  "Input matrix A must have a nonnull column Map.");
305  const LO lclNumCols = static_cast<LO> (colMapPtr->getNodeNumElements ());
306  if (static_cast<std::size_t> (result.rowScaledColNorms.extent (0)) !=
307  static_cast<std::size_t> (lclNumCols)) {
308  result.rowScaledColNorms =
309  Kokkos::View<mag_type*, device_type> ("rowScaledColNorms", lclNumCols);
310  }
311 
312  const crs_matrix_type* A_crs = dynamic_cast<const crs_matrix_type*> (&A);
313  if (A_crs == nullptr) {
315  }
316  else {
317  computeLocalRowScaledColumnNorms_CrsMatrix (result, *A_crs);
318  }
319 }
320 
321 // Kokkos::parallel_reduce functor that is part of the implementation
322 // of computeLocalRowAndColumnOneNorms_CrsMatrix.
323 template<class SC, class LO, class GO, class NT>
324 class ComputeLocalRowAndColumnOneNorms {
325 public:
326  using val_type = typename Kokkos::ArithTraits<SC>::val_type;
327  using equib_info_type = EquilibrationInfo<val_type, typename NT::device_type>;
328  using local_matrix_type = typename ::Tpetra::CrsMatrix<SC, LO, GO, NT>::local_matrix_type;
329  using local_map_type = typename ::Tpetra::Map<LO, GO, NT>::local_map_type;
330 
331 public:
332  ComputeLocalRowAndColumnOneNorms (const equib_info_type& equib, // in/out
333  const local_matrix_type& A_lcl, // in
334  const local_map_type& rowMap, // in
335  const local_map_type& colMap) : // in
336  equib_ (equib),
337  A_lcl_ (A_lcl),
338  rowMap_ (rowMap),
339  colMap_ (colMap)
340  {}
341 
342  // (result & 1) != 0 means "found Inf."
343  // (result & 2) != 0 means "found NaN."
344  // (result & 4) != 0 means "found zero diag."
345  // (result & 8) != 0 means "found zero row norm."
346  // Pack into a single int so the reduction is cheaper,
347  // esp. on GPU.
348  using value_type = int;
349 
350  KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
351  {
352  dst = 0;
353  }
354 
355  KOKKOS_INLINE_FUNCTION void
356  join (volatile value_type& dst,
357  const volatile value_type& src) const
358  {
359  dst |= src;
360  }
361 
362  KOKKOS_INLINE_FUNCTION void
363  operator () (const LO lclRow, value_type& dst) const
364  {
365  using KAT = Kokkos::ArithTraits<val_type>;
366  using mag_type = typename KAT::mag_type;
367  using KAM = Kokkos::ArithTraits<mag_type>;
368 
369  const GO gblRow = rowMap_.getGlobalElement (lclRow);
370  // OK if invalid(); then we simply won't find the diagonal entry.
371  const GO lclDiagColInd = colMap_.getLocalElement (gblRow);
372 
373  const auto curRow = A_lcl_.rowConst (lclRow);
374  const LO numEnt = curRow.length;
375  const bool assumeSymmetric = equib_.assumeSymmetric;
376 
377  mag_type rowNorm {0.0};
378  val_type diagVal {0.0};
379 
380  for (LO k = 0; k < numEnt; ++k) {
381  const val_type matrixVal = curRow.value (k);
382  if (KAT::isInf (matrixVal)) {
383  dst |= 1;
384  }
385  if (KAT::isNan (matrixVal)) {
386  dst |= 2;
387  }
388  const mag_type matrixAbsVal = KAT::abs (matrixVal);
389  rowNorm += matrixAbsVal;
390  const LO lclCol = curRow.colidx (k);
391  if (lclCol == lclDiagColInd) {
392  diagVal = curRow.value (k); // assume no repeats
393  }
394  if (! assumeSymmetric) {
395  Kokkos::atomic_add (&(equib_.colNorms[lclCol]), matrixAbsVal);
396  }
397  } // for each entry in row
398 
399  // This is a local result. If the matrix has an overlapping
400  // row Map, then the global result might differ.
401  if (diagVal == KAT::zero ()) {
402  dst |= 4;
403  }
404  if (rowNorm == KAM::zero ()) {
405  dst |= 8;
406  }
407  // NOTE (mfh 24 May 2018) We could actually compute local
408  // rowScaledColNorms in situ at this point, if ! assumeSymmetric
409  // and row Map is the same as range Map (so that the local row
410  // norms are the same as the global row norms).
411  equib_.rowDiagonalEntries[lclRow] = diagVal;
412  equib_.rowNorms[lclRow] = rowNorm;
413  if (! assumeSymmetric &&
414  lclDiagColInd != Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
415  // Don't need an atomic update here, since this lclDiagColInd is
416  // a one-to-one function of lclRow.
417  equib_.colDiagonalEntries[lclDiagColInd] += diagVal;
418  }
419  }
420 
421 private:
422  equib_info_type equib_;
423  local_matrix_type A_lcl_;
424  local_map_type rowMap_;
425  local_map_type colMap_;
426 };
427 
430 template<class SC, class LO, class GO, class NT>
431 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type, typename NT::device_type>
433  const bool assumeSymmetric)
434 {
435  using execution_space = typename NT::device_type::execution_space;
436  using range_type = Kokkos::RangePolicy<execution_space, LO>;
437  using functor_type = ComputeLocalRowAndColumnOneNorms<SC, LO, GO, NT>;
438  using val_type = typename Kokkos::ArithTraits<SC>::val_type;
439  using device_type = typename NT::device_type;
440  using equib_info_type = EquilibrationInfo<val_type, device_type>;
441 
442  const LO lclNumRows = static_cast<LO> (A.getRowMap ()->getNodeNumElements ());
443  const LO lclNumCols = static_cast<LO> (A.getColMap ()->getNodeNumElements ());
444  equib_info_type equib (lclNumRows, lclNumCols, assumeSymmetric);
445 
446  functor_type functor (equib, A.getLocalMatrix (),
447  A.getRowMap ()->getLocalMap (),
448  A.getColMap ()->getLocalMap ());
449  int result = 0;
450  Kokkos::parallel_reduce ("computeLocalRowAndColumnOneNorms",
451  range_type (0, lclNumRows), functor,
452  result);
453  equib.foundInf = (result & 1) != 0;
454  equib.foundNan = (result & 2) != 0;
455  equib.foundZeroDiag = (result & 4) != 0;
456  equib.foundZeroRowNorm = (result & 8) != 0;
457  return equib;
458 }
459 
481 template<class SC, class LO, class GO, class NT>
482 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type, typename NT::device_type>
484  const bool assumeSymmetric)
485 {
486  using crs_matrix_type = Tpetra::CrsMatrix<SC, LO, GO, NT>;
487  const crs_matrix_type* A_crs = dynamic_cast<const crs_matrix_type*> (&A);
488 
489  if (A_crs == nullptr) {
490  return computeLocalRowAndColumnOneNorms_RowMatrix (A, assumeSymmetric);
491  }
492  else {
493  return computeLocalRowAndColumnOneNorms_CrsMatrix (*A_crs, assumeSymmetric);
494  }
495 }
496 
497 template<class SC, class LO, class GO, class NT>
499 getLocalView_2d (const Tpetra::MultiVector<SC, LO, GO, NT>& X)
500 {
501  return X.template getLocalView<typename NT::device_type::memory_space> ();
502 }
503 
504 template<class SC, class LO, class GO, class NT>
505 auto getLocalView_1d (const Tpetra::MultiVector<SC, LO, GO, NT>& X,
506  const LO whichColumn)
507  -> decltype (Kokkos::subview (getLocalView_2d (X), Kokkos::ALL (), whichColumn))
508 {
509  if (X.isConstantStride ()) {
510  return Kokkos::subview (getLocalView_2d (X), Kokkos::ALL (), whichColumn);
511  }
512  else {
513  auto X_whichColumn = X.getVector (whichColumn);
514  return Kokkos::subview (getLocalView_2d (*X_whichColumn), Kokkos::ALL (), 0);
515  }
516 }
517 
518 template<class SC, class LO, class GO, class NT, class ViewValueType>
519 void
520 copy1DViewIntoMultiVectorColumn (Tpetra::MultiVector<SC, LO, GO, NT>& X,
521  const LO whichColumn,
522  const Kokkos::View<ViewValueType*, typename NT::device_type>& view)
523 {
524  using dev_memory_space = typename NT::device_type::memory_space;
525  // MultiVector always starts sync'd to device.
526  X.template modify<dev_memory_space> ();
527  auto X_lcl = getLocalView_1d (X, whichColumn);
528  Tpetra::Details::copyConvert (X_lcl, view);
529 }
530 
531 template<class SC, class LO, class GO, class NT, class ViewValueType>
532 void
533 copyMultiVectorColumnInto1DView (const Kokkos::View<ViewValueType*, typename NT::device_type>& view,
535  const LO whichColumn)
536 {
537  using dev_memory_space = typename NT::device_type::memory_space;
538  X.template sync<dev_memory_space> ();
539  auto X_lcl = getLocalView_1d (X, whichColumn);
540  Tpetra::Details::copyConvert (view, X_lcl);
541 }
542 
543 template<class OneDViewType, class IndexType>
544 class FindZero {
545 public:
546  static_assert (OneDViewType::Rank == 1,
547  "OneDViewType must be a rank-1 Kokkos::View.");
548  static_assert (std::is_integral<IndexType>::value,
549  "IndexType must be a built-in integer type.");
550  FindZero (const OneDViewType& x) : x_ (x) {}
551  // Kokkos historically didn't like bool reduction results on CUDA,
552  // so we use int as the reduction result type.
553  KOKKOS_INLINE_FUNCTION void
554  operator () (const IndexType i, int& result) const {
555  using val_type = typename OneDViewType::non_const_value_type;
556  result = (x_(i) == Kokkos::ArithTraits<val_type>::zero ()) ? 1 : result;
557  }
558 private:
559  OneDViewType x_;
560 };
561 
562 template<class OneDViewType>
563 bool findZero (const OneDViewType& x)
564 {
565  using view_type = typename OneDViewType::const_type;
566  using execution_space = typename view_type::execution_space;
567  using size_type = typename view_type::size_type;
568  using functor_type = FindZero<view_type, size_type>;
569 
570  Kokkos::RangePolicy<execution_space, size_type> range (0, x.extent (0));
571  range.set (Kokkos::ChunkSize (500)); // adjust as needed
572 
573  int foundZero = 0;
574  Kokkos::parallel_reduce ("findZero", range, functor_type (x), foundZero);
575  return foundZero == 1;
576 }
577 
578 template<class SC, class LO, class GO, class NT>
579 void
580 globalizeRowOneNorms (EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
581  typename NT::device_type>& equib,
583 {
584  using mv_type = Tpetra::MultiVector<SC, LO, GO, NT>;
585 
586  auto G = A.getGraph ();
587  TEUCHOS_TEST_FOR_EXCEPTION
588  (G.get () == nullptr, std::invalid_argument,
589  "globalizeRowOneNorms: Input RowMatrix A must have a nonnull graph "
590  "(that is, getGraph() must return nonnull).");
591  TEUCHOS_TEST_FOR_EXCEPTION
592  (! G->isFillComplete (), std::invalid_argument,
593  "globalizeRowOneNorms: Input CrsGraph G must be fillComplete.");
594 
595  auto exp = G->getExporter ();
596  if (! exp.is_null ()) {
597  // If the matrix has an overlapping row Map, first Export the
598  // local row norms with ADD CombineMode to a range Map Vector to
599  // get the global row norms, then reverse them back with REPLACE
600  // CombineMode to the row Map Vector. Ditto for the local row
601  // diagonal entries. Use SC instead of mag_type, so we can
602  // communicate both row norms and row diagonal entries at once.
603 
604  // FIXME (mfh 16 May 2018) Clever DualView tricks could possibly
605  // avoid the local copy here.
606  mv_type rowMapMV (G->getRowMap (), 2, false);
607 
608  copy1DViewIntoMultiVectorColumn (rowMapMV, 0, equib.rowNorms);
609  copy1DViewIntoMultiVectorColumn (rowMapMV, 1, equib.rowDiagonalEntries);
610  {
611  mv_type rangeMapMV (G->getRangeMap (), 2, true);
612  rangeMapMV.doExport (rowMapMV, *exp, Tpetra::ADD); // forward mode
613  rowMapMV.doImport (rangeMapMV, *exp, Tpetra::REPLACE); // reverse mode
614  }
615  copyMultiVectorColumnInto1DView (equib.rowNorms, rowMapMV, 0);
616  copyMultiVectorColumnInto1DView (equib.rowDiagonalEntries, rowMapMV, 1);
617 
618  // It's not common for users to solve linear systems with a
619  // nontrival Export, so it's OK for this to cost an additional
620  // pass over rowDiagonalEntries.
621  equib.foundZeroDiag = findZero (equib.rowDiagonalEntries);
622  equib.foundZeroRowNorm = findZero (equib.rowNorms);
623  }
624 
625  constexpr int allReduceCount = 4;
626  int lclNaughtyMatrix[allReduceCount];
627  lclNaughtyMatrix[0] = equib.foundInf ? 1 : 0;
628  lclNaughtyMatrix[1] = equib.foundNan ? 1 : 0;
629  lclNaughtyMatrix[2] = equib.foundZeroDiag ? 1 : 0;
630  lclNaughtyMatrix[3] = equib.foundZeroRowNorm ? 1 : 0;
631 
632  using Teuchos::outArg;
633  using Teuchos::REDUCE_MAX;
634  using Teuchos::reduceAll;
635  auto comm = G->getComm ();
636  int gblNaughtyMatrix[allReduceCount];
637  reduceAll<int, int> (*comm, REDUCE_MAX, allReduceCount,
638  lclNaughtyMatrix, gblNaughtyMatrix);
639 
640  equib.foundInf = gblNaughtyMatrix[0] == 1;
641  equib.foundNan = gblNaughtyMatrix[1] == 1;
642  equib.foundZeroDiag = gblNaughtyMatrix[2] == 1;
643  equib.foundZeroRowNorm = gblNaughtyMatrix[3] == 1;
644 }
645 
646 template<class SC, class LO, class GO, class NT>
647 void
648 globalizeColumnOneNorms (EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
649  typename NT::device_type>& equib,
651  const bool assumeSymmetric) // if so, use row norms
652 {
653  using val_type = typename Kokkos::ArithTraits<SC>::val_type;
654  using mag_type = typename Kokkos::ArithTraits<val_type>::mag_type;
656  using device_type = typename NT::device_type;
657 
658  auto G = A.getGraph ();
659  TEUCHOS_TEST_FOR_EXCEPTION
660  (G.get () == nullptr, std::invalid_argument,
661  "globalizeColumnOneNorms: Input RowMatrix A must have a nonnull graph "
662  "(that is, getGraph() must return nonnull).");
663  TEUCHOS_TEST_FOR_EXCEPTION
664  (! G->isFillComplete (), std::invalid_argument,
665  "globalizeColumnOneNorms: Input CrsGraph G must be fillComplete.");
666 
667  auto imp = G->getImporter ();
668  if (assumeSymmetric) {
669  const LO numCols = 2;
670  // Redistribute local row info to global column info.
671 
672  // Get the data into a MultiVector on the domain Map.
673  mv_type rowNorms_domMap (G->getDomainMap (), numCols, false);
674  const bool rowMapSameAsDomainMap = G->getRowMap ()->isSameAs (* (G->getDomainMap ()));
675  if (rowMapSameAsDomainMap) {
676  copy1DViewIntoMultiVectorColumn (rowNorms_domMap, 0, equib.rowNorms);
677  copy1DViewIntoMultiVectorColumn (rowNorms_domMap, 1, equib.rowDiagonalEntries);
678  }
679  else {
680  // This is not a common case; it would normally arise when the
681  // matrix has an overlapping row Map.
682  Tpetra::Export<LO, GO, NT> rowToDom (G->getRowMap (), G->getDomainMap ());
683  mv_type rowNorms_rowMap (G->getRowMap (), numCols, true);
684  copy1DViewIntoMultiVectorColumn (rowNorms_rowMap, 0, equib.rowNorms);
685  copy1DViewIntoMultiVectorColumn (rowNorms_rowMap, 1, equib.rowDiagonalEntries);
686  rowNorms_domMap.doExport (rowNorms_rowMap, rowToDom, Tpetra::REPLACE);
687  }
688 
689  // Use the existing Import to redistribute the row norms from the
690  // domain Map to the column Map.
691  std::unique_ptr<mv_type> rowNorms_colMap;
692  if (imp.is_null ()) {
693  // Shallow copy of rowNorms_domMap, since the two must have the
694  // same Maps.
695  rowNorms_colMap =
696  std::unique_ptr<mv_type> (new mv_type (G->getColMap (),
697  rowNorms_domMap.getDualView ()));
698  }
699  else {
700  rowNorms_colMap =
701  std::unique_ptr<mv_type> (new mv_type (G->getColMap (), numCols, true));
702  rowNorms_colMap->doImport (rowNorms_domMap, *imp, Tpetra::REPLACE);
703  }
704 
705  // Make sure the result has allocations of the right size.
706  const LO lclNumCols =
707  static_cast<LO> (G->getColMap ()->getNodeNumElements ());
708  if (static_cast<LO> (equib.colNorms.extent (0)) != lclNumCols) {
709  equib.colNorms =
710  Kokkos::View<mag_type*, device_type> ("colNorms", lclNumCols);
711  }
712  if (static_cast<LO> (equib.colDiagonalEntries.extent (0)) != lclNumCols) {
713  equib.colDiagonalEntries =
714  Kokkos::View<val_type*, device_type> ("colDiagonalEntries", lclNumCols);
715  }
716 
717  // Copy row norms and diagonal entries, appropriately
718  // redistributed, into column norms resp. diagonal entries.
719  copyMultiVectorColumnInto1DView (equib.colNorms, *rowNorms_colMap, 0);
720  copyMultiVectorColumnInto1DView (equib.colDiagonalEntries, *rowNorms_colMap, 1);
721  }
722  else {
723  if (! imp.is_null ()) {
724  const LO numCols = 3;
725  // If the matrix has an overlapping column Map (this is usually
726  // the case), first Export (reverse-mode Import) the local info
727  // to a domain Map Vector to get the global info, then Import
728  // them back with REPLACE CombineMode to the column Map Vector.
729  // Ditto for the row-scaled column norms.
730 
731  // FIXME (mfh 16 May 2018) Clever DualView tricks could possibly
732  // avoid the local copy here.
733  mv_type colMapMV (G->getColMap (), numCols, false);
734 
735  copy1DViewIntoMultiVectorColumn (colMapMV, 0, equib.colNorms);
736  copy1DViewIntoMultiVectorColumn (colMapMV, 1, equib.colDiagonalEntries);
737  copy1DViewIntoMultiVectorColumn (colMapMV, 2, equib.rowScaledColNorms);
738  {
739  mv_type domainMapMV (G->getDomainMap (), numCols, true);
740  domainMapMV.doExport (colMapMV, *imp, Tpetra::ADD); // reverse mode
741  colMapMV.doImport (domainMapMV, *imp, Tpetra::REPLACE); // forward mode
742  }
743  copyMultiVectorColumnInto1DView (equib.colNorms, colMapMV, 0);
744  copyMultiVectorColumnInto1DView (equib.colDiagonalEntries, colMapMV, 1);
745  copyMultiVectorColumnInto1DView (equib.rowScaledColNorms, colMapMV, 2);
746  }
747  }
748 }
749 
750 } // namespace Details
751 
752 template<class SC, class LO, class GO, class NT>
753 Details::EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
754  typename NT::device_type>
756  const bool assumeSymmetric)
757 {
758  TEUCHOS_TEST_FOR_EXCEPTION
759  (! A.isFillComplete (), std::invalid_argument,
760  "computeRowAndColumnOneNorms: Input matrix A must be fillComplete.");
761  auto result = Details::computeLocalRowAndColumnOneNorms (A, assumeSymmetric);
762 
763  Details::globalizeRowOneNorms (result, A);
764  if (! assumeSymmetric) {
765  // Row-norm-scaled column norms are trivial if the matrix is
766  // symmetric, since the row norms and column norms are the same in
767  // that case.
768  Details::computeLocalRowScaledColumnNorms (result, A);
769  }
770  Details::globalizeColumnOneNorms (result, A, assumeSymmetric);
771  return result;
772 }
773 
774 } // namespace Tpetra
775 
776 //
777 // Explicit instantiation macro
778 //
779 // Must be expanded from within the Tpetra namespace!
780 //
781 
782 #define TPETRA_COMPUTEROWANDCOLUMNONENORMS_INSTANT(SC,LO,GO,NT) \
783  template Details::EquilibrationInfo<Kokkos::ArithTraits<SC>::val_type, NT::device_type> \
784  computeRowAndColumnOneNorms (const Tpetra::RowMatrix<SC, LO, GO, NT>& A, \
785  const bool assumeSymmetric);
786 
787 #endif // TPETRA_COMPUTEROWANDCOLUMNONENORMS_DEF_HPP
Tpetra::Details::computeLocalRowScaledColumnNorms_RowMatrix
void computeLocalRowScaledColumnNorms_RowMatrix(EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > &result, const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
For a given Tpetra::RowMatrix that is not a Tpetra::CrsMatrix, assume that result....
Definition: Tpetra_computeRowAndColumnOneNorms_def.hpp:122
Tpetra::Details::computeLocalRowAndColumnOneNorms_RowMatrix
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowAndColumnOneNorms_RowMatrix(const Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Implementation of computeLocalRowAndColumnOneNorms for a Tpetra::RowMatrix that is NOT a Tpetra::CrsM...
Definition: Tpetra_computeRowAndColumnOneNorms_def.hpp:153
Tpetra::Classes::CrsMatrix::getRowMap
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.
Definition: Tpetra_CrsMatrix_def.hpp:799
Tpetra::Classes::RowMatrix::getColMap
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getColMap() const =0
The Map that describes the distribution of columns over processes.
Tpetra::REPLACE
Replace existing values with new values.
Definition: Tpetra_CombineMode.hpp:97
Tpetra::Classes::CrsMatrix::getColMap
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
Definition: Tpetra_CrsMatrix_def.hpp:806
Tpetra::Details::EquilibrationInfo
Struct storing results of Tpetra::computeRowAndColumnOneNorms.
Definition: Tpetra_Details_EquilibrationInfo.hpp:79
Details
Implementation details of Tpetra.
Tpetra::ADD
Sum new values into existing values.
Definition: Tpetra_CombineMode.hpp:95
Tpetra::Classes::CrsMatrix
Sparse matrix that presents a row-oriented interface that lets users read or modify entries.
Definition: Tpetra_CrsMatrix_decl.hpp:424
Tpetra::Classes::RowMatrix::getRowMap
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
The Map that describes the distribution of rows over processes.
Tpetra::Classes::RowMatrix::getLocalRowCopy
virtual void getLocalRowCopy(LocalOrdinal LocalRow, const Teuchos::ArrayView< LocalOrdinal > &Indices, const Teuchos::ArrayView< Scalar > &Values, size_t &NumEntries) const =0
Get a copy of the given local row's entries.
Tpetra::Classes::CrsMatrix::getLocalMatrix
local_matrix_type getLocalMatrix() const
The local sparse matrix.
Definition: Tpetra_CrsMatrix_decl.hpp:2435
Tpetra::Classes::RowMatrix::getGraph
virtual Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const =0
The RowGraph associated with this matrix.
Tpetra::Details::computeLocalRowAndColumnOneNorms_CrsMatrix
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowAndColumnOneNorms_CrsMatrix(const Tpetra::CrsMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Implementation of computeLocalRowAndColumnOneNorms for a Tpetra::CrsMatrix.
Definition: Tpetra_computeRowAndColumnOneNorms_def.hpp:432
Tpetra::Classes::MultiVector
One or more distributed dense vectors.
Definition: Tpetra_MultiVector_decl.hpp:389
Tpetra::Classes::RowMatrix::isFillComplete
virtual bool isFillComplete() const =0
Whether fillComplete() has been called.
Tpetra::Classes::CrsMatrix::local_matrix_type
KokkosSparse::CrsMatrix< impl_scalar_type, LocalOrdinal, execution_space, void, typename local_graph_type::size_type > local_matrix_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
Definition: Tpetra_CrsMatrix_decl.hpp:483
Tpetra::Details::copyConvert
void copyConvert(const OutputViewType &dst, const InputViewType &src)
Copy values from the 1-D Kokkos::View src, to the 1-D Kokkos::View dst, of the same length....
Definition: Tpetra_Details_copyConvert.hpp:314
Tpetra::Classes::CrsMatrix::device_type
Node::device_type device_type
The Kokkos device type.
Definition: Tpetra_CrsMatrix_decl.hpp:454
Tpetra::computeRowAndColumnOneNorms
Details::EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeRowAndColumnOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Compute global row and column one-norms ("row sums" and "column sums") of the input sparse matrix A,...
Definition: Tpetra_computeRowAndColumnOneNorms_def.hpp:755
Tpetra::Details::computeLocalRowAndColumnOneNorms
EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeLocalRowAndColumnOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Compute LOCAL row and column one-norms ("row sums" etc.) of the input sparse matrix A....
Definition: Tpetra_computeRowAndColumnOneNorms_def.hpp:483
Tpetra::Classes::RowMatrix::getNumEntriesInLocalRow
virtual size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const =0
The current number of entries on the calling process in the specified local row.
Tpetra
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Tpetra::deep_copy
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Definition: Tpetra_MultiVector_decl.hpp:2453
Tpetra::Classes::Export
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Definition: Tpetra_Export_decl.hpp:124
Tpetra::Classes::RowMatrix
A read-only, row-oriented interface to a sparse matrix.
Definition: Tpetra_RowMatrix_decl.hpp:85