42 #ifndef TPETRA_COMPUTEROWANDCOLUMNONENORMS_DEF_HPP
43 #define TPETRA_COMPUTEROWANDCOLUMNONENORMS_DEF_HPP
52 #include "Tpetra_Details_copyConvert.hpp"
53 #include "Tpetra_CrsMatrix.hpp"
54 #include "Tpetra_Export.hpp"
55 #include "Tpetra_Map.hpp"
56 #include "Tpetra_MultiVector.hpp"
57 #include "Tpetra_RowMatrix.hpp"
58 #include "Kokkos_Core.hpp"
59 #include "Teuchos_CommHelpers.hpp"
65 template<
class SC,
class LO,
class GO,
class NT>
70 const LO lclNumRows = static_cast<LO> (rowMap.getNodeNumElements ());
72 std::size_t maxNumEnt {0};
73 for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
75 maxNumEnt = numEnt > maxNumEnt ? numEnt : maxNumEnt;
80 template<
class SC,
class LO,
class GO,
class NT>
84 const std::size_t maxNumEnt,
85 std::function<
void (
const LO lclRow,
86 const Teuchos::ArrayView<LO>& ,
87 const Teuchos::ArrayView<SC>& ,
88 std::size_t )> doForEachRow)
90 Teuchos::Array<LO> indBuf (maxNumEnt);
91 Teuchos::Array<SC> valBuf (maxNumEnt);
93 for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
95 Teuchos::ArrayView<LO> ind = indBuf.view (0, numEnt);
96 Teuchos::ArrayView<SC> val = valBuf.view (0, numEnt);
98 doForEachRow (lclRow, ind, val, numEnt);
102 template<
class SC,
class LO,
class GO,
class NT>
105 std::function<
void (
const LO lclRow,
106 const Teuchos::ArrayView<LO>& ,
107 const Teuchos::ArrayView<SC>& ,
108 std::size_t )> doForEachRow)
111 const LO lclNumRows = static_cast<LO> (rowMap.getNodeNumElements ());
112 const std::size_t maxNumEnt = lclMaxNumEntriesRowMatrix (A);
114 forEachLocalRowMatrixRow<SC, LO, GO, NT> (A, lclNumRows, maxNumEnt, doForEachRow);
120 template<
class SC,
class LO,
class GO,
class NT>
123 typename NT::device_type>& result,
126 using KAT = Kokkos::ArithTraits<SC>;
127 using mag_type =
typename KAT::mag_type;
129 auto rowNorms_h = Kokkos::create_mirror_view (result.rowNorms);
131 auto rowScaledColNorms_h = Kokkos::create_mirror_view (result.rowScaledColNorms);
133 forEachLocalRowMatrixRow<SC, LO, GO, NT> (A,
134 [&] (
const LO lclRow,
135 const Teuchos::ArrayView<LO>& ind,
136 const Teuchos::ArrayView<SC>& val,
137 std::size_t numEnt) {
138 const mag_type rowNorm = rowNorms_h[lclRow];
139 for (std::size_t k = 0; k < numEnt; ++k) {
140 const mag_type matrixAbsVal = KAT::abs (val[k]);
141 const LO lclCol = ind[k];
143 rowScaledColNorms_h[lclCol] += matrixAbsVal / rowNorm;
151 template<
class SC,
class LO,
class GO,
class NT>
152 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
typename NT::device_type>
154 const bool assumeSymmetric)
156 using KAT = Kokkos::ArithTraits<SC>;
157 using val_type =
typename KAT::val_type;
158 using mag_type =
typename KAT::mag_type;
159 using KAM = Kokkos::ArithTraits<mag_type>;
160 using device_type =
typename NT::device_type;
164 const LO lclNumRows = static_cast<LO> (rowMap.getNodeNumElements ());
165 const LO lclNumCols = static_cast<LO> (colMap.getNodeNumElements ());
168 (lclNumRows, lclNumCols, assumeSymmetric);
169 auto result_h = result.createMirrorView ();
171 forEachLocalRowMatrixRow<SC, LO, GO, NT> (A,
172 [&] (
const LO lclRow,
173 const Teuchos::ArrayView<LO>& ind,
174 const Teuchos::ArrayView<SC>& val,
175 std::size_t numEnt) {
176 mag_type rowNorm {0.0};
177 val_type diagVal {0.0};
178 const GO gblRow = rowMap.getGlobalElement (lclRow);
180 const GO lclDiagColInd = colMap.getLocalElement (gblRow);
182 for (std::size_t k = 0; k < numEnt; ++k) {
183 const val_type matrixVal = val[k];
184 if (KAT::isInf (matrixVal)) {
185 result_h.foundInf =
true;
187 if (KAT::isNan (matrixVal)) {
188 result_h.foundNan =
true;
190 const mag_type matrixAbsVal = KAT::abs (matrixVal);
191 rowNorm += matrixAbsVal;
192 const LO lclCol = ind[k];
193 if (lclCol == lclDiagColInd) {
196 if (! assumeSymmetric) {
197 result_h.colNorms[lclCol] += matrixAbsVal;
203 if (diagVal == KAT::zero ()) {
204 result_h.foundZeroDiag =
true;
206 if (rowNorm == KAM::zero ()) {
207 result_h.foundZeroRowNorm =
true;
213 result_h.rowDiagonalEntries[lclRow] += diagVal;
214 result_h.rowNorms[lclRow] = rowNorm;
215 if (! assumeSymmetric &&
216 lclDiagColInd != Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
217 result_h.colDiagonalEntries[lclDiagColInd] += diagVal;
221 result.assign (result_h);
225 template<
class SC,
class LO,
class GO,
class NT>
226 class ComputeLocalRowScaledColumnNorms {
229 using val_type =
typename Kokkos::ArithTraits<SC>::val_type;
230 using mag_type =
typename Kokkos::ArithTraits<val_type>::mag_type;
233 ComputeLocalRowScaledColumnNorms (
const Kokkos::View<mag_type*, device_type>& rowScaledColNorms,
234 const Kokkos::View<const mag_type*, device_type>& rowNorms,
235 const crs_matrix_type& A) :
236 rowScaledColNorms_ (rowScaledColNorms),
237 rowNorms_ (rowNorms),
238 A_lcl_ (A.getLocalMatrix ())
241 KOKKOS_INLINE_FUNCTION
void operator () (
const LO lclRow)
const {
242 using KAT = Kokkos::ArithTraits<val_type>;
244 const auto curRow = A_lcl_.rowConst (lclRow);
245 const mag_type rowNorm = rowNorms_[lclRow];
246 const LO numEnt = curRow.length;
247 for (LO k = 0; k < numEnt; ++k) {
248 const mag_type matrixAbsVal = KAT::abs (curRow.value(k));
249 const LO lclCol = curRow.colidx(k);
251 Kokkos::atomic_add (&rowScaledColNorms_[lclCol], matrixAbsVal / rowNorm);
256 run (
const Kokkos::View<mag_type*, device_type>& rowScaledColNorms,
257 const Kokkos::View<const mag_type*, device_type>& rowNorms,
258 const crs_matrix_type& A)
260 using execution_space =
typename device_type::execution_space;
261 using range_type = Kokkos::RangePolicy<execution_space, LO>;
262 using functor_type = ComputeLocalRowScaledColumnNorms<SC, LO, GO, NT>;
264 functor_type functor (rowScaledColNorms, rowNorms, A);
265 const LO lclNumRows =
266 static_cast<LO> (A.getRowMap ()->getNodeNumElements ());
267 Kokkos::parallel_for (
"computeLocalRowScaledColumnNorms",
268 range_type (0, lclNumRows), functor);
272 Kokkos::View<mag_type*, device_type> rowScaledColNorms_;
273 Kokkos::View<const mag_type*, device_type> rowNorms_;
276 local_matrix_type A_lcl_;
279 template<
class SC,
class LO,
class GO,
class NT>
281 computeLocalRowScaledColumnNorms_CrsMatrix (EquilibrationInfo<
typename Kokkos::ArithTraits<SC>::val_type,
282 typename NT::device_type>& result,
285 using impl_type = ComputeLocalRowScaledColumnNorms<SC, LO, GO, NT>;
286 impl_type::run (result.rowScaledColNorms, result.rowNorms, A);
289 template<
class SC,
class LO,
class GO,
class NT>
291 computeLocalRowScaledColumnNorms (EquilibrationInfo<
typename Kokkos::ArithTraits<SC>::val_type,
292 typename NT::device_type>& result,
296 using val_type =
typename Kokkos::ArithTraits<SC>::val_type;
297 using mag_type =
typename Kokkos::ArithTraits<val_type>::mag_type;
298 using device_type =
typename NT::device_type;
301 TEUCHOS_TEST_FOR_EXCEPTION
302 (colMapPtr.get () ==
nullptr, std::invalid_argument,
303 "computeLocalRowScaledColumnNorms: "
304 "Input matrix A must have a nonnull column Map.");
305 const LO lclNumCols = static_cast<LO> (colMapPtr->getNodeNumElements ());
306 if (static_cast<std::size_t> (result.rowScaledColNorms.extent (0)) !=
307 static_cast<std::size_t> (lclNumCols)) {
308 result.rowScaledColNorms =
309 Kokkos::View<mag_type*, device_type> (
"rowScaledColNorms", lclNumCols);
312 const crs_matrix_type* A_crs = dynamic_cast<const crs_matrix_type*> (&A);
313 if (A_crs ==
nullptr) {
317 computeLocalRowScaledColumnNorms_CrsMatrix (result, *A_crs);
323 template<
class SC,
class LO,
class GO,
class NT>
324 class ComputeLocalRowAndColumnOneNorms {
326 using val_type =
typename Kokkos::ArithTraits<SC>::val_type;
327 using equib_info_type = EquilibrationInfo<val_type, typename NT::device_type>;
328 using local_matrix_type = typename ::Tpetra::CrsMatrix<SC, LO, GO, NT>::local_matrix_type;
329 using local_map_type = typename ::Tpetra::Map<LO, GO, NT>::local_map_type;
332 ComputeLocalRowAndColumnOneNorms (
const equib_info_type& equib,
333 const local_matrix_type& A_lcl,
334 const local_map_type& rowMap,
335 const local_map_type& colMap) :
348 using value_type = int;
350 KOKKOS_INLINE_FUNCTION
void init (value_type& dst)
const
355 KOKKOS_INLINE_FUNCTION
void
356 join (
volatile value_type& dst,
357 const volatile value_type& src)
const
362 KOKKOS_INLINE_FUNCTION
void
363 operator () (
const LO lclRow, value_type& dst)
const
365 using KAT = Kokkos::ArithTraits<val_type>;
366 using mag_type =
typename KAT::mag_type;
367 using KAM = Kokkos::ArithTraits<mag_type>;
369 const GO gblRow = rowMap_.getGlobalElement (lclRow);
371 const GO lclDiagColInd = colMap_.getLocalElement (gblRow);
373 const auto curRow = A_lcl_.rowConst (lclRow);
374 const LO numEnt = curRow.length;
375 const bool assumeSymmetric = equib_.assumeSymmetric;
377 mag_type rowNorm {0.0};
378 val_type diagVal {0.0};
380 for (LO k = 0; k < numEnt; ++k) {
381 const val_type matrixVal = curRow.value (k);
382 if (KAT::isInf (matrixVal)) {
385 if (KAT::isNan (matrixVal)) {
388 const mag_type matrixAbsVal = KAT::abs (matrixVal);
389 rowNorm += matrixAbsVal;
390 const LO lclCol = curRow.colidx (k);
391 if (lclCol == lclDiagColInd) {
392 diagVal = curRow.value (k);
394 if (! assumeSymmetric) {
395 Kokkos::atomic_add (&(equib_.colNorms[lclCol]), matrixAbsVal);
401 if (diagVal == KAT::zero ()) {
404 if (rowNorm == KAM::zero ()) {
411 equib_.rowDiagonalEntries[lclRow] = diagVal;
412 equib_.rowNorms[lclRow] = rowNorm;
413 if (! assumeSymmetric &&
414 lclDiagColInd != Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
417 equib_.colDiagonalEntries[lclDiagColInd] += diagVal;
422 equib_info_type equib_;
423 local_matrix_type A_lcl_;
424 local_map_type rowMap_;
425 local_map_type colMap_;
430 template<
class SC,
class LO,
class GO,
class NT>
431 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
typename NT::device_type>
433 const bool assumeSymmetric)
435 using execution_space =
typename NT::device_type::execution_space;
436 using range_type = Kokkos::RangePolicy<execution_space, LO>;
437 using functor_type = ComputeLocalRowAndColumnOneNorms<SC, LO, GO, NT>;
438 using val_type =
typename Kokkos::ArithTraits<SC>::val_type;
439 using device_type =
typename NT::device_type;
442 const LO lclNumRows = static_cast<LO> (A.
getRowMap ()->getNodeNumElements ());
443 const LO lclNumCols = static_cast<LO> (A.
getColMap ()->getNodeNumElements ());
444 equib_info_type equib (lclNumRows, lclNumCols, assumeSymmetric);
450 Kokkos::parallel_reduce (
"computeLocalRowAndColumnOneNorms",
451 range_type (0, lclNumRows), functor,
453 equib.foundInf = (result & 1) != 0;
454 equib.foundNan = (result & 2) != 0;
455 equib.foundZeroDiag = (result & 4) != 0;
456 equib.foundZeroRowNorm = (result & 8) != 0;
481 template<
class SC,
class LO,
class GO,
class NT>
482 EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
typename NT::device_type>
484 const bool assumeSymmetric)
487 const crs_matrix_type* A_crs = dynamic_cast<const crs_matrix_type*> (&A);
489 if (A_crs ==
nullptr) {
497 template<
class SC,
class LO,
class GO,
class NT>
501 return X.template getLocalView<typename NT::device_type::memory_space> ();
504 template<
class SC,
class LO,
class GO,
class NT>
506 const LO whichColumn)
507 -> decltype (Kokkos::subview (getLocalView_2d (X), Kokkos::ALL (), whichColumn))
509 if (X.isConstantStride ()) {
510 return Kokkos::subview (getLocalView_2d (X), Kokkos::ALL (), whichColumn);
513 auto X_whichColumn = X.getVector (whichColumn);
514 return Kokkos::subview (getLocalView_2d (*X_whichColumn), Kokkos::ALL (), 0);
518 template<
class SC,
class LO,
class GO,
class NT,
class ViewValueType>
521 const LO whichColumn,
522 const Kokkos::View<ViewValueType*, typename NT::device_type>& view)
524 using dev_memory_space =
typename NT::device_type::memory_space;
526 X.template modify<dev_memory_space> ();
527 auto X_lcl = getLocalView_1d (X, whichColumn);
531 template<
class SC,
class LO,
class GO,
class NT,
class ViewValueType>
533 copyMultiVectorColumnInto1DView (
const Kokkos::View<ViewValueType*, typename NT::device_type>& view,
535 const LO whichColumn)
537 using dev_memory_space =
typename NT::device_type::memory_space;
538 X.template sync<dev_memory_space> ();
539 auto X_lcl = getLocalView_1d (X, whichColumn);
543 template<
class OneDViewType,
class IndexType>
546 static_assert (OneDViewType::Rank == 1,
547 "OneDViewType must be a rank-1 Kokkos::View.");
548 static_assert (std::is_integral<IndexType>::value,
549 "IndexType must be a built-in integer type.");
550 FindZero (
const OneDViewType& x) : x_ (x) {}
553 KOKKOS_INLINE_FUNCTION
void
554 operator () (
const IndexType i,
int& result)
const {
555 using val_type =
typename OneDViewType::non_const_value_type;
556 result = (x_(i) == Kokkos::ArithTraits<val_type>::zero ()) ? 1 : result;
562 template<
class OneDViewType>
563 bool findZero (
const OneDViewType& x)
565 using view_type =
typename OneDViewType::const_type;
566 using execution_space =
typename view_type::execution_space;
567 using size_type =
typename view_type::size_type;
568 using functor_type = FindZero<view_type, size_type>;
570 Kokkos::RangePolicy<execution_space, size_type> range (0, x.extent (0));
571 range.set (Kokkos::ChunkSize (500));
574 Kokkos::parallel_reduce (
"findZero", range, functor_type (x), foundZero);
575 return foundZero == 1;
578 template<
class SC,
class LO,
class GO,
class NT>
580 globalizeRowOneNorms (EquilibrationInfo<
typename Kokkos::ArithTraits<SC>::val_type,
581 typename NT::device_type>& equib,
587 TEUCHOS_TEST_FOR_EXCEPTION
588 (G.get () ==
nullptr, std::invalid_argument,
589 "globalizeRowOneNorms: Input RowMatrix A must have a nonnull graph "
590 "(that is, getGraph() must return nonnull).");
591 TEUCHOS_TEST_FOR_EXCEPTION
592 (! G->isFillComplete (), std::invalid_argument,
593 "globalizeRowOneNorms: Input CrsGraph G must be fillComplete.");
595 auto exp = G->getExporter ();
596 if (! exp.is_null ()) {
606 mv_type rowMapMV (G->getRowMap (), 2,
false);
608 copy1DViewIntoMultiVectorColumn (rowMapMV, 0, equib.rowNorms);
609 copy1DViewIntoMultiVectorColumn (rowMapMV, 1, equib.rowDiagonalEntries);
611 mv_type rangeMapMV (G->getRangeMap (), 2,
true);
615 copyMultiVectorColumnInto1DView (equib.rowNorms, rowMapMV, 0);
616 copyMultiVectorColumnInto1DView (equib.rowDiagonalEntries, rowMapMV, 1);
621 equib.foundZeroDiag = findZero (equib.rowDiagonalEntries);
622 equib.foundZeroRowNorm = findZero (equib.rowNorms);
625 constexpr
int allReduceCount = 4;
626 int lclNaughtyMatrix[allReduceCount];
627 lclNaughtyMatrix[0] = equib.foundInf ? 1 : 0;
628 lclNaughtyMatrix[1] = equib.foundNan ? 1 : 0;
629 lclNaughtyMatrix[2] = equib.foundZeroDiag ? 1 : 0;
630 lclNaughtyMatrix[3] = equib.foundZeroRowNorm ? 1 : 0;
632 using Teuchos::outArg;
633 using Teuchos::REDUCE_MAX;
634 using Teuchos::reduceAll;
635 auto comm = G->getComm ();
636 int gblNaughtyMatrix[allReduceCount];
637 reduceAll<int, int> (*comm, REDUCE_MAX, allReduceCount,
638 lclNaughtyMatrix, gblNaughtyMatrix);
640 equib.foundInf = gblNaughtyMatrix[0] == 1;
641 equib.foundNan = gblNaughtyMatrix[1] == 1;
642 equib.foundZeroDiag = gblNaughtyMatrix[2] == 1;
643 equib.foundZeroRowNorm = gblNaughtyMatrix[3] == 1;
646 template<
class SC,
class LO,
class GO,
class NT>
648 globalizeColumnOneNorms (EquilibrationInfo<
typename Kokkos::ArithTraits<SC>::val_type,
649 typename NT::device_type>& equib,
651 const bool assumeSymmetric)
653 using val_type =
typename Kokkos::ArithTraits<SC>::val_type;
654 using mag_type =
typename Kokkos::ArithTraits<val_type>::mag_type;
656 using device_type =
typename NT::device_type;
659 TEUCHOS_TEST_FOR_EXCEPTION
660 (G.get () ==
nullptr, std::invalid_argument,
661 "globalizeColumnOneNorms: Input RowMatrix A must have a nonnull graph "
662 "(that is, getGraph() must return nonnull).");
663 TEUCHOS_TEST_FOR_EXCEPTION
664 (! G->isFillComplete (), std::invalid_argument,
665 "globalizeColumnOneNorms: Input CrsGraph G must be fillComplete.");
667 auto imp = G->getImporter ();
668 if (assumeSymmetric) {
669 const LO numCols = 2;
673 mv_type rowNorms_domMap (G->getDomainMap (), numCols,
false);
674 const bool rowMapSameAsDomainMap = G->getRowMap ()->isSameAs (* (G->getDomainMap ()));
675 if (rowMapSameAsDomainMap) {
676 copy1DViewIntoMultiVectorColumn (rowNorms_domMap, 0, equib.rowNorms);
677 copy1DViewIntoMultiVectorColumn (rowNorms_domMap, 1, equib.rowDiagonalEntries);
683 mv_type rowNorms_rowMap (G->getRowMap (), numCols,
true);
684 copy1DViewIntoMultiVectorColumn (rowNorms_rowMap, 0, equib.rowNorms);
685 copy1DViewIntoMultiVectorColumn (rowNorms_rowMap, 1, equib.rowDiagonalEntries);
691 std::unique_ptr<mv_type> rowNorms_colMap;
692 if (imp.is_null ()) {
696 std::unique_ptr<mv_type> (
new mv_type (G->getColMap (),
697 rowNorms_domMap.getDualView ()));
701 std::unique_ptr<mv_type> (
new mv_type (G->getColMap (), numCols,
true));
706 const LO lclNumCols =
707 static_cast<LO> (G->getColMap ()->getNodeNumElements ());
708 if (static_cast<LO> (equib.colNorms.extent (0)) != lclNumCols) {
710 Kokkos::View<mag_type*, device_type> (
"colNorms", lclNumCols);
712 if (static_cast<LO> (equib.colDiagonalEntries.extent (0)) != lclNumCols) {
713 equib.colDiagonalEntries =
714 Kokkos::View<val_type*, device_type> (
"colDiagonalEntries", lclNumCols);
719 copyMultiVectorColumnInto1DView (equib.colNorms, *rowNorms_colMap, 0);
720 copyMultiVectorColumnInto1DView (equib.colDiagonalEntries, *rowNorms_colMap, 1);
723 if (! imp.is_null ()) {
724 const LO numCols = 3;
733 mv_type colMapMV (G->getColMap (), numCols,
false);
735 copy1DViewIntoMultiVectorColumn (colMapMV, 0, equib.colNorms);
736 copy1DViewIntoMultiVectorColumn (colMapMV, 1, equib.colDiagonalEntries);
737 copy1DViewIntoMultiVectorColumn (colMapMV, 2, equib.rowScaledColNorms);
739 mv_type domainMapMV (G->getDomainMap (), numCols,
true);
740 domainMapMV.doExport (colMapMV, *imp,
Tpetra::ADD);
743 copyMultiVectorColumnInto1DView (equib.colNorms, colMapMV, 0);
744 copyMultiVectorColumnInto1DView (equib.colDiagonalEntries, colMapMV, 1);
745 copyMultiVectorColumnInto1DView (equib.rowScaledColNorms, colMapMV, 2);
752 template<
class SC,
class LO,
class GO,
class NT>
753 Details::EquilibrationInfo<typename Kokkos::ArithTraits<SC>::val_type,
754 typename NT::device_type>
756 const bool assumeSymmetric)
758 TEUCHOS_TEST_FOR_EXCEPTION
760 "computeRowAndColumnOneNorms: Input matrix A must be fillComplete.");
763 Details::globalizeRowOneNorms (result, A);
764 if (! assumeSymmetric) {
768 Details::computeLocalRowScaledColumnNorms (result, A);
770 Details::globalizeColumnOneNorms (result, A, assumeSymmetric);
782 #define TPETRA_COMPUTEROWANDCOLUMNONENORMS_INSTANT(SC,LO,GO,NT) \
783 template Details::EquilibrationInfo<Kokkos::ArithTraits<SC>::val_type, NT::device_type> \
784 computeRowAndColumnOneNorms (const Tpetra::RowMatrix<SC, LO, GO, NT>& A, \
785 const bool assumeSymmetric);
787 #endif // TPETRA_COMPUTEROWANDCOLUMNONENORMS_DEF_HPP