42 #ifndef TPETRA_EXPERIMENTAL_BLOCKCRSMATRIX_DEF_HPP
43 #define TPETRA_EXPERIMENTAL_BLOCKCRSMATRIX_DEF_HPP
50 #include "Teuchos_TimeMonitor.hpp"
51 #ifdef HAVE_TPETRA_DEBUG
53 #endif // HAVE_TPETRA_DEBUG
67 #if defined(__CUDACC__)
69 # if defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
70 # undef TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA
71 # endif // defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
73 #elif defined(__GNUC__)
75 # if defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
76 # undef TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA
77 # endif // defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
79 #else // some other compiler
82 # if ! defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
83 # define TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA 1
84 # endif // ! defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
85 #endif // defined(__CUDACC__), defined(__GNUC__)
89 namespace Experimental {
95 template<
class AlphaCoeffType,
97 class MatrixValuesType,
101 class BcrsApplyNoTrans1VecTeamFunctor {
103 static_assert (Kokkos::Impl::is_view<MatrixValuesType>::value,
104 "MatrixValuesType must be a Kokkos::View.");
105 static_assert (Kokkos::Impl::is_view<OutVecType>::value,
106 "OutVecType must be a Kokkos::View.");
107 static_assert (Kokkos::Impl::is_view<InVecType>::value,
108 "InVecType must be a Kokkos::View.");
109 static_assert (std::is_same<MatrixValuesType,
110 typename MatrixValuesType::const_type>::value,
111 "MatrixValuesType must be a const Kokkos::View.");
112 static_assert (std::is_same<OutVecType,
113 typename OutVecType::non_const_type>::value,
114 "OutVecType must be a nonconst Kokkos::View.");
115 static_assert (std::is_same<InVecType, typename InVecType::const_type>::value,
116 "InVecType must be a const Kokkos::View.");
117 static_assert (static_cast<int> (MatrixValuesType::rank) == 1,
118 "MatrixValuesType must be a rank-1 Kokkos::View.");
119 static_assert (static_cast<int> (InVecType::rank) == 1,
120 "InVecType must be a rank-1 Kokkos::View.");
121 static_assert (static_cast<int> (OutVecType::rank) == 1,
122 "OutVecType must be a rank-1 Kokkos::View.");
123 typedef typename MatrixValuesType::non_const_value_type scalar_type;
124 typedef typename GraphType::device_type device_type;
125 typedef typename device_type::execution_space execution_space;
126 typedef typename execution_space::scratch_memory_space shmem_space;
130 typedef typename std::remove_const<typename GraphType::data_type>::type
133 typedef Kokkos::TeamPolicy<Kokkos::Schedule<Kokkos::Dynamic>,
142 void setX (
const InVecType& X) { X_ = X; }
150 void setY (
const OutVecType& Y) { Y_ = Y; }
156 getScratchSizePerTeam ()
const
160 typedef typename std::decay<decltype (Y_(0))>::type y_value_type;
161 return blockSize_ *
sizeof (y_value_type);
168 getScratchSizePerThread ()
const
172 typedef typename std::decay<decltype (Y_(0))>::type y_value_type;
173 return blockSize_ *
sizeof (y_value_type);
178 getNumLclMeshRows ()
const
180 return ptr_.extent (0) == 0 ?
181 static_cast<local_ordinal_type> (0) :
196 BcrsApplyNoTrans1VecTeamFunctor (
const typename std::decay<AlphaCoeffType>::type& alpha,
197 const GraphType& graph,
198 const MatrixValuesType& val,
199 const local_ordinal_type blockSize,
201 const typename std::decay<BetaCoeffType>::type& beta,
203 const local_ordinal_type rowsPerTeam = defaultRowsPerTeam) :
205 ptr_ (graph.row_map),
206 ind_ (graph.entries),
208 blockSize_ (blockSize),
212 rowsPerTeam_ (rowsPerTeam)
219 KOKKOS_INLINE_FUNCTION
void
220 operator () (
const typename policy_type::member_type& member)
const
226 using Kokkos::Details::ArithTraits;
232 using Kokkos::parallel_for;
233 using Kokkos::subview;
235 typedef typename decltype (ptr_)::non_const_value_type offset_type;
236 typedef Kokkos::View<
typename OutVecType::non_const_value_type*,
237 shmem_space, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
239 typedef Kokkos::View<
typename OutVecType::non_const_value_type*,
242 Kokkos::MemoryTraits<Kokkos::Unmanaged> >
244 typedef Kokkos::View<
typename MatrixValuesType::const_value_type**,
247 Kokkos::MemoryTraits<Kokkos::Unmanaged> >
250 const LO leagueRank = member.league_rank();
255 shared_array_type threadLocalMem =
256 shared_array_type (member.thread_scratch (1), blockSize_ * rowsPerTeam_);
261 const LO numLclMeshRows = getNumLclMeshRows ();
262 const LO rowBeg = leagueRank * rowsPerTeam_;
263 const LO rowTmp = rowBeg + rowsPerTeam_;
264 const LO rowEnd = rowTmp < numLclMeshRows ? rowTmp : numLclMeshRows;
275 parallel_for (Kokkos::TeamThreadRange (member, rowBeg, rowEnd),
276 [&] (
const LO& lclRow) {
278 out_little_vec_type Y_tlm (threadLocalMem.data () + blockSize_ * member.team_rank (), blockSize_);
280 const offset_type Y_ptBeg = lclRow * blockSize_;
281 const offset_type Y_ptEnd = Y_ptBeg + blockSize_;
283 subview (Y_, ::Kokkos::make_pair (Y_ptBeg, Y_ptEnd));
284 if (beta_ == ArithTraits<BetaCoeffType>::zero ()) {
285 FILL (Y_tlm, ArithTraits<BetaCoeffType>::zero ());
287 else if (beta_ == ArithTraits<BetaCoeffType>::one ()) {
295 if (alpha_ != ArithTraits<AlphaCoeffType>::zero ()) {
296 const offset_type blkBeg = ptr_[lclRow];
297 const offset_type blkEnd = ptr_[lclRow+1];
299 const offset_type bs2 = blockSize_ * blockSize_;
300 for (offset_type absBlkOff = blkBeg; absBlkOff < blkEnd;
302 little_block_type A_cur (val_.data () + absBlkOff * bs2,
303 blockSize_, blockSize_);
304 const offset_type X_blkCol = ind_[absBlkOff];
305 const offset_type X_ptBeg = X_blkCol * blockSize_;
306 const offset_type X_ptEnd = X_ptBeg + blockSize_;
308 subview (X_, ::Kokkos::make_pair (X_ptBeg, X_ptEnd));
310 GEMV (alpha_, A_cur, X_cur, Y_tlm);
318 typename std::decay<AlphaCoeffType>::type alpha_;
319 typename GraphType::row_map_type::const_type ptr_;
320 typename GraphType::entries_type::const_type ind_;
321 MatrixValuesType val_;
324 typename std::decay<BetaCoeffType>::type beta_;
330 template<
class AlphaCoeffType,
332 class MatrixValuesType,
336 class BcrsApplyNoTrans1VecFunctor {
338 static_assert (Kokkos::Impl::is_view<MatrixValuesType>::value,
339 "MatrixValuesType must be a Kokkos::View.");
340 static_assert (Kokkos::Impl::is_view<OutVecType>::value,
341 "OutVecType must be a Kokkos::View.");
342 static_assert (Kokkos::Impl::is_view<InVecType>::value,
343 "InVecType must be a Kokkos::View.");
344 static_assert (std::is_same<MatrixValuesType,
345 typename MatrixValuesType::const_type>::value,
346 "MatrixValuesType must be a const Kokkos::View.");
347 static_assert (std::is_same<OutVecType,
348 typename OutVecType::non_const_type>::value,
349 "OutVecType must be a nonconst Kokkos::View.");
350 static_assert (std::is_same<InVecType, typename InVecType::const_type>::value,
351 "InVecType must be a const Kokkos::View.");
352 static_assert (static_cast<int> (MatrixValuesType::rank) == 1,
353 "MatrixValuesType must be a rank-1 Kokkos::View.");
354 static_assert (static_cast<int> (InVecType::rank) == 1,
355 "InVecType must be a rank-1 Kokkos::View.");
356 static_assert (static_cast<int> (OutVecType::rank) == 1,
357 "OutVecType must be a rank-1 Kokkos::View.");
358 typedef typename MatrixValuesType::non_const_value_type scalar_type;
361 typedef typename GraphType::device_type device_type;
364 typedef typename std::remove_const<typename GraphType::data_type>::type
367 typedef Kokkos::RangePolicy<Kokkos::Schedule<Kokkos::Dynamic>,
368 typename device_type::execution_space,
376 void setX (
const InVecType& X) { X_ = X; }
384 void setY (
const OutVecType& Y) { Y_ = Y; }
386 typedef typename Kokkos::ArithTraits<typename std::decay<AlphaCoeffType>::type>::val_type alpha_coeff_type;
387 typedef typename Kokkos::ArithTraits<typename std::decay<BetaCoeffType>::type>::val_type beta_coeff_type;
390 BcrsApplyNoTrans1VecFunctor (
const alpha_coeff_type& alpha,
391 const GraphType& graph,
392 const MatrixValuesType& val,
393 const local_ordinal_type blockSize,
395 const beta_coeff_type& beta,
396 const OutVecType& Y) :
398 ptr_ (graph.row_map),
399 ind_ (graph.entries),
401 blockSize_ (blockSize),
407 KOKKOS_INLINE_FUNCTION
void
408 operator () (
const local_ordinal_type& lclRow)
const
414 using Kokkos::Details::ArithTraits;
420 using Kokkos::parallel_for;
421 using Kokkos::subview;
422 typedef typename decltype (ptr_)::non_const_value_type offset_type;
423 typedef Kokkos::View<
typename MatrixValuesType::const_value_type**,
426 Kokkos::MemoryTraits<Kokkos::Unmanaged> >
429 const offset_type Y_ptBeg = lclRow * blockSize_;
430 const offset_type Y_ptEnd = Y_ptBeg + blockSize_;
431 auto Y_cur = subview (Y_, ::Kokkos::make_pair (Y_ptBeg, Y_ptEnd));
435 if (beta_ == ArithTraits<beta_coeff_type>::zero ()) {
436 FILL (Y_cur, ArithTraits<beta_coeff_type>::zero ());
438 else if (beta_ != ArithTraits<beta_coeff_type>::one ()) {
442 if (alpha_ != ArithTraits<alpha_coeff_type>::zero ()) {
443 const offset_type blkBeg = ptr_[lclRow];
444 const offset_type blkEnd = ptr_[lclRow+1];
446 const offset_type bs2 = blockSize_ * blockSize_;
447 for (offset_type absBlkOff = blkBeg; absBlkOff < blkEnd;
449 little_block_type A_cur (val_.data () + absBlkOff * bs2,
450 blockSize_, blockSize_);
451 const offset_type X_blkCol = ind_[absBlkOff];
452 const offset_type X_ptBeg = X_blkCol * blockSize_;
453 const offset_type X_ptEnd = X_ptBeg + blockSize_;
454 auto X_cur = subview (X_, ::Kokkos::make_pair (X_ptBeg, X_ptEnd));
456 GEMV (alpha_, A_cur, X_cur, Y_cur);
462 alpha_coeff_type alpha_;
463 typename GraphType::row_map_type::const_type ptr_;
464 typename GraphType::entries_type::const_type ind_;
465 MatrixValuesType val_;
468 beta_coeff_type beta_;
472 template<
class AlphaCoeffType,
474 class MatrixValuesType,
475 class InMultiVecType,
477 class OutMultiVecType>
479 bcrsLocalApplyNoTrans (
const AlphaCoeffType& alpha,
480 const GraphType& graph,
481 const MatrixValuesType& val,
482 const typename std::remove_const<typename GraphType::data_type>::type blockSize,
483 const InMultiVecType& X,
484 const BetaCoeffType& beta,
485 const OutMultiVecType& Y
487 ,
const typename std::remove_const<typename GraphType::data_type>::type rowsPerTeam = 20
491 static_assert (Kokkos::Impl::is_view<MatrixValuesType>::value,
492 "MatrixValuesType must be a Kokkos::View.");
493 static_assert (Kokkos::Impl::is_view<OutMultiVecType>::value,
494 "OutMultiVecType must be a Kokkos::View.");
495 static_assert (Kokkos::Impl::is_view<InMultiVecType>::value,
496 "InMultiVecType must be a Kokkos::View.");
497 static_assert (static_cast<int> (MatrixValuesType::rank) == 1,
498 "MatrixValuesType must be a rank-1 Kokkos::View.");
499 static_assert (static_cast<int> (OutMultiVecType::rank) == 2,
500 "OutMultiVecType must be a rank-2 Kokkos::View.");
501 static_assert (static_cast<int> (InMultiVecType::rank) == 2,
502 "InMultiVecType must be a rank-2 Kokkos::View.");
504 typedef typename MatrixValuesType::const_type matrix_values_type;
505 typedef typename OutMultiVecType::non_const_type out_multivec_type;
506 typedef typename InMultiVecType::const_type in_multivec_type;
507 typedef typename Kokkos::ArithTraits<typename std::decay<AlphaCoeffType>::type>::val_type alpha_type;
508 typedef typename Kokkos::ArithTraits<typename std::decay<BetaCoeffType>::type>::val_type beta_type;
509 typedef typename std::remove_const<typename GraphType::data_type>::type LO;
511 const LO numLocalMeshRows = graph.row_map.extent (0) == 0 ?
512 static_cast<LO> (0) :
513 static_cast<LO> (graph.row_map.extent (0) - 1);
514 const LO numVecs = Y.extent (1);
515 if (numLocalMeshRows == 0 || numVecs == 0) {
522 in_multivec_type X_in = X;
523 out_multivec_type Y_out = Y;
528 typedef decltype (Kokkos::subview (X_in, Kokkos::ALL (), 0)) in_vec_type;
529 typedef decltype (Kokkos::subview (Y_out, Kokkos::ALL (), 0)) out_vec_type;
531 typedef BcrsApplyNoTrans1VecTeamFunctor<alpha_type, GraphType,
532 matrix_values_type, in_vec_type, beta_type, out_vec_type> functor_type;
534 typedef BcrsApplyNoTrans1VecFunctor<alpha_type, GraphType,
535 matrix_values_type, in_vec_type, beta_type, out_vec_type> functor_type;
537 typedef typename functor_type::policy_type policy_type;
539 auto X_0 = Kokkos::subview (X_in, Kokkos::ALL (), 0);
540 auto Y_0 = Kokkos::subview (Y_out, Kokkos::ALL (), 0);
542 functor_type functor (alpha, graph, val, blockSize, X_0, beta, Y_0, rowsPerTeam);
543 const LO numTeams = functor.getNumTeams ();
544 policy_type policy (numTeams, Kokkos::AUTO ());
550 const LO scratchSizePerTeam = functor.getScratchSizePerTeam ();
551 const LO scratchSizePerThread = functor.getScratchSizePerThread ();
553 policy.set_scratch_size (level,
554 Kokkos::PerTeam (scratchSizePerTeam),
555 Kokkos::PerThread (scratchSizePerThread));
558 functor_type functor (alpha, graph, val, blockSize, X_0, beta, Y_0);
559 policy_type policy (0, numLocalMeshRows);
563 Kokkos::parallel_for (policy, functor);
566 for (LO j = 1; j < numVecs; ++j) {
567 auto X_j = Kokkos::subview (X_in, Kokkos::ALL (), j);
568 auto Y_j = Kokkos::subview (Y_out, Kokkos::ALL (), j);
571 Kokkos::parallel_for (policy, functor);
581 template<
class Scalar,
class LO,
class GO,
class Node>
582 class GetLocalDiagCopy {
584 typedef typename Node::device_type device_type;
585 typedef size_t diag_offset_type;
586 typedef Kokkos::View<
const size_t*, device_type,
587 Kokkos::MemoryUnmanaged> diag_offsets_type;
588 typedef typename ::Tpetra::CrsGraph<LO, GO, Node> global_graph_type;
590 typedef typename local_graph_type::row_map_type row_offsets_type;
591 typedef typename ::Tpetra::Experimental::BlockMultiVector<Scalar, LO, GO, Node>::impl_scalar_type IST;
592 typedef Kokkos::View<IST***, device_type, Kokkos::MemoryUnmanaged> diag_type;
593 typedef Kokkos::View<const IST*, device_type, Kokkos::MemoryUnmanaged> values_type;
596 GetLocalDiagCopy (
const diag_type& diag,
597 const values_type& val,
598 const diag_offsets_type& diagOffsets,
599 const row_offsets_type& ptr,
600 const LO blockSize) :
602 diagOffsets_ (diagOffsets),
604 blockSize_ (blockSize),
605 offsetPerBlock_ (blockSize_*blockSize_),
609 KOKKOS_INLINE_FUNCTION
void
610 operator() (
const LO& lclRowInd)
const
615 const size_t absOffset = ptr_[lclRowInd];
618 const size_t relOffset = diagOffsets_[lclRowInd];
621 const size_t pointOffset = (absOffset+relOffset)*offsetPerBlock_;
625 typedef Kokkos::View<
const IST**, Kokkos::LayoutRight,
626 device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
627 const_little_block_type;
628 const_little_block_type D_in (val_.data () + pointOffset,
629 blockSize_, blockSize_);
630 auto D_out = Kokkos::subview (diag_, lclRowInd, ALL (), ALL ());
636 diag_offsets_type diagOffsets_;
637 row_offsets_type ptr_;
644 template<
class Scalar,
class LO,
class GO,
class Node>
646 BlockCrsMatrix<Scalar, LO, GO, Node>::
647 markLocalErrorAndGetStream ()
649 * (this->localError_) =
true;
650 if ((*errs_).is_null ()) {
651 *errs_ = Teuchos::rcp (
new std::ostringstream ());
656 template<
class Scalar,
class LO,
class GO,
class Node>
660 graph_ (Teuchos::rcp (new
map_type ()), 0),
661 blockSize_ (static_cast<LO> (0)),
662 X_colMap_ (new Teuchos::RCP<
BMV> ()),
663 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
664 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
666 localError_ (new bool (false)),
667 errs_ (new Teuchos::RCP<std::ostringstream> ())
671 template<
class Scalar,
class LO,
class GO,
class Node>
674 const LO blockSize) :
677 rowMeshMap_ (* (graph.getRowMap ())),
678 blockSize_ (blockSize),
679 X_colMap_ (new Teuchos::RCP<
BMV> ()),
680 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
681 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
682 offsetPerBlock_ (blockSize * blockSize),
683 localError_ (new bool (false)),
684 errs_ (new Teuchos::RCP<std::ostringstream> ())
686 TEUCHOS_TEST_FOR_EXCEPTION(
687 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::Experimental::"
688 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
689 "rows (isSorted() is false). This class assumes sorted rows.");
691 graphRCP_ = Teuchos::rcpFromRef(graph_);
697 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
698 TEUCHOS_TEST_FOR_EXCEPTION(
699 blockSizeIsNonpositive, std::invalid_argument,
"Tpetra::Experimental::"
700 "BlockCrsMatrix constructor: The input blockSize = " << blockSize <<
701 " <= 0. The block size must be positive.");
707 typedef typename crs_graph_type::local_graph_type::row_map_type row_map_type;
708 typedef typename row_map_type::HostMirror::non_const_type nc_host_row_map_type;
711 nc_host_row_map_type ptr_h_nc = Kokkos::create_mirror_view (ptr_d);
716 typedef typename crs_graph_type::local_graph_type::entries_type entries_type;
717 typedef typename entries_type::HostMirror::non_const_type nc_host_entries_type;
720 nc_host_entries_type ind_h_nc = Kokkos::create_mirror_view (ind_d);
726 val_ = decltype (val_) (
"val", numValEnt);
729 template<
class Scalar,
class LO,
class GO,
class Node>
734 const LO blockSize) :
737 rowMeshMap_ (* (graph.getRowMap ())),
738 domainPointMap_ (domainPointMap),
739 rangePointMap_ (rangePointMap),
740 blockSize_ (blockSize),
741 X_colMap_ (new Teuchos::RCP<
BMV> ()),
742 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
743 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
744 offsetPerBlock_ (blockSize * blockSize),
745 localError_ (new bool (false)),
746 errs_ (new Teuchos::RCP<std::ostringstream> ())
748 TEUCHOS_TEST_FOR_EXCEPTION(
749 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::Experimental::"
750 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
751 "rows (isSorted() is false). This class assumes sorted rows.");
753 graphRCP_ = Teuchos::rcpFromRef(graph_);
759 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
760 TEUCHOS_TEST_FOR_EXCEPTION(
761 blockSizeIsNonpositive, std::invalid_argument,
"Tpetra::Experimental::"
762 "BlockCrsMatrix constructor: The input blockSize = " << blockSize <<
763 " <= 0. The block size must be positive.");
766 typedef typename crs_graph_type::local_graph_type::row_map_type row_map_type;
767 typedef typename row_map_type::HostMirror::non_const_type nc_host_row_map_type;
770 nc_host_row_map_type ptr_h_nc = Kokkos::create_mirror_view (ptr_d);
775 typedef typename crs_graph_type::local_graph_type::entries_type entries_type;
776 typedef typename entries_type::HostMirror::non_const_type nc_host_entries_type;
779 nc_host_entries_type ind_h_nc = Kokkos::create_mirror_view (ind_d);
785 val_ = decltype (val_) (
"val", numValEnt);
788 template<
class Scalar,
class LO,
class GO,
class Node>
789 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
794 return Teuchos::rcp (
new map_type (domainPointMap_));
797 template<
class Scalar,
class LO,
class GO,
class Node>
798 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
803 return Teuchos::rcp (
new map_type (rangePointMap_));
806 template<
class Scalar,
class LO,
class GO,
class Node>
807 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
811 return graph_.getRowMap();
814 template<
class Scalar,
class LO,
class GO,
class Node>
815 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
819 return graph_.getColMap();
822 template<
class Scalar,
class LO,
class GO,
class Node>
827 return graph_.getGlobalNumRows();
830 template<
class Scalar,
class LO,
class GO,
class Node>
835 return graph_.getNodeNumRows();
838 template<
class Scalar,
class LO,
class GO,
class Node>
843 return graph_.getNodeMaxNumRowEntries();
846 template<
class Scalar,
class LO,
class GO,
class Node>
851 Teuchos::ETransp mode,
856 TEUCHOS_TEST_FOR_EXCEPTION(
857 mode != Teuchos::NO_TRANS && mode != Teuchos::TRANS && mode != Teuchos::CONJ_TRANS,
858 std::invalid_argument,
"Tpetra::Experimental::BlockCrsMatrix::apply: "
859 "Invalid 'mode' argument. Valid values are Teuchos::NO_TRANS, "
860 "Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
864 const LO blockSize = getBlockSize ();
866 X_view =
BMV (X, * (graph_.getDomainMap ()), blockSize);
867 Y_view =
BMV (Y, * (graph_.getRangeMap ()), blockSize);
869 catch (std::invalid_argument& e) {
870 TEUCHOS_TEST_FOR_EXCEPTION(
871 true, std::invalid_argument,
"Tpetra::Experimental::BlockCrsMatrix::"
872 "apply: Either the input MultiVector X or the output MultiVector Y "
873 "cannot be viewed as a BlockMultiVector, given this BlockCrsMatrix's "
874 "graph. BlockMultiVector's constructor threw the following exception: "
883 const_cast<this_type*> (
this)->applyBlock (X_view, Y_view, mode, alpha, beta);
884 }
catch (std::invalid_argument& e) {
885 TEUCHOS_TEST_FOR_EXCEPTION(
886 true, std::invalid_argument,
"Tpetra::Experimental::BlockCrsMatrix::"
887 "apply: The implementation method applyBlock complained about having "
888 "an invalid argument. It reported the following: " << e.what ());
889 }
catch (std::logic_error& e) {
890 TEUCHOS_TEST_FOR_EXCEPTION(
891 true, std::invalid_argument,
"Tpetra::Experimental::BlockCrsMatrix::"
892 "apply: The implementation method applyBlock complained about a "
893 "possible bug in its implementation. It reported the following: "
895 }
catch (std::exception& e) {
896 TEUCHOS_TEST_FOR_EXCEPTION(
897 true, std::invalid_argument,
"Tpetra::Experimental::BlockCrsMatrix::"
898 "apply: The implementation method applyBlock threw an exception which "
899 "is neither std::invalid_argument nor std::logic_error, but is a "
900 "subclass of std::exception. It reported the following: "
903 TEUCHOS_TEST_FOR_EXCEPTION(
904 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::"
905 "apply: The implementation method applyBlock threw an exception which "
906 "is not an instance of a subclass of std::exception. This probably "
907 "indicates a bug. Please report this to the Tpetra developers.");
911 template<
class Scalar,
class LO,
class GO,
class Node>
916 Teuchos::ETransp mode,
920 TEUCHOS_TEST_FOR_EXCEPTION(
922 "Tpetra::Experimental::BlockCrsMatrix::applyBlock: "
923 "X and Y have different block sizes. X.getBlockSize() = "
927 if (mode == Teuchos::NO_TRANS) {
928 applyBlockNoTrans (X, Y, alpha, beta);
929 }
else if (mode == Teuchos::TRANS || mode == Teuchos::CONJ_TRANS) {
930 applyBlockTrans (X, Y, mode, alpha, beta);
932 TEUCHOS_TEST_FOR_EXCEPTION(
933 true, std::invalid_argument,
"Tpetra::Experimental::BlockCrsMatrix::"
934 "applyBlock: Invalid 'mode' argument. Valid values are "
935 "Teuchos::NO_TRANS, Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
939 template<
class Scalar,
class LO,
class GO,
class Node>
944 #ifdef HAVE_TPETRA_DEBUG
945 const char prefix[] =
"Tpetra::Experimental::BlockCrsMatrix::setAllToScalar: ";
946 #endif // HAVE_TPETRA_DEBUG
948 if (this->
template need_sync<device_type> ()) {
952 #ifdef HAVE_TPETRA_DEBUG
953 TEUCHOS_TEST_FOR_EXCEPTION
954 (this->
template need_sync<Kokkos::HostSpace> (), std::runtime_error,
955 prefix <<
"The matrix's values need sync on both device and host.");
956 #endif // HAVE_TPETRA_DEBUG
957 this->
template modify<Kokkos::HostSpace> ();
960 else if (this->
template need_sync<Kokkos::HostSpace> ()) {
964 #ifdef HAVE_TPETRA_DEBUG
965 TEUCHOS_TEST_FOR_EXCEPTION
966 (this->
template need_sync<device_type> (), std::runtime_error,
967 prefix <<
"The matrix's values need sync on both host and device.");
968 #endif // HAVE_TPETRA_DEBUG
969 this->
template modify<device_type> ();
973 this->
template modify<device_type> ();
978 template<
class Scalar,
class LO,
class GO,
class Node>
984 const LO numColInds)
const
986 #ifdef HAVE_TPETRA_DEBUG
987 const char prefix[] =
988 "Tpetra::Experimental::BlockCrsMatrix::replaceLocalValues: ";
989 #endif // HAVE_TPETRA_DEBUG
991 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
996 return static_cast<LO> (0);
999 reinterpret_cast<const impl_scalar_type*> (vals);
1000 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1001 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
1002 const LO perBlockSize = this->offsetPerBlock ();
1007 #ifdef HAVE_TPETRA_DEBUG
1008 TEUCHOS_TEST_FOR_EXCEPTION
1009 (this->
template need_sync<Kokkos::HostSpace> (), std::runtime_error,
1010 prefix <<
"The matrix's data were last modified on device, but have "
1011 "not been sync'd to host. Please sync to host (by calling "
1012 "sync<Kokkos::HostSpace>() on this matrix) before calling this "
1014 #endif // HAVE_TPETRA_DEBUG
1020 auto vals_host_out =
1021 const_cast<this_type*> (
this)->template getValues<Kokkos::HostSpace> ();
1024 for (LO k = 0; k < numColInds; ++k, pointOffset += perBlockSize) {
1025 const LO relBlockOffset =
1026 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
1027 if (relBlockOffset != LINV) {
1036 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1040 vals_host_out_raw + absBlockOffset * perBlockSize;
1045 for (LO i = 0; i < perBlockSize; ++i) {
1046 A_old[i] = A_new[i];
1048 hint = relBlockOffset + 1;
1055 template <
class Scalar,
class LO,
class GO,
class Node>
1059 Kokkos::MemoryUnmanaged>& offsets)
const
1061 graph_.getLocalDiagOffsets (offsets);
1064 template <
class Scalar,
class LO,
class GO,
class Node>
1065 void TPETRA_DEPRECATED
1074 const size_t lclNumRows = graph_.getNodeNumRows ();
1075 if (static_cast<size_t> (offsets.size ()) < lclNumRows) {
1076 offsets.resize (lclNumRows);
1082 typedef typename device_type::memory_space
memory_space;
1083 if (std::is_same<memory_space, Kokkos::HostSpace>::value) {
1088 Kokkos::MemoryUnmanaged> output_type;
1089 output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
1090 graph_.getLocalDiagOffsets (offsetsOut);
1093 Kokkos::View<size_t*, device_type> offsetsTmp (
"diagOffsets", lclNumRows);
1094 graph_.getLocalDiagOffsets (offsetsTmp);
1095 typedef Kokkos::View<
size_t*, Kokkos::HostSpace,
1096 Kokkos::MemoryUnmanaged> output_type;
1097 output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
1102 template <
class Scalar,
class LO,
class GO,
class Node>
1108 Kokkos::MemoryUnmanaged>& D_inv,
1109 const Scalar& omega,
1114 Kokkos::Details::ArithTraits<impl_scalar_type>::zero ();
1116 Kokkos::Details::ArithTraits<impl_scalar_type>::one ();
1117 const LO numLocalMeshRows =
1118 static_cast<LO> (rowMeshMap_.getNodeNumElements ());
1125 const LO blockSize = getBlockSize ();
1126 Teuchos::Array<impl_scalar_type> localMem (blockSize);
1127 Teuchos::Array<impl_scalar_type> localMat (blockSize*blockSize);
1131 LO rowBegin = 0, rowEnd = 0, rowStride = 0;
1132 if (direction == Forward) {
1134 rowEnd = numLocalMeshRows+1;
1137 else if (direction == Backward) {
1138 rowBegin = numLocalMeshRows;
1142 else if (direction == Symmetric) {
1143 this->localGaussSeidel (B, X, D_inv, omega, Forward);
1144 this->localGaussSeidel (B, X, D_inv, omega, Backward);
1148 const Scalar one_minus_omega = Teuchos::ScalarTraits<Scalar>::one()-omega;
1149 const Scalar minus_omega = -omega;
1152 for (LO lclRow = rowBegin; lclRow != rowEnd; lclRow += rowStride) {
1153 const LO actlRow = lclRow - 1;
1156 COPY (B_cur, X_lcl);
1157 SCAL (static_cast<impl_scalar_type> (omega), X_lcl);
1159 const size_t meshBeg = ptrHost_[actlRow];
1160 const size_t meshEnd = ptrHost_[actlRow+1];
1161 for (
size_t absBlkOff = meshBeg; absBlkOff < meshEnd; ++absBlkOff) {
1162 const LO meshCol = indHost_[absBlkOff];
1164 getConstLocalBlockFromAbsOffset (absBlkOff);
1168 const Scalar alpha = meshCol == actlRow ? one_minus_omega : minus_omega;
1170 GEMV (static_cast<impl_scalar_type> (alpha), A_cur, X_cur, X_lcl);
1176 auto D_lcl = Kokkos::subview (D_inv, actlRow, ALL (), ALL ());
1178 FILL (X_update, zero);
1179 GEMV (one, D_lcl, X_lcl, X_update);
1183 for (LO lclRow = rowBegin; lclRow != rowEnd; lclRow += rowStride) {
1184 for (LO j = 0; j < numVecs; ++j) {
1185 LO actlRow = lclRow-1;
1188 COPY (B_cur, X_lcl);
1189 SCAL (static_cast<impl_scalar_type> (omega), X_lcl);
1191 const size_t meshBeg = ptrHost_[actlRow];
1192 const size_t meshEnd = ptrHost_[actlRow+1];
1193 for (
size_t absBlkOff = meshBeg; absBlkOff < meshEnd; ++absBlkOff) {
1194 const LO meshCol = indHost_[absBlkOff];
1196 getConstLocalBlockFromAbsOffset (absBlkOff);
1200 const Scalar alpha = meshCol == actlRow ? one_minus_omega : minus_omega;
1201 GEMV (static_cast<impl_scalar_type> (alpha), A_cur, X_cur, X_lcl);
1204 auto D_lcl = Kokkos::subview (D_inv, actlRow, ALL (), ALL ());
1206 FILL (X_update, zero);
1207 GEMV (one, D_lcl, X_lcl, X_update);
1213 template <
class Scalar,
class LO,
class GO,
class Node>
1219 const Scalar& dampingFactor,
1221 const int numSweeps,
1222 const bool zeroInitialGuess)
const
1226 TEUCHOS_TEST_FOR_EXCEPTION(
1227 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::"
1228 "gaussSeidelCopy: Not implemented.");
1231 template <
class Scalar,
class LO,
class GO,
class Node>
1237 const Teuchos::ArrayView<LO>& rowIndices,
1238 const Scalar& dampingFactor,
1240 const int numSweeps,
1241 const bool zeroInitialGuess)
const
1245 TEUCHOS_TEST_FOR_EXCEPTION(
1246 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::"
1247 "reorderedGaussSeidelCopy: Not implemented.");
1250 template <
class Scalar,
class LO,
class GO,
class Node>
1251 void TPETRA_DEPRECATED
1254 const Teuchos::ArrayView<const size_t>& offsets)
const
1256 using Teuchos::ArrayRCP;
1257 using Teuchos::ArrayView;
1258 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
1260 const size_t myNumRows = rowMeshMap_.getNodeNumElements();
1261 const LO* columnIndices;
1264 Teuchos::Array<LO> cols(1);
1267 Teuchos::Array<Scalar> zeroMat (blockSize_*blockSize_,
ZERO);
1268 for (
size_t i = 0; i < myNumRows; ++i) {
1270 if (offsets[i] == Teuchos::OrdinalTraits<size_t>::invalid ()) {
1274 getLocalRowView (i, columnIndices, vals, numColumns);
1275 diag.
replaceLocalValues (i, cols.getRawPtr(), &vals[offsets[i]*blockSize_*blockSize_], 1);
1281 template <
class Scalar,
class LO,
class GO,
class Node>
1285 Kokkos::MemoryUnmanaged>& diag,
1287 Kokkos::MemoryUnmanaged>& offsets)
const
1289 using Kokkos::parallel_for;
1291 const char prefix[] =
"Tpetra::BlockCrsMatrix::getLocalDiagCopy (2-arg): ";
1293 const LO lclNumMeshRows = static_cast<LO> (rowMeshMap_.getNodeNumElements ());
1294 const LO blockSize = this->getBlockSize ();
1295 TEUCHOS_TEST_FOR_EXCEPTION
1296 (static_cast<LO> (diag.extent (0)) < lclNumMeshRows ||
1297 static_cast<LO> (diag.extent (1)) < blockSize ||
1298 static_cast<LO> (diag.extent (2)) < blockSize,
1299 std::invalid_argument, prefix <<
1300 "The input Kokkos::View is not big enough to hold all the data.");
1301 TEUCHOS_TEST_FOR_EXCEPTION
1302 (static_cast<LO> (offsets.size ()) < lclNumMeshRows, std::invalid_argument,
1303 prefix <<
"offsets.size() = " << offsets.size () <<
" < local number of "
1304 "diagonal blocks " << lclNumMeshRows <<
".");
1306 #ifdef HAVE_TPETRA_DEBUG
1307 TEUCHOS_TEST_FOR_EXCEPTION
1308 (this->
template need_sync<device_type> (), std::runtime_error,
1309 prefix <<
"The matrix's data were last modified on host, but have "
1310 "not been sync'd to device. Please sync to device (by calling "
1311 "sync<device_type>() on this matrix) before calling this method.");
1312 #endif // HAVE_TPETRA_DEBUG
1314 typedef Kokkos::RangePolicy<execution_space, LO> policy_type;
1315 typedef GetLocalDiagCopy<Scalar, LO, GO, Node> functor_type;
1323 const_cast<this_type*> (
this)->template getValues<device_type> ();
1325 parallel_for (policy_type (0, lclNumMeshRows),
1326 functor_type (diag, vals_dev, offsets,
1327 graph_.getLocalGraph ().row_map, blockSize_));
1331 template <
class Scalar,
class LO,
class GO,
class Node>
1335 Kokkos::MemoryUnmanaged>& diag,
1336 const Teuchos::ArrayView<const size_t>& offsets)
const
1339 using Kokkos::parallel_for;
1341 Kokkos::MemoryUnmanaged>::HostMirror::execution_space host_exec_space;
1343 const LO lclNumMeshRows = static_cast<LO> (rowMeshMap_.getNodeNumElements ());
1344 const LO blockSize = this->getBlockSize ();
1345 TEUCHOS_TEST_FOR_EXCEPTION
1346 (static_cast<LO> (diag.extent (0)) < lclNumMeshRows ||
1347 static_cast<LO> (diag.extent (1)) < blockSize ||
1348 static_cast<LO> (diag.extent (2)) < blockSize,
1349 std::invalid_argument,
"Tpetra::BlockCrsMatrix::getLocalDiagCopy: "
1350 "The input Kokkos::View is not big enough to hold all the data.");
1351 TEUCHOS_TEST_FOR_EXCEPTION
1352 (static_cast<LO> (offsets.size ()) < lclNumMeshRows,
1353 std::invalid_argument,
"Tpetra::BlockCrsMatrix::getLocalDiagCopy: "
1354 "offsets.size() = " << offsets.size () <<
" < local number of diagonal "
1355 "blocks " << lclNumMeshRows <<
".");
1359 typedef Kokkos::RangePolicy<host_exec_space, LO> policy_type;
1360 parallel_for (policy_type (0, lclNumMeshRows), [=] (
const LO& lclMeshRow) {
1361 auto D_in = this->getConstLocalBlockFromRelOffset (lclMeshRow, offsets[lclMeshRow]);
1362 auto D_out = Kokkos::subview (diag, lclMeshRow, ALL (), ALL ());
1368 template<
class Scalar,
class LO,
class GO,
class Node>
1373 const Scalar vals[],
1374 const LO numColInds)
const
1376 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1381 return static_cast<LO> (0);
1384 reinterpret_cast<const impl_scalar_type*> (vals);
1385 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1386 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
1387 const LO perBlockSize = this->offsetPerBlock ();
1392 for (LO k = 0; k < numColInds; ++k, pointOffset += perBlockSize) {
1393 const LO relBlockOffset =
1394 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
1395 if (relBlockOffset != LINV) {
1396 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1398 getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1400 getConstLocalBlockFromInput (vIn, pointOffset);
1402 ::Tpetra::Experimental::Impl::absMax (A_old, A_new);
1403 hint = relBlockOffset + 1;
1411 template<
class Scalar,
class LO,
class GO,
class Node>
1416 const Scalar vals[],
1417 const LO numColInds)
const
1419 #ifdef HAVE_TPETRA_DEBUG
1420 const char prefix[] =
1421 "Tpetra::Experimental::BlockCrsMatrix::sumIntoLocalValues: ";
1422 #endif // HAVE_TPETRA_DEBUG
1424 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1429 return static_cast<LO> (0);
1433 reinterpret_cast<const impl_scalar_type*> (vals);
1434 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1435 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
1436 const LO perBlockSize = this->offsetPerBlock ();
1441 #ifdef HAVE_TPETRA_DEBUG
1442 TEUCHOS_TEST_FOR_EXCEPTION
1443 (this->
template need_sync<Kokkos::HostSpace> (), std::runtime_error,
1444 prefix <<
"The matrix's data were last modified on device, but have not "
1445 "been sync'd to host. Please sync to host (by calling "
1446 "sync<Kokkos::HostSpace>() on this matrix) before calling this method.");
1447 #endif // HAVE_TPETRA_DEBUG
1453 auto vals_host_out =
1454 const_cast<this_type*> (
this)->template getValues<Kokkos::HostSpace> ();
1457 for (LO k = 0; k < numColInds; ++k, pointOffset += perBlockSize) {
1458 const LO relBlockOffset =
1459 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
1460 if (relBlockOffset != LINV) {
1469 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1473 vals_host_out_raw + absBlockOffset * perBlockSize;
1478 for (LO i = 0; i < perBlockSize; ++i) {
1479 A_old[i] += A_new[i];
1481 hint = relBlockOffset + 1;
1488 template<
class Scalar,
class LO,
class GO,
class Node>
1496 #ifdef HAVE_TPETRA_DEBUG
1497 const char prefix[] =
1498 "Tpetra::Experimental::BlockCrsMatrix::getLocalRowView: ";
1499 #endif // HAVE_TPETRA_DEBUG
1501 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1505 return Teuchos::OrdinalTraits<LO>::invalid ();
1508 const size_t absBlockOffsetStart = ptrHost_[localRowInd];
1509 colInds = indHost_.data () + absBlockOffsetStart;
1511 #ifdef HAVE_TPETRA_DEBUG
1512 TEUCHOS_TEST_FOR_EXCEPTION
1513 (this->
template need_sync<Kokkos::HostSpace> (), std::runtime_error,
1514 prefix <<
"The matrix's data were last modified on device, but have "
1515 "not been sync'd to host. Please sync to host (by calling "
1516 "sync<Kokkos::HostSpace>() on this matrix) before calling this "
1518 #endif // HAVE_TPETRA_DEBUG
1524 auto vals_host_out =
1525 const_cast<this_type*> (
this)->template getValues<Kokkos::HostSpace> ();
1528 absBlockOffsetStart * offsetPerBlock ();
1529 vals = reinterpret_cast<Scalar*> (vOut);
1531 numInds = ptrHost_[localRowInd + 1] - absBlockOffsetStart;
1536 template<
class Scalar,
class LO,
class GO,
class Node>
1540 const Teuchos::ArrayView<LO>& Indices,
1541 const Teuchos::ArrayView<Scalar>& Values,
1542 size_t &NumEntries)
const
1547 getLocalRowView(LocalRow,colInds,vals,numInds);
1548 if (numInds > Indices.size() || numInds*blockSize_*blockSize_ > Values.size()) {
1549 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
1550 "Tpetra::BlockCrsMatrix::getLocalRowCopy : Column and/or values array is not large enough to hold "
1551 << numInds <<
" row entries");
1553 for (LO i=0; i<numInds; ++i) {
1554 Indices[i] = colInds[i];
1556 for (LO i=0; i<numInds*blockSize_*blockSize_; ++i) {
1557 Values[i] = vals[i];
1559 NumEntries = numInds;
1562 template<
class Scalar,
class LO,
class GO,
class Node>
1566 ptrdiff_t offsets[],
1568 const LO numColInds)
const
1570 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1575 return static_cast<LO> (0);
1578 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
1582 for (LO k = 0; k < numColInds; ++k) {
1583 const LO relBlockOffset =
1584 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
1585 if (relBlockOffset != LINV) {
1586 offsets[k] = static_cast<ptrdiff_t> (relBlockOffset);
1587 hint = relBlockOffset + 1;
1595 template<
class Scalar,
class LO,
class GO,
class Node>
1599 const ptrdiff_t offsets[],
1600 const Scalar vals[],
1601 const LO numOffsets)
const
1603 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1608 return static_cast<LO> (0);
1610 const impl_scalar_type*
const vIn = reinterpret_cast<const impl_scalar_type*> (vals);
1612 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1613 const size_t perBlockSize = static_cast<LO> (offsetPerBlock ());
1614 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1615 size_t pointOffset = 0;
1618 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
1619 const size_t relBlockOffset = offsets[k];
1620 if (relBlockOffset != STINV) {
1621 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1623 getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1625 getConstLocalBlockFromInput (vIn, pointOffset);
1626 COPY (A_new, A_old);
1634 template<
class Scalar,
class LO,
class GO,
class Node>
1638 const ptrdiff_t offsets[],
1639 const Scalar vals[],
1640 const LO numOffsets)
const
1642 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1647 return static_cast<LO> (0);
1649 const impl_scalar_type*
const vIn = reinterpret_cast<const impl_scalar_type*> (vals);
1651 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1652 const size_t perBlockSize = static_cast<LO> (offsetPerBlock ());
1653 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1654 size_t pointOffset = 0;
1657 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
1658 const size_t relBlockOffset = offsets[k];
1659 if (relBlockOffset != STINV) {
1660 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1662 getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1664 getConstLocalBlockFromInput (vIn, pointOffset);
1665 ::Tpetra::Experimental::Impl::absMax (A_old, A_new);
1673 template<
class Scalar,
class LO,
class GO,
class Node>
1677 const ptrdiff_t offsets[],
1678 const Scalar vals[],
1679 const LO numOffsets)
const
1681 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1686 return static_cast<LO> (0);
1689 const impl_scalar_type*
const vIn = reinterpret_cast<const impl_scalar_type*> (vals);
1691 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1692 const size_t perBlockSize = static_cast<LO> (offsetPerBlock ());
1693 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1694 size_t pointOffset = 0;
1697 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
1698 const size_t relBlockOffset = offsets[k];
1699 if (relBlockOffset != STINV) {
1700 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1702 getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1704 getConstLocalBlockFromInput (vIn, pointOffset);
1706 AXPY (ONE, A_new, A_old);
1714 template<
class Scalar,
class LO,
class GO,
class Node>
1719 const size_t numEntInGraph = graph_.getNumEntriesInLocalRow (localRowInd);
1720 if (numEntInGraph == Teuchos::OrdinalTraits<size_t>::invalid ()) {
1721 return static_cast<LO> (0);
1723 return static_cast<LO> (numEntInGraph);
1727 template<
class Scalar,
class LO,
class GO,
class Node>
1732 const Teuchos::ETransp mode,
1742 TEUCHOS_TEST_FOR_EXCEPTION(
1743 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::apply: "
1744 "transpose and conjugate transpose modes are not yet implemented.");
1747 template<
class Scalar,
class LO,
class GO,
class Node>
1757 typedef ::Tpetra::Import<LO, GO, Node> import_type;
1758 typedef ::Tpetra::Export<LO, GO, Node> export_type;
1759 const Scalar zero = STS::zero ();
1760 const Scalar one = STS::one ();
1761 RCP<const import_type>
import = graph_.getImporter ();
1763 RCP<const export_type> theExport = graph_.getExporter ();
1764 const char prefix[] =
"Tpetra::BlockCrsMatrix::applyBlockNoTrans: ";
1766 if (alpha == zero) {
1770 else if (beta != one) {
1775 const BMV* X_colMap = NULL;
1776 if (
import.is_null ()) {
1780 catch (std::exception& e) {
1781 TEUCHOS_TEST_FOR_EXCEPTION
1782 (
true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1783 "operator= threw an exception: " << std::endl << e.what ());
1793 if ((*X_colMap_).is_null () ||
1796 *X_colMap_ = rcp (
new BMV (* (graph_.getColMap ()), getBlockSize (),
1799 #ifdef HAVE_TPETRA_BCRS_DO_POINT_IMPORT
1800 if (pointImporter_->is_null ()) {
1803 const auto domainPointMap = rcp (
new typename BMV::map_type (domainPointMap_));
1804 const auto colPointMap = rcp (
new typename BMV::map_type (
1805 BMV::makePointMap (*graph_.getColMap(),
1807 *pointImporter_ = rcp (
new typename crs_graph_type::import_type (
1808 domainPointMap, colPointMap));
1817 X_colMap = &(**X_colMap_);
1819 catch (std::exception& e) {
1820 TEUCHOS_TEST_FOR_EXCEPTION
1821 (
true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1822 "operator= threw an exception: " << std::endl << e.what ());
1826 BMV* Y_rowMap = NULL;
1827 if (theExport.is_null ()) {
1830 else if ((*Y_rowMap_).is_null () ||
1833 *Y_rowMap_ = rcp (
new BMV (* (graph_.getRowMap ()), getBlockSize (),
1836 Y_rowMap = &(**Y_rowMap_);
1838 catch (std::exception& e) {
1839 TEUCHOS_TEST_FOR_EXCEPTION(
1840 true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1841 "operator= threw an exception: " << std::endl << e.what ());
1846 localApplyBlockNoTrans (*X_colMap, *Y_rowMap, alpha, beta);
1848 catch (std::exception& e) {
1849 TEUCHOS_TEST_FOR_EXCEPTION
1850 (
true, std::runtime_error, prefix <<
"localApplyBlockNoTrans threw "
1851 "an exception: " << e.what ());
1854 TEUCHOS_TEST_FOR_EXCEPTION
1855 (
true, std::runtime_error, prefix <<
"localApplyBlockNoTrans threw "
1856 "an exception not a subclass of std::exception.");
1859 if (! theExport.is_null ()) {
1865 template<
class Scalar,
class LO,
class GO,
class Node>
1867 BlockCrsMatrix<Scalar, LO, GO, Node>::
1868 localApplyBlockNoTrans (
const BlockMultiVector<Scalar, LO, GO, Node>& X,
1869 BlockMultiVector<Scalar, LO, GO, Node>& Y,
1873 using ::Tpetra::Experimental::Classes::Impl::bcrsLocalApplyNoTrans;
1875 const impl_scalar_type alpha_impl = alpha;
1876 const auto graph = this->graph_.getLocalGraph ();
1877 const impl_scalar_type beta_impl = beta;
1878 const LO blockSize = this->getBlockSize ();
1880 auto X_mv = X.getMultiVectorView ();
1881 auto Y_mv = Y.getMultiVectorView ();
1882 Y_mv.template modify<device_type> ();
1884 auto X_lcl = X_mv.template getLocalView<device_type> ();
1885 auto Y_lcl = Y_mv.template getLocalView<device_type> ();
1886 auto val = this->val_.template view<device_type> ();
1888 bcrsLocalApplyNoTrans (alpha_impl, graph, val, blockSize, X_lcl,
1892 template<
class Scalar,
class LO,
class GO,
class Node>
1894 BlockCrsMatrix<Scalar, LO, GO, Node>::
1895 findRelOffsetOfColumnIndex (
const LO localRowIndex,
1896 const LO colIndexToFind,
1897 const LO hint)
const
1899 const size_t absStartOffset = ptrHost_[localRowIndex];
1900 const size_t absEndOffset = ptrHost_[localRowIndex+1];
1901 const LO numEntriesInRow = static_cast<LO> (absEndOffset - absStartOffset);
1903 const LO*
const curInd = indHost_.data () + absStartOffset;
1906 if (hint < numEntriesInRow && curInd[hint] == colIndexToFind) {
1913 LO relOffset = Teuchos::OrdinalTraits<LO>::invalid ();
1918 const LO maxNumEntriesForLinearSearch = 32;
1919 if (numEntriesInRow > maxNumEntriesForLinearSearch) {
1924 const LO* beg = curInd;
1925 const LO* end = curInd + numEntriesInRow;
1926 std::pair<const LO*, const LO*> p =
1927 std::equal_range (beg, end, colIndexToFind);
1928 if (p.first != p.second) {
1930 relOffset = static_cast<LO> (p.first - beg);
1934 for (LO k = 0; k < numEntriesInRow; ++k) {
1935 if (colIndexToFind == curInd[k]) {
1945 template<
class Scalar,
class LO,
class GO,
class Node>
1947 BlockCrsMatrix<Scalar, LO, GO, Node>::
1948 offsetPerBlock ()
const
1950 return offsetPerBlock_;
1953 template<
class Scalar,
class LO,
class GO,
class Node>
1955 BlockCrsMatrix<Scalar, LO, GO, Node>::
1956 getConstLocalBlockFromInput (
const impl_scalar_type* val,
1957 const size_t pointOffset)
const
1960 const LO rowStride = blockSize_;
1961 return const_little_block_type (val + pointOffset, blockSize_, rowStride);
1964 template<
class Scalar,
class LO,
class GO,
class Node>
1966 BlockCrsMatrix<Scalar, LO, GO, Node>::
1967 getNonConstLocalBlockFromInput (impl_scalar_type* val,
1968 const size_t pointOffset)
const
1971 const LO rowStride = blockSize_;
1972 return little_block_type (val + pointOffset, blockSize_, rowStride);
1975 template<
class Scalar,
class LO,
class GO,
class Node>
1977 BlockCrsMatrix<Scalar, LO, GO, Node>::
1978 getConstLocalBlockFromAbsOffset (
const size_t absBlockOffset)
const
1980 #ifdef HAVE_TPETRA_DEBUG
1981 const char prefix[] =
1982 "Tpetra::Experimental::BlockCrsMatrix::getConstLocalBlockFromAbsOffset: ";
1983 #endif // HAVE_TPETRA_DEBUG
1985 if (absBlockOffset >= ptrHost_[rowMeshMap_.getNodeNumElements ()]) {
1989 return const_little_block_type ();
1992 #ifdef HAVE_TPETRA_DEBUG
1993 TEUCHOS_TEST_FOR_EXCEPTION
1994 (this->
template need_sync<Kokkos::HostSpace> (), std::runtime_error,
1995 prefix <<
"The matrix's data were last modified on device, but have "
1996 "not been sync'd to host. Please sync to host (by calling "
1997 "sync<Kokkos::HostSpace>() on this matrix) before calling this "
1999 #endif // HAVE_TPETRA_DEBUG
2000 const size_t absPointOffset = absBlockOffset * offsetPerBlock ();
2005 typedef BlockCrsMatrix<Scalar, LO, GO, Node> this_type;
2007 const_cast<this_type*> (
this)->template getValues<Kokkos::HostSpace> ();
2008 const impl_scalar_type* vals_host_raw = vals_host.data ();
2010 return getConstLocalBlockFromInput (vals_host_raw, absPointOffset);
2014 template<
class Scalar,
class LO,
class GO,
class Node>
2016 BlockCrsMatrix<Scalar, LO, GO, Node>::
2017 getConstLocalBlockFromRelOffset (
const LO lclMeshRow,
2018 const size_t relMeshOffset)
const
2020 typedef impl_scalar_type IST;
2022 const LO* lclColInds = NULL;
2023 Scalar* lclVals = NULL;
2026 LO err = this->getLocalRowView (lclMeshRow, lclColInds, lclVals, numEnt);
2031 return const_little_block_type ();
2034 const size_t relPointOffset = relMeshOffset * this->offsetPerBlock ();
2035 IST* lclValsImpl = reinterpret_cast<IST*> (lclVals);
2036 return this->getConstLocalBlockFromInput (const_cast<const IST*> (lclValsImpl),
2041 template<
class Scalar,
class LO,
class GO,
class Node>
2043 BlockCrsMatrix<Scalar, LO, GO, Node>::
2044 getNonConstLocalBlockFromAbsOffset (
const size_t absBlockOffset)
const
2046 #ifdef HAVE_TPETRA_DEBUG
2047 const char prefix[] =
2048 "Tpetra::Experimental::BlockCrsMatrix::getNonConstLocalBlockFromAbsOffset: ";
2049 #endif // HAVE_TPETRA_DEBUG
2051 if (absBlockOffset >= ptrHost_[rowMeshMap_.getNodeNumElements ()]) {
2055 return little_block_type ();
2058 const size_t absPointOffset = absBlockOffset * offsetPerBlock ();
2059 #ifdef HAVE_TPETRA_DEBUG
2060 TEUCHOS_TEST_FOR_EXCEPTION
2061 (this->
template need_sync<Kokkos::HostSpace> (), std::runtime_error,
2062 prefix <<
"The matrix's data were last modified on device, but have "
2063 "not been sync'd to host. Please sync to host (by calling "
2064 "sync<Kokkos::HostSpace>() on this matrix) before calling this "
2066 #endif // HAVE_TPETRA_DEBUG
2070 typedef BlockCrsMatrix<Scalar, LO, GO, Node> this_type;
2072 const_cast<this_type*> (
this)->template getValues<Kokkos::HostSpace> ();
2073 impl_scalar_type* vals_host_raw = vals_host.data ();
2074 return getNonConstLocalBlockFromInput (vals_host_raw, absPointOffset);
2078 template<
class Scalar,
class LO,
class GO,
class Node>
2080 BlockCrsMatrix<Scalar, LO, GO, Node>::
2081 getLocalBlock (
const LO localRowInd,
const LO localColInd)
const
2083 const size_t absRowBlockOffset = ptrHost_[localRowInd];
2084 const LO relBlockOffset =
2085 this->findRelOffsetOfColumnIndex (localRowInd, localColInd);
2087 if (relBlockOffset != Teuchos::OrdinalTraits<LO>::invalid ()) {
2088 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
2089 return getNonConstLocalBlockFromAbsOffset (absBlockOffset);
2092 return little_block_type ();
2106 template<
class Scalar,
class LO,
class GO,
class Node>
2108 BlockCrsMatrix<Scalar, LO, GO, Node>::
2109 checkSizes (const ::Tpetra::SrcDistObject& source)
2112 typedef BlockCrsMatrix<Scalar, LO, GO, Node> this_type;
2113 const this_type* src = dynamic_cast<const this_type* > (&source);
2116 std::ostream& err = markLocalErrorAndGetStream ();
2117 err <<
"checkSizes: The source object of the Import or Export "
2118 "must be a BlockCrsMatrix with the same template parameters as the "
2119 "target object." << endl;
2124 if (src->getBlockSize () != this->getBlockSize ()) {
2125 std::ostream& err = markLocalErrorAndGetStream ();
2126 err <<
"checkSizes: The source and target objects of the Import or "
2127 <<
"Export must have the same block sizes. The source's block "
2128 <<
"size = " << src->getBlockSize () <<
" != the target's block "
2129 <<
"size = " << this->getBlockSize () <<
"." << endl;
2131 if (! src->graph_.isFillComplete ()) {
2132 std::ostream& err = markLocalErrorAndGetStream ();
2133 err <<
"checkSizes: The source object of the Import or Export is "
2134 "not fill complete. Both source and target objects must be fill "
2135 "complete." << endl;
2137 if (! this->graph_.isFillComplete ()) {
2138 std::ostream& err = markLocalErrorAndGetStream ();
2139 err <<
"checkSizes: The target object of the Import or Export is "
2140 "not fill complete. Both source and target objects must be fill "
2141 "complete." << endl;
2143 if (src->graph_.getColMap ().is_null ()) {
2144 std::ostream& err = markLocalErrorAndGetStream ();
2145 err <<
"checkSizes: The source object of the Import or Export does "
2146 "not have a column Map. Both source and target objects must have "
2147 "column Maps." << endl;
2149 if (this->graph_.getColMap ().is_null ()) {
2150 std::ostream& err = markLocalErrorAndGetStream ();
2151 err <<
"checkSizes: The target object of the Import or Export does "
2152 "not have a column Map. Both source and target objects must have "
2153 "column Maps." << endl;
2157 return ! (* (this->localError_));
2160 template<
class Scalar,
class LO,
class GO,
class Node>
2162 BlockCrsMatrix<Scalar, LO, GO, Node>::
2163 copyAndPermute (const ::Tpetra::SrcDistObject& source,
2165 const Teuchos::ArrayView<const LO>& permuteToLIDs,
2166 const Teuchos::ArrayView<const LO>& permuteFromLIDs)
2169 typedef BlockCrsMatrix<Scalar, LO, GO, Node> this_type;
2170 const bool debug =
false;
2173 std::ostringstream os;
2174 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2175 os <<
"Proc " << myRank <<
": copyAndPermute: "
2176 <<
"numSameIDs: " << numSameIDs
2177 <<
", permuteToLIDs.size(): " << permuteToLIDs.size ()
2178 <<
", permuteFromLIDs.size(): " << permuteFromLIDs.size ()
2180 std::cerr << os.str ();
2186 if (* (this->localError_)) {
2187 std::ostream& err = this->markLocalErrorAndGetStream ();
2188 err <<
"copyAndPermute: The target object of the Import or Export is "
2189 "already in an error state." << endl;
2192 if (permuteToLIDs.size () != permuteFromLIDs.size ()) {
2193 std::ostream& err = this->markLocalErrorAndGetStream ();
2194 err <<
"copyAndPermute: permuteToLIDs.size() = " << permuteToLIDs.size ()
2195 <<
" != permuteFromLIDs.size() = " << permuteFromLIDs.size () <<
"."
2200 const this_type* src = dynamic_cast<const this_type* > (&source);
2202 std::ostream& err = this->markLocalErrorAndGetStream ();
2203 err <<
"copyAndPermute: The source object of the Import or Export is "
2204 "either not a BlockCrsMatrix, or does not have the right template "
2205 "parameters. checkSizes() should have caught this. "
2206 "Please report this bug to the Tpetra developers." << endl;
2209 if (* (src->localError_)) {
2210 std::ostream& err = this->markLocalErrorAndGetStream ();
2211 err <<
"copyAndPermute: The source object of the Import or Export is "
2212 "already in an error state." << endl;
2216 bool lclErr =
false;
2217 #ifdef HAVE_TPETRA_DEBUG
2218 std::set<LO> invalidSrcCopyRows;
2219 std::set<LO> invalidDstCopyRows;
2220 std::set<LO> invalidDstCopyCols;
2221 std::set<LO> invalidDstPermuteCols;
2222 std::set<LO> invalidPermuteFromRows;
2223 #endif // HAVE_TPETRA_DEBUG
2232 #ifdef HAVE_TPETRA_DEBUG
2233 const map_type& srcRowMap = * (src->graph_.getRowMap ());
2234 #endif // HAVE_TPETRA_DEBUG
2235 const map_type& dstRowMap = * (this->graph_.getRowMap ());
2236 const map_type& srcColMap = * (src->graph_.getColMap ());
2237 const map_type& dstColMap = * (this->graph_.getColMap ());
2238 const bool canUseLocalColumnIndices = srcColMap.locallySameAs (dstColMap);
2239 const size_t numPermute = static_cast<size_t> (permuteFromLIDs.size ());
2242 std::ostringstream os;
2243 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2244 os <<
"Proc " << myRank <<
": copyAndPermute: "
2245 <<
"canUseLocalColumnIndices: "
2246 << (canUseLocalColumnIndices ?
"true" :
"false")
2247 <<
", numPermute: " << numPermute
2249 std::cerr << os.str ();
2252 if (canUseLocalColumnIndices) {
2254 for (LO localRow = 0; localRow < static_cast<LO> (numSameIDs); ++localRow) {
2255 #ifdef HAVE_TPETRA_DEBUG
2256 if (! srcRowMap.isNodeLocalElement (localRow)) {
2258 invalidSrcCopyRows.insert (localRow);
2261 #endif // HAVE_TPETRA_DEBUG
2263 const LO* lclSrcCols;
2268 LO err = src->getLocalRowView (localRow, lclSrcCols, vals, numEntries);
2271 #ifdef HAVE_TPETRA_DEBUG
2272 (void) invalidSrcCopyRows.insert (localRow);
2273 #endif // HAVE_TPETRA_DEBUG
2276 err = this->replaceLocalValues (localRow, lclSrcCols, vals, numEntries);
2277 if (err != numEntries) {
2279 if (! dstRowMap.isNodeLocalElement (localRow)) {
2280 #ifdef HAVE_TPETRA_DEBUG
2281 invalidDstCopyRows.insert (localRow);
2282 #endif // HAVE_TPETRA_DEBUG
2290 for (LO k = 0; k < numEntries; ++k) {
2291 if (! dstColMap.isNodeLocalElement (lclSrcCols[k])) {
2293 #ifdef HAVE_TPETRA_DEBUG
2294 (void) invalidDstCopyCols.insert (lclSrcCols[k]);
2295 #endif // HAVE_TPETRA_DEBUG
2304 for (
size_t k = 0; k < numPermute; ++k) {
2305 const LO srcLclRow = static_cast<LO> (permuteFromLIDs[k]);
2306 const LO dstLclRow = static_cast<LO> (permuteToLIDs[k]);
2308 const LO* lclSrcCols;
2311 LO err = src->getLocalRowView (srcLclRow, lclSrcCols, vals, numEntries);
2314 #ifdef HAVE_TPETRA_DEBUG
2315 invalidPermuteFromRows.insert (srcLclRow);
2316 #endif // HAVE_TPETRA_DEBUG
2319 err = this->replaceLocalValues (dstLclRow, lclSrcCols, vals, numEntries);
2320 if (err != numEntries) {
2322 #ifdef HAVE_TPETRA_DEBUG
2323 for (LO c = 0; c < numEntries; ++c) {
2324 if (! dstColMap.isNodeLocalElement (lclSrcCols[c])) {
2325 invalidDstPermuteCols.insert (lclSrcCols[c]);
2328 #endif // HAVE_TPETRA_DEBUG
2335 const size_t maxNumEnt = src->graph_.getNodeMaxNumRowEntries ();
2336 Teuchos::Array<LO> lclDstCols (maxNumEnt);
2339 for (LO localRow = 0; localRow < static_cast<LO> (numSameIDs); ++localRow) {
2340 const LO* lclSrcCols;
2347 err = src->getLocalRowView (localRow, lclSrcCols, vals, numEntries);
2348 }
catch (std::exception& e) {
2350 std::ostringstream os;
2351 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2352 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow "
2353 << localRow <<
", src->getLocalRowView() threw an exception: "
2355 std::cerr << os.str ();
2362 #ifdef HAVE_TPETRA_DEBUG
2363 invalidSrcCopyRows.insert (localRow);
2364 #endif // HAVE_TPETRA_DEBUG
2367 if (static_cast<size_t> (numEntries) > static_cast<size_t> (lclDstCols.size ())) {
2370 std::ostringstream os;
2371 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2372 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow "
2373 << localRow <<
", numEntries = " << numEntries <<
" > maxNumEnt = "
2374 << maxNumEnt << endl;
2375 std::cerr << os.str ();
2381 Teuchos::ArrayView<LO> lclDstColsView = lclDstCols.view (0, numEntries);
2382 for (LO j = 0; j < numEntries; ++j) {
2383 lclDstColsView[j] = dstColMap.getLocalElement (srcColMap.getGlobalElement (lclSrcCols[j]));
2384 if (lclDstColsView[j] == Teuchos::OrdinalTraits<LO>::invalid ()) {
2386 #ifdef HAVE_TPETRA_DEBUG
2387 invalidDstCopyCols.insert (lclDstColsView[j]);
2388 #endif // HAVE_TPETRA_DEBUG
2392 err = this->replaceLocalValues (localRow, lclDstColsView.getRawPtr (), vals, numEntries);
2393 }
catch (std::exception& e) {
2395 std::ostringstream os;
2396 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2397 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow "
2398 << localRow <<
", this->replaceLocalValues() threw an exception: "
2400 std::cerr << os.str ();
2404 if (err != numEntries) {
2407 std::ostringstream os;
2408 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2409 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" "
2410 "localRow " << localRow <<
", this->replaceLocalValues "
2411 "returned " << err <<
" instead of numEntries = "
2412 << numEntries << endl;
2413 std::cerr << os.str ();
2421 for (
size_t k = 0; k < numPermute; ++k) {
2422 const LO srcLclRow = static_cast<LO> (permuteFromLIDs[k]);
2423 const LO dstLclRow = static_cast<LO> (permuteToLIDs[k]);
2425 const LO* lclSrcCols;
2430 err = src->getLocalRowView (srcLclRow, lclSrcCols, vals, numEntries);
2431 }
catch (std::exception& e) {
2433 std::ostringstream os;
2434 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2435 os <<
"Proc " << myRank <<
": copyAndPermute: At \"permute\" "
2436 "srcLclRow " << srcLclRow <<
" and dstLclRow " << dstLclRow
2437 <<
", src->getLocalRowView() threw an exception: " << e.what ();
2438 std::cerr << os.str ();
2445 #ifdef HAVE_TPETRA_DEBUG
2446 invalidPermuteFromRows.insert (srcLclRow);
2447 #endif // HAVE_TPETRA_DEBUG
2450 if (static_cast<size_t> (numEntries) > static_cast<size_t> (lclDstCols.size ())) {
2456 Teuchos::ArrayView<LO> lclDstColsView = lclDstCols.view (0, numEntries);
2457 for (LO j = 0; j < numEntries; ++j) {
2458 lclDstColsView[j] = dstColMap.getLocalElement (srcColMap.getGlobalElement (lclSrcCols[j]));
2459 if (lclDstColsView[j] == Teuchos::OrdinalTraits<LO>::invalid ()) {
2461 #ifdef HAVE_TPETRA_DEBUG
2462 invalidDstPermuteCols.insert (lclDstColsView[j]);
2463 #endif // HAVE_TPETRA_DEBUG
2466 err = this->replaceLocalValues (dstLclRow, lclDstColsView.getRawPtr (), vals, numEntries);
2467 if (err != numEntries) {
2476 std::ostream& err = this->markLocalErrorAndGetStream ();
2477 #ifdef HAVE_TPETRA_DEBUG
2478 err <<
"copyAndPermute: The graph structure of the source of the "
2479 "Import or Export must be a subset of the graph structure of the "
2481 err <<
"invalidSrcCopyRows = [";
2482 for (
typename std::set<LO>::const_iterator it = invalidSrcCopyRows.begin ();
2483 it != invalidSrcCopyRows.end (); ++it) {
2485 typename std::set<LO>::const_iterator itp1 = it;
2487 if (itp1 != invalidSrcCopyRows.end ()) {
2491 err <<
"], invalidDstCopyRows = [";
2492 for (
typename std::set<LO>::const_iterator it = invalidDstCopyRows.begin ();
2493 it != invalidDstCopyRows.end (); ++it) {
2495 typename std::set<LO>::const_iterator itp1 = it;
2497 if (itp1 != invalidDstCopyRows.end ()) {
2501 err <<
"], invalidDstCopyCols = [";
2502 for (
typename std::set<LO>::const_iterator it = invalidDstCopyCols.begin ();
2503 it != invalidDstCopyCols.end (); ++it) {
2505 typename std::set<LO>::const_iterator itp1 = it;
2507 if (itp1 != invalidDstCopyCols.end ()) {
2511 err <<
"], invalidDstPermuteCols = [";
2512 for (
typename std::set<LO>::const_iterator it = invalidDstPermuteCols.begin ();
2513 it != invalidDstPermuteCols.end (); ++it) {
2515 typename std::set<LO>::const_iterator itp1 = it;
2517 if (itp1 != invalidDstPermuteCols.end ()) {
2521 err <<
"], invalidPermuteFromRows = [";
2522 for (
typename std::set<LO>::const_iterator it = invalidPermuteFromRows.begin ();
2523 it != invalidPermuteFromRows.end (); ++it) {
2525 typename std::set<LO>::const_iterator itp1 = it;
2527 if (itp1 != invalidPermuteFromRows.end ()) {
2531 err <<
"]" << std::endl;
2533 err <<
"copyAndPermute: The graph structure of the source of the "
2534 "Import or Export must be a subset of the graph structure of the "
2535 "target." << std::endl;
2536 #endif // HAVE_TPETRA_DEBUG
2540 std::ostringstream os;
2541 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2542 const bool lclSuccess = ! (* (this->localError_));
2543 os <<
"*** Proc " << myRank <<
": copyAndPermute "
2544 << (lclSuccess ?
"succeeded" :
"FAILED");
2548 os <<
": error messages: " << this->errorMessages ();
2550 std::cerr << os.str ();
2574 template<
class LO,
class GO,
class D>
2576 packRowCount (
const size_t numEnt,
2577 const size_t numBytesPerValue,
2578 const size_t blkSize)
2580 using ::Tpetra::Details::PackTraits;
2590 const size_t numEntLen = PackTraits<LO, D>::packValueCount (numEntLO);
2591 const size_t gidsLen = numEnt * PackTraits<GO, D>::packValueCount (gid);
2592 const size_t valsLen = numEnt * numBytesPerValue * blkSize * blkSize;
2593 return numEntLen + gidsLen + valsLen;
2607 template<
class ST,
class LO,
class GO,
class D>
2609 unpackRowCount (
const typename ::Tpetra::Details::PackTraits<LO, D>::input_buffer_type& imports,
2610 const size_t offset,
2611 const size_t numBytes,
2612 const size_t numBytesPerValue)
2614 using Kokkos::subview;
2615 using ::Tpetra::Details::PackTraits;
2617 if (numBytes == 0) {
2619 return static_cast<size_t> (0);
2623 #ifdef HAVE_TPETRA_DEBUG
2624 const size_t theNumBytes = PackTraits<LO, D>::packValueCount (numEntLO);
2625 TEUCHOS_TEST_FOR_EXCEPTION(
2626 theNumBytes > numBytes, std::logic_error,
"unpackRowCount: "
2627 "theNumBytes = " << theNumBytes <<
" < numBytes = " << numBytes
2629 #endif // HAVE_TPETRA_DEBUG
2630 const char*
const inBuf = imports.data () + offset;
2631 const size_t actualNumBytes = PackTraits<LO, D>::unpackValue (numEntLO, inBuf);
2632 #ifdef HAVE_TPETRA_DEBUG
2633 TEUCHOS_TEST_FOR_EXCEPTION(
2634 actualNumBytes > numBytes, std::logic_error,
"unpackRowCount: "
2635 "actualNumBytes = " << actualNumBytes <<
" < numBytes = " << numBytes
2638 (void) actualNumBytes;
2639 #endif // HAVE_TPETRA_DEBUG
2640 return static_cast<size_t> (numEntLO);
2651 template<
class ST,
class LO,
class GO,
class D>
2653 packRowForBlockCrs (
const typename ::Tpetra::Details::PackTraits<LO, D>::output_buffer_type& exports,
2654 const size_t offset,
2655 const size_t numEnt,
2656 const typename ::Tpetra::Details::PackTraits<GO, D>::input_array_type& gidsIn,
2657 const typename ::Tpetra::Details::PackTraits<ST, D>::input_array_type& valsIn,
2658 const size_t numBytesPerValue,
2659 const size_t blockSize)
2661 using Kokkos::subview;
2662 using ::Tpetra::Details::PackTraits;
2668 const size_t numScalarEnt = numEnt * blockSize * blockSize;
2671 const LO numEntLO = static_cast<size_t> (numEnt);
2673 const size_t numEntBeg = offset;
2674 const size_t numEntLen = PackTraits<LO, D>::packValueCount (numEntLO);
2675 const size_t gidsBeg = numEntBeg + numEntLen;
2676 const size_t gidsLen = numEnt * PackTraits<GO, D>::packValueCount (gid);
2677 const size_t valsBeg = gidsBeg + gidsLen;
2678 const size_t valsLen = numScalarEnt * numBytesPerValue;
2680 char*
const numEntOut = exports.data () + numEntBeg;
2681 char*
const gidsOut = exports.data () + gidsBeg;
2682 char*
const valsOut = exports.data () + valsBeg;
2684 size_t numBytesOut = 0;
2686 numBytesOut += PackTraits<LO, D>::packValue (numEntOut, numEntLO);
2689 Kokkos::pair<int, size_t> p;
2690 p = PackTraits<GO, D>::packArray (gidsOut, gidsIn.data (), numEnt);
2691 errorCode += p.first;
2692 numBytesOut += p.second;
2694 p = PackTraits<ST, D>::packArray (valsOut, valsIn.data (), numScalarEnt);
2695 errorCode += p.first;
2696 numBytesOut += p.second;
2699 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
2700 TEUCHOS_TEST_FOR_EXCEPTION(
2701 numBytesOut != expectedNumBytes, std::logic_error,
"packRow: "
2702 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = "
2703 << expectedNumBytes <<
".");
2705 TEUCHOS_TEST_FOR_EXCEPTION(
2706 errorCode != 0, std::runtime_error,
"packRow: "
2707 "PackTraits::packArray returned a nonzero error code");
2713 template<
class ST,
class LO,
class GO,
class D>
2715 unpackRowForBlockCrs (
const typename ::Tpetra::Details::PackTraits<GO, D>::output_array_type& gidsOut,
2716 const typename ::Tpetra::Details::PackTraits<ST, D>::output_array_type& valsOut,
2717 const typename ::Tpetra::Details::PackTraits<int, D>::input_buffer_type& imports,
2718 const size_t offset,
2719 const size_t numBytes,
2720 const size_t numEnt,
2721 const size_t numBytesPerValue,
2722 const size_t blockSize)
2724 using ::Tpetra::Details::PackTraits;
2726 if (numBytes == 0) {
2730 const size_t numScalarEnt = numEnt * blockSize * blockSize;
2731 TEUCHOS_TEST_FOR_EXCEPTION(
2732 static_cast<size_t> (imports.extent (0)) <= offset,
2733 std::logic_error,
"unpackRow: imports.extent(0) = "
2734 << imports.extent (0) <<
" <= offset = " << offset <<
".");
2735 TEUCHOS_TEST_FOR_EXCEPTION(
2736 static_cast<size_t> (imports.extent (0)) < offset + numBytes,
2737 std::logic_error,
"unpackRow: imports.extent(0) = "
2738 << imports.extent (0) <<
" < offset + numBytes = "
2739 << (offset + numBytes) <<
".");
2744 const size_t numEntBeg = offset;
2745 const size_t numEntLen = PackTraits<LO, D>::packValueCount (lid);
2746 const size_t gidsBeg = numEntBeg + numEntLen;
2747 const size_t gidsLen = numEnt * PackTraits<GO, D>::packValueCount (gid);
2748 const size_t valsBeg = gidsBeg + gidsLen;
2749 const size_t valsLen = numScalarEnt * numBytesPerValue;
2751 const char*
const numEntIn = imports.data () + numEntBeg;
2752 const char*
const gidsIn = imports.data () + gidsBeg;
2753 const char*
const valsIn = imports.data () + valsBeg;
2755 size_t numBytesOut = 0;
2758 numBytesOut += PackTraits<LO, D>::unpackValue (numEntOut, numEntIn);
2759 TEUCHOS_TEST_FOR_EXCEPTION(
2760 static_cast<size_t> (numEntOut) != numEnt, std::logic_error,
2761 "unpackRow: Expected number of entries " << numEnt
2762 <<
" != actual number of entries " << numEntOut <<
".");
2765 Kokkos::pair<int, size_t> p;
2766 p = PackTraits<GO, D>::unpackArray (gidsOut.data (), gidsIn, numEnt);
2767 errorCode += p.first;
2768 numBytesOut += p.second;
2770 p = PackTraits<ST, D>::unpackArray (valsOut.data (), valsIn, numScalarEnt);
2771 errorCode += p.first;
2772 numBytesOut += p.second;
2775 TEUCHOS_TEST_FOR_EXCEPTION(
2776 numBytesOut != numBytes, std::logic_error,
"unpackRow: numBytesOut = "
2777 << numBytesOut <<
" != numBytes = " << numBytes <<
".");
2779 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
2780 TEUCHOS_TEST_FOR_EXCEPTION(
2781 numBytesOut != expectedNumBytes, std::logic_error,
"unpackRow: "
2782 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = "
2783 << expectedNumBytes <<
".");
2785 TEUCHOS_TEST_FOR_EXCEPTION(
2786 errorCode != 0, std::runtime_error,
"unpackRow: "
2787 "PackTraits::unpackArray returned a nonzero error code");
2793 template<
class Scalar,
class LO,
class GO,
class Node>
2795 BlockCrsMatrix<Scalar, LO, GO, Node>::
2796 packAndPrepare (const ::Tpetra::SrcDistObject& source,
2797 const Teuchos::ArrayView<const LO>& exportLIDs,
2798 Teuchos::Array<packet_type>& exports,
2799 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
2800 size_t& constantNumPackets,
2804 using ::Tpetra::Details::PackTraits;
2805 using Kokkos::MemoryUnmanaged;
2806 using Kokkos::subview;
2808 typedef typename ::Tpetra::MultiVector<Scalar, LO, GO, Node>::impl_scalar_type ST;
2809 typedef typename View<int*, device_type>::HostMirror::execution_space HES;
2810 typedef BlockCrsMatrix<Scalar, LO, GO, Node> this_type;
2811 typedef typename Teuchos::ArrayView<const LO>::size_type size_type;
2812 const bool debug =
false;
2815 std::ostringstream os;
2816 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2817 os <<
"Proc " << myRank <<
": packAndPrepare: exportLIDs.size() = "
2818 << exportLIDs.size () <<
", numPacketsPerLID.size() = "
2819 << numPacketsPerLID.size () << endl;
2820 std::cerr << os.str ();
2823 if (* (this->localError_)) {
2824 std::ostream& err = this->markLocalErrorAndGetStream ();
2825 err <<
"packAndPrepare: The target object of the Import or Export is "
2826 "already in an error state." << endl;
2830 const this_type* src = dynamic_cast<const this_type* > (&source);
2833 std::ostream& err = this->markLocalErrorAndGetStream ();
2834 err <<
"packAndPrepare: The source (input) object of the Import or "
2835 "Export is either not a BlockCrsMatrix, or does not have the right "
2836 "template parameters. checkSizes() should have caught this. "
2837 "Please report this bug to the Tpetra developers." << endl;
2841 const crs_graph_type& srcGraph = src->graph_;
2842 const size_t blockSize = static_cast<size_t> (src->getBlockSize ());
2843 const size_type numExportLIDs = exportLIDs.size ();
2845 if (numExportLIDs != numPacketsPerLID.size ()) {
2846 std::ostream& err = this->markLocalErrorAndGetStream ();
2847 err <<
"packAndPrepare: exportLIDs.size() = " << numExportLIDs
2848 <<
" != numPacketsPerLID.size() = " << numPacketsPerLID.size ()
2861 constantNumPackets = 0;
2866 size_t totalNumBytes = 0;
2867 size_t totalNumEntries = 0;
2868 size_t maxRowLength = 0;
2869 for (size_type i = 0; i < numExportLIDs; ++i) {
2870 const LO lclRow = exportLIDs[i];
2871 size_t numEnt = srcGraph.getNumEntriesInLocalRow (lclRow);
2878 if (numEnt == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2881 const size_t numScalarEnt = numEnt * blockSize * blockSize;
2885 size_t numBytesPerValue = 0;
2891 Scalar* valsRaw = NULL;
2892 const LO* lidsRaw = NULL;
2893 LO actualNumEnt = 0;
2895 src->getLocalRowView (lclRow, lidsRaw, valsRaw, actualNumEnt);
2897 if (numEnt != static_cast<size_t> (actualNumEnt)) {
2898 std::ostream& err = this->markLocalErrorAndGetStream ();
2899 err <<
"packAndPrepare: Local row " << i <<
" claims to have " <<
2900 numEnt <<
"entry/ies, but the View returned by getLocalRowView() "
2901 "has " << actualNumEnt <<
" entry/ies. This should never happen. "
2902 "Please report this bug to the Tpetra developers." << endl;
2905 if (errCode == Teuchos::OrdinalTraits<LO>::invalid ()) {
2906 std::ostream& err = this->markLocalErrorAndGetStream ();
2907 err <<
"packAndPrepare: Local row " << i <<
" is not in the row Map "
2908 "of the source object on the calling process." << endl;
2912 const ST* valsRawST =
2913 const_cast<const ST*> (reinterpret_cast<ST*> (valsRaw));
2914 View<const ST*, HES, MemoryUnmanaged> vals (valsRawST, numScalarEnt);
2919 numBytesPerValue = PackTraits<ST, HES>::packValueCount (vals(0));
2922 const size_t numBytes =
2923 packRowCount<LO, GO, HES> (numEnt, numBytesPerValue, blockSize);
2924 numPacketsPerLID[i] = numBytes;
2925 totalNumBytes += numBytes;
2926 totalNumEntries += numEnt;
2927 maxRowLength = std::max (maxRowLength, numEnt);
2931 const int myRank = graph_.getComm ()->getRank ();
2932 std::ostringstream os;
2933 os <<
"Proc " << myRank <<
": packAndPrepare: totalNumBytes = "
2934 << totalNumBytes << endl;
2935 std::cerr << os.str ();
2941 exports.resize (totalNumBytes);
2942 if (totalNumEntries > 0) {
2943 View<char*, HES, MemoryUnmanaged> exportsK (exports.getRawPtr (),
2955 const map_type& srcColMap = * (srcGraph.getColMap ());
2958 View<GO*, HES> gblColInds;
2961 gblColInds = PackTraits<GO, HES>::allocateArray (gid, maxRowLength,
"gids");
2965 for (size_type i = 0; i < numExportLIDs; ++i) {
2966 const LO lclRowInd = exportLIDs[i];
2967 const LO* lclColIndsRaw;
2973 (void) src->getLocalRowView (lclRowInd, lclColIndsRaw, valsRaw, numEntLO);
2974 const size_t numEnt = static_cast<size_t> (numEntLO);
2975 const size_t numScalarEnt = numEnt * blockSize * blockSize;
2976 View<const LO*, HES, MemoryUnmanaged> lclColInds (lclColIndsRaw, numEnt);
2977 const ST* valsRawST = const_cast<const ST*> (reinterpret_cast<ST*> (valsRaw));
2978 View<const ST*, HES, MemoryUnmanaged> vals (valsRawST, numScalarEnt);
2983 const size_t numBytesPerValue = numEnt == 0 ?
2984 static_cast<size_t> (0) :
2985 PackTraits<ST, HES>::packValueCount (vals(0));
2988 for (
size_t j = 0; j < numEnt; ++j) {
2989 gblColInds(j) = srcColMap.getGlobalElement (lclColInds(j));
2993 const size_t numBytes =
2994 packRowForBlockCrs<ST, LO, GO, HES> (exportsK, offset, numEnt, gblColInds,
2995 vals, numBytesPerValue, blockSize);
3000 if (offset != totalNumBytes) {
3001 std::ostream& err = this->markLocalErrorAndGetStream ();
3002 err <<
"packAndPreapre: At end of method, the final offset (in bytes) "
3003 << offset <<
" does not equal the total number of bytes packed "
3004 << totalNumBytes <<
". "
3005 <<
"Please report this bug to the Tpetra developers." << endl;
3011 std::ostringstream os;
3012 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
3013 const bool lclSuccess = ! (* (this->localError_));
3014 os <<
"*** Proc " << myRank <<
": packAndPrepare "
3015 << (lclSuccess ?
"succeeded" :
"FAILED")
3016 <<
" (totalNumEntries = " << totalNumEntries <<
") ***" << endl;
3017 std::cerr << os.str ();
3022 template<
class Scalar,
class LO,
class GO,
class Node>
3024 BlockCrsMatrix<Scalar, LO, GO, Node>::
3025 unpackAndCombine (
const Teuchos::ArrayView<const LO>& importLIDs,
3026 const Teuchos::ArrayView<const packet_type>& imports,
3027 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
3033 using ::Tpetra::Details::PackTraits;
3034 using Kokkos::MemoryUnmanaged;
3035 using Kokkos::subview;
3037 typedef typename ::Tpetra::MultiVector<Scalar, LO, GO, Node>::impl_scalar_type ST;
3038 typedef typename Teuchos::ArrayView<const LO>::size_type size_type;
3039 typedef typename View<int*, device_type>::HostMirror::execution_space HES;
3040 typedef std::pair<typename View<int*, HES>::size_type,
3041 typename View<int*, HES>::size_type> pair_type;
3042 typedef View<GO*, HES, MemoryUnmanaged> gids_out_type;
3043 typedef View<LO*, HES, MemoryUnmanaged> lids_out_type;
3044 typedef View<ST*, HES, MemoryUnmanaged> vals_out_type;
3045 typedef typename PackTraits<GO, HES>::input_buffer_type input_buffer_type;
3046 const char prefix[] =
"Tpetra::Experimental::BlockCrsMatrix::unpackAndCombine: ";
3047 const bool debug =
false;
3050 std::ostringstream os;
3051 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
3052 os <<
"Proc " << myRank <<
": unpackAndCombine" << endl;
3053 std::cerr << os.str ();
3059 if (* (this->localError_)) {
3060 std::ostream& err = this->markLocalErrorAndGetStream ();
3061 err << prefix <<
"The target object of the Import or Export is "
3062 "already in an error state." << endl;
3065 if (importLIDs.size () != numPacketsPerLID.size ()) {
3066 std::ostream& err = this->markLocalErrorAndGetStream ();
3067 err << prefix <<
"importLIDs.size() = " << importLIDs.size () <<
" != "
3068 "numPacketsPerLID.size() = " << numPacketsPerLID.size () <<
"." << endl;
3072 std::ostream& err = this->markLocalErrorAndGetStream ();
3073 err << prefix <<
"Invalid CombineMode value " << CM <<
". Valid "
3074 <<
"values include ADD, INSERT, REPLACE, ABSMAX, and ZERO."
3082 const map_type& tgtColMap = * (this->graph_.getColMap ());
3084 const size_type numImportLIDs = importLIDs.size ();
3085 if (CM ==
ZERO || numImportLIDs == 0) {
3087 std::ostringstream os;
3088 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
3089 os <<
"Proc " << myRank <<
": unpackAndCombine: Nothing to do" << endl;
3090 std::cerr << os.str ();
3096 std::ostringstream os;
3097 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
3098 os <<
"Proc " << myRank <<
": unpackAndCombine: Getting ready" << endl;
3099 std::cerr << os.str ();
3102 input_buffer_type importsK (imports.getRawPtr (), imports.size ());
3103 const size_t blockSize = this->getBlockSize ();
3104 const size_t maxRowNumEnt = graph_.getNodeMaxNumRowEntries ();
3105 const size_t maxRowNumScalarEnt = maxRowNumEnt * blockSize * blockSize;
3108 size_t numBytesPerValue;
3115 if (this->val_.h_view.extent (0) != 0) {
3116 const ST& val = this->val_.h_view[0];
3117 numBytesPerValue = PackTraits<ST, HES>::packValueCount (val);
3127 numBytesPerValue = PackTraits<ST, HES>::packValueCount (val);
3134 View<GO*, HES> gblColInds;
3135 View<LO*, HES> lclColInds;
3136 View<ST*, HES> vals;
3150 gblColInds = PackTraits<GO, HES>::allocateArray (gid, maxRowNumEnt,
"gids");
3151 lclColInds = PackTraits<LO, HES>::allocateArray (lid, maxRowNumEnt,
"lids");
3152 vals = PackTraits<ST, HES>::allocateArray (val, maxRowNumScalarEnt,
"vals");
3156 bool errorDuringUnpack =
false;
3157 for (size_type i = 0; i < numImportLIDs; ++i) {
3158 const size_t numBytes = numPacketsPerLID[i];
3159 if (numBytes == 0) {
3162 const size_t numEnt =
3163 unpackRowCount<ST, LO, GO, HES> (importsK, offset, numBytes,
3165 if (numEnt > maxRowNumEnt) {
3166 errorDuringUnpack =
true;
3167 #ifdef HAVE_TPETRA_DEBUG
3168 std::ostream& err = this->markLocalErrorAndGetStream ();
3169 err << prefix <<
"At i = " << i <<
", numEnt = " << numEnt
3170 <<
" > maxRowNumEnt = " << maxRowNumEnt << endl;
3171 #endif // HAVE_TPETRA_DEBUG
3175 const size_t numScalarEnt = numEnt * blockSize * blockSize;
3176 const LO lclRow = importLIDs[i];
3178 gids_out_type gidsOut = subview (gblColInds, pair_type (0, numEnt));
3179 vals_out_type valsOut = subview (vals, pair_type (0, numScalarEnt));
3181 const size_t numBytesOut =
3182 unpackRowForBlockCrs<ST, LO, GO, HES> (gidsOut, valsOut, importsK,
3183 offset, numBytes, numEnt,
3184 numBytesPerValue, blockSize);
3185 if (numBytes != numBytesOut) {
3186 errorDuringUnpack =
true;
3187 #ifdef HAVE_TPETRA_DEBUG
3188 std::ostream& err = this->markLocalErrorAndGetStream ();
3189 err << prefix <<
"At i = " << i <<
", numBytes = " << numBytes
3190 <<
" != numBytesOut = " << numBytesOut <<
".";
3191 #endif // HAVE_TPETRA_DEBUG
3196 lids_out_type lidsOut = subview (lclColInds, pair_type (0, numEnt));
3197 for (
size_t k = 0; k < numEnt; ++k) {
3198 lidsOut(k) = tgtColMap.getLocalElement (gidsOut(k));
3199 if (lidsOut(k) == Teuchos::OrdinalTraits<LO>::invalid ()) {
3200 errorDuringUnpack =
true;
3201 #ifdef HAVE_TPETRA_DEBUG
3202 std::ostream& err = this->markLocalErrorAndGetStream ();
3203 err << prefix <<
"At i = " << i <<
", GID " << gidsOut(k)
3204 <<
" is not owned by the calling process.";
3205 #endif // HAVE_TPETRA_DEBUG
3212 const LO*
const lidsRaw = const_cast<const LO*> (lidsOut.data ());
3213 const Scalar*
const valsRaw =
3214 reinterpret_cast<const Scalar*> (const_cast<const ST*> (valsOut.data ()));
3216 numCombd = this->sumIntoLocalValues (lclRow, lidsRaw, valsRaw, numEnt);
3218 numCombd = this->replaceLocalValues (lclRow, lidsRaw, valsRaw, numEnt);
3219 }
else if (CM ==
ABSMAX) {
3220 numCombd = this->absMaxLocalValues (lclRow, lidsRaw, valsRaw, numEnt);
3223 if (static_cast<LO> (numEnt) != numCombd) {
3224 errorDuringUnpack =
true;
3225 #ifdef HAVE_TPETRA_DEBUG
3226 std::ostream& err = this->markLocalErrorAndGetStream ();
3227 err << prefix <<
"At i = " << i <<
", numEnt = " << numEnt
3228 <<
" != numCombd = " << numCombd <<
".";
3229 #endif // HAVE_TPETRA_DEBUG
3237 if (errorDuringUnpack) {
3238 std::ostream& err = this->markLocalErrorAndGetStream ();
3239 err << prefix <<
"Unpacking failed.";
3240 #ifndef HAVE_TPETRA_DEBUG
3241 err <<
" Please run again with a debug build to get more verbose "
3242 "diagnostic output.";
3243 #endif // ! HAVE_TPETRA_DEBUG
3248 std::ostringstream os;
3249 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
3250 const bool lclSuccess = ! (* (this->localError_));
3251 os <<
"*** Proc " << myRank <<
": unpackAndCombine "
3252 << (lclSuccess ?
"succeeded" :
"FAILED")
3254 std::cerr << os.str ();
3259 template<
class Scalar,
class LO,
class GO,
class Node>
3263 using Teuchos::TypeNameTraits;
3264 std::ostringstream os;
3265 os <<
"\"Tpetra::BlockCrsMatrix\": { "
3266 <<
"Template parameters: { "
3267 <<
"Scalar: " << TypeNameTraits<Scalar>::name ()
3268 <<
"LO: " << TypeNameTraits<LO>::name ()
3269 <<
"GO: " << TypeNameTraits<GO>::name ()
3270 <<
"Node: " << TypeNameTraits<Node>::name ()
3272 <<
", Label: \"" << this->getObjectLabel () <<
"\""
3273 <<
", Global dimensions: ["
3274 << graph_.getDomainMap ()->getGlobalNumElements () <<
", "
3275 << graph_.getRangeMap ()->getGlobalNumElements () <<
"]"
3276 <<
", Block size: " << getBlockSize ()
3282 template<
class Scalar,
class LO,
class GO,
class Node>
3286 const Teuchos::EVerbosityLevel verbLevel)
const
3288 using Teuchos::ArrayRCP;
3289 using Teuchos::CommRequest;
3290 using Teuchos::FancyOStream;
3291 using Teuchos::getFancyOStream;
3292 using Teuchos::ireceive;
3293 using Teuchos::isend;
3294 using Teuchos::outArg;
3295 using Teuchos::TypeNameTraits;
3296 using Teuchos::VERB_DEFAULT;
3297 using Teuchos::VERB_NONE;
3298 using Teuchos::VERB_LOW;
3299 using Teuchos::VERB_MEDIUM;
3301 using Teuchos::VERB_EXTREME;
3303 using Teuchos::wait;
3305 #ifdef HAVE_TPETRA_DEBUG
3306 const char prefix[] =
"Tpetra::Experimental::BlockCrsMatrix::describe: ";
3307 #endif // HAVE_TPETRA_DEBUG
3310 const Teuchos::EVerbosityLevel vl =
3311 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
3313 if (vl == VERB_NONE) {
3318 Teuchos::OSTab tab0 (out);
3320 out <<
"\"Tpetra::BlockCrsMatrix\":" << endl;
3321 Teuchos::OSTab tab1 (out);
3323 out <<
"Template parameters:" << endl;
3325 Teuchos::OSTab tab2 (out);
3326 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
3327 <<
"LO: " << TypeNameTraits<LO>::name () << endl
3328 <<
"GO: " << TypeNameTraits<GO>::name () << endl
3329 <<
"Node: " << TypeNameTraits<Node>::name () << endl;
3331 out <<
"Label: \"" << this->getObjectLabel () <<
"\"" << endl
3332 <<
"Global dimensions: ["
3333 << graph_.getDomainMap ()->getGlobalNumElements () <<
", "
3334 << graph_.getRangeMap ()->getGlobalNumElements () <<
"]" << endl;
3336 const LO blockSize = getBlockSize ();
3337 out <<
"Block size: " << blockSize << endl;
3340 if (vl >= VERB_MEDIUM) {
3341 const Teuchos::Comm<int>& comm = * (graph_.getMap ()->getComm ());
3342 const int myRank = comm.getRank ();
3344 out <<
"Row Map:" << endl;
3346 getRowMap()->describe (out, vl);
3348 if (! getColMap ().is_null ()) {
3349 if (getColMap() == getRowMap()) {
3351 out <<
"Column Map: same as row Map" << endl;
3356 out <<
"Column Map:" << endl;
3358 getColMap ()->describe (out, vl);
3361 if (! getDomainMap ().is_null ()) {
3362 if (getDomainMap () == getRowMap ()) {
3364 out <<
"Domain Map: same as row Map" << endl;
3367 else if (getDomainMap () == getColMap ()) {
3369 out <<
"Domain Map: same as column Map" << endl;
3374 out <<
"Domain Map:" << endl;
3376 getDomainMap ()->describe (out, vl);
3379 if (! getRangeMap ().is_null ()) {
3380 if (getRangeMap () == getDomainMap ()) {
3382 out <<
"Range Map: same as domain Map" << endl;
3385 else if (getRangeMap () == getRowMap ()) {
3387 out <<
"Range Map: same as row Map" << endl;
3392 out <<
"Range Map: " << endl;
3394 getRangeMap ()->describe (out, vl);
3399 if (vl >= VERB_EXTREME) {
3405 const_cast<this_type*> (
this)->template sync<Kokkos::HostSpace> ();
3407 #ifdef HAVE_TPETRA_DEBUG
3408 TEUCHOS_TEST_FOR_EXCEPTION
3409 (this->
template need_sync<Kokkos::HostSpace> (), std::logic_error,
3410 prefix <<
"Right after sync to host, the matrix claims that it needs "
3411 "sync to host. Please report this bug to the Tpetra developers.");
3412 #endif // HAVE_TPETRA_DEBUG
3414 const Teuchos::Comm<int>& comm = * (graph_.getMap ()->getComm ());
3415 const int myRank = comm.getRank ();
3416 const int numProcs = comm.getSize ();
3419 RCP<std::ostringstream> lclOutStrPtr (
new std::ostringstream ());
3420 RCP<FancyOStream> osPtr = getFancyOStream (lclOutStrPtr);
3421 FancyOStream& os = *osPtr;
3422 os <<
"Process " << myRank <<
":" << endl;
3423 Teuchos::OSTab tab2 (os);
3425 const map_type& meshRowMap = * (graph_.getRowMap ());
3426 const map_type& meshColMap = * (graph_.getColMap ());
3431 os <<
"Row " << meshGblRow <<
": {";
3433 const LO* lclColInds = NULL;
3434 Scalar* vals = NULL;
3436 this->getLocalRowView (meshLclRow, lclColInds, vals, numInds);
3438 for (LO k = 0; k < numInds; ++k) {
3441 os <<
"Col " << gblCol <<
": [";
3442 for (LO i = 0; i < blockSize; ++i) {
3443 for (LO j = 0; j < blockSize; ++j) {
3444 os << vals[blockSize*blockSize*k + i*blockSize + j];
3445 if (j + 1 < blockSize) {
3449 if (i + 1 < blockSize) {
3454 if (k + 1 < numInds) {
3464 out << lclOutStrPtr->str ();
3465 lclOutStrPtr = Teuchos::null;
3468 const int sizeTag = 1337;
3469 const int dataTag = 1338;
3471 ArrayRCP<char> recvDataBuf;
3475 for (
int p = 1; p < numProcs; ++p) {
3478 ArrayRCP<size_t> recvSize (1);
3480 RCP<CommRequest<int> > recvSizeReq =
3481 ireceive<int, size_t> (recvSize, p, sizeTag, comm);
3482 wait<int> (comm, outArg (recvSizeReq));
3483 const size_t numCharsToRecv = recvSize[0];
3490 if (static_cast<size_t>(recvDataBuf.size()) < numCharsToRecv + 1) {
3491 recvDataBuf.resize (numCharsToRecv + 1);
3493 ArrayRCP<char> recvData = recvDataBuf.persistingView (0, numCharsToRecv);
3495 RCP<CommRequest<int> > recvDataReq =
3496 ireceive<int, char> (recvData, p, dataTag, comm);
3497 wait<int> (comm, outArg (recvDataReq));
3502 recvDataBuf[numCharsToRecv] =
'\0';
3503 out << recvDataBuf.getRawPtr ();
3505 else if (myRank == p) {
3509 const std::string stringToSend = lclOutStrPtr->str ();
3510 lclOutStrPtr = Teuchos::null;
3513 const size_t numCharsToSend = stringToSend.size ();
3514 ArrayRCP<size_t> sendSize (1);
3515 sendSize[0] = numCharsToSend;
3516 RCP<CommRequest<int> > sendSizeReq =
3517 isend<int, size_t> (sendSize, 0, sizeTag, comm);
3518 wait<int> (comm, outArg (sendSizeReq));
3526 ArrayRCP<const char> sendData (&stringToSend[0], 0, numCharsToSend,
false);
3527 RCP<CommRequest<int> > sendDataReq =
3528 isend<int, char> (sendData, 0, dataTag, comm);
3529 wait<int> (comm, outArg (sendDataReq));
3535 template<
class Scalar,
class LO,
class GO,
class Node>
3536 Teuchos::RCP<const Teuchos::Comm<int> >
3540 return graph_.getComm();
3543 template<
class Scalar,
class LO,
class GO,
class Node>
3548 return graph_.getNode();
3552 template<
class Scalar,
class LO,
class GO,
class Node>
3557 return graph_.getGlobalNumCols();
3560 template<
class Scalar,
class LO,
class GO,
class Node>
3565 return graph_.getNodeNumCols();
3568 template<
class Scalar,
class LO,
class GO,
class Node>
3573 return graph_.getIndexBase();
3576 template<
class Scalar,
class LO,
class GO,
class Node>
3581 return graph_.getGlobalNumEntries();
3584 template<
class Scalar,
class LO,
class GO,
class Node>
3589 return graph_.getNodeNumEntries();
3592 template<
class Scalar,
class LO,
class GO,
class Node>
3597 return graph_.getNumEntriesInGlobalRow(globalRow);
3600 template<
class Scalar,
class LO,
class GO,
class Node>
3606 return dynamic_cast<const HDM&> (this->graph_).getGlobalNumDiagsImpl ();
3609 template<
class Scalar,
class LO,
class GO,
class Node>
3615 return dynamic_cast<const HDM&> (this->graph_).getNodeNumDiagsImpl ();
3618 template<
class Scalar,
class LO,
class GO,
class Node>
3623 return graph_.getGlobalMaxNumRowEntries();
3626 template<
class Scalar,
class LO,
class GO,
class Node>
3631 return graph_.hasColMap();
3634 template<
class Scalar,
class LO,
class GO,
class Node>
3640 return dynamic_cast<const HDM&> (this->graph_).isLowerTriangularImpl ();
3643 template<
class Scalar,
class LO,
class GO,
class Node>
3649 return dynamic_cast<const HDM&> (this->graph_).isUpperTriangularImpl ();
3652 template<
class Scalar,
class LO,
class GO,
class Node>
3657 return graph_.isLocallyIndexed();
3660 template<
class Scalar,
class LO,
class GO,
class Node>
3665 return graph_.isGloballyIndexed();
3668 template<
class Scalar,
class LO,
class GO,
class Node>
3673 return graph_.isFillComplete ();
3676 template<
class Scalar,
class LO,
class GO,
class Node>
3685 template<
class Scalar,
class LO,
class GO,
class Node>
3689 const Teuchos::ArrayView<GO> &Indices,
3690 const Teuchos::ArrayView<Scalar> &Values,
3691 size_t &NumEntries)
const
3693 TEUCHOS_TEST_FOR_EXCEPTION(
3694 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::getGlobalRowCopy: "
3695 "This class doesn't support global matrix indexing.");
3699 template<
class Scalar,
class LO,
class GO,
class Node>
3703 Teuchos::ArrayView<const GO> &indices,
3704 Teuchos::ArrayView<const Scalar> &values)
const
3706 TEUCHOS_TEST_FOR_EXCEPTION(
3707 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::getGlobalRowView: "
3708 "This class doesn't support global matrix indexing.");
3712 template<
class Scalar,
class LO,
class GO,
class Node>
3716 Teuchos::ArrayView<const LO>& indices,
3717 Teuchos::ArrayView<const Scalar>& values)
const
3719 TEUCHOS_TEST_FOR_EXCEPTION(
3720 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::getLocalRowView: "
3721 "This class doesn't support local matrix indexing.");
3724 template<
class Scalar,
class LO,
class GO,
class Node>
3729 #ifdef HAVE_TPETRA_DEBUG
3730 const char prefix[] =
3731 "Tpetra::Experimental::BlockCrsMatrix::getLocalDiagCopy: ";
3732 #endif // HAVE_TPETRA_DEBUG
3734 const size_t lclNumMeshRows = graph_.getNodeNumRows ();
3736 Kokkos::View<size_t*, device_type> diagOffsets (
"diagOffsets", lclNumMeshRows);
3737 graph_.getLocalDiagOffsets (diagOffsets);
3740 auto diagOffsetsHost = Kokkos::create_mirror_view (diagOffsets);
3743 diag.template modify<typename decltype (diagOffsetsHost)::memory_space> ();
3745 #ifdef HAVE_TPETRA_DEBUG
3746 TEUCHOS_TEST_FOR_EXCEPTION
3747 (this->
template need_sync<Kokkos::HostSpace> (), std::runtime_error,
3748 prefix <<
"The matrix's data were last modified on device, but have "
3749 "not been sync'd to host. Please sync to host (by calling "
3750 "sync<Kokkos::HostSpace>() on this matrix) before calling this "
3752 #endif // HAVE_TPETRA_DEBUG
3758 auto vals_host_out =
3759 const_cast<this_type*> (
this)->template getValues<Kokkos::HostSpace> ();
3760 Scalar* vals_host_out_raw =
3761 reinterpret_cast<Scalar*> (vals_host_out.data ());
3764 size_t rowOffset = 0;
3766 LO bs = getBlockSize();
3767 for(
size_t r=0; r<getNodeNumRows(); r++)
3770 offset = rowOffset + diagOffsetsHost(r)*bs*bs;
3771 for(
int b=0; b<bs; b++)
3776 rowOffset += getNumEntriesInLocalRow(r)*bs*bs;
3779 diag.template sync<memory_space> ();
3782 template<
class Scalar,
class LO,
class GO,
class Node>
3787 TEUCHOS_TEST_FOR_EXCEPTION(
3788 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::leftScale: "
3789 "not implemented.");
3793 template<
class Scalar,
class LO,
class GO,
class Node>
3798 TEUCHOS_TEST_FOR_EXCEPTION(
3799 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::rightScale: "
3800 "not implemented.");
3804 template<
class Scalar,
class LO,
class GO,
class Node>
3805 Teuchos::RCP<const ::Tpetra::RowGraph<LO, GO, Node> >
3812 template<
class Scalar,
class LO,
class GO,
class Node>
3813 typename ::Tpetra::RowMatrix<Scalar, LO, GO, Node>::mag_type
3817 TEUCHOS_TEST_FOR_EXCEPTION(
3818 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::getFrobeniusNorm: "
3819 "not implemented.");
3831 #define TPETRA_EXPERIMENTAL_BLOCKCRSMATRIX_INSTANT(S,LO,GO,NODE) \
3832 namespace Experimental { \
3833 namespace Classes { \
3834 template class BlockCrsMatrix< S, LO, GO, NODE >; \
3838 #endif // TPETRA_EXPERIMENTAL_BLOCKCRSMATRIX_DEF_HPP