42 #ifndef TPETRA_MULTIVECTOR_DEF_HPP
43 #define TPETRA_MULTIVECTOR_DEF_HPP
54 #include "Tpetra_Vector.hpp"
58 #include "Tpetra_Details_gathervPrint.hpp"
64 #include "Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp"
68 #include "KokkosCompat_View.hpp"
69 #include "KokkosBlas.hpp"
70 #include "KokkosKernels_Utils.hpp"
71 #include "Kokkos_Random.hpp"
73 #ifdef HAVE_TPETRA_INST_FLOAT128
76 template<
class Generator>
77 struct rand<Generator, __float128> {
78 static KOKKOS_INLINE_FUNCTION __float128 max ()
80 return static_cast<__float128> (1.0);
82 static KOKKOS_INLINE_FUNCTION __float128
87 const __float128 scalingFactor =
88 static_cast<__float128> (std::numeric_limits<double>::min ()) /
89 static_cast<__float128> (2.0);
90 const __float128 higherOrderTerm = static_cast<__float128> (gen.drand ());
91 const __float128 lowerOrderTerm =
92 static_cast<__float128> (gen.drand ()) * scalingFactor;
93 return higherOrderTerm + lowerOrderTerm;
95 static KOKKOS_INLINE_FUNCTION __float128
96 draw (Generator& gen,
const __float128& range)
99 const __float128 scalingFactor =
100 static_cast<__float128> (std::numeric_limits<double>::min ()) /
101 static_cast<__float128> (2.0);
102 const __float128 higherOrderTerm =
103 static_cast<__float128> (gen.drand (range));
104 const __float128 lowerOrderTerm =
105 static_cast<__float128> (gen.drand (range)) * scalingFactor;
106 return higherOrderTerm + lowerOrderTerm;
108 static KOKKOS_INLINE_FUNCTION __float128
109 draw (Generator& gen,
const __float128& start,
const __float128& end)
112 const __float128 scalingFactor =
113 static_cast<__float128> (std::numeric_limits<double>::min ()) /
114 static_cast<__float128> (2.0);
115 const __float128 higherOrderTerm =
116 static_cast<__float128> (gen.drand (start, end));
117 const __float128 lowerOrderTerm =
118 static_cast<__float128> (gen.drand (start, end)) * scalingFactor;
119 return higherOrderTerm + lowerOrderTerm;
123 #endif // HAVE_TPETRA_INST_FLOAT128
141 template<
class ST,
class LO,
class GO,
class NT>
143 allocDualView (
const size_t lclNumRows,
144 const size_t numCols,
145 const bool zeroOut =
true,
146 const bool allowPadding =
false)
148 using ::Tpetra::Details::Behavior;
149 using Kokkos::AllowPadding;
150 using Kokkos::view_alloc;
151 using Kokkos::WithoutInitializing;
153 typedef typename dual_view_type::t_dev dev_view_type;
158 const std::string label (
"MV::DualView");
159 const bool debug = Behavior::debug ();
169 dev_view_type d_view;
172 d_view = dev_view_type (view_alloc (label, AllowPadding),
173 lclNumRows, numCols);
176 d_view = dev_view_type (view_alloc (label),
177 lclNumRows, numCols);
182 d_view = dev_view_type (view_alloc (label,
185 lclNumRows, numCols);
188 d_view = dev_view_type (view_alloc (label, WithoutInitializing),
189 lclNumRows, numCols);
200 const ST nan = Kokkos::Details::ArithTraits<ST>::nan ();
201 KokkosBlas::fill (d_view, nan);
205 TEUCHOS_TEST_FOR_EXCEPTION
206 (static_cast<size_t> (d_view.extent (0)) != lclNumRows ||
207 static_cast<size_t> (d_view.extent (1)) != numCols, std::logic_error,
208 "allocDualView: d_view's dimensions actual dimensions do not match "
209 "requested dimensions. d_view is " << d_view.extent (0) <<
" x " <<
210 d_view.extent (1) <<
"; requested " << lclNumRows <<
" x " << numCols
211 <<
". Please report this bug to the Tpetra developers.");
214 typename dual_view_type::t_host h_view = Kokkos::create_mirror_view (d_view);
216 dual_view_type dv (d_view, h_view);
221 dv.template modify<typename dev_view_type::memory_space> ();
230 template<
class T,
class ExecSpace>
231 struct MakeUnmanagedView {
244 typedef typename Kokkos::Impl::if_c<
245 Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
246 typename ExecSpace::memory_space,
247 Kokkos::HostSpace>::value,
248 typename ExecSpace::device_type,
249 typename Kokkos::DualView<T*, ExecSpace>::host_mirror_space>::type host_exec_space;
250 typedef Kokkos::LayoutLeft array_layout;
251 typedef Kokkos::View<T*, array_layout, host_exec_space,
252 Kokkos::MemoryUnmanaged> view_type;
254 static view_type getView (
const Teuchos::ArrayView<T>& x_in)
256 const size_t numEnt = static_cast<size_t> (x_in.size ());
260 return view_type (x_in.getRawPtr (), numEnt);
268 template<
class DualViewType>
270 takeSubview (
const DualViewType& X,
271 const Kokkos::Impl::ALL_t&,
272 const std::pair<size_t, size_t>& colRng)
274 if (X.extent (0) == 0 && X.extent (1) != 0) {
275 return DualViewType (
"MV::DualView", 0, colRng.second - colRng.first);
278 return subview (X, Kokkos::ALL (), colRng);
285 template<
class DualViewType>
287 takeSubview (
const DualViewType& X,
288 const std::pair<size_t, size_t>& rowRng,
289 const std::pair<size_t, size_t>& colRng)
291 if (X.extent (0) == 0 && X.extent (1) != 0) {
292 return DualViewType (
"MV::DualView", 0, colRng.second - colRng.first);
295 return subview (X, rowRng, colRng);
304 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
306 MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
307 vectorIndexOutOfRange (
const size_t VectorIndex)
const {
308 return (VectorIndex < 1 && VectorIndex != 0) || VectorIndex >= getNumVectors();
311 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
317 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
320 const size_t numVecs,
321 const bool zeroOut) :
326 const size_t lclNumRows = this->getLocalLength ();
327 view_ = allocDualView<Scalar, LocalOrdinal, GlobalOrdinal, Node> (lclNumRows, numVecs, zeroOut);
331 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
335 view_ (source.view_),
336 origView_ (source.origView_),
337 whichVectors_ (source.whichVectors_)
340 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
343 const Teuchos::DataAccess copyOrView) :
345 view_ (source.view_),
346 origView_ (source.origView_),
347 whichVectors_ (source.whichVectors_)
350 const char tfecfFuncName[] =
"MultiVector(const MultiVector&, "
351 "const Teuchos::DataAccess): ";
355 if (copyOrView == Teuchos::Copy) {
359 this->view_ = cpy.view_;
360 this->origView_ = cpy.origView_;
361 this->whichVectors_ = cpy.whichVectors_;
363 else if (copyOrView == Teuchos::View) {
366 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
367 true, std::invalid_argument,
"Second argument 'copyOrView' has an "
368 "invalid value " << copyOrView <<
". Valid values include "
369 "Teuchos::Copy = " << Teuchos::Copy <<
" and Teuchos::View = "
370 << Teuchos::View <<
".");
374 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
382 const char tfecfFuncName[] =
"MultiVector(map,view): ";
387 origView_.stride (stride);
388 const size_t LDA = (origView_.extent (1) > 1) ? stride[1] :
389 origView_.extent (0);
390 const size_t lclNumRows = this->getLocalLength ();
391 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
392 LDA < lclNumRows, std::invalid_argument,
"The input Kokkos::DualView's "
393 "column stride LDA = " << LDA <<
" < getLocalLength() = " << lclNumRows
394 <<
". This may also mean that the input view's first dimension (number "
395 "of rows = " << view.extent (0) <<
") does not not match the number "
396 "of entries in the Map on the calling process.");
400 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
403 const typename dual_view_type::t_dev& d_view) :
406 using Teuchos::ArrayRCP;
408 const char tfecfFuncName[] =
"MultiVector(map,d_view): ";
415 d_view.stride (stride);
416 const size_t LDA = (d_view.extent (1) > 1) ? stride[1] :
418 const size_t lclNumRows = this->getLocalLength ();
419 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
420 LDA < lclNumRows, std::invalid_argument,
"The input Kokkos::View's "
421 "column stride LDA = " << LDA <<
" < getLocalLength() = " << lclNumRows
422 <<
". This may also mean that the input view's first dimension (number "
423 "of rows = " << d_view.extent (0) <<
") does not not match the "
424 "number of entries in the Map on the calling process.");
433 this->
template modify<device_type> ();
437 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
446 const char tfecfFuncName[] =
"MultiVector(map,view,origView): ";
451 origView_.stride (stride);
452 const size_t LDA = (origView_.extent (1) > 1) ? stride[1] :
453 origView_.extent (0);
454 const size_t lclNumRows = this->getLocalLength ();
455 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
456 LDA < lclNumRows, std::invalid_argument,
"The input Kokkos::DualView's "
457 "column stride LDA = " << LDA <<
" < getLocalLength() = " << lclNumRows
458 <<
". This may also mean that the input origView's first dimension (number "
459 "of rows = " << origView.extent (0) <<
") does not not match the number "
460 "of entries in the Map on the calling process.");
464 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
468 const Teuchos::ArrayView<const size_t>& whichVectors) :
472 whichVectors_ (whichVectors.begin (), whichVectors.end ())
475 using Kokkos::subview;
476 const char tfecfFuncName[] =
"MultiVector(map,view,whichVectors): ";
478 const size_t lclNumRows = map.is_null () ? size_t (0) :
479 map->getNodeNumElements ();
486 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
487 view.extent (1) != 0 && static_cast<size_t> (view.extent (0)) < lclNumRows,
488 std::invalid_argument,
"view.extent(0) = " << view.extent (0)
489 <<
" < map->getNodeNumElements() = " << lclNumRows <<
".");
490 if (whichVectors.size () != 0) {
491 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
492 view.extent (1) != 0 && view.extent (1) == 0,
493 std::invalid_argument,
"view.extent(1) = 0, but whichVectors.size()"
494 " = " << whichVectors.size () <<
" > 0.");
495 size_t maxColInd = 0;
496 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
497 for (size_type k = 0; k < whichVectors.size (); ++k) {
498 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
499 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
500 std::invalid_argument,
"whichVectors[" << k <<
"] = "
501 "Teuchos::OrdinalTraits<size_t>::invalid().");
502 maxColInd = std::max (maxColInd, whichVectors[k]);
504 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
505 view.extent (1) != 0 && static_cast<size_t> (view.extent (1)) <= maxColInd,
506 std::invalid_argument,
"view.extent(1) = " << view.extent (1)
507 <<
" <= max(whichVectors) = " << maxColInd <<
".");
513 origView_.stride (stride);
514 const size_t LDA = (origView_.extent (1) > 1) ? stride[1] :
515 origView_.extent (0);
516 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
517 LDA < lclNumRows, std::invalid_argument,
518 "LDA = " << LDA <<
" < this->getLocalLength() = " << lclNumRows <<
".");
520 if (whichVectors.size () == 1) {
529 const std::pair<size_t, size_t> colRng (whichVectors[0],
530 whichVectors[0] + 1);
531 view_ = takeSubview (view_, ALL (), colRng);
532 origView_ = takeSubview (origView_, ALL (), colRng);
534 whichVectors_.clear ();
538 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
543 const Teuchos::ArrayView<const size_t>& whichVectors) :
546 origView_ (origView),
547 whichVectors_ (whichVectors.begin (), whichVectors.end ())
550 using Kokkos::subview;
551 const char tfecfFuncName[] =
"MultiVector(map,view,origView,whichVectors): ";
553 const size_t lclNumRows = this->getLocalLength ();
560 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
561 view.extent (1) != 0 && static_cast<size_t> (view.extent (0)) < lclNumRows,
562 std::invalid_argument,
"view.extent(0) = " << view.extent (0)
563 <<
" < map->getNodeNumElements() = " << lclNumRows <<
".");
564 if (whichVectors.size () != 0) {
565 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
566 view.extent (1) != 0 && view.extent (1) == 0,
567 std::invalid_argument,
"view.extent(1) = 0, but whichVectors.size()"
568 " = " << whichVectors.size () <<
" > 0.");
569 size_t maxColInd = 0;
570 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
571 for (size_type k = 0; k < whichVectors.size (); ++k) {
572 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
573 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
574 std::invalid_argument,
"whichVectors[" << k <<
"] = "
575 "Teuchos::OrdinalTraits<size_t>::invalid().");
576 maxColInd = std::max (maxColInd, whichVectors[k]);
578 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
579 view.extent (1) != 0 && static_cast<size_t> (view.extent (1)) <= maxColInd,
580 std::invalid_argument,
"view.extent(1) = " << view.extent (1)
581 <<
" <= max(whichVectors) = " << maxColInd <<
".");
586 origView_.stride (stride);
587 const size_t LDA = (origView_.extent (1) > 1) ? stride[1] :
588 origView_.extent (0);
589 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
590 LDA < lclNumRows, std::invalid_argument,
"Input DualView's column stride"
591 " = " << LDA <<
" < this->getLocalLength() = " << lclNumRows <<
".");
593 if (whichVectors.size () == 1) {
602 const std::pair<size_t, size_t> colRng (whichVectors[0],
603 whichVectors[0] + 1);
604 view_ = takeSubview (view_, ALL (), colRng);
605 origView_ = takeSubview (origView_, ALL (), colRng);
607 whichVectors_.clear ();
611 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
614 const Teuchos::ArrayView<const Scalar>& data,
616 const size_t numVecs) :
619 typedef LocalOrdinal LO;
620 typedef GlobalOrdinal GO;
621 const char tfecfFuncName[] =
"MultiVector(map,data,LDA,numVecs): ";
627 const size_t lclNumRows =
628 map.is_null () ? size_t (0) : map->getNodeNumElements ();
629 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
630 (LDA < lclNumRows, std::invalid_argument,
"LDA = " << LDA <<
" < "
631 "map->getNodeNumElements() = " << lclNumRows <<
".");
633 const size_t minNumEntries = LDA * (numVecs - 1) + lclNumRows;
634 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
635 (static_cast<size_t> (data.size ()) < minNumEntries,
636 std::invalid_argument,
"Input Teuchos::ArrayView does not have enough "
637 "entries, given the input Map and number of vectors in the MultiVector."
638 " data.size() = " << data.size () <<
" < (LDA*(numVecs-1)) + "
639 "map->getNodeNumElements () = " << minNumEntries <<
".");
642 this->view_ = allocDualView<Scalar, LO, GO, Node> (lclNumRows, numVecs);
643 this->
template modify<device_type> ();
644 auto X_out = this->
template getLocalView<device_type> ();
651 reinterpret_cast<const impl_scalar_type*> (data.getRawPtr ());
655 Kokkos::MemoryUnmanaged> X_in_orig (X_in_raw, LDA, numVecs);
656 const Kokkos::pair<size_t, size_t> rowRng (0, lclNumRows);
657 auto X_in = Kokkos::subview (X_in_orig, rowRng, Kokkos::ALL ());
662 size_t outStrides[8];
663 X_out.stride (outStrides);
664 const size_t outStride = (X_out.extent (1) > 1) ? outStrides[1] :
666 if (LDA == outStride) {
672 typedef decltype (Kokkos::subview (X_out, Kokkos::ALL (), 0))
674 typedef decltype (Kokkos::subview (X_in, Kokkos::ALL (), 0))
676 for (
size_t j = 0; j < numVecs; ++j) {
677 out_col_view_type X_out_j = Kokkos::subview (X_out, Kokkos::ALL (), j);
678 in_col_view_type X_in_j = Kokkos::subview (X_in, Kokkos::ALL (), j);
684 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
687 const Teuchos::ArrayView<
const Teuchos::ArrayView<const Scalar> >& ArrayOfPtrs,
688 const size_t numVecs) :
692 typedef LocalOrdinal LO;
693 typedef GlobalOrdinal GO;
694 const char tfecfFuncName[] =
"MultiVector(map,ArrayOfPtrs,numVecs): ";
697 const size_t lclNumRows =
698 map.is_null () ? size_t (0) : map->getNodeNumElements ();
699 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
700 (numVecs < 1 || numVecs != static_cast<size_t> (ArrayOfPtrs.size ()),
701 std::runtime_error,
"Either numVecs (= " << numVecs <<
") < 1, or "
702 "ArrayOfPtrs.size() (= " << ArrayOfPtrs.size () <<
") != numVecs.");
703 for (
size_t j = 0; j < numVecs; ++j) {
704 Teuchos::ArrayView<const Scalar> X_j_av = ArrayOfPtrs[j];
705 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
706 static_cast<size_t> (X_j_av.size ()) < lclNumRows,
707 std::invalid_argument,
"ArrayOfPtrs[" << j <<
"].size() = "
708 << X_j_av.size () <<
" < map->getNodeNumElements() = " << lclNumRows
712 view_ = allocDualView<Scalar, LO, GO, Node> (lclNumRows, numVecs);
713 this->
template modify<device_type> ();
714 auto X_out = this->getLocalView<device_type> ();
718 typedef typename decltype (X_out)::array_layout array_layout;
719 typedef typename Kokkos::View<
const IST*,
722 Kokkos::MemoryUnmanaged> input_col_view_type;
724 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
725 for (
size_t j = 0; j < numVecs; ++j) {
726 Teuchos::ArrayView<const IST> X_j_av =
727 Teuchos::av_reinterpret_cast<const IST> (ArrayOfPtrs[j]);
728 input_col_view_type X_j_in (X_j_av.getRawPtr (), lclNumRows);
729 auto X_j_out = Kokkos::subview (X_out, rowRng, j);
735 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
739 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
742 return whichVectors_.empty ();
745 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
750 if (this->getMap ().is_null ()) {
751 return static_cast<size_t> (0);
753 return this->getMap ()->getNodeNumElements ();
757 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
762 if (this->getMap ().is_null ()) {
763 return static_cast<size_t> (0);
765 return this->getMap ()->getGlobalNumElements ();
769 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
774 if (isConstantStride ()) {
778 origView_.stride (stride);
779 const size_t LDA = (origView_.extent (1) > 1) ? stride[1] :
780 origView_.extent (0);
784 return static_cast<size_t> (0);
788 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
797 const this_type* src = dynamic_cast<const this_type*> (&sourceObj);
807 return src->getNumVectors () == this->getNumVectors ();
811 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
815 return this->getNumVectors ();
818 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
822 const size_t numSameIDs,
823 const Kokkos::DualView<const LocalOrdinal*, device_type>& permuteToLIDs,
824 const Kokkos::DualView<const LocalOrdinal*, device_type>& permuteFromLIDs)
826 using ::Tpetra::Details::Behavior;
829 using ::Tpetra::Details::ProfilingRegion;
830 using KokkosRefactor::Details::permute_array_multi_column;
831 using KokkosRefactor::Details::permute_array_multi_column_variable_stride;
832 using Kokkos::Compat::create_const_view;
833 typedef typename device_type::memory_space DMS;
834 typedef Kokkos::HostSpace HMS;
836 const char tfecfFuncName[] =
"copyAndPermuteNew: ";
837 ProfilingRegion regionCAP (
"Tpetra::MultiVector::copyAndPermute");
843 const bool debug = Behavior::verbose ();
845 if (debug && ! this->getMap ().is_null () &&
846 ! this->getMap ()->getComm ().is_null ()) {
847 myRank = this->getMap ()->getComm ()->getRank ();
850 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
851 (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0),
852 std::runtime_error,
"permuteToLIDs.extent(0) = "
853 << permuteToLIDs.extent (0) <<
" != permuteFromLIDs.extent(0) = "
854 << permuteFromLIDs.extent (0) <<
".");
857 const MV& sourceMV = dynamic_cast<const MV&> (sourceObj);
858 const size_t numCols = this->getNumVectors ();
866 const bool copyOnHost = sourceMV.template need_sync<DMS> ();
868 std::ostringstream os;
869 os <<
"(Proc " << myRank <<
") MV::copyAndPermuteNew: "
870 "copyOnHost=" << (copyOnHost ?
"true" :
"false") << std::endl;
871 std::cerr << os.str ();
875 if(this->
template need_sync<HMS> ()) this->
template sync<HMS> ();
876 this->
template modify<HMS> ();
879 if(this->
template need_sync<DMS> ()) this->
template sync<DMS> ();
880 this->
template modify<DMS> ();
884 std::ostringstream os;
885 os <<
"(Proc " << myRank <<
") MV::copyAndPermuteNew: "
886 "Copy source to target" << std::endl;
887 std::cerr << os.str ();
909 if (numSameIDs > 0) {
910 const std::pair<size_t, size_t> rows (0, numSameIDs);
912 auto tgt_h = this->
template getLocalView<HMS> ();
913 auto src_h = create_const_view (sourceMV.template getLocalView<HMS> ());
915 for (
size_t j = 0; j < numCols; ++j) {
916 const size_t tgtCol = isConstantStride () ? j : whichVectors_[j];
917 const size_t srcCol =
918 sourceMV.isConstantStride () ? j : sourceMV.whichVectors_[j];
920 auto tgt_j = Kokkos::subview (tgt_h, rows, tgtCol);
921 auto src_j = Kokkos::subview (src_h, rows, srcCol);
926 auto tgt_d = this->
template getLocalView<DMS> ();
927 auto src_d = create_const_view (sourceMV.template getLocalView<DMS> ());
929 for (
size_t j = 0; j < numCols; ++j) {
930 const size_t tgtCol = isConstantStride () ? j : whichVectors_[j];
931 const size_t srcCol =
932 sourceMV.isConstantStride () ? j : sourceMV.whichVectors_[j];
934 auto tgt_j = Kokkos::subview (tgt_d, rows, tgtCol);
935 auto src_j = Kokkos::subview (src_d, rows, srcCol);
951 if (permuteFromLIDs.extent (0) == 0 ||
952 permuteToLIDs.extent (0) == 0) {
954 std::ostringstream os;
955 os <<
"(Proc " << myRank <<
") MV::copyAndPermuteNew: "
956 "No permutations; done." << std::endl;
957 std::cerr << os.str ();
963 std::ostringstream os;
964 os <<
"(Proc " << myRank <<
") MV::copyAndPermuteNew: "
965 "Permute source to target" << std::endl;
966 std::cerr << os.str ();
973 Kokkos::DualView<LocalOrdinal*, device_type> permuteToLIDs_nc =
975 Kokkos::DualView<LocalOrdinal*, device_type> permuteFromLIDs_nc =
980 const bool nonConstStride =
981 ! this->isConstantStride () || ! sourceMV.isConstantStride ();
984 std::ostringstream os;
985 os <<
"(Proc " << myRank <<
") MV::copyAndPermuteNew: "
986 "nonConstStride=" << (nonConstStride ?
"true" :
"false") << std::endl;
987 std::cerr << os.str ();
994 Kokkos::DualView<const size_t*, device_type> srcWhichVecs;
995 Kokkos::DualView<const size_t*, device_type> tgtWhichVecs;
996 if (nonConstStride) {
997 if (this->whichVectors_.size () == 0) {
998 Kokkos::DualView<size_t*, device_type> tmpTgt (
"tgtWhichVecs", numCols);
999 tmpTgt.template modify<HMS> ();
1000 for (
size_t j = 0; j < numCols; ++j) {
1001 tmpTgt.h_view(j) = j;
1004 tmpTgt.template sync<DMS> ();
1006 tgtWhichVecs = tmpTgt;
1009 Teuchos::ArrayView<const size_t> tgtWhichVecsT = this->whichVectors_ ();
1011 getDualViewCopyFromArrayView<size_t, device_type> (tgtWhichVecsT,
1015 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1016 (static_cast<size_t> (tgtWhichVecs.extent (0)) !=
1017 this->getNumVectors (),
1018 std::logic_error,
"tgtWhichVecs.extent(0) = " <<
1019 tgtWhichVecs.extent (0) <<
" != this->getNumVectors() = " <<
1020 this->getNumVectors () <<
".");
1022 if (sourceMV.whichVectors_.size () == 0) {
1023 Kokkos::DualView<size_t*, device_type> tmpSrc (
"srcWhichVecs", numCols);
1024 tmpSrc.template modify<HMS> ();
1025 for (
size_t j = 0; j < numCols; ++j) {
1026 tmpSrc.h_view(j) = j;
1029 tmpSrc.template sync<DMS> ();
1031 srcWhichVecs = tmpSrc;
1034 Teuchos::ArrayView<const size_t> srcWhichVecsT =
1035 sourceMV.whichVectors_ ();
1037 getDualViewCopyFromArrayView<size_t, device_type> (srcWhichVecsT,
1041 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1042 (static_cast<size_t> (srcWhichVecs.extent (0)) !=
1043 sourceMV.getNumVectors (), std::logic_error,
1044 "srcWhichVecs.extent(0) = " << srcWhichVecs.extent (0)
1045 <<
" != sourceMV.getNumVectors() = " << sourceMV.getNumVectors ()
1051 std::ostringstream os;
1052 os <<
"(Proc " << myRank <<
") MV::copyAndPermuteNew: "
1053 "Set up permutation arrays on host" << std::endl;
1054 std::cerr << os.str ();
1056 auto tgt_h = this->
template getLocalView<HMS> ();
1057 auto src_h = create_const_view (sourceMV.template getLocalView<HMS> ());
1058 permuteToLIDs_nc.template sync<HMS> ();
1059 auto permuteToLIDs_h =
1060 create_const_view (permuteToLIDs_nc.template view<HMS> ());
1061 permuteFromLIDs_nc.template sync<HMS> ();
1062 auto permuteFromLIDs_h =
1063 create_const_view (permuteFromLIDs_nc.template view<HMS> ());
1066 std::ostringstream os;
1067 os <<
"(Proc " << myRank <<
") MV::copyAndPermuteNew: "
1068 "Call permute kernel on host" << std::endl;
1069 std::cerr << os.str ();
1071 if (nonConstStride) {
1074 auto tgtWhichVecs_h =
1075 create_const_view (tgtWhichVecs.template view<HMS> ());
1076 auto srcWhichVecs_h =
1077 create_const_view (srcWhichVecs.template view<HMS> ());
1078 permute_array_multi_column_variable_stride (tgt_h, src_h,
1082 srcWhichVecs_h, numCols);
1085 permute_array_multi_column (tgt_h, src_h, permuteToLIDs_h,
1086 permuteFromLIDs_h, numCols);
1091 std::ostringstream os;
1092 os <<
"(Proc " << myRank <<
") MV::copyAndPermuteNew: "
1093 "Set up permutation arrays on device" << std::endl;
1094 std::cerr << os.str ();
1096 auto tgt_d = this->
template getLocalView<DMS> ();
1097 auto src_d = create_const_view (sourceMV.template getLocalView<DMS> ());
1098 permuteToLIDs_nc.template sync<DMS> ();
1099 auto permuteToLIDs_d =
1100 create_const_view (permuteToLIDs_nc.template view<DMS> ());
1101 permuteFromLIDs_nc.template sync<DMS> ();
1102 auto permuteFromLIDs_d =
1103 create_const_view (permuteFromLIDs_nc.template view<DMS> ());
1106 std::ostringstream os;
1107 os <<
"(Proc " << myRank <<
") MV::copyAndPermuteNew: "
1108 "Call permute kernel on device" << std::endl;
1109 std::cerr << os.str ();
1111 if (nonConstStride) {
1114 auto tgtWhichVecs_d =
1115 create_const_view (tgtWhichVecs.template view<DMS> ());
1116 auto srcWhichVecs_d =
1117 create_const_view (srcWhichVecs.template view<DMS> ());
1118 permute_array_multi_column_variable_stride (tgt_d, src_d,
1122 srcWhichVecs_d, numCols);
1125 permute_array_multi_column (tgt_d, src_d, permuteToLIDs_d,
1126 permuteFromLIDs_d, numCols);
1131 std::ostringstream os;
1132 os <<
"(Proc " << myRank <<
") MV::copyAndPermuteNew: Done" << std::endl;
1133 std::cerr << os.str ();
1137 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1139 MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
1140 packAndPrepareNew (
const SrcDistObject& sourceObj,
1141 const Kokkos::DualView<const local_ordinal_type*, device_type>& exportLIDs,
1142 Kokkos::DualView<impl_scalar_type*, buffer_device_type>& exports,
1143 const Kokkos::DualView<size_t*, buffer_device_type>& ,
1144 size_t& constantNumPackets,
1147 using ::Tpetra::Details::Behavior;
1148 using ::Tpetra::Details::ProfilingRegion;
1149 using Kokkos::Compat::create_const_view;
1150 using Kokkos::Compat::getKokkosViewDeepCopy;
1151 typedef MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> MV;
1152 typedef impl_scalar_type IST;
1153 typedef Kokkos::HostSpace host_memory_space;
1154 typedef typename Kokkos::DualView<IST*, device_type>::t_dev::memory_space
1156 typedef typename Kokkos::DualView<IST*, device_type>::t_host::execution_space
1157 host_execution_space;
1158 typedef typename Kokkos::DualView<IST*, device_type>::t_dev::execution_space
1159 dev_execution_space;
1160 ProfilingRegion regionPAP (
"Tpetra::MultiVector::packAndPrepare");
1168 const bool debugCheckIndices = Behavior::debug ();
1173 const bool printDebugOutput = Behavior::verbose ();
1176 if (printDebugOutput && ! this->getMap ().is_null () &&
1177 ! this->getMap ()->getComm ().is_null ()) {
1178 myRank = this->getMap ()->getComm ()->getRank ();
1181 if (printDebugOutput) {
1182 std::ostringstream os;
1183 os <<
"Proc " << myRank <<
": MV::packAndPrepareNew" << std::endl;
1184 std::cerr << os.str ();
1187 const MV& sourceMV = dynamic_cast<const MV&> (sourceObj);
1202 const bool packOnHost =
1203 exportLIDs.modified_host () > exportLIDs.modified_device ();
1204 auto src_dev = sourceMV.template getLocalView<dev_memory_space> ();
1205 auto src_host = sourceMV.template getLocalView<host_memory_space> ();
1207 if (sourceMV.template need_sync<Kokkos::HostSpace> ()) {
1208 if (printDebugOutput) {
1209 std::ostringstream os;
1210 os <<
"Proc " << myRank <<
": MV::packAndPrepareNew: "
1211 "Pack on host, need sync to host" << std::endl;
1212 std::cerr << os.str ();
1216 src_host = decltype (src_host) (
"MV::DualView::h_view",
1218 src_dev.extent (1));
1223 if (sourceMV.template need_sync<device_type> ()) {
1224 if (printDebugOutput) {
1225 std::ostringstream os;
1226 os <<
"Proc " << myRank <<
": MV::packAndPrepareNew: "
1227 "Pack on device, need sync to device" << std::endl;
1228 std::cerr << os.str ();
1232 src_dev = decltype (src_dev) (
"MV::DualView::d_view",
1233 src_host.extent (0),
1234 src_host.extent (1));
1239 const size_t numCols = sourceMV.getNumVectors ();
1244 constantNumPackets = numCols;
1248 if (exportLIDs.extent (0) == 0) {
1249 if (printDebugOutput) {
1250 std::ostringstream os;
1251 os <<
"Proc " << myRank <<
": MV::packAndPrepareNew: "
1252 "No exports on this proc, DONE" << std::endl;
1253 std::cerr << os.str ();
1273 const size_t numExportLIDs = exportLIDs.extent (0);
1274 const size_t newExportsSize = numCols * numExportLIDs;
1275 if (printDebugOutput) {
1276 std::ostringstream os;
1277 os <<
"Proc " << myRank <<
": MV::packAndPrepareNew: realloc: "
1278 <<
"numExportLIDs = " << numExportLIDs
1279 <<
", exports.extent(0) = " << exports.extent (0)
1280 <<
", newExportsSize = " << newExportsSize << std::endl;
1281 std::cerr << os.str ();
1288 typedef typename std::decay<decltype (exports) >::type::t_host::memory_space EHMS;
1289 typedef typename std::decay<decltype (exports) >::type::t_dev::memory_space EDMS;
1295 exports.template modify<EHMS> ();
1298 exports.template modify<EDMS> ();
1314 if (sourceMV.isConstantStride ()) {
1315 using KokkosRefactor::Details::pack_array_single_column;
1316 if (printDebugOutput) {
1317 std::ostringstream os;
1318 os <<
"Proc " << myRank <<
": MV::packAndPrepareNew: "
1319 "pack numCols=1 const stride" << std::endl;
1320 std::cerr << os.str ();
1323 pack_array_single_column (exports.template view<EHMS> (),
1324 create_const_view (src_host),
1325 exportLIDs.template view<host_memory_space> (),
1330 pack_array_single_column (exports.template view<EDMS> (),
1331 create_const_view (src_dev),
1332 exportLIDs.template view<dev_memory_space> (),
1338 using KokkosRefactor::Details::pack_array_single_column;
1339 if (printDebugOutput) {
1340 std::ostringstream os;
1341 os <<
"Proc " << myRank <<
": MV::packAndPrepareNew: "
1342 "pack numCols=1 nonconst stride" << std::endl;
1343 std::cerr << os.str ();
1346 pack_array_single_column (exports.template view<EHMS> (),
1347 create_const_view (src_host),
1348 exportLIDs.template view<host_memory_space> (),
1349 sourceMV.whichVectors_[0],
1353 pack_array_single_column (exports.template view<EDMS> (),
1354 create_const_view (src_dev),
1355 exportLIDs.template view<dev_memory_space> (),
1356 sourceMV.whichVectors_[0],
1362 if (sourceMV.isConstantStride ()) {
1363 using KokkosRefactor::Details::pack_array_multi_column;
1364 if (printDebugOutput) {
1365 std::ostringstream os;
1366 os <<
"Proc " << myRank <<
": MV::packAndPrepareNew: "
1367 "pack numCols>1 const stride" << std::endl;
1368 std::cerr << os.str ();
1371 pack_array_multi_column (exports.template view<EHMS> (),
1372 create_const_view (src_host),
1373 exportLIDs.template view<host_memory_space> (),
1378 pack_array_multi_column (exports.template view<EDMS> (),
1379 create_const_view (src_dev),
1380 exportLIDs.template view<dev_memory_space> (),
1386 using KokkosRefactor::Details::pack_array_multi_column_variable_stride;
1387 if (printDebugOutput) {
1388 std::ostringstream os;
1389 os <<
"Proc " << myRank <<
": MV::packAndPrepareNew: "
1390 "pack numCols>1 nonconst stride" << std::endl;
1391 std::cerr << os.str ();
1394 pack_array_multi_column_variable_stride (exports.template view<EHMS> (),
1395 create_const_view (src_host),
1396 exportLIDs.template view<host_memory_space> (),
1397 getKokkosViewDeepCopy<host_execution_space> (sourceMV.whichVectors_ ()),
1402 pack_array_multi_column_variable_stride (exports.template view<EDMS> (),
1403 create_const_view (src_dev),
1404 exportLIDs.template view<dev_memory_space> (),
1405 getKokkosViewDeepCopy<dev_execution_space> (sourceMV.whichVectors_ ()),
1412 if (printDebugOutput) {
1413 std::ostringstream os;
1414 os <<
"Proc " << myRank <<
": MV::packAndPrepareNew: DONE" << std::endl;
1415 std::cerr << os.str ();
1420 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1422 MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
1423 unpackAndCombineNew (
const Kokkos::DualView<const local_ordinal_type*, device_type>& importLIDs,
1424 const Kokkos::DualView<const impl_scalar_type*, buffer_device_type>& imports,
1425 const Kokkos::DualView<const size_t*, buffer_device_type>& ,
1426 const size_t constantNumPackets,
1430 using ::Tpetra::Details::Behavior;
1431 using ::Tpetra::Details::ProfilingRegion;
1433 using KokkosRefactor::Details::unpack_array_multi_column;
1434 using KokkosRefactor::Details::unpack_array_multi_column_variable_stride;
1435 using Kokkos::Compat::getKokkosViewDeepCopy;
1436 typedef impl_scalar_type IST;
1437 typedef typename Kokkos::DualView<IST*,
1438 device_type>::t_dev::memory_space DMS;
1443 typedef Kokkos::HostSpace HMS;
1444 const char tfecfFuncName[] =
"unpackAndCombineNew: ";
1445 ProfilingRegion regionUAC (
"Tpetra::MultiVector::unpackAndCombine");
1453 const bool debugCheckIndices = Behavior::debug ();
1459 const bool printDebugOutput = Behavior::verbose ();
1461 if (printDebugOutput && ! this->getMap ().is_null () &&
1462 ! this->getMap ()->getComm ().is_null ()) {
1463 myRank = this->getMap ()->getComm ()->getRank ();
1467 if (importLIDs.extent (0) == 0) {
1471 const size_t numVecs = getNumVectors ();
1472 if (debugCheckIndices) {
1473 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1474 (static_cast<size_t> (imports.extent (0)) !=
1475 numVecs * importLIDs.extent (0),
1477 "imports.extent(0) = " << imports.extent (0)
1478 <<
" != getNumVectors() * importLIDs.extent(0) = " << numVecs
1479 <<
" * " << importLIDs.extent (0) <<
" = "
1480 << numVecs * importLIDs.extent (0) <<
".");
1482 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1483 (constantNumPackets == static_cast<size_t> (0), std::runtime_error,
1484 "constantNumPackets input argument must be nonzero.");
1486 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1487 (static_cast<size_t> (numVecs) !=
1488 static_cast<size_t> (constantNumPackets),
1489 std::runtime_error,
"constantNumPackets must equal numVecs.");
1497 const bool unpackOnHost =
1498 imports.modified_host () != 0 &&
1499 imports.modified_host () > imports.modified_device ();
1501 if (printDebugOutput) {
1502 std::ostringstream os;
1503 os <<
"(Proc " << myRank <<
") MV::unpackAndCombine: unpackOnHost: "
1504 << (unpackOnHost ?
"true" :
"false") << std::endl;
1505 std::cerr << os.str ();
1513 if (unpackOnHost && importLIDs.modified_host () < importLIDs.modified_device ()) {
1514 if (printDebugOutput) {
1515 std::ostringstream os;
1516 os <<
"(Proc " << myRank <<
") MV::unpackAndCombine: sync importLIDs "
1517 "to host" << std::endl;
1518 std::cerr << os.str ();
1524 theImportLIDs.template sync<HMS> ();
1526 else if (! unpackOnHost && importLIDs.modified_host () > importLIDs.modified_device ()) {
1527 if (printDebugOutput) {
1528 std::ostringstream os;
1529 os <<
"(Proc " << myRank <<
") MV::unpackAndCombine: sync importLIDs "
1530 "to device" << std::endl;
1531 std::cerr << os.str ();
1537 theImportLIDs.template sync<DMS> ();
1539 else if (printDebugOutput) {
1540 std::ostringstream os;
1541 os <<
"(Proc " << myRank <<
") MV::unpackAndCombine: no need to sync "
1542 "importLIDs" << std::endl;
1543 std::cerr << os.str ();
1551 if(this->
template need_sync<HMS> ()) this->
template sync<HMS> ();
1552 this->
template modify<HMS> ();
1555 if(this->
template need_sync<DMS> ()) this->
template sync<DMS> ();
1556 this->
template modify<DMS> ();
1558 auto X_d = this->
template getLocalView<DMS> ();
1559 auto X_h = this->
template getLocalView<HMS> ();
1562 imports.template view<typename buffer_device_type::memory_space> ();
1563 auto imports_h = imports.template view<Kokkos::HostSpace> ();
1564 auto importLIDs_d = importLIDs.template view<DMS> ();
1565 auto importLIDs_h = importLIDs.template view<HMS> ();
1567 Kokkos::DualView<size_t*, device_type> whichVecs;
1568 if (! isConstantStride ()) {
1569 Kokkos::View<
const size_t*, HMS,
1570 Kokkos::MemoryUnmanaged> whichVecsIn (whichVectors_.getRawPtr (),
1572 whichVecs = Kokkos::DualView<size_t*, device_type> (
"whichVecs", numVecs);
1574 whichVecs.template modify<HMS> ();
1578 whichVecs.template modify<DMS> ();
1582 auto whichVecs_d = whichVecs.template view<DMS> ();
1583 auto whichVecs_h = whichVecs.template view<HMS> ();
1593 if (printDebugOutput) {
1594 std::ostringstream os;
1595 os <<
"(Proc " << myRank <<
") MV::unpackAndCombine: unpacking"
1597 std::cerr << os.str ();
1600 if (numVecs > 0 && importLIDs.extent (0) > 0) {
1601 typedef typename Kokkos::DualView<IST*,
1602 device_type>::t_dev::execution_space dev_exec_space;
1603 typedef typename Kokkos::DualView<IST*,
1604 device_type>::t_host::execution_space host_exec_space;
1611 KokkosRefactor::Details::InsertOp<execution_space> op;
1613 if (isConstantStride ()) {
1621 unpack_array_multi_column (host_exec_space (),
1622 X_h, imports_h, importLIDs_h, op,
1623 numVecs, debugCheckIndices);
1627 unpack_array_multi_column (dev_exec_space (),
1628 X_d, imports_d, importLIDs_d, op,
1629 numVecs, debugCheckIndices);
1641 unpack_array_multi_column_variable_stride (host_exec_space (),
1649 unpack_array_multi_column_variable_stride (dev_exec_space (),
1658 else if (CM ==
ADD) {
1659 KokkosRefactor::Details::AddOp<execution_space> op;
1661 if (isConstantStride ()) {
1663 unpack_array_multi_column (host_exec_space (),
1664 X_h, imports_h, importLIDs_h, op,
1665 numVecs, debugCheckIndices);
1668 unpack_array_multi_column (dev_exec_space (),
1669 X_d, imports_d, importLIDs_d, op,
1670 numVecs, debugCheckIndices);
1675 unpack_array_multi_column_variable_stride (host_exec_space (),
1683 unpack_array_multi_column_variable_stride (dev_exec_space (),
1693 KokkosRefactor::Details::AbsMaxOp<execution_space> op;
1695 if (isConstantStride ()) {
1697 unpack_array_multi_column (host_exec_space (),
1698 X_h, imports_h, importLIDs_h, op,
1699 numVecs, debugCheckIndices);
1702 unpack_array_multi_column (dev_exec_space (),
1703 X_d, imports_d, importLIDs_d, op,
1704 numVecs, debugCheckIndices);
1709 unpack_array_multi_column_variable_stride (host_exec_space (),
1717 unpack_array_multi_column_variable_stride (dev_exec_space (),
1728 if (printDebugOutput) {
1729 std::ostringstream os;
1730 os <<
"(Proc " << myRank <<
") MV::unpackAndCombine: Done!"
1732 std::cerr << os.str ();
1736 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1738 MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
1739 getNumVectors ()
const
1741 if (isConstantStride ()) {
1742 return static_cast<size_t> (view_.extent (1));
1744 return static_cast<size_t> (whichVectors_.size ());
1752 gblDotImpl (
const RV& dotsOut,
1753 const Teuchos::RCP<
const Teuchos::Comm<int> >& comm,
1754 const bool distributed)
1756 using Teuchos::REDUCE_MAX;
1757 using Teuchos::REDUCE_SUM;
1758 using Teuchos::reduceAll;
1759 typedef typename RV::non_const_value_type dot_type;
1761 const size_t numVecs = dotsOut.extent (0);
1776 if (distributed && ! comm.is_null ()) {
1779 const int nv = static_cast<int> (numVecs);
1780 const bool commIsInterComm = ::Tpetra::Details::isInterComm (*comm);
1782 if (commIsInterComm) {
1786 typename RV::non_const_type lclDots (Kokkos::ViewAllocateWithoutInitializing (
"tmp"), numVecs);
1788 const dot_type*
const lclSum = lclDots.data ();
1789 dot_type*
const gblSum = dotsOut.data ();
1790 reduceAll<int, dot_type> (*comm, REDUCE_SUM, nv, lclSum, gblSum);
1793 dot_type*
const inout = dotsOut.data ();
1794 reduceAll<int, dot_type> (*comm, REDUCE_SUM, nv, inout, inout);
1800 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1802 MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
1804 const Kokkos::View<dot_type*, Kokkos::HostSpace>& dots)
const
1806 using ::Tpetra::Details::Behavior;
1807 using Kokkos::create_mirror_view;
1808 using Kokkos::subview;
1809 using Teuchos::Comm;
1810 using Teuchos::null;
1813 typedef Kokkos::View<dot_type*, Kokkos::HostSpace> RV;
1815 typedef typename dual_view_type::t_dev XMV;
1816 typedef typename MV::dual_view_type::t_dev::memory_space dev_memory_space;
1817 const char tfecfFuncName[] =
"Tpetra::MultiVector::dot: ";
1821 const size_t numVecs = this->getNumVectors ();
1825 const size_t lclNumRows = this->getLocalLength ();
1826 const size_t numDots = static_cast<size_t> (dots.extent (0));
1827 const bool debug = Behavior::debug ();
1830 const bool compat = this->getMap ()->isCompatible (* (A.
getMap ()));
1831 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1832 (! compat, std::invalid_argument,
"'*this' MultiVector is not "
1833 "compatible with the input MultiVector A. We only test for this "
1844 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1846 "MultiVectors do not have the same local length. "
1847 "this->getLocalLength() = " << lclNumRows <<
" != "
1849 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1851 "MultiVectors must have the same number of columns (vectors). "
1852 "this->getNumVectors() = " << numVecs <<
" != "
1854 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1855 numDots != numVecs, std::runtime_error,
1856 "The output array 'dots' must have the same number of entries as the "
1857 "number of columns (vectors) in *this and A. dots.extent(0) = " <<
1858 numDots <<
" != this->getNumVectors() = " << numVecs <<
".");
1860 const std::pair<size_t, size_t> colRng (0, numVecs);
1861 RV dotsOut = subview (dots, colRng);
1862 RCP<const Comm<int> > comm = this->getMap ().is_null () ? null :
1863 this->getMap ()->getComm ();
1866 if (this->
template need_sync<dev_memory_space> ()) const_cast<MV*>(
this)->template sync<dev_memory_space> ();
1867 if (A.template need_sync<dev_memory_space> ()) const_cast<MV&>(A).template sync<dev_memory_space> ();
1869 auto thisView = this->
template getLocalView<dev_memory_space> ();
1870 auto A_view = A.template getLocalView<dev_memory_space> ();
1872 ::Tpetra::Details::lclDot<RV, XMV> (dotsOut, thisView, A_view, lclNumRows, numVecs,
1873 this->whichVectors_.getRawPtr (),
1876 gblDotImpl (dotsOut, comm, this->isDistributed ());
1880 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1885 using Teuchos::Comm;
1888 typedef typename MV::dot_type dot_type;
1889 typedef typename MV::dual_view_type::t_dev::memory_space dev_memory_space;
1892 RCP<const typename MV::map_type> map = x.
getMap ();
1893 RCP<const Comm<int> > comm = map.is_null () ? Teuchos::null : map->getComm ();
1894 if (comm.is_null ()) {
1895 return Kokkos::ArithTraits<dot_type>::zero ();
1898 typedef LocalOrdinal LO;
1902 const LO lclNumRows = static_cast<LO> (std::min (x.
getLocalLength (),
1904 const Kokkos::pair<LO, LO> rowRng (0, lclNumRows);
1905 dot_type lclDot = Kokkos::ArithTraits<dot_type>::zero ();
1906 dot_type gblDot = Kokkos::ArithTraits<dot_type>::zero ();
1909 if (x.template need_sync<dev_memory_space> ()) x.template sync<dev_memory_space> ();
1910 if (y.template need_sync<dev_memory_space> ()) const_cast<MV&>(y).template sync<dev_memory_space> ();
1912 x.template modify<dev_memory_space> ();
1913 auto x_2d = x.template getLocalView<dev_memory_space> ();
1914 auto x_1d = Kokkos::subview (x_2d, rowRng, 0);
1915 auto y_2d = y.template getLocalView<dev_memory_space> ();
1916 auto y_1d = Kokkos::subview (y_2d, rowRng, 0);
1917 lclDot = KokkosBlas::dot (x_1d, y_1d);
1920 using Teuchos::outArg;
1921 using Teuchos::REDUCE_SUM;
1922 using Teuchos::reduceAll;
1923 reduceAll<int, dot_type> (*comm, REDUCE_SUM, lclDot, outArg (gblDot));
1935 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1937 MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
1939 const Teuchos::ArrayView<dot_type>& dots)
const
1942 const char tfecfFuncName[] =
"dot: ";
1945 const size_t numVecs = this->getNumVectors ();
1946 const size_t lclNumRows = this->getLocalLength ();
1947 const size_t numDots = static_cast<size_t> (dots.size ());
1957 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1959 "MultiVectors do not have the same local length. "
1960 "this->getLocalLength() = " << lclNumRows <<
" != "
1962 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1964 "MultiVectors must have the same number of columns (vectors). "
1965 "this->getNumVectors() = " << numVecs <<
" != "
1967 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1968 (numDots != numVecs, std::runtime_error,
1969 "The output array 'dots' must have the same number of entries as the "
1970 "number of columns (vectors) in *this and A. dots.extent(0) = " <<
1971 numDots <<
" != this->getNumVectors() = " << numVecs <<
".");
1974 const dot_type gblDot = multiVectorSingleColumnDot (const_cast<MV&> (*
this), A);
1978 this->dot (A, Kokkos::View<dot_type*, Kokkos::HostSpace>(dots.getRawPtr (), numDots));
1983 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1986 norm2 (
const Teuchos::ArrayView<mag_type>& norms)
const
1988 typedef Kokkos::View<mag_type*, Kokkos::HostSpace> host_norms_view_type;
1992 const size_t numNorms = static_cast<size_t> (norms.size ());
1993 host_norms_view_type normsHostView (norms.getRawPtr (), numNorms);
1994 this->norm2 (normsHostView);
1998 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2001 norm2 (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2003 this->normImpl (norms, NORM_TWO);
2007 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2008 void TPETRA_DEPRECATED
2011 const Teuchos::ArrayView<mag_type> &norms)
const
2014 using Kokkos::subview;
2015 using Teuchos::Comm;
2016 using Teuchos::null;
2018 using Teuchos::reduceAll;
2019 using Teuchos::REDUCE_SUM;
2020 typedef Kokkos::Details::ArithTraits<impl_scalar_type> ATS;
2021 typedef Kokkos::Details::ArithTraits<mag_type> ATM;
2022 typedef Kokkos::View<mag_type*, Kokkos::HostSpace> norms_view_type;
2024 const char tfecfFuncName[] =
"normWeighted: ";
2026 const size_t numVecs = this->getNumVectors ();
2027 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2028 static_cast<size_t> (norms.size ()) != numVecs, std::runtime_error,
2029 "norms.size() = " << norms.size () <<
" != this->getNumVectors() = "
2033 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2034 ! OneW && weights.
getNumVectors () != numVecs, std::runtime_error,
2035 "The input MultiVector of weights must contain either one column, "
2036 "or must have the same number of columns as *this. "
2038 <<
" and this->getNumVectors() = " << numVecs <<
".");
2043 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2044 (! this->getMap ()->isCompatible (*weights.
getMap ()), std::runtime_error,
2045 "MultiVectors do not have compatible Maps:" << std::endl
2046 <<
"this->getMap(): " << std::endl << *this->getMap()
2047 <<
"weights.getMap(): " << std::endl << *weights.
getMap() << std::endl);
2050 const size_t lclNumRows = this->getLocalLength ();
2051 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2053 "MultiVectors do not have the same local length.");
2056 norms_view_type lclNrms (
"Tpetra::MV::lclNrms", numVecs);
2059 const_cast<MV*> (
this)->template sync<device_type> ();
2060 const_cast<MV&> (weights).template sync<device_type> ();
2062 auto X_lcl = this->
template getLocalView<device_type> ();
2063 auto W_lcl = weights.template getLocalView<device_type> ();
2065 if (isConstantStride () && ! OneW) {
2066 KokkosBlas::nrm2w_squared (lclNrms, X_lcl, W_lcl);
2069 for (
size_t j = 0; j < numVecs; ++j) {
2070 const size_t X_col = this->isConstantStride () ? j :
2071 this->whichVectors_[j];
2072 const size_t W_col = OneW ? static_cast<size_t> (0) :
2074 KokkosBlas::nrm2w_squared (subview (lclNrms, j),
2075 subview (X_lcl, ALL (), X_col),
2076 subview (W_lcl, ALL (), W_col));
2081 ATM::one () / static_cast<mag_type> (this->getGlobalLength ());
2082 RCP<const Comm<int> > comm = this->getMap ().is_null () ?
2083 Teuchos::null : this->getMap ()->getComm ();
2085 if (! comm.is_null () && this->isDistributed ()) {
2087 reduceAll<int, mag_type> (*comm, REDUCE_SUM, static_cast<int> (numVecs),
2088 lclNrms.data (), norms.getRawPtr ());
2089 for (
size_t k = 0; k < numVecs; ++k) {
2090 norms[k] = ATM::sqrt (norms[k] * OneOverN);
2094 for (
size_t k = 0; k < numVecs; ++k) {
2095 norms[k] = ATM::sqrt (ATS::magnitude (lclNrms(k)) * OneOverN);
2101 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2104 norm1 (
const Teuchos::ArrayView<mag_type>& norms)
const
2106 typedef typename Kokkos::View<mag_type*, Kokkos::HostSpace> host_norms_view_type;
2108 const size_t numNorms = static_cast<size_t> (norms.size ());
2109 host_norms_view_type normsHostView (norms.getRawPtr (), numNorms);
2110 this->norm1 (normsHostView);
2114 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2117 norm1 (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2119 this->normImpl (norms, NORM_ONE);
2122 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2125 normInf (
const Teuchos::ArrayView<mag_type>& norms)
const
2127 typedef Kokkos::View<mag_type*, Kokkos::HostSpace> host_norms_view_type;
2129 const size_t numNorms = static_cast<size_t> (norms.size ());
2130 host_norms_view_type normsHostView (norms.getRawPtr (), numNorms);
2131 this->normInf (normsHostView);
2135 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2138 normInf (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2140 this->normImpl (norms, NORM_INF);
2152 template<
class RV,
class XMV>
2154 lclNormImpl (
const RV& normsOut,
2156 const size_t lclNumRows,
2157 const size_t numVecs,
2158 const Teuchos::ArrayView<const size_t>& whichVecs,
2159 const bool constantStride,
2160 const EWhichNormImpl whichNorm)
2163 using Kokkos::subview;
2164 typedef typename RV::non_const_value_type mag_type;
2166 static_assert (Kokkos::Impl::is_view<RV>::value,
2167 "Tpetra::MultiVector::lclNormImpl: "
2168 "The first argument RV is not a Kokkos::View.");
2169 static_assert (RV::rank == 1,
"Tpetra::MultiVector::lclNormImpl: "
2170 "The first argument normsOut must have rank 1.");
2171 static_assert (Kokkos::Impl::is_view<XMV>::value,
2172 "Tpetra::MultiVector::lclNormImpl: "
2173 "The second argument X_lcl is not a Kokkos::View.");
2174 static_assert (XMV::rank == 2,
"Tpetra::MultiVector::lclNormImpl: "
2175 "The second argument X_lcl must have rank 2.");
2180 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2181 const std::pair<size_t, size_t> colRng (0, numVecs);
2182 RV theNorms = subview (normsOut, colRng);
2183 XMV X = subview (X_lcl, rowRng, Kokkos::ALL());
2188 TEUCHOS_TEST_FOR_EXCEPTION(
2189 lclNumRows != 0 && constantStride && ( \
2190 ( X.extent (0) != lclNumRows ) ||
2191 ( X.extent (1) != numVecs ) ),
2192 std::logic_error,
"Constant Stride X's dimensions are " << X.extent (0) <<
" x "
2193 << X.extent (1) <<
", which differ from the local dimensions "
2194 << lclNumRows <<
" x " << numVecs <<
". Please report this bug to "
2195 "the Tpetra developers.");
2197 TEUCHOS_TEST_FOR_EXCEPTION(
2198 lclNumRows != 0 && !constantStride && ( \
2199 ( X.extent (0) != lclNumRows ) ||
2200 ( X.extent (1) < numVecs ) ),
2201 std::logic_error,
"Strided X's dimensions are " << X.extent (0) <<
" x "
2202 << X.extent (1) <<
", which are incompatible with the local dimensions "
2203 << lclNumRows <<
" x " << numVecs <<
". Please report this bug to "
2204 "the Tpetra developers.");
2206 if (lclNumRows == 0) {
2207 const mag_type zeroMag = Kokkos::Details::ArithTraits<mag_type>::zero ();
2211 if (constantStride) {
2212 if (whichNorm == IMPL_NORM_INF) {
2213 KokkosBlas::nrminf (theNorms, X);
2215 else if (whichNorm == IMPL_NORM_ONE) {
2216 KokkosBlas::nrm1 (theNorms, X);
2218 else if (whichNorm == IMPL_NORM_TWO) {
2219 KokkosBlas::nrm2_squared (theNorms, X);
2222 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error,
"Should never get here!");
2231 for (
size_t k = 0; k < numVecs; ++k) {
2232 const size_t X_col = constantStride ? k : whichVecs[k];
2233 if (whichNorm == IMPL_NORM_INF) {
2234 KokkosBlas::nrminf (subview (theNorms, k),
2235 subview (X, ALL (), X_col));
2237 else if (whichNorm == IMPL_NORM_ONE) {
2238 KokkosBlas::nrm1 (subview (theNorms, k),
2239 subview (X, ALL (), X_col));
2241 else if (whichNorm == IMPL_NORM_TWO) {
2242 KokkosBlas::nrm2_squared (subview (theNorms, k),
2243 subview (X, ALL (), X_col));
2246 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error,
"Should never get here!");
2255 template<
class ViewType>
2256 class SquareRootFunctor {
2258 typedef typename ViewType::execution_space execution_space;
2259 typedef typename ViewType::size_type size_type;
2261 SquareRootFunctor (
const ViewType& theView) :
2265 KOKKOS_INLINE_FUNCTION
void
2266 operator() (
const size_type& i)
const
2268 typedef typename ViewType::non_const_value_type value_type;
2269 typedef Kokkos::Details::ArithTraits<value_type> KAT;
2270 theView_(i) = KAT::sqrt (theView_(i));
2279 gblNormImpl (
const RV& normsOut,
2280 const Teuchos::RCP<
const Teuchos::Comm<int> >& comm,
2281 const bool distributed,
2282 const EWhichNormImpl whichNorm)
2284 using Teuchos::REDUCE_MAX;
2285 using Teuchos::REDUCE_SUM;
2286 using Teuchos::reduceAll;
2287 typedef typename RV::non_const_value_type mag_type;
2289 const size_t numVecs = normsOut.extent (0);
2304 if (distributed && ! comm.is_null ()) {
2310 RV lclNorms (
"MV::normImpl lcl", numVecs);
2312 const mag_type*
const lclSum = lclNorms.data ();
2313 mag_type*
const gblSum = normsOut.data ();
2314 const int nv = static_cast<int> (numVecs);
2315 if (whichNorm == IMPL_NORM_INF) {
2316 reduceAll<int, mag_type> (*comm, REDUCE_MAX, nv, lclSum, gblSum);
2318 reduceAll<int, mag_type> (*comm, REDUCE_SUM, nv, lclSum, gblSum);
2322 if (whichNorm == IMPL_NORM_TWO) {
2328 const bool inHostMemory =
2329 Kokkos::Impl::is_same<
typename RV::memory_space,
2330 typename RV::host_mirror_space::memory_space>::value;
2332 for (
size_t j = 0; j < numVecs; ++j) {
2333 normsOut(j) = Kokkos::Details::ArithTraits<mag_type>::sqrt (normsOut(j));
2341 SquareRootFunctor<RV> f (normsOut);
2342 typedef typename RV::execution_space execution_space;
2343 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
2344 Kokkos::parallel_for (range_type (0, numVecs), f);
2351 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2353 MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2354 normImpl (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms,
2357 using Kokkos::create_mirror_view;
2358 using Kokkos::subview;
2359 using Teuchos::Comm;
2360 using Teuchos::null;
2363 typedef Kokkos::View<mag_type*, Kokkos::HostSpace> RV;
2365 const size_t numVecs = this->getNumVectors ();
2369 const size_t lclNumRows = this->getLocalLength ();
2370 const size_t numNorms = static_cast<size_t> (norms.extent (0));
2371 TEUCHOS_TEST_FOR_EXCEPTION(
2372 numNorms < numVecs, std::runtime_error,
"Tpetra::MultiVector::normImpl: "
2373 "'norms' must have at least as many entries as the number of vectors in "
2374 "*this. norms.extent(0) = " << numNorms <<
" < this->getNumVectors()"
2375 " = " << numVecs <<
".");
2377 const std::pair<size_t, size_t> colRng (0, numVecs);
2378 RV normsOut = subview (norms, colRng);
2380 EWhichNormImpl lclNormType;
2381 if (whichNorm == NORM_ONE) {
2382 lclNormType = IMPL_NORM_ONE;
2383 }
else if (whichNorm == NORM_TWO) {
2384 lclNormType = IMPL_NORM_TWO;
2386 lclNormType = IMPL_NORM_INF;
2389 RCP<const Comm<int> > comm = this->getMap ().is_null () ? null :
2390 this->getMap ()->getComm ();
2393 const bool useHostVersion = this->
template need_sync<device_type> ();
2394 if (useHostVersion) {
2397 typedef typename dual_view_type::t_host XMV;
2398 typedef typename XMV::memory_space cur_memory_space;
2400 auto thisView = this->
template getLocalView<cur_memory_space> ();
2401 lclNormImpl<RV, XMV> (normsOut, thisView, lclNumRows, numVecs,
2402 this->whichVectors_, this->isConstantStride (),
2404 gblNormImpl (normsOut, comm, this->isDistributed (), lclNormType);
2408 typedef typename dual_view_type::t_dev XMV;
2409 typedef typename XMV::memory_space cur_memory_space;
2411 auto thisView = this->
template getLocalView<cur_memory_space> ();
2412 lclNormImpl<RV, XMV> (normsOut, thisView, lclNumRows, numVecs,
2413 this->whichVectors_, this->isConstantStride (),
2415 gblNormImpl (normsOut, comm, this->isDistributed (), lclNormType);
2419 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2422 meanValue (
const Teuchos::ArrayView<impl_scalar_type>& means)
const
2426 using Kokkos::subview;
2427 using Teuchos::Comm;
2429 using Teuchos::reduceAll;
2430 using Teuchos::REDUCE_SUM;
2431 typedef Kokkos::Details::ArithTraits<impl_scalar_type> ATS;
2433 const size_t lclNumRows = this->getLocalLength ();
2434 const size_t numVecs = this->getNumVectors ();
2435 const size_t numMeans = static_cast<size_t> (means.size ());
2437 TEUCHOS_TEST_FOR_EXCEPTION(
2438 numMeans != numVecs, std::runtime_error,
2439 "Tpetra::MultiVector::meanValue: means.size() = " << numMeans
2440 <<
" != this->getNumVectors() = " << numVecs <<
".");
2442 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2443 const std::pair<size_t, size_t> colRng (0, numVecs);
2448 typedef Kokkos::View<impl_scalar_type*, device_type> local_view_type;
2450 typename local_view_type::HostMirror::array_layout,
2452 Kokkos::MemoryTraits<Kokkos::Unmanaged> > host_local_view_type;
2453 host_local_view_type meansOut (means.getRawPtr (), numMeans);
2455 RCP<const Comm<int> > comm = this->getMap ().is_null () ? Teuchos::null :
2456 this->getMap ()->getComm ();
2459 const bool useHostVersion = this->
template need_sync<device_type> ();
2460 if (useHostVersion) {
2462 auto X_lcl = subview (this->
template getLocalView<Kokkos::HostSpace> (),
2463 rowRng, Kokkos::ALL ());
2465 Kokkos::View<impl_scalar_type*, Kokkos::HostSpace> lclSums (
"MV::meanValue tmp", numVecs);
2466 if (isConstantStride ()) {
2467 KokkosBlas::sum (lclSums, X_lcl);
2470 for (
size_t j = 0; j < numVecs; ++j) {
2471 const size_t col = whichVectors_[j];
2472 KokkosBlas::sum (subview (lclSums, j), subview (X_lcl, ALL (), col));
2479 if (! comm.is_null () && this->isDistributed ()) {
2480 reduceAll (*comm, REDUCE_SUM, static_cast<int> (numVecs),
2481 lclSums.data (), meansOut.data ());
2489 auto X_lcl = subview (this->
template getLocalView<device_type> (),
2490 rowRng, Kokkos::ALL ());
2493 Kokkos::View<impl_scalar_type*, Kokkos::HostSpace> lclSums (
"MV::meanValue tmp", numVecs);
2494 if (isConstantStride ()) {
2495 KokkosBlas::sum (lclSums, X_lcl);
2498 for (
size_t j = 0; j < numVecs; ++j) {
2499 const size_t col = whichVectors_[j];
2500 KokkosBlas::sum (subview (lclSums, j), subview (X_lcl, ALL (), col));
2508 if (! comm.is_null () && this->isDistributed ()) {
2509 reduceAll (*comm, REDUCE_SUM, static_cast<int> (numVecs),
2510 lclSums.data (), meansOut.data ());
2521 ATS::one () / static_cast<mag_type> (this->getGlobalLength ());
2522 for (
size_t k = 0; k < numMeans; ++k) {
2523 meansOut(k) = meansOut(k) * OneOverN;
2528 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2534 typedef Kokkos::Details::ArithTraits<IST> ATS;
2535 typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space> pool_type;
2536 typedef typename pool_type::generator_type generator_type;
2538 const IST max = Kokkos::rand<generator_type, IST>::max ();
2539 const IST min = ATS::is_signed ? IST (-max) : ATS::zero ();
2541 this->randomize (static_cast<Scalar> (min), static_cast<Scalar> (max));
2545 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2551 typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space> pool_type;
2562 const uint64_t myRank =
2563 static_cast<uint64_t> (this->getMap ()->getComm ()->getRank ());
2564 uint64_t seed64 = static_cast<uint64_t> (std::rand ()) + myRank + 17311uLL;
2565 unsigned int seed = static_cast<unsigned int> (seed64&0xffffffff);
2567 pool_type rand_pool (seed);
2568 const IST max = static_cast<IST> (maxVal);
2569 const IST min = static_cast<IST> (minVal);
2574 this->view_.modified_device () = 0;
2575 this->view_.modified_host () = 0;
2577 this->
template modify<device_type> ();
2578 auto thisView = this->getLocalView<device_type> ();
2580 if (isConstantStride ()) {
2581 Kokkos::fill_random (thisView, rand_pool, min, max);
2584 const size_t numVecs = getNumVectors ();
2585 for (
size_t k = 0; k < numVecs; ++k) {
2586 const size_t col = whichVectors_[k];
2587 auto X_k = Kokkos::subview (thisView, Kokkos::ALL (), col);
2588 Kokkos::fill_random (X_k, rand_pool, min, max);
2593 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2598 using ::Tpetra::Details::ProfilingRegion;
2599 using ::Tpetra::Details::Blas::fill;
2600 typedef typename dual_view_type::t_dev::memory_space DMS;
2602 typedef Kokkos::HostSpace HMS;
2603 typedef typename dual_view_type::t_dev::execution_space DES;
2604 typedef typename dual_view_type::t_host::execution_space HES;
2605 typedef LocalOrdinal LO;
2606 ProfilingRegion region (
"Tpetra::MultiVector::putScalar");
2611 const LO lclNumRows = static_cast<LO> (this->getLocalLength ());
2612 const LO numVecs = static_cast<LO> (this->getNumVectors ());
2618 const bool useHostVersion = this->
template need_sync<DMS> ();
2620 if (! useHostVersion) {
2621 this->
template modify<DMS> ();
2622 auto X = this->
template getLocalView<DMS> ();
2623 if (this->isConstantStride ()) {
2624 fill (DES (), X, theAlpha, lclNumRows, numVecs);
2627 fill (DES (), X, theAlpha, lclNumRows, numVecs,
2628 this->whichVectors_.getRawPtr ());
2632 this->
template modify<HMS> ();
2633 auto X = this->
template getLocalView<HMS> ();
2634 if (this->isConstantStride ()) {
2635 fill (HES (), X, theAlpha, lclNumRows, numVecs);
2638 fill (HES (), X, theAlpha, lclNumRows, numVecs,
2639 this->whichVectors_.getRawPtr ());
2645 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2650 using Teuchos::ArrayRCP;
2651 using Teuchos::Comm;
2658 TEUCHOS_TEST_FOR_EXCEPTION(
2659 ! this->isConstantStride (), std::logic_error,
2660 "Tpetra::MultiVector::replaceMap: This method does not currently work "
2661 "if the MultiVector is a column view of another MultiVector (that is, if "
2662 "isConstantStride() == false).");
2692 #ifdef HAVE_TEUCHOS_DEBUG
2706 #endif // HAVE_TEUCHOS_DEBUG
2708 if (this->getMap ().is_null ()) {
2713 TEUCHOS_TEST_FOR_EXCEPTION(
2714 newMap.is_null (), std::invalid_argument,
2715 "Tpetra::MultiVector::replaceMap: both current and new Maps are null. "
2716 "This probably means that the input Map is incorrect.");
2720 const size_t newNumRows = newMap->getNodeNumElements ();
2721 const size_t origNumRows = view_.extent (0);
2722 const size_t numCols = this->getNumVectors ();
2724 if (origNumRows != newNumRows || view_.extent (1) != numCols) {
2725 view_ = allocDualView<Scalar, LocalOrdinal, GlobalOrdinal, Node> (newNumRows, numCols);
2728 else if (newMap.is_null ()) {
2731 const size_t newNumRows = static_cast<size_t> (0);
2732 const size_t numCols = this->getNumVectors ();
2733 view_ = allocDualView<Scalar, LocalOrdinal, GlobalOrdinal, Node> (newNumRows, numCols);
2736 this->map_ = newMap;
2739 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2745 if (theAlpha == Kokkos::Details::ArithTraits<impl_scalar_type>::one ()) {
2748 const size_t lclNumRows = this->getLocalLength ();
2749 const size_t numVecs = this->getNumVectors ();
2750 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2751 const std::pair<size_t, size_t> colRng (0, numVecs);
2759 const bool useHostVersion = this->
template need_sync<device_type> ();
2760 if (useHostVersion) {
2762 Kokkos::subview (this->
template getLocalView<Kokkos::HostSpace> (),
2763 rowRng, Kokkos::ALL ());
2764 if (isConstantStride ()) {
2765 KokkosBlas::scal (Y_lcl, theAlpha, Y_lcl);
2768 for (
size_t k = 0; k < numVecs; ++k) {
2769 const size_t Y_col = this->isConstantStride () ? k :
2770 this->whichVectors_[k];
2771 auto Y_k = Kokkos::subview (Y_lcl, Kokkos::ALL (), Y_col);
2772 KokkosBlas::scal (Y_k, theAlpha, Y_k);
2778 Kokkos::subview (this->
template getLocalView<device_type> (),
2779 rowRng, Kokkos::ALL ());
2780 if (isConstantStride ()) {
2781 KokkosBlas::scal (Y_lcl, theAlpha, Y_lcl);
2784 for (
size_t k = 0; k < numVecs; ++k) {
2785 const size_t Y_col = this->isConstantStride () ? k :
2786 this->whichVectors_[k];
2787 auto Y_k = Kokkos::subview (Y_lcl, Kokkos::ALL (), Y_col);
2788 KokkosBlas::scal (Y_k, theAlpha, Y_k);
2795 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2798 scale (
const Teuchos::ArrayView<const Scalar>& alphas)
2800 const size_t numVecs = this->getNumVectors ();
2801 const size_t numAlphas = static_cast<size_t> (alphas.size ());
2802 TEUCHOS_TEST_FOR_EXCEPTION(
2803 numAlphas != numVecs, std::invalid_argument,
"Tpetra::MultiVector::"
2804 "scale: alphas.size() = " << numAlphas <<
" != this->getNumVectors() = "
2808 typedef Kokkos::DualView<impl_scalar_type*, device_type> k_alphas_type ;
2809 k_alphas_type k_alphas (
"alphas::tmp", numAlphas);
2810 k_alphas.template modify<typename k_alphas_type::host_mirror_space> ();
2811 for (
size_t i = 0; i < numAlphas; ++i) {
2812 k_alphas.h_view(i) = static_cast<impl_scalar_type> (alphas[i]);
2814 k_alphas.template sync<typename k_alphas_type::memory_space> ();
2816 this->scale (k_alphas.d_view);
2819 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2822 scale (
const Kokkos::View<const impl_scalar_type*, device_type>& alphas)
2825 using Kokkos::subview;
2827 const size_t lclNumRows = this->getLocalLength ();
2828 const size_t numVecs = this->getNumVectors ();
2829 TEUCHOS_TEST_FOR_EXCEPTION(
2830 static_cast<size_t> (alphas.extent (0)) != numVecs,
2831 std::invalid_argument,
"Tpetra::MultiVector::scale(alphas): "
2832 "alphas.extent(0) = " << alphas.extent (0)
2833 <<
" != this->getNumVectors () = " << numVecs <<
".");
2834 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2835 const std::pair<size_t, size_t> colRng (0, numVecs);
2845 const bool useHostVersion = this->
template need_sync<device_type> ();
2846 if (useHostVersion) {
2849 auto alphas_h = Kokkos::create_mirror_view (alphas);
2850 typedef typename decltype (alphas_h)::memory_space cur_memory_space;
2854 Kokkos::subview (this->
template getLocalView<cur_memory_space> (),
2855 rowRng, Kokkos::ALL ());
2856 if (isConstantStride ()) {
2857 KokkosBlas::scal (Y_lcl, alphas_h, Y_lcl);
2860 for (
size_t k = 0; k < numVecs; ++k) {
2861 const size_t Y_col = this->isConstantStride () ? k :
2862 this->whichVectors_[k];
2863 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2866 KokkosBlas::scal (Y_k, alphas_h(k), Y_k);
2872 Kokkos::subview (this->
template getLocalView<device_type> (),
2873 rowRng, Kokkos::ALL());
2874 if (isConstantStride ()) {
2875 KokkosBlas::scal (Y_lcl, alphas, Y_lcl);
2878 for (
size_t k = 0; k < numVecs; ++k) {
2879 const size_t Y_col = this->isConstantStride () ? k :
2880 this->whichVectors_[k];
2881 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2887 KokkosBlas::scal (Y_k, alphas(k), Y_k);
2893 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2900 using Kokkos::subview;
2902 typedef typename dual_view_type::t_dev::memory_space dev_memory_space;
2904 const char tfecfFuncName[] =
"scale: ";
2906 const size_t lclNumRows = getLocalLength ();
2907 const size_t numVecs = getNumVectors ();
2909 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2911 "this->getLocalLength() = " << lclNumRows <<
" != A.getLocalLength() = "
2913 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2915 "this->getNumVectors() = " << numVecs <<
" != A.getNumVectors() = "
2919 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2920 const std::pair<size_t, size_t> colRng (0, numVecs);
2923 if (this->
template need_sync<dev_memory_space> ()) this->
template sync<dev_memory_space> ();
2924 if (A.template need_sync<dev_memory_space> ()) const_cast<MV&>(A).template sync<dev_memory_space> ();
2926 this->
template modify<dev_memory_space> ();
2927 auto Y_lcl_orig = this->
template getLocalView<dev_memory_space> ();
2928 auto X_lcl_orig = A.template getLocalView<dev_memory_space> ();
2929 auto Y_lcl = subview (Y_lcl_orig, rowRng, Kokkos::ALL ());
2930 auto X_lcl = subview (X_lcl_orig, rowRng, Kokkos::ALL ());
2933 KokkosBlas::scal (Y_lcl, theAlpha, X_lcl);
2937 for (
size_t k = 0; k < numVecs; ++k) {
2938 const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
2940 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2941 auto X_k = subview (X_lcl, ALL (), X_col);
2943 KokkosBlas::scal (Y_k, theAlpha, X_k);
2950 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2956 typedef typename MV::dual_view_type::t_dev::memory_space dev_memory_space;
2958 const char tfecfFuncName[] =
"reciprocal: ";
2960 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2962 "MultiVectors do not have the same local length. "
2963 "this->getLocalLength() = " << getLocalLength ()
2965 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2966 A.
getNumVectors () != this->getNumVectors (), std::runtime_error,
2967 ": MultiVectors do not have the same number of columns (vectors). "
2968 "this->getNumVectors() = " << getNumVectors ()
2969 <<
" != A.getNumVectors() = " << A.
getNumVectors () <<
".");
2971 const size_t numVecs = getNumVectors ();
2974 if (this->
template need_sync<dev_memory_space> ()) this->
template sync<dev_memory_space> ();
2975 if (A.template need_sync<dev_memory_space> ()) const_cast<MV&>(A).template sync<dev_memory_space> ();
2976 this->
template modify<dev_memory_space> ();
2978 auto this_view_dev = this->
template getLocalView<dev_memory_space> ();
2979 auto A_view_dev = A.template getLocalView<dev_memory_space> ();
2982 KokkosBlas::reciprocal (this_view_dev, A_view_dev);
2986 using Kokkos::subview;
2987 for (
size_t k = 0; k < numVecs; ++k) {
2988 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
2989 auto vector_k = subview (this_view_dev, ALL (), this_col);
2990 const size_t A_col = isConstantStride () ? k : A.
whichVectors_[k];
2991 auto vector_Ak = subview (A_view_dev, ALL (), A_col);
2992 KokkosBlas::reciprocal (vector_k, vector_Ak);
2997 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3003 typedef typename MV::dual_view_type::t_dev::memory_space dev_memory_space;
3005 const char tfecfFuncName[] =
"abs";
3006 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3008 ": MultiVectors do not have the same local length. "
3009 "this->getLocalLength() = " << getLocalLength ()
3011 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3012 A.
getNumVectors () != this->getNumVectors (), std::runtime_error,
3013 ": MultiVectors do not have the same number of columns (vectors). "
3014 "this->getNumVectors() = " << getNumVectors ()
3015 <<
" != A.getNumVectors() = " << A.
getNumVectors () <<
".");
3016 const size_t numVecs = getNumVectors ();
3019 if (this->
template need_sync<dev_memory_space> ()) this->
template sync<dev_memory_space> ();
3020 if (A.template need_sync<dev_memory_space> ()) const_cast<MV&>(A).template sync<dev_memory_space> ();
3021 this->
template modify<dev_memory_space> ();
3023 auto this_view_dev = this->
template getLocalView<dev_memory_space> ();
3024 auto A_view_dev = A.template getLocalView<dev_memory_space> ();
3027 KokkosBlas::abs (this_view_dev, A_view_dev);
3031 using Kokkos::subview;
3033 for (
size_t k=0; k < numVecs; ++k) {
3034 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
3035 auto vector_k = subview (this_view_dev, ALL (), this_col);
3036 const size_t A_col = isConstantStride () ? k : A.
whichVectors_[k];
3037 auto vector_Ak = subview (A_view_dev, ALL (), A_col);
3038 KokkosBlas::abs (vector_k, vector_Ak);
3043 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3050 const char tfecfFuncName[] =
"update: ";
3051 using Kokkos::subview;
3054 typedef typename dual_view_type::t_dev::memory_space dev_memory_space;
3058 const size_t lclNumRows = getLocalLength ();
3059 const size_t numVecs = getNumVectors ();
3061 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3063 "this->getLocalLength() = " << lclNumRows <<
" != A.getLocalLength() = "
3065 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3067 "this->getNumVectors() = " << numVecs <<
" != A.getNumVectors() = "
3071 if (this->
template need_sync<dev_memory_space> ()) this->
template sync<dev_memory_space> ();
3072 if (A.template need_sync<dev_memory_space> ()) const_cast<MV&>(A).template sync<dev_memory_space> ();
3076 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
3077 const std::pair<size_t, size_t> colRng (0, numVecs);
3079 auto Y_lcl_orig = this->
template getLocalView<dev_memory_space> ();
3080 auto Y_lcl = subview (Y_lcl_orig, rowRng, Kokkos::ALL ());
3081 auto X_lcl_orig = A.template getLocalView<dev_memory_space> ();
3082 auto X_lcl = subview (X_lcl_orig, rowRng, Kokkos::ALL ());
3085 this->
template modify<dev_memory_space> ();
3087 KokkosBlas::axpby (theAlpha, X_lcl, theBeta, Y_lcl);
3091 for (
size_t k = 0; k < numVecs; ++k) {
3092 const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
3094 auto Y_k = subview (Y_lcl, ALL (), Y_col);
3095 auto X_k = subview (X_lcl, ALL (), X_col);
3097 KokkosBlas::axpby (theAlpha, X_k, theBeta, Y_k);
3102 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3109 const Scalar& gamma)
3112 using Kokkos::subview;
3114 typedef typename dual_view_type::t_dev::memory_space dev_memory_space;
3116 const char tfecfFuncName[] =
"update(alpha,A,beta,B,gamma): ";
3120 const size_t lclNumRows = this->getLocalLength ();
3121 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3123 "The input MultiVector A has " << A.
getLocalLength () <<
" local "
3124 "row(s), but this MultiVector has " << lclNumRows <<
" local "
3126 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3128 "The input MultiVector B has " << B.
getLocalLength () <<
" local "
3129 "row(s), but this MultiVector has " << lclNumRows <<
" local "
3131 const size_t numVecs = getNumVectors ();
3132 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3134 "The input MultiVector A has " << A.
getNumVectors () <<
" column(s), "
3135 "but this MultiVector has " << numVecs <<
" column(s).");
3136 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3138 "The input MultiVector B has " << B.
getNumVectors () <<
" column(s), "
3139 "but this MultiVector has " << numVecs <<
" column(s).");
3146 if (this->
template need_sync<dev_memory_space> ()) this->
template sync<dev_memory_space> ();
3147 if (A.template need_sync<dev_memory_space> ()) const_cast<MV&>(A).template sync<dev_memory_space> ();
3148 if (B.template need_sync<dev_memory_space> ()) const_cast<MV&>(B).template sync<dev_memory_space> ();
3151 this->
template modify<dev_memory_space> ();
3153 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
3154 const std::pair<size_t, size_t> colRng (0, numVecs);
3159 auto C_lcl = subview (this->
template getLocalView<dev_memory_space> (), rowRng, ALL ());
3160 auto A_lcl = subview (A.template getLocalView<dev_memory_space> (), rowRng, ALL ());
3161 auto B_lcl = subview (B.template getLocalView<dev_memory_space> (), rowRng, ALL ());
3164 KokkosBlas::update (theAlpha, A_lcl, theBeta, B_lcl, theGamma, C_lcl);
3169 for (
size_t k = 0; k < numVecs; ++k) {
3170 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
3173 KokkosBlas::update (theAlpha, subview (A_lcl, rowRng, A_col),
3174 theBeta, subview (B_lcl, rowRng, B_col),
3175 theGamma, subview (C_lcl, rowRng, this_col));
3180 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3181 Teuchos::ArrayRCP<const Scalar>
3186 using Kokkos::subview;
3188 const char tfecfFuncName[] =
"getData: ";
3194 const_cast<MV*> (
this)->template sync<Kokkos::HostSpace> ();
3197 auto hostView = this->
template getLocalView<Kokkos::HostSpace> ();
3200 const size_t col = isConstantStride () ? j : whichVectors_[j];
3201 auto hostView_j = subview (hostView, ALL (), col);
3204 Teuchos::ArrayRCP<const impl_scalar_type> dataAsArcp =
3205 Kokkos::Compat::persistingView (hostView_j, 0, getLocalLength ());
3207 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3208 (static_cast<size_t> (hostView_j.extent (0)) <
3209 static_cast<size_t> (dataAsArcp.size ()), std::logic_error,
3210 "hostView_j.extent(0) = " << hostView_j.extent (0)
3211 <<
" < dataAsArcp.size() = " << dataAsArcp.size () <<
". "
3212 "Please report this bug to the Tpetra developers.");
3214 return Teuchos::arcp_reinterpret_cast<const Scalar> (dataAsArcp);
3217 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3218 Teuchos::ArrayRCP<Scalar>
3223 using Kokkos::subview;
3225 const char tfecfFuncName[] =
"getDataNonConst: ";
3231 const_cast<MV*> (
this)->template sync<Kokkos::HostSpace> ();
3235 this->
template modify<Kokkos::HostSpace> ();
3238 auto hostView = this->
template getLocalView<Kokkos::HostSpace> ();
3241 const size_t col = isConstantStride () ? j : whichVectors_[j];
3242 auto hostView_j = subview (hostView, ALL (), col);
3245 Teuchos::ArrayRCP<impl_scalar_type> dataAsArcp =
3246 Kokkos::Compat::persistingView (hostView_j, 0, getLocalLength ());
3248 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3249 (static_cast<size_t> (hostView_j.extent (0)) <
3250 static_cast<size_t> (dataAsArcp.size ()), std::logic_error,
3251 "hostView_j.extent(0) = " << hostView_j.extent (0)
3252 <<
" < dataAsArcp.size() = " << dataAsArcp.size () <<
". "
3253 "Please report this bug to the Tpetra developers.");
3255 return Teuchos::arcp_reinterpret_cast<Scalar> (dataAsArcp);
3258 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3263 if (
this != &source) {
3264 base_type::operator= (source);
3270 view_ = source.
view_;
3286 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3287 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3289 subCopy (
const Teuchos::ArrayView<const size_t>& cols)
const
3296 bool contiguous =
true;
3297 const size_t numCopyVecs = static_cast<size_t> (cols.size ());
3298 for (
size_t j = 1; j < numCopyVecs; ++j) {
3299 if (cols[j] != cols[j-1] + static_cast<size_t> (1)) {
3304 if (contiguous && numCopyVecs > 0) {
3305 return this->subCopy (Teuchos::Range1D (cols[0], cols[numCopyVecs-1]));
3308 RCP<const MV> X_sub = this->subView (cols);
3309 RCP<MV> Y (
new MV (this->getMap (), numCopyVecs,
false));
3315 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3316 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3323 RCP<const MV> X_sub = this->subView (colRng);
3324 RCP<MV> Y (
new MV (this->getMap (), static_cast<size_t> (colRng.size ()),
false));
3329 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3333 return origView_.extent (0);
3336 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3340 return origView_.extent (1);
3343 template <
class Scalar,
class LO,
class GO,
class Node>
3347 const size_t offset) :
3351 using Kokkos::subview;
3355 const char prefix[] =
"Tpetra::MultiVector constructor (offsetView): ";
3360 const int myRank = X.
getMap ()->getComm ()->getRank ();
3361 TEUCHOS_TEST_FOR_EXCEPTION(
3363 prefix <<
"Invalid input Map. The input Map owns " << newNumRows <<
3364 " entries on process " << myRank <<
". offset = " << offset <<
". "
3366 " rows on this process.");
3372 #ifdef HAVE_TPETRA_DEBUG
3373 const size_t strideBefore =
3378 X.template getLocalView<Kokkos::HostSpace> ().data ();
3379 #endif // HAVE_TPETRA_DEBUG
3381 const std::pair<size_t, size_t> origRowRng (offset, X.
origView_.extent (0));
3382 const std::pair<size_t, size_t> rowRng (offset, offset + newNumRows);
3402 if (newOrigView.extent (0) == 0 &&
3403 newOrigView.extent (1) != X.
origView_.extent (1)) {
3404 newOrigView = allocDualView<Scalar, LO, GO, Node> (
size_t (0),
3407 if (newView.extent (0) == 0 &&
3408 newView.extent (1) != X.
view_.extent (1)) {
3409 newView = allocDualView<Scalar, LO, GO, Node> (
size_t (0),
3414 MV (Teuchos::rcp (
new map_type (subMap)), newView, newOrigView) :
3417 #ifdef HAVE_TPETRA_DEBUG
3420 static_cast<size_t> (0);
3424 X.template getLocalView<Kokkos::HostSpace> ().data ();
3427 subViewMV.getStride () :
3428 static_cast<size_t> (0);
3429 const size_t lclNumRowsRet = subViewMV.getLocalLength ();
3430 const size_t numColsRet = subViewMV.getNumVectors ();
3432 const char suffix[] =
". This should never happen. Please report this "
3433 "bug to the Tpetra developers.";
3435 TEUCHOS_TEST_FOR_EXCEPTION(
3437 std::logic_error, prefix <<
"Returned MultiVector has a number of rows "
3438 "different than the number of local indices in the input Map. "
3439 "lclNumRowsRet: " << lclNumRowsRet <<
", subMap.getNodeNumElements(): "
3441 TEUCHOS_TEST_FOR_EXCEPTION(
3442 strideBefore != strideAfter || lclNumRowsBefore != lclNumRowsAfter ||
3443 numColsBefore != numColsAfter || hostPtrBefore != hostPtrAfter,
3444 std::logic_error, prefix <<
"Original MultiVector changed dimensions, "
3445 "stride, or host pointer after taking offset view. strideBefore: " <<
3446 strideBefore <<
", strideAfter: " << strideAfter <<
", lclNumRowsBefore: "
3447 << lclNumRowsBefore <<
", lclNumRowsAfter: " << lclNumRowsAfter <<
3448 ", numColsBefore: " << numColsBefore <<
", numColsAfter: " <<
3449 numColsAfter <<
", hostPtrBefore: " << hostPtrBefore <<
", hostPtrAfter: "
3450 << hostPtrAfter << suffix);
3451 TEUCHOS_TEST_FOR_EXCEPTION(
3452 strideBefore != strideRet, std::logic_error, prefix <<
"Returned "
3453 "MultiVector has different stride than original MultiVector. "
3454 "strideBefore: " << strideBefore <<
", strideRet: " << strideRet <<
3455 ", numColsBefore: " << numColsBefore <<
", numColsRet: " << numColsRet
3457 TEUCHOS_TEST_FOR_EXCEPTION(
3458 numColsBefore != numColsRet, std::logic_error,
3459 prefix <<
"Returned MultiVector has a different number of columns than "
3460 "original MultiVector. numColsBefore: " << numColsBefore <<
", "
3461 "numColsRet: " << numColsRet << suffix);
3462 #endif // HAVE_TPETRA_DEBUG
3467 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3468 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3471 const size_t offset)
const
3474 return Teuchos::rcp (
new MV (*
this, *subMap, offset));
3477 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3478 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3481 const size_t offset)
3484 return Teuchos::rcp (
new MV (*
this, *subMap, offset));
3487 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3488 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3490 subView (
const Teuchos::ArrayView<const size_t>& cols)
const
3492 using Teuchos::Array;
3496 const size_t numViewCols = static_cast<size_t> (cols.size ());
3497 TEUCHOS_TEST_FOR_EXCEPTION(
3498 numViewCols < 1, std::runtime_error,
"Tpetra::MultiVector::subView"
3499 "(const Teuchos::ArrayView<const size_t>&): The input array cols must "
3500 "contain at least one entry, but cols.size() = " << cols.size ()
3505 bool contiguous =
true;
3506 for (
size_t j = 1; j < numViewCols; ++j) {
3507 if (cols[j] != cols[j-1] + static_cast<size_t> (1)) {
3513 if (numViewCols == 0) {
3515 return rcp (
new MV (this->getMap (), numViewCols));
3518 return this->subView (Teuchos::Range1D (cols[0], cols[numViewCols-1]));
3522 if (isConstantStride ()) {
3523 return rcp (
new MV (this->getMap (), view_, origView_, cols));
3526 Array<size_t> newcols (cols.size ());
3527 for (
size_t j = 0; j < numViewCols; ++j) {
3528 newcols[j] = whichVectors_[cols[j]];
3530 return rcp (
new MV (this->getMap (), view_, origView_, newcols ()));
3535 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3536 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3540 using ::Tpetra::Details::Behavior;
3542 using Kokkos::subview;
3543 using Teuchos::Array;
3547 const char tfecfFuncName[] =
"subView(Range1D): ";
3549 const size_t lclNumRows = this->getLocalLength ();
3550 const size_t numVecs = this->getNumVectors ();
3554 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3555 static_cast<size_t> (colRng.size ()) > numVecs, std::runtime_error,
3556 "colRng.size() = " << colRng.size () <<
" > this->getNumVectors() = "
3558 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3559 numVecs != 0 && colRng.size () != 0 &&
3560 (colRng.lbound () < static_cast<Teuchos::Ordinal> (0) ||
3561 static_cast<size_t> (colRng.ubound ()) >= numVecs),
3562 std::invalid_argument,
"Nonempty input range [" << colRng.lbound () <<
3563 "," << colRng.ubound () <<
"] exceeds the valid range of column indices "
3564 "[0, " << numVecs <<
"].");
3566 RCP<const MV> X_ret;
3576 const std::pair<size_t, size_t> rows (0, lclNumRows);
3577 if (colRng.size () == 0) {
3578 const std::pair<size_t, size_t> cols (0, 0);
3580 X_ret = rcp (
new MV (this->getMap (), X_sub, origView_));
3584 if (isConstantStride ()) {
3585 const std::pair<size_t, size_t> cols (colRng.lbound (),
3586 colRng.ubound () + 1);
3588 X_ret = rcp (
new MV (this->getMap (), X_sub, origView_));
3591 if (static_cast<size_t> (colRng.size ()) == static_cast<size_t> (1)) {
3594 const std::pair<size_t, size_t> col (whichVectors_[0] + colRng.lbound (),
3595 whichVectors_[0] + colRng.ubound () + 1);
3597 X_ret = rcp (
new MV (this->getMap (), X_sub, origView_));
3600 Array<size_t> which (whichVectors_.begin () + colRng.lbound (),
3601 whichVectors_.begin () + colRng.ubound () + 1);
3602 X_ret = rcp (
new MV (this->getMap (), view_, origView_, which));
3607 const bool debug = Behavior::debug ();
3609 using Teuchos::Comm;
3610 using Teuchos::outArg;
3611 using Teuchos::REDUCE_MIN;
3612 using Teuchos::reduceAll;
3614 RCP<const Comm<int> > comm = this->getMap ().is_null () ?
3615 Teuchos::null : this->getMap ()->getComm ();
3616 if (! comm.is_null ()) {
3620 if (X_ret.is_null ()) {
3623 reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
3624 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3625 (lclSuccess != 1, std::logic_error,
"X_ret (the subview of this "
3626 "MultiVector; the return value of this method) is null on some MPI "
3627 "process in this MultiVector's communicator. This should never "
3628 "happen. Please report this bug to the Tpetra developers.");
3629 if (! X_ret.is_null () &&
3630 X_ret->getNumVectors () != static_cast<size_t> (colRng.size ())) {
3633 reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess,
3634 outArg (gblSuccess));
3635 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3636 (lclSuccess != 1, std::logic_error,
"X_ret->getNumVectors() != "
3637 "colRng.size(), on at least one MPI process in this MultiVector's "
3638 "communicator. This should never happen. "
3639 "Please report this bug to the Tpetra developers.");
3646 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3647 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3652 return Teuchos::rcp_const_cast<MV> (this->subView (cols));
3656 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3657 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3662 return Teuchos::rcp_const_cast<MV> (this->subView (colRng));
3666 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3672 using Kokkos::subview;
3673 typedef std::pair<size_t, size_t> range_type;
3674 const char tfecfFuncName[] =
"MultiVector(const MultiVector&, const size_t): ";
3677 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3678 (j >= numCols, std::invalid_argument,
"Input index j (== " << j
3679 <<
") exceeds valid column index range [0, " << numCols <<
" - 1].");
3681 static_cast<size_t> (j) :
3683 this->view_ = takeSubview (X.
view_, Kokkos::ALL (), range_type (jj, jj+1));
3695 const size_t newSize = X.
imports_.extent (0) / numCols;
3697 newImports.d_view = subview (X.
imports_.d_view, range_type (0, newSize));
3698 newImports.h_view = subview (X.
imports_.h_view, range_type (0, newSize));
3701 const size_t newSize = X.
exports_.extent (0) / numCols;
3703 newExports.d_view = subview (X.
exports_.d_view, range_type (0, newSize));
3704 newExports.h_view = subview (X.
exports_.h_view, range_type (0, newSize));
3714 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3715 Teuchos::RCP<const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3720 return Teuchos::rcp (
new V (*
this, j));
3724 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3725 Teuchos::RCP<Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3730 return Teuchos::rcp (
new V (*
this, j));
3734 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3737 get1dCopy (
const Teuchos::ArrayView<Scalar>& A,
const size_t LDA)
const
3739 typedef decltype (this->
template getLocalView<device_type> ())
3741 typedef decltype (this->
template getLocalView<Kokkos::HostSpace> ())
3744 typedef Kokkos::View<IST*,
typename host_view_type::array_layout,
3745 Kokkos::HostSpace, Kokkos::MemoryUnmanaged> input_col_type;
3746 const char tfecfFuncName[] =
"get1dCopy: ";
3748 const size_t numRows = this->getLocalLength ();
3749 const size_t numCols = this->getNumVectors ();
3750 const std::pair<size_t, size_t> rowRange (0, numRows);
3752 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3753 LDA < numRows, std::runtime_error,
3754 "LDA = " << LDA <<
" < numRows = " << numRows <<
".");
3755 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3756 numRows > static_cast<size_t> (0) &&
3757 numCols > static_cast<size_t> (0) &&
3758 static_cast<size_t> (A.size ()) < LDA * (numCols - 1) + numRows,
3760 "A.size() = " << A.size () <<
", but its size must be at least "
3761 << (LDA * (numCols - 1) + numRows) <<
" to hold all the entries.");
3776 const bool useHostVersion = this->
template need_sync<device_type> ();
3778 dev_view_type srcView_dev;
3779 host_view_type srcView_host;
3780 if (useHostVersion) {
3781 srcView_host = this->
template getLocalView<Kokkos::HostSpace> ();
3784 srcView_dev = this->
template getLocalView<device_type> ();
3787 for (
size_t j = 0; j < numCols; ++j) {
3788 const size_t srcCol =
3789 this->isConstantStride () ? j : this->whichVectors_[j];
3790 const size_t dstCol = j;
3791 IST*
const dstColRaw =
3792 reinterpret_cast<IST*> (A.getRawPtr () + LDA * dstCol);
3793 input_col_type dstColView (dstColRaw, numRows);
3795 if (useHostVersion) {
3796 auto srcColView_host = Kokkos::subview (srcView_host, rowRange, srcCol);
3797 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3798 dstColView.extent (0) != srcColView_host.extent (0),
3799 std::logic_error,
": srcColView and dstColView_host have different "
3800 "dimensions. Please report this bug to the Tpetra developers.");
3804 auto srcColView_dev = Kokkos::subview (srcView_dev, rowRange, srcCol);
3805 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3806 dstColView.extent (0) != srcColView_dev.extent (0),
3807 std::logic_error,
": srcColView and dstColView_dev have different "
3808 "dimensions. Please report this bug to the Tpetra developers.");
3815 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3818 get2dCopy (
const Teuchos::ArrayView<
const Teuchos::ArrayView<Scalar> >& ArrayOfPtrs)
const
3821 const char tfecfFuncName[] =
"get2dCopy: ";
3822 const size_t numRows = this->getLocalLength ();
3823 const size_t numCols = this->getNumVectors ();
3825 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3826 static_cast<size_t> (ArrayOfPtrs.size ()) != numCols,
3827 std::runtime_error,
"Input array of pointers must contain as many "
3828 "entries (arrays) as the MultiVector has columns. ArrayOfPtrs.size() = "
3829 << ArrayOfPtrs.size () <<
" != getNumVectors() = " << numCols <<
".");
3831 if (numRows != 0 && numCols != 0) {
3833 for (
size_t j = 0; j < numCols; ++j) {
3834 const size_t dstLen = static_cast<size_t> (ArrayOfPtrs[j].size ());
3835 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3836 dstLen < numRows, std::invalid_argument,
"Array j = " << j <<
" of "
3837 "the input array of arrays is not long enough to fit all entries in "
3838 "that column of the MultiVector. ArrayOfPtrs[j].size() = " << dstLen
3839 <<
" < getLocalLength() = " << numRows <<
".");
3843 for (
size_t j = 0; j < numCols; ++j) {
3844 Teuchos::RCP<const V> X_j = this->getVector (j);
3845 const size_t LDA = static_cast<size_t> (ArrayOfPtrs[j].size ());
3846 X_j->get1dCopy (ArrayOfPtrs[j], LDA);
3852 template <
class SC,
class LO,
class GO,
class NT>
3855 const bool markModified)
3861 using host_memory_space = Kokkos::HostSpace;
3862 if (X.template need_sync<host_memory_space> ()) {
3863 X.template sync<host_memory_space> ();
3866 X.template modify<host_memory_space> ();
3870 return X.template getLocalView<host_memory_space> ();
3875 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3876 Teuchos::ArrayRCP<const Scalar>
3877 MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
3880 if (getLocalLength () == 0 || getNumVectors () == 0) {
3881 return Teuchos::null;
3883 TEUCHOS_TEST_FOR_EXCEPTION(
3884 ! isConstantStride (), std::runtime_error,
"Tpetra::MultiVector::"
3885 "get1dView: This MultiVector does not have constant stride, so it is "
3886 "not possible to view its data as a single array. You may check "
3887 "whether a MultiVector has constant stride by calling "
3888 "isConstantStride().");
3892 constexpr
bool markModified =
false;
3894 auto X_lcl = syncMVToHostIfNeededAndGetHostView (const_cast<MV&> (*
this),
3896 Teuchos::ArrayRCP<const impl_scalar_type> dataAsArcp =
3897 Kokkos::Compat::persistingView (X_lcl);
3898 return Teuchos::arcp_reinterpret_cast<const Scalar> (dataAsArcp);
3902 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3903 Teuchos::ArrayRCP<Scalar>
3907 if (this->getLocalLength () == 0 || this->getNumVectors () == 0) {
3908 return Teuchos::null;
3911 TEUCHOS_TEST_FOR_EXCEPTION
3912 (! isConstantStride (), std::runtime_error,
"Tpetra::MultiVector::"
3913 "get1dViewNonConst: This MultiVector does not have constant stride, "
3914 "so it is not possible to view its data as a single array. You may "
3915 "check whether a MultiVector has constant stride by calling "
3916 "isConstantStride().");
3917 constexpr
bool markModified =
true;
3918 auto X_lcl = syncMVToHostIfNeededAndGetHostView (*
this, markModified);
3919 Teuchos::ArrayRCP<impl_scalar_type> dataAsArcp =
3920 Kokkos::Compat::persistingView (X_lcl);
3921 return Teuchos::arcp_reinterpret_cast<Scalar> (dataAsArcp);
3925 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3926 Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> >
3930 constexpr
bool markModified =
true;
3931 auto X_lcl = syncMVToHostIfNeededAndGetHostView (*
this, markModified);
3937 const size_t myNumRows = this->getLocalLength ();
3938 const size_t numCols = this->getNumVectors ();
3939 const Kokkos::pair<size_t, size_t> rowRange (0, myNumRows);
3941 Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> > views (numCols);
3942 for (
size_t j = 0; j < numCols; ++j) {
3943 const size_t col = this->isConstantStride () ? j : this->whichVectors_[j];
3944 auto X_lcl_j = Kokkos::subview (X_lcl, rowRange, col);
3945 Teuchos::ArrayRCP<impl_scalar_type> X_lcl_j_arcp =
3946 Kokkos::Compat::persistingView (X_lcl_j);
3947 views[j] = Teuchos::arcp_reinterpret_cast<Scalar> (X_lcl_j_arcp);
3952 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3953 Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> >
3960 constexpr
bool markModified =
false;
3962 auto X_lcl = syncMVToHostIfNeededAndGetHostView (const_cast<MV&> (*
this),
3968 const size_t myNumRows = this->getLocalLength ();
3969 const size_t numCols = this->getNumVectors ();
3970 const Kokkos::pair<size_t, size_t> rowRange (0, myNumRows);
3972 Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> > views (numCols);
3973 for (
size_t j = 0; j < numCols; ++j) {
3974 const size_t col = this->isConstantStride () ? j : this->whichVectors_[j];
3975 auto X_lcl_j = Kokkos::subview (X_lcl, rowRange, col);
3976 Teuchos::ArrayRCP<const impl_scalar_type> X_lcl_j_arcp =
3977 Kokkos::Compat::persistingView (X_lcl_j);
3978 views[j] = Teuchos::arcp_reinterpret_cast<const Scalar> (X_lcl_j_arcp);
3983 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3987 Teuchos::ETransp transB,
3988 const Scalar& alpha,
3993 using Teuchos::CONJ_TRANS;
3994 using Teuchos::NO_TRANS;
3995 using Teuchos::TRANS;
3998 using Teuchos::rcpFromRef;
3999 typedef Kokkos::Details::ArithTraits<impl_scalar_type> ATS;
4000 typedef Teuchos::ScalarTraits<Scalar> STS;
4002 const char tfecfFuncName[] =
"multiply: ";
4041 if (! this->getMap ().is_null () && ! this->getMap ()->getComm ().is_null ()) {
4042 using Teuchos::Comm;
4044 using Teuchos::REDUCE_MIN;
4045 using Teuchos::reduceAll;
4046 using Teuchos::outArg;
4048 RCP<const Comm<int> > comm = this->getMap ()->getComm ();
4049 const size_t A_nrows =
4051 const size_t A_ncols =
4053 const size_t B_nrows =
4055 const size_t B_ncols =
4058 const bool lclBad = this->getLocalLength () != A_nrows ||
4059 this->getNumVectors () != B_ncols || A_ncols != B_nrows;
4060 const int lclGood = lclBad ? 0 : 1;
4062 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
4064 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4065 (gblGood != 1, std::runtime_error,
"Local dimensions of '*this', "
4066 "op(A), and op(B) are not consistent on at least one process "
4067 "in this object's communicator.");
4073 const bool C_is_local = ! this->isDistributed ();
4075 const bool Case1 = C_is_local && A_is_local && B_is_local;
4077 const bool Case2 = C_is_local && ! A_is_local && ! B_is_local &&
4078 transA != NO_TRANS &&
4081 const bool Case3 = ! C_is_local && ! A_is_local && B_is_local &&
4085 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4086 (! Case1 && ! Case2 && ! Case3, std::runtime_error,
4087 "Multiplication of op(A) and op(B) into *this is not a "
4088 "supported use case.");
4090 if (beta != STS::zero () && Case2) {
4096 const int myRank = this->getMap ()->getComm ()->getRank ();
4098 beta_local = ATS::zero ();
4107 if (! isConstantStride ()) {
4108 C_tmp = rcp (
new MV (*
this, Teuchos::Copy));
4110 C_tmp = rcp (
this,
false);
4113 RCP<const MV> A_tmp;
4115 A_tmp = rcp (
new MV (A, Teuchos::Copy));
4117 A_tmp = rcpFromRef (A);
4120 RCP<const MV> B_tmp;
4122 B_tmp = rcp (
new MV (B, Teuchos::Copy));
4124 B_tmp = rcpFromRef (B);
4127 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4128 (! C_tmp->isConstantStride () || ! B_tmp->isConstantStride () ||
4129 ! A_tmp->isConstantStride (), std::logic_error,
4130 "Failed to make temporary constant-stride copies of MultiVectors.");
4133 typedef LocalOrdinal LO;
4135 const LO A_lclNumRows = A_tmp->getLocalLength ();
4136 const LO A_numVecs = A_tmp->getNumVectors ();
4137 auto A_lcl = A_tmp->template getLocalView<device_type> ();
4138 auto A_sub = Kokkos::subview (A_lcl,
4139 std::make_pair (LO (0), A_lclNumRows),
4140 std::make_pair (LO (0), A_numVecs));
4141 const LO B_lclNumRows = B_tmp->getLocalLength ();
4142 const LO B_numVecs = B_tmp->getNumVectors ();
4143 auto B_lcl = B_tmp->template getLocalView<device_type> ();
4144 auto B_sub = Kokkos::subview (B_lcl,
4145 std::make_pair (LO (0), B_lclNumRows),
4146 std::make_pair (LO (0), B_numVecs));
4147 const LO C_lclNumRows = C_tmp->getLocalLength ();
4148 const LO C_numVecs = C_tmp->getNumVectors ();
4149 auto C_lcl = C_tmp->template getLocalView<device_type> ();
4150 auto C_sub = Kokkos::subview (C_lcl,
4151 std::make_pair (LO (0), C_lclNumRows),
4152 std::make_pair (LO (0), C_numVecs));
4153 const char ctransA = (transA == Teuchos::NO_TRANS ?
'N' :
4154 (transA == Teuchos::TRANS ?
'T' :
'C'));
4155 const char ctransB = (transB == Teuchos::NO_TRANS ?
'N' :
4156 (transB == Teuchos::TRANS ?
'T' :
'C'));
4160 KokkosBlas::gemm(&ctransA, &ctransB, alpha_IST, A_sub, B_sub, beta_local, C_sub);
4163 if (! isConstantStride ()) {
4168 A_tmp = Teuchos::null;
4169 B_tmp = Teuchos::null;
4177 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4186 using Kokkos::subview;
4189 typedef typename MV::dual_view_type::t_dev::memory_space dev_memory_space;
4191 const char tfecfFuncName[] =
"elementWiseMultiply: ";
4193 const size_t lclNumRows = this->getLocalLength ();
4194 const size_t numVecs = this->getNumVectors ();
4196 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4198 std::runtime_error,
"MultiVectors do not have the same local length.");
4199 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4200 numVecs != B.
getNumVectors (), std::runtime_error,
"this->getNumVectors"
4201 "() = " << numVecs <<
" != B.getNumVectors() = " << B.
getNumVectors ()
4205 if (this->
template need_sync<dev_memory_space> ()) this->
template sync<dev_memory_space> ();
4206 if (A.template need_sync<dev_memory_space> ()) const_cast<V&>(A).template sync<dev_memory_space> ();
4207 if (B.template need_sync<dev_memory_space> ()) const_cast<MV&>(B).template sync<dev_memory_space> ();
4209 this->
template modify<dev_memory_space> ();
4211 auto this_view = this->
template getLocalView<dev_memory_space> ();
4212 auto A_view = A.template getLocalView<dev_memory_space> ();
4213 auto B_view = B.template getLocalView<dev_memory_space> ();
4221 KokkosBlas::mult (scalarThis,
4224 subview (A_view, ALL (), 0),
4228 for (
size_t j = 0; j < numVecs; ++j) {
4229 const size_t C_col = isConstantStride () ? j : whichVectors_[j];
4231 KokkosBlas::mult (scalarThis,
4232 subview (this_view, ALL (), C_col),
4234 subview (A_view, ALL (), 0),
4235 subview (B_view, ALL (), B_col));
4240 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4245 using Teuchos::reduceAll;
4246 using Teuchos::REDUCE_SUM;
4247 typedef decltype (this->
template getLocalView<device_type> ())
4249 typedef decltype (this->
template getLocalView<Kokkos::HostSpace> ())
4254 TEUCHOS_TEST_FOR_EXCEPTION
4255 (this->isDistributed (), std::runtime_error,
4256 "Tpetra::MultiVector::reduce should only be called with locally "
4257 "replicated or otherwise not distributed MultiVector objects.");
4258 const Teuchos::Comm<int>& comm = * (this->getMap ()->getComm ());
4259 if (comm.getSize () == 1) {
4263 const size_t lclNumRows = getLocalLength ();
4264 const size_t numCols = getNumVectors ();
4265 const size_t totalAllocSize = lclNumRows * numCols;
4272 TEUCHOS_TEST_FOR_EXCEPTION(
4273 lclNumRows > static_cast<size_t> (std::numeric_limits<int>::max ()),
4274 std::runtime_error,
"Tpetra::MultiVector::reduce: On Process " <<
4275 comm.getRank () <<
", the number of local rows " << lclNumRows <<
4276 " does not fit in int.");
4285 const bool useHostVersion = this->
template need_sync<device_type> ();
4287 dev_view_type srcView_dev;
4288 host_view_type srcView_host;
4289 if (useHostVersion) {
4290 srcView_host = this->
template getLocalView<Kokkos::HostSpace> ();
4291 if (lclNumRows != static_cast<size_t> (srcView_host.extent (0))) {
4293 const Kokkos::pair<size_t, size_t> rowRng (0, lclNumRows);
4294 srcView_host = Kokkos::subview (srcView_host, rowRng, Kokkos::ALL ());
4298 srcView_dev = this->
template getLocalView<device_type> ();
4299 if (lclNumRows != static_cast<size_t> (srcView_dev.extent (0))) {
4301 const Kokkos::pair<size_t, size_t> rowRng (0, lclNumRows);
4302 srcView_dev = Kokkos::subview (srcView_dev, rowRng, Kokkos::ALL ());
4310 const bool contig = isConstantStride () && getStride () == lclNumRows;
4311 dev_view_type srcBuf_dev;
4312 host_view_type srcBuf_host;
4313 if (useHostVersion) {
4315 srcBuf_host = srcView_host;
4318 srcBuf_host = decltype (srcBuf_host) (
"srcBuf", lclNumRows, numCols);
4324 srcBuf_dev = srcView_dev;
4327 srcBuf_dev = decltype (srcBuf_dev) (
"srcBuf", lclNumRows, numCols);
4338 const bool correct =
4339 (useHostVersion && static_cast<size_t> (srcBuf_host.size ()) >= totalAllocSize) ||
4340 (! useHostVersion && static_cast<size_t> (srcBuf_dev.size ()) >= totalAllocSize);
4341 TEUCHOS_TEST_FOR_EXCEPTION
4342 (! correct, std::logic_error,
"Tpetra::MultiVector::reduce: Violated "
4343 "invariant of temporary source buffer construction. Please report "
4344 "this bug to the Tpetra developers.");
4352 dev_view_type tgtBuf_dev;
4353 host_view_type tgtBuf_host;
4354 if (useHostVersion) {
4355 tgtBuf_host = host_view_type (
"tgtBuf", lclNumRows, numCols);
4358 tgtBuf_dev = dev_view_type (
"tgtBuf", lclNumRows, numCols);
4361 const int reduceCount = static_cast<int> (totalAllocSize);
4362 if (useHostVersion) {
4363 TEUCHOS_TEST_FOR_EXCEPTION
4364 (static_cast<size_t> (tgtBuf_host.size ()) < totalAllocSize,
4365 std::logic_error,
"Tpetra::MultiVector::reduce: tgtBuf_host.size() = "
4366 << tgtBuf_host.size () <<
" < lclNumRows*numCols = " << totalAllocSize
4367 <<
". Please report this bug to the Tpetra developers.");
4368 reduceAll<int, impl_scalar_type> (comm, REDUCE_SUM, reduceCount,
4369 srcBuf_host.data (),
4370 tgtBuf_host.data ());
4373 TEUCHOS_TEST_FOR_EXCEPTION
4374 (static_cast<size_t> (tgtBuf_dev.size ()) < totalAllocSize,
4375 std::logic_error,
"Tpetra::MultiVector::reduce: tgtBuf_dev.size() = "
4376 << tgtBuf_dev.size () <<
" < lclNumRows*numCols = " << totalAllocSize
4377 <<
". Please report this bug to the Tpetra developers.");
4378 reduceAll<int, impl_scalar_type> (comm, REDUCE_SUM, reduceCount,
4380 tgtBuf_dev.data ());
4384 if (useHostVersion) {
4385 this->
template modify<Kokkos::HostSpace> ();
4386 if (contig || isConstantStride ()) {
4390 for (
size_t j = 0; j < numCols; ++j) {
4391 auto X_j_out = Kokkos::subview (srcView_host, Kokkos::ALL (), j);
4392 auto X_j_in = Kokkos::subview (tgtBuf_host, Kokkos::ALL (), j);
4398 this->
template modify<device_type> ();
4399 if (contig || isConstantStride ()) {
4403 for (
size_t j = 0; j < numCols; ++j) {
4404 auto X_j_out = Kokkos::subview (srcView_dev, Kokkos::ALL (), j);
4405 auto X_j_in = Kokkos::subview (tgtBuf_dev, Kokkos::ALL (), j);
4415 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4422 #ifdef HAVE_TPETRA_DEBUG
4423 const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex();
4424 const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex();
4425 TEUCHOS_TEST_FOR_EXCEPTION(
4426 lclRow < minLocalIndex || lclRow > maxLocalIndex,
4428 "Tpetra::MultiVector::replaceLocalValue: row index " << lclRow
4429 <<
" is invalid. The range of valid row indices on this process "
4430 << this->getMap()->getComm()->getRank() <<
" is [" << minLocalIndex
4431 <<
", " << maxLocalIndex <<
"].");
4432 TEUCHOS_TEST_FOR_EXCEPTION(
4433 vectorIndexOutOfRange(col),
4435 "Tpetra::MultiVector::replaceLocalValue: vector index " << col
4436 <<
" of the multivector is invalid.");
4438 const size_t colInd = isConstantStride () ? col : whichVectors_[col];
4439 view_.h_view (lclRow, colInd) = ScalarValue;
4443 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4449 const bool atomic)
const
4451 #ifdef HAVE_TPETRA_DEBUG
4452 const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex();
4453 const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex();
4454 TEUCHOS_TEST_FOR_EXCEPTION(
4455 lclRow < minLocalIndex || lclRow > maxLocalIndex,
4457 "Tpetra::MultiVector::sumIntoLocalValue: row index " << lclRow
4458 <<
" is invalid. The range of valid row indices on this process "
4459 << this->getMap()->getComm()->getRank() <<
" is [" << minLocalIndex
4460 <<
", " << maxLocalIndex <<
"].");
4461 TEUCHOS_TEST_FOR_EXCEPTION(
4462 vectorIndexOutOfRange(col),
4464 "Tpetra::MultiVector::sumIntoLocalValue: vector index " << col
4465 <<
" of the multivector is invalid.");
4467 const size_t colInd = isConstantStride () ? col : whichVectors_[col];
4469 Kokkos::atomic_add (& (view_.h_view(lclRow, colInd)), value);
4472 view_.h_view (lclRow, colInd) += value;
4477 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4486 const LocalOrdinal lclRow = this->map_->getLocalElement (gblRow);
4487 #ifdef HAVE_TPETRA_DEBUG
4488 const char tfecfFuncName[] =
"replaceGlobalValue: ";
4489 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4490 (lclRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid (),
4492 "Global row index " << gblRow <<
" is not present on this process "
4493 << this->getMap ()->getComm ()->getRank () <<
".");
4494 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4495 (this->vectorIndexOutOfRange (col), std::runtime_error,
4496 "Vector index " << col <<
" of the MultiVector is invalid.");
4497 #endif // HAVE_TPETRA_DEBUG
4498 this->replaceLocalValue (lclRow, col, ScalarValue);
4501 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4507 const bool atomic)
const
4511 const LocalOrdinal lclRow = this->map_->getLocalElement (globalRow);
4512 #ifdef HAVE_TEUCHOS_DEBUG
4513 TEUCHOS_TEST_FOR_EXCEPTION(
4514 lclRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid (),
4516 "Tpetra::MultiVector::sumIntoGlobalValue: Global row index " << globalRow
4517 <<
" is not present on this process "
4518 << this->getMap ()->getComm ()->getRank () <<
".");
4519 TEUCHOS_TEST_FOR_EXCEPTION(
4520 vectorIndexOutOfRange(col),
4522 "Tpetra::MultiVector::sumIntoGlobalValue: Vector index " << col
4523 <<
" of the multivector is invalid.");
4525 this->sumIntoLocalValue (lclRow, col, value, atomic);
4528 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4530 Teuchos::ArrayRCP<T>
4536 typename dual_view_type::array_layout,
4538 const size_t col = isConstantStride () ? j : whichVectors_[j];
4539 col_dual_view_type X_col =
4540 Kokkos::subview (view_, Kokkos::ALL (), col);
4541 return Kokkos::Compat::persistingView (X_col.d_view);
4544 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4551 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4556 using Teuchos::TypeNameTraits;
4558 std::ostringstream out;
4559 out <<
"\"" << className <<
"\": {";
4560 out <<
"Template parameters: {Scalar: " << TypeNameTraits<Scalar>::name ()
4561 <<
", LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name ()
4562 <<
", GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name ()
4563 <<
", Node" << Node::name ()
4565 if (this->getObjectLabel () !=
"") {
4566 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
4568 out <<
", numRows: " << this->getGlobalLength ();
4569 if (className !=
"Tpetra::Vector") {
4570 out <<
", numCols: " << this->getNumVectors ()
4571 <<
", isConstantStride: " << this->isConstantStride ();
4573 if (this->isConstantStride ()) {
4574 out <<
", columnStride: " << this->getStride ();
4581 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4586 return this->descriptionImpl (
"Tpetra::MultiVector");
4589 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4594 typedef LocalOrdinal LO;
4597 if (vl <= Teuchos::VERB_LOW) {
4598 return std::string ();
4600 auto map = this->getMap ();
4601 if (map.is_null ()) {
4602 return std::string ();
4604 auto outStringP = Teuchos::rcp (
new std::ostringstream ());
4605 auto outp = Teuchos::getFancyOStream (outStringP);
4606 Teuchos::FancyOStream& out = *outp;
4607 auto comm = map->getComm ();
4608 const int myRank = comm->getRank ();
4609 const int numProcs = comm->getSize ();
4611 out <<
"Process " << myRank <<
" of " << numProcs <<
":" << endl;
4612 Teuchos::OSTab tab1 (out);
4615 const LO lclNumRows = static_cast<LO> (this->getLocalLength ());
4616 out <<
"Local number of rows: " << lclNumRows << endl;
4618 if (vl > Teuchos::VERB_MEDIUM) {
4621 if (this->getNumVectors () != static_cast<size_t> (1)) {
4622 out <<
"isConstantStride: " << this->isConstantStride () << endl;
4624 if (this->isConstantStride ()) {
4625 out <<
"Column stride: " << this->getStride () << endl;
4628 if (vl > Teuchos::VERB_HIGH && lclNumRows > 0) {
4632 auto X_dual = this->getDualView ();
4633 typename dual_view_type::t_host X_host;
4634 if (X_dual.template need_sync<Kokkos::HostSpace> ()) {
4640 auto X_dev = this->
template getLocalView<device_type> ();
4641 auto X_host_copy = Kokkos::create_mirror_view (X_dev);
4643 X_host = X_host_copy;
4649 X_host = this->
template getLocalView<Kokkos::HostSpace> ();
4653 out <<
"Values: " << endl
4655 const LO numCols = static_cast<LO> (this->getNumVectors ());
4657 for (LO i = 0; i < lclNumRows; ++i) {
4659 if (i + 1 < lclNumRows) {
4665 for (LO i = 0; i < lclNumRows; ++i) {
4666 for (LO j = 0; j < numCols; ++j) {
4668 if (j + 1 < numCols) {
4672 if (i + 1 < lclNumRows) {
4682 return outStringP->str ();
4685 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4689 const std::string& className,
4690 const Teuchos::EVerbosityLevel verbLevel)
const
4692 using Teuchos::TypeNameTraits;
4693 using Teuchos::VERB_DEFAULT;
4694 using Teuchos::VERB_NONE;
4695 using Teuchos::VERB_LOW;
4697 const Teuchos::EVerbosityLevel vl =
4698 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
4700 if (vl == VERB_NONE) {
4707 auto map = this->getMap ();
4708 if (map.is_null ()) {
4711 auto comm = map->getComm ();
4712 if (comm.is_null ()) {
4716 const int myRank = comm->getRank ();
4725 Teuchos::RCP<Teuchos::OSTab> tab0, tab1;
4729 tab0 = Teuchos::rcp (
new Teuchos::OSTab (out));
4730 out <<
"\"" << className <<
"\":" << endl;
4731 tab1 = Teuchos::rcp (
new Teuchos::OSTab (out));
4733 out <<
"Template parameters:" << endl;
4734 Teuchos::OSTab tab2 (out);
4735 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
4736 <<
"LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name () << endl
4737 <<
"GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name () << endl
4738 <<
"Node: " << Node::name () << endl;
4740 if (this->getObjectLabel () !=
"") {
4741 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
4743 out <<
"Global number of rows: " << this->getGlobalLength () << endl;
4744 if (className !=
"Tpetra::Vector") {
4745 out <<
"Number of columns: " << this->getNumVectors () << endl;
4752 if (vl > VERB_LOW) {
4753 const std::string lclStr = this->localDescribeToString (vl);
4754 ::Tpetra::Details::gathervPrint (out, lclStr, *comm);
4758 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4762 const Teuchos::EVerbosityLevel verbLevel)
const
4764 this->describeImpl (out,
"Tpetra::MultiVector", verbLevel);
4767 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4772 replaceMap (newMap);
4775 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4780 using LO = LocalOrdinal;
4782 using HMDT =
typename dual_view_type::host_mirror_space::device_type;
4783 using ::Tpetra::Details::localDeepCopy;
4784 using ::Tpetra::Details::localDeepCopyConstStride;
4785 const char prefix[] =
"Tpetra::deep_copy (MultiVector): ";
4786 const bool debug =
false;
4788 TEUCHOS_TEST_FOR_EXCEPTION
4790 this->getNumVectors () != src.
getNumVectors (), std::invalid_argument,
4791 prefix <<
"Global dimensions of the two Tpetra::MultiVector "
4792 "objects do not match. src has dimensions [" << src.
getGlobalLength ()
4793 <<
"," << src.
getNumVectors () <<
"], and *this has dimensions ["
4794 << this->getGlobalLength () <<
"," << this->getNumVectors () <<
"].");
4796 TEUCHOS_TEST_FOR_EXCEPTION
4797 (this->getLocalLength () != src.
getLocalLength (), std::invalid_argument,
4798 prefix <<
"The local row counts of the two Tpetra::MultiVector "
4799 "objects do not match. src has " << src.
getLocalLength () <<
" row(s) "
4800 <<
" and *this has " << this->getLocalLength () <<
" row(s).");
4806 this->view_.modified_device () = 0;
4807 this->view_.modified_host () = 0;
4809 if (debug && this->getMap ()->getComm ()->getRank () == 0) {
4810 std::cout <<
"*** MultiVector::assign: ";
4814 if (debug && this->getMap ()->getComm ()->getRank () == 0) {
4815 std::cout <<
"Both *this and src have constant stride" << std::endl;
4819 const bool useHostVersion = src.template need_sync<device_type> ();
4821 if (useHostVersion) {
4823 this->
template modify<HMDT> ();
4824 localDeepCopyConstStride (this->
template getLocalView<HMDT> (),
4825 src.template getLocalView<HMDT> ());
4829 this->
template modify<DT> ();
4830 localDeepCopyConstStride (this->
template getLocalView<DT> (),
4831 src.template getLocalView<DT> ());
4835 if (this->isConstantStride ()) {
4836 if (debug && this->getMap ()->getComm ()->getRank () == 0) {
4837 std::cout <<
"Only *this has constant stride";
4840 const LO numWhichVecs = static_cast<LO> (src.
whichVectors_.size ());
4841 const std::string whichVecsLabel (
"MV::deep_copy::whichVecs");
4848 const bool useHostVersion = src.template need_sync<device_type> ();
4849 if (useHostVersion) {
4850 if (debug && this->getMap ()->getComm ()->getRank () == 0) {
4851 std::cout <<
"; Copy from host version of src" << std::endl;
4857 typedef Kokkos::View<LO*, HMDT> whichvecs_type;
4858 whichvecs_type srcWhichVecs (whichVecsLabel, numWhichVecs);
4859 for (LO i = 0; i < numWhichVecs; ++i) {
4865 this->
template modify<HMDT> ();
4866 localDeepCopy (this->
template getLocalView<HMDT> (),
4867 src.template getLocalView<HMDT> (),
4868 true,
false, srcWhichVecs, srcWhichVecs);
4871 if (debug && this->getMap ()->getComm ()->getRank () == 0) {
4872 std::cout <<
"; Copy from device version of src" << std::endl;
4878 typedef Kokkos::DualView<LO*, DT> whichvecs_type;
4879 whichvecs_type srcWhichVecs (whichVecsLabel, numWhichVecs);
4880 srcWhichVecs.template modify<HMDT> ();
4881 for (LO i = 0; i < numWhichVecs; ++i) {
4882 srcWhichVecs.h_view(i) = static_cast<LO> (src.
whichVectors_[i]);
4885 srcWhichVecs.template sync<DT> ();
4890 this->
template modify<DT> ();
4891 localDeepCopy (this->
template getLocalView<DT> (),
4892 src.template getLocalView<DT> (),
4893 true,
false, srcWhichVecs.d_view,
4894 srcWhichVecs.d_view);
4900 if (debug && this->getMap ()->getComm ()->getRank () == 0) {
4901 std::cout <<
"Only src has constant stride" << std::endl;
4905 const bool useHostVersion = src.template need_sync<device_type> ();
4906 if (useHostVersion) {
4911 typedef Kokkos::View<LO*, HMDT> whichvecs_type;
4912 const LO numWhichVecs = static_cast<LO> (this->whichVectors_.size ());
4913 whichvecs_type whichVecs (
"MV::deep_copy::whichVecs", numWhichVecs);
4914 for (LO i = 0; i < numWhichVecs; ++i) {
4915 whichVecs(i) = static_cast<LO> (this->whichVectors_[i]);
4919 this->
template modify<HMDT> ();
4920 localDeepCopy (this->
template getLocalView<HMDT> (),
4921 src.template getLocalView<HMDT> (),
4924 whichVecs, whichVecs);
4929 typedef Kokkos::DualView<LO*, DT> whichvecs_type;
4930 const std::string whichVecsLabel (
"MV::deep_copy::whichVecs");
4931 const LO numWhichVecs = static_cast<LO> (this->whichVectors_.size ());
4932 whichvecs_type whichVecs (whichVecsLabel, numWhichVecs);
4933 whichVecs.template modify<HMDT> ();
4934 for (LO i = 0; i < numWhichVecs; ++i) {
4935 whichVecs.h_view(i) = this->whichVectors_[i];
4938 whichVecs.template sync<DT> ();
4941 this->
template modify<DT> ();
4942 localDeepCopy (this->
template getLocalView<DT> (),
4943 src.template getLocalView<DT> (),
4951 if (debug && this->getMap ()->getComm ()->getRank () == 0) {
4952 std::cout <<
"Neither *this nor src has constant stride" << std::endl;
4956 const bool useHostVersion = src.template need_sync<device_type> ();
4957 if (useHostVersion) {
4958 const LO dstNumWhichVecs = static_cast<LO> (this->whichVectors_.size ());
4959 Kokkos::View<LO*, HMDT> whichVectorsDst (
"dstWhichVecs", dstNumWhichVecs);
4960 for (LO i = 0; i < dstNumWhichVecs; ++i) {
4961 whichVectorsDst(i) = this->whichVectors_[i];
4965 const LO srcNumWhichVecs = static_cast<LO> (src.
whichVectors_.size ());
4966 Kokkos::View<LO*, HMDT> whichVectorsSrc (
"srcWhichVecs", srcNumWhichVecs);
4967 for (LO i = 0; i < srcNumWhichVecs; ++i) {
4973 this->
template modify<HMDT> ();
4974 localDeepCopy (this->
template getLocalView<HMDT> (),
4975 src.template getLocalView<HMDT> (),
4984 const LO dstNumWhichVecs = static_cast<LO> (this->whichVectors_.size ());
4985 Kokkos::DualView<LO*, DT> whichVecsDst (
"MV::deep_copy::whichVecsDst",
4987 whichVecsDst.template modify<HMDT> ();
4988 for (LO i = 0; i < dstNumWhichVecs; ++i) {
4989 whichVecsDst.h_view(i) = static_cast<LO> (this->whichVectors_[i]);
4992 whichVecsDst.template sync<DT> ();
4998 const LO srcNumWhichVecs = static_cast<LO> (src.
whichVectors_.size ());
4999 Kokkos::DualView<LO*, DT> whichVecsSrc (
"MV::deep_copy::whichVecsSrc",
5001 whichVecsSrc.template modify<HMDT> ();
5002 for (LO i = 0; i < srcNumWhichVecs; ++i) {
5003 whichVecsSrc.h_view(i) = static_cast<LO> (src.
whichVectors_[i]);
5006 whichVecsSrc.template sync<DT> ();
5010 this->
template modify<DT> ();
5011 localDeepCopy (this->
template getLocalView<DT> (),
5012 src.template getLocalView<DT> (),
5015 whichVecsDst.d_view,
5016 whichVecsSrc.d_view);
5023 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5027 using ::Tpetra::Details::PackTraits;
5029 using HES =
typename Kokkos::View<int*, device_type>::HostMirror::execution_space;
5031 const size_t l1 = this->getLocalLength();
5033 if ((l1!=l2) || (this->getNumVectors() != vec.
getNumVectors())) {
5040 auto v1 = this->
template getLocalView<HES>();
5041 auto v2 = vec.template getLocalView<HES>();
5042 if (PackTraits<ST, HES>::packValueCount (v1(0,0)) !=
5043 PackTraits<ST, HES>::packValueCount (v2(0,0))) {
5050 template <
class ST,
class LO,
class GO,
class NT>
5054 Teuchos::RCP<const map_type> map = mv.
map_;
5056 Teuchos::Array<size_t> whichVectors;
5062 mv.
map_ = this->map_;
5063 mv.
view_ = this->view_;
5070 this->origView_ = origView;
5071 this->whichVectors_ = whichVectors;
5075 template <
class Scalar,
class LO,
class GO,
class NT>
5076 Teuchos::RCP<MultiVector<Scalar, LO, GO, NT> >
5081 return Teuchos::rcp (
new MV (map, numVectors));
5084 template <
class ST,
class LO,
class GO,
class NT>
5085 MultiVector<ST, LO, GO, NT>
5102 #define TPETRA_MULTIVECTOR_INSTANT(SCALAR,LO,GO,NODE) \
5103 namespace Classes { template class MultiVector< SCALAR , LO , GO , NODE >; } \
5104 template MultiVector< SCALAR , LO , GO , NODE > createCopy( const MultiVector< SCALAR , LO , GO , NODE >& src); \
5105 template Teuchos::RCP<MultiVector< SCALAR , LO , GO , NODE > > createMultiVector (const Teuchos::RCP<const Map<LO, GO, NODE> >& map, size_t numVectors);
5107 #endif // TPETRA_MULTIVECTOR_DEF_HPP