42 #ifndef TPETRA_EXPERIMENTAL_BLOCKMULTIVECTOR_DEF_HPP
43 #define TPETRA_EXPERIMENTAL_BLOCKMULTIVECTOR_DEF_HPP
45 #include "Tpetra_Experimental_BlockMultiVector_decl.hpp"
60 template<
class MultiVectorType>
61 struct RawHostPtrFromMultiVector {
62 typedef typename MultiVectorType::impl_scalar_type impl_scalar_type;
64 static impl_scalar_type* getRawPtr (MultiVectorType& X) {
70 auto X_view_host = X.template getLocalView<Kokkos::HostSpace> ();
71 impl_scalar_type* X_raw = X_view_host.data ();
88 template<
class S,
class LO,
class GO,
class N>
92 return RawHostPtrFromMultiVector<MV>::getRawPtr (X);
98 namespace Experimental {
101 template<
class Scalar,
class LO,
class GO,
class Node>
109 template<
class Scalar,
class LO,
class GO,
class Node>
110 Teuchos::RCP<const BlockMultiVector<Scalar, LO, GO, Node> >
115 const BMV* src_bmv = dynamic_cast<const BMV*> (&src);
116 TEUCHOS_TEST_FOR_EXCEPTION(
117 src_bmv == NULL, std::invalid_argument,
"Tpetra::Experimental::"
118 "BlockMultiVector: The source object of an Import or Export to a "
119 "BlockMultiVector, must also be a BlockMultiVector.");
120 return Teuchos::rcp (src_bmv,
false);
123 template<
class Scalar,
class LO,
class GO,
class Node>
130 pointMap_ (makePointMap (meshMap, blockSize)),
131 mv_ (Teuchos::rcpFromRef (pointMap_), numVecs),
132 mvData_ (getRawHostPtrFromMultiVector (mv_)),
133 blockSize_ (blockSize)
139 template<
class Scalar,
class LO,
class GO,
class Node>
147 pointMap_ (pointMap),
148 mv_ (Teuchos::rcpFromRef (pointMap_), numVecs),
149 mvData_ (getRawHostPtrFromMultiVector (mv_)),
150 blockSize_ (blockSize)
156 template<
class Scalar,
class LO,
class GO,
class Node>
160 const LO blockSize) :
164 blockSize_ (blockSize)
180 RCP<const mv_type> X_view_const;
183 Teuchos::Array<size_t> cols (0);
184 X_view_const = X_mv.
subView (cols ());
186 X_view_const = X_mv.
subView (Teuchos::Range1D (0, numCols-1));
188 TEUCHOS_TEST_FOR_EXCEPTION(
189 X_view_const.is_null (), std::logic_error,
"Tpetra::Experimental::"
190 "BlockMultiVector constructor: X_mv.subView(...) returned null. This "
191 "should never happen. Please report this bug to the Tpetra developers.");
196 RCP<mv_type> X_view = Teuchos::rcp_const_cast<mv_type> (X_view_const);
197 X_view->setCopyOrView (Teuchos::View);
198 TEUCHOS_TEST_FOR_EXCEPTION(
199 X_view->getCopyOrView () != Teuchos::View, std::logic_error,
"Tpetra::"
200 "Experimental::BlockMultiVector constructor: We just set a MultiVector "
201 "to have view semantics, but it claims that it doesn't have view "
202 "semantics. This should never happen. "
203 "Please report this bug to the Tpetra developers.");
208 Teuchos::RCP<const map_type> pointMap =
mv_.
getMap ();
209 if (! pointMap.is_null ()) {
210 pointMap_ = *pointMap;
212 mvData_ = getRawHostPtrFromMultiVector (
mv_);
215 template<
class Scalar,
class LO,
class GO,
class Node>
220 const size_t offset) :
222 meshMap_ (newMeshMap),
223 pointMap_ (newPointMap),
224 mv_ (X.mv_, newPointMap, offset * X.getBlockSize ()),
225 mvData_ (getRawHostPtrFromMultiVector (mv_)),
226 blockSize_ (X.getBlockSize ())
232 template<
class Scalar,
class LO,
class GO,
class Node>
236 const size_t offset) :
238 meshMap_ (newMeshMap),
239 pointMap_ (makePointMap (newMeshMap, X.getBlockSize ())),
240 mv_ (X.mv_, pointMap_, offset * X.getBlockSize ()),
241 mvData_ (getRawHostPtrFromMultiVector (mv_)),
242 blockSize_ (X.getBlockSize ())
248 template<
class Scalar,
class LO,
class GO,
class Node>
259 template<
class Scalar,
class LO,
class GO,
class Node>
265 typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
267 const GST gblNumMeshMapInds =
269 const size_t lclNumMeshMapIndices =
271 const GST gblNumPointMapInds =
272 gblNumMeshMapInds * static_cast<GST> (blockSize);
273 const size_t lclNumPointMapInds =
274 lclNumMeshMapIndices * static_cast<size_t> (blockSize);
278 return map_type (gblNumPointMapInds, lclNumPointMapInds, indexBase,
286 const size_type lclNumMeshGblInds = lclMeshGblInds.size ();
287 Teuchos::Array<GO> lclPointGblInds (lclNumPointMapInds);
288 for (size_type g = 0; g < lclNumMeshGblInds; ++g) {
289 const GO meshGid = lclMeshGblInds[g];
290 const GO pointGidStart = indexBase +
291 (meshGid - indexBase) * static_cast<GO> (blockSize);
292 const size_type offset = g * static_cast<size_type> (blockSize);
293 for (LO k = 0; k < blockSize; ++k) {
294 const GO pointGid = pointGidStart + static_cast<GO> (k);
295 lclPointGblInds[offset + static_cast<size_type> (k)] = pointGid;
298 return map_type (gblNumPointMapInds, lclPointGblInds (), indexBase,
304 template<
class Scalar,
class LO,
class GO,
class Node>
309 const Scalar vals[])
const
311 auto X_dst = getLocalBlock (localRowIndex, colIndex);
312 typename const_little_vec_type::HostMirror::const_type X_src (reinterpret_cast<const impl_scalar_type*> (vals),
318 template<
class Scalar,
class LO,
class GO,
class Node>
323 const Scalar vals[])
const
325 if (! meshMap_.isNodeLocalElement (localRowIndex)) {
328 replaceLocalValuesImpl (localRowIndex, colIndex, vals);
333 template<
class Scalar,
class LO,
class GO,
class Node>
338 const Scalar vals[])
const
340 const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
341 if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
344 replaceLocalValuesImpl (localRowIndex, colIndex, vals);
349 template<
class Scalar,
class LO,
class GO,
class Node>
354 const Scalar vals[])
const
356 auto X_dst = getLocalBlock (localRowIndex, colIndex);
357 typename const_little_vec_type::HostMirror::const_type X_src (reinterpret_cast<const impl_scalar_type*> (vals),
359 AXPY (static_cast<impl_scalar_type> (STS::one ()), X_src, X_dst);
362 template<
class Scalar,
class LO,
class GO,
class Node>
367 const Scalar vals[])
const
369 if (! meshMap_.isNodeLocalElement (localRowIndex)) {
372 sumIntoLocalValuesImpl (localRowIndex, colIndex, vals);
377 template<
class Scalar,
class LO,
class GO,
class Node>
382 const Scalar vals[])
const
384 const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
385 if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
388 sumIntoLocalValuesImpl (localRowIndex, colIndex, vals);
393 template<
class Scalar,
class LO,
class GO,
class Node>
398 if (! meshMap_.isNodeLocalElement (localRowIndex)) {
401 auto X_ij = getLocalBlock (localRowIndex, colIndex);
402 vals = reinterpret_cast<Scalar*> (X_ij.data ());
407 template<
class Scalar,
class LO,
class GO,
class Node>
412 const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
413 if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
416 auto X_ij = getLocalBlock (localRowIndex, colIndex);
417 vals = reinterpret_cast<Scalar*> (X_ij.data ());
422 template<
class Scalar,
class LO,
class GO,
class Node>
426 const LO colIndex)
const
440 if (! isValidLocalMeshIndex (localRowIndex)) {
441 return typename little_vec_type::HostMirror ();
443 const size_t blockSize = getBlockSize ();
444 const size_t offset = colIndex * this->getStrideY () +
445 localRowIndex * blockSize;
447 return typename little_vec_type::HostMirror (blockRaw, blockSize);
451 template<
class Scalar,
class LO,
class GO,
class Node>
452 Teuchos::RCP<const typename BlockMultiVector<Scalar, LO, GO, Node>::mv_type>
457 using Teuchos::rcpFromRef;
464 const this_type* srcBlkVec = dynamic_cast<const this_type*> (&src);
465 if (srcBlkVec == NULL) {
466 const mv_type* srcMultiVec = dynamic_cast<const mv_type*> (&src);
467 if (srcMultiVec == NULL) {
471 return rcp (
new mv_type ());
473 return rcp (srcMultiVec,
false);
476 return rcpFromRef (srcBlkVec->mv_);
480 template<
class Scalar,
class LO,
class GO,
class Node>
484 return ! getMultiVectorFromSrcDistObject (src).is_null ();
487 template<
class Scalar,
class LO,
class GO,
class Node>
491 const Teuchos::ArrayView<const LO>& permuteToLIDs,
492 const Teuchos::ArrayView<const LO>& permuteFromLIDs)
494 const char tfecfFuncName[] =
"copyAndPermute: ";
495 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
496 permuteToLIDs.size() != permuteFromLIDs.size(), std::runtime_error,
497 "permuteToLIDs and permuteFromLIDs must have the same size."
498 << std::endl <<
"permuteToLIDs.size() = " << permuteToLIDs.size ()
499 <<
" != permuteFromLIDs.size() = " << permuteFromLIDs.size () <<
".");
502 Teuchos::RCP<const BMV> srcAsBmvPtr = getBlockMultiVectorFromSrcDistObject (src);
503 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
504 srcAsBmvPtr.is_null (), std::invalid_argument,
505 "The source of an Import or Export to a BlockMultiVector "
506 "must also be a BlockMultiVector.");
507 const BMV& srcAsBmv = *srcAsBmvPtr;
512 const LO numVecs = getNumVectors ();
513 const LO numSame = static_cast<LO> (numSameIDs);
514 for (LO j = 0; j < numVecs; ++j) {
515 for (LO lclRow = 0; lclRow < numSame; ++lclRow) {
516 deep_copy (getLocalBlock (lclRow, j), srcAsBmv.getLocalBlock (lclRow, j));
523 const LO numPermuteLIDs = static_cast<LO> (permuteToLIDs.size ());
524 for (LO j = 0; j < numVecs; ++j) {
525 for (LO k = numSame; k < numPermuteLIDs; ++k) {
526 deep_copy (getLocalBlock (permuteToLIDs[k], j), srcAsBmv.getLocalBlock (permuteFromLIDs[k], j));
531 template<
class Scalar,
class LO,
class GO,
class Node>
532 void BlockMultiVector<Scalar, LO, GO, Node>::
534 const Teuchos::ArrayView<const LO>& exportLIDs,
535 Teuchos::Array<impl_scalar_type>& exports,
536 const Teuchos::ArrayView<size_t>& ,
537 size_t& constantNumPackets,
540 typedef BlockMultiVector<Scalar, LO, GO, Node> BMV;
541 typedef typename Teuchos::ArrayView<const LO>::size_type size_type;
542 const char tfecfFuncName[] =
"packAndPrepare: ";
544 Teuchos::RCP<const BMV> srcAsBmvPtr = getBlockMultiVectorFromSrcDistObject (src);
545 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
546 srcAsBmvPtr.is_null (), std::invalid_argument,
547 "The source of an Import or Export to a BlockMultiVector "
548 "must also be a BlockMultiVector.");
549 const BMV& srcAsBmv = *srcAsBmvPtr;
551 const LO numVecs = getNumVectors ();
552 const LO blockSize = getBlockSize ();
558 static_cast<size_t> (blockSize) * static_cast<size_t> (numVecs);
559 const size_type numMeshLIDs = exportLIDs.size ();
561 const size_type requiredExportsSize = numMeshLIDs *
562 static_cast<size_type> (blockSize) * static_cast<size_type> (numVecs);
563 exports.resize (requiredExportsSize);
566 size_type curExportPos = 0;
567 for (size_type meshLidIndex = 0; meshLidIndex < numMeshLIDs; ++meshLidIndex) {
568 for (LO j = 0; j < numVecs; ++j, curExportPos += blockSize) {
569 const LO meshLid = exportLIDs[meshLidIndex];
570 impl_scalar_type*
const curExportPtr = &exports[curExportPos];
571 typename little_vec_type::HostMirror X_dst (curExportPtr, blockSize);
572 auto X_src = srcAsBmv.getLocalBlock (meshLid, j);
577 }
catch (std::exception& e) {
578 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
579 true, std::logic_error,
"Oh no! packAndPrepare on Process "
580 << meshMap_.getComm ()->getRank () <<
" raised the following exception: "
585 template<
class Scalar,
class LO,
class GO,
class Node>
586 void BlockMultiVector<Scalar, LO, GO, Node>::
587 unpackAndCombine (
const Teuchos::ArrayView<const LO>& importLIDs,
588 const Teuchos::ArrayView<const impl_scalar_type>& imports,
589 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
590 size_t constantNumPackets,
594 typedef typename Teuchos::ArrayView<const LO>::size_type size_type;
595 const char tfecfFuncName[] =
"unpackAndCombine: ";
597 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
599 std::invalid_argument,
"Invalid CombineMode: " << CM <<
". Valid "
600 "CombineMode values are ADD, REPLACE, INSERT, ABSMAX, and ZERO.");
608 const size_type numMeshLIDs = importLIDs.size ();
609 const LO blockSize = getBlockSize ();
610 const LO numVecs = getNumVectors ();
612 const size_type requiredImportsSize = numMeshLIDs *
613 static_cast<size_type> (blockSize) * static_cast<size_type> (numVecs);
614 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
615 imports.size () < requiredImportsSize, std::logic_error,
616 ": imports.size () = " << imports.size ()
617 <<
" < requiredImportsSize = " << requiredImportsSize <<
".");
619 size_type curImportPos = 0;
620 for (size_type meshLidIndex = 0; meshLidIndex < numMeshLIDs; ++meshLidIndex) {
621 for (LO j = 0; j < numVecs; ++j, curImportPos += blockSize) {
622 const LO meshLid = importLIDs[meshLidIndex];
623 const impl_scalar_type*
const curImportPtr = &imports[curImportPos];
625 typename const_little_vec_type::HostMirror::const_type X_src (curImportPtr, blockSize);
626 auto X_dst = getLocalBlock (meshLid, j);
630 }
else if (CM ==
ADD) {
631 AXPY (static_cast<impl_scalar_type> (STS::one ()), X_src, X_dst);
632 }
else if (CM ==
ABSMAX) {
633 Impl::absMax (X_dst, X_src);
639 template<
class Scalar,
class LO,
class GO,
class Node>
646 template<
class Scalar,
class LO,
class GO,
class Node>
653 template<
class Scalar,
class LO,
class GO,
class Node>
659 mv_.update (alpha, X.
mv_, beta);
664 template <
typename Scalar,
typename ViewY,
typename ViewD,
typename ViewX>
665 struct BlockWiseMultiply {
666 typedef typename ViewD::size_type Size;
669 typedef typename ViewD::device_type Device;
670 typedef typename ViewD::non_const_value_type ImplScalar;
671 typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
673 template <
typename View>
674 using UnmanagedView = Kokkos::View<
typename View::data_type,
typename View::array_layout,
675 typename View::device_type, Unmanaged>;
676 typedef UnmanagedView<ViewY> UnMViewY;
677 typedef UnmanagedView<ViewD> UnMViewD;
678 typedef UnmanagedView<ViewX> UnMViewX;
680 const Size block_size_;
687 BlockWiseMultiply (
const Size block_size,
const Scalar& alpha,
688 const ViewY& Y,
const ViewD& D,
const ViewX& X)
689 : block_size_(block_size), alpha_(alpha), Y_(Y), D_(D), X_(X)
692 KOKKOS_INLINE_FUNCTION
693 void operator() (
const Size k)
const {
694 const auto zero = Kokkos::Details::ArithTraits<Scalar>::zero();
695 auto D_curBlk = Kokkos::subview(D_, k, Kokkos::ALL (), Kokkos::ALL ());
696 const auto num_vecs = X_.extent(1);
697 for (Size i = 0; i < num_vecs; ++i) {
698 Kokkos::pair<Size, Size> kslice(k*block_size_, (k+1)*block_size_);
699 auto X_curBlk = Kokkos::subview(X_, kslice, i);
700 auto Y_curBlk = Kokkos::subview(Y_, kslice, i);
709 template <
typename Scalar,
typename ViewY,
typename ViewD,
typename ViewX>
710 inline BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>
711 createBlockWiseMultiply (
const int block_size,
const Scalar& alpha,
712 const ViewY& Y,
const ViewD& D,
const ViewX& X) {
713 return BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>(block_size, alpha, Y, D, X);
716 template <
typename ViewY,
720 typename LO =
typename ViewY::size_type>
721 class BlockJacobiUpdate {
723 typedef typename ViewD::device_type Device;
724 typedef typename ViewD::non_const_value_type ImplScalar;
725 typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
727 template <
typename ViewType>
728 using UnmanagedView = Kokkos::View<
typename ViewType::data_type,
729 typename ViewType::array_layout,
730 typename ViewType::device_type,
732 typedef UnmanagedView<ViewY> UnMViewY;
733 typedef UnmanagedView<ViewD> UnMViewD;
734 typedef UnmanagedView<ViewZ> UnMViewZ;
744 BlockJacobiUpdate (
const ViewY& Y,
748 const Scalar& beta) :
749 blockSize_ (D.extent (1)),
757 static_assert (static_cast<int> (ViewY::rank) == 1,
758 "Y must have rank 1.");
759 static_assert (static_cast<int> (ViewD::rank) == 3,
"D must have rank 3.");
760 static_assert (static_cast<int> (ViewZ::rank) == 1,
761 "Z must have rank 1.");
767 KOKKOS_INLINE_FUNCTION
void
768 operator() (
const LO& k)
const
771 using Kokkos::subview;
772 typedef Kokkos::pair<LO, LO> range_type;
773 typedef Kokkos::Details::ArithTraits<Scalar> KAT;
777 auto D_curBlk = subview (D_, k, ALL (), ALL ());
778 const range_type kslice (k*blockSize_, (k+1)*blockSize_);
782 auto Z_curBlk = subview (Z_, kslice);
783 auto Y_curBlk = subview (Y_, kslice);
785 if (beta_ == KAT::zero ()) {
788 else if (beta_ != KAT::one ()) {
795 template<
class ViewY,
799 class LO =
typename ViewD::size_type>
801 blockJacobiUpdate (
const ViewY& Y,
807 static_assert (Kokkos::Impl::is_view<ViewY>::value,
"Y must be a Kokkos::View.");
808 static_assert (Kokkos::Impl::is_view<ViewD>::value,
"D must be a Kokkos::View.");
809 static_assert (Kokkos::Impl::is_view<ViewZ>::value,
"Z must be a Kokkos::View.");
810 static_assert (static_cast<int> (ViewY::rank) == static_cast<int> (ViewZ::rank),
811 "Y and Z must have the same rank.");
812 static_assert (static_cast<int> (ViewD::rank) == 3,
"D must have rank 3.");
814 const auto lclNumMeshRows = D.extent (0);
816 #ifdef HAVE_TPETRA_DEBUG
820 const auto blkSize = D.extent (1);
821 const auto lclNumPtRows = lclNumMeshRows * blkSize;
822 TEUCHOS_TEST_FOR_EXCEPTION
823 (Y.extent (0) != lclNumPtRows, std::invalid_argument,
824 "blockJacobiUpdate: Y.extent(0) = " << Y.extent (0) <<
" != "
825 "D.extent(0)*D.extent(1) = " << lclNumMeshRows <<
" * " << blkSize
826 <<
" = " << lclNumPtRows <<
".");
827 TEUCHOS_TEST_FOR_EXCEPTION
828 (Y.extent (0) != Z.extent (0), std::invalid_argument,
829 "blockJacobiUpdate: Y.extent(0) = " << Y.extent (0) <<
" != "
830 "Z.extent(0) = " << Z.extent (0) <<
".");
831 TEUCHOS_TEST_FOR_EXCEPTION
832 (Y.extent (1) != Z.extent (1), std::invalid_argument,
833 "blockJacobiUpdate: Y.extent(1) = " << Y.extent (1) <<
" != "
834 "Z.extent(1) = " << Z.extent (1) <<
".");
835 #endif // HAVE_TPETRA_DEBUG
837 BlockJacobiUpdate<ViewY, Scalar, ViewD, ViewZ, LO> functor (Y, alpha, D, Z, beta);
838 typedef Kokkos::RangePolicy<typename ViewY::execution_space, LO> range_type;
840 range_type range (0, static_cast<LO> (lclNumMeshRows));
841 Kokkos::parallel_for (range, functor);
846 template<
class Scalar,
class LO,
class GO,
class Node>
855 typedef typename device_type::memory_space memory_space;
856 const LO lclNumMeshRows = meshMap_.getNodeNumElements ();
858 if (alpha == STS::zero ()) {
859 this->putScalar (STS::zero ());
862 const LO blockSize = this->getBlockSize ();
864 auto X_lcl = X.
mv_.template getLocalView<memory_space> ();
865 auto Y_lcl = this->mv_.template getLocalView<memory_space> ();
866 auto bwm = Impl::createBlockWiseMultiply (blockSize, alphaImpl, Y_lcl, D, X_lcl);
873 Kokkos::RangePolicy<execution_space, LO> range (0, lclNumMeshRows);
874 Kokkos::parallel_for (range, bwm);
878 template<
class Scalar,
class LO,
class GO,
class Node>
888 using Kokkos::subview;
889 typedef typename device_type::memory_space memory_space;
892 const IST alphaImpl = static_cast<IST> (alpha);
893 const IST betaImpl = static_cast<IST> (beta);
894 const LO numVecs = mv_.getNumVectors ();
896 auto X_lcl = X.
mv_.template getLocalView<memory_space> ();
897 auto Y_lcl = this->mv_.template getLocalView<memory_space> ();
898 auto Z_lcl = Z.
mv_.template getLocalView<memory_space> ();
900 if (alpha == STS::zero ()) {
904 Z.
update (STS::one (), X, -STS::one ());
905 for (LO j = 0; j < numVecs; ++j) {
906 auto X_lcl_j = subview (X_lcl, ALL (), j);
907 auto Y_lcl_j = subview (Y_lcl, ALL (), j);
908 auto Z_lcl_j = subview (Z_lcl, ALL (), j);
909 Impl::blockJacobiUpdate (Y_lcl_j, alphaImpl, D, Z_lcl_j, betaImpl);
923 #define TPETRA_EXPERIMENTAL_BLOCKMULTIVECTOR_INSTANT(S,LO,GO,NODE) \
924 namespace Experimental { \
925 namespace Classes { \
926 template class BlockMultiVector< S, LO, GO, NODE >; \
930 #endif // TPETRA_EXPERIMENTAL_BLOCKMULTIVECTOR_DEF_HPP