42 #ifndef TPETRA_DISTOBJECT_DEF_HPP
43 #define TPETRA_DISTOBJECT_DEF_HPP
53 #include "Tpetra_Distributor.hpp"
61 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
66 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
69 using Teuchos::TimeMonitor;
71 RCP<Time> doXferTimer =
72 TimeMonitor::lookupCounter (
"Tpetra::DistObject::doTransfer");
73 if (doXferTimer.is_null ()) {
75 TimeMonitor::getNewCounter (
"Tpetra::DistObject::doTransfer");
77 doXferTimer_ = doXferTimer;
79 RCP<Time> copyAndPermuteTimer =
80 TimeMonitor::lookupCounter (
"Tpetra::DistObject::copyAndPermute");
81 if (copyAndPermuteTimer.is_null ()) {
83 TimeMonitor::getNewCounter (
"Tpetra::DistObject::copyAndPermute");
85 copyAndPermuteTimer_ = copyAndPermuteTimer;
87 RCP<Time> packAndPrepareTimer =
88 TimeMonitor::lookupCounter (
"Tpetra::DistObject::packAndPrepare");
89 if (packAndPrepareTimer.is_null ()) {
91 TimeMonitor::getNewCounter (
"Tpetra::DistObject::packAndPrepare");
93 packAndPrepareTimer_ = packAndPrepareTimer;
95 RCP<Time> doPostsAndWaitsTimer =
96 TimeMonitor::lookupCounter (
"Tpetra::DistObject::doPostsAndWaits");
97 if (doPostsAndWaitsTimer.is_null ()) {
98 doPostsAndWaitsTimer =
99 TimeMonitor::getNewCounter (
"Tpetra::DistObject::doPostsAndWaits");
101 doPostsAndWaitsTimer_ = doPostsAndWaitsTimer;
103 RCP<Time> unpackAndCombineTimer =
104 TimeMonitor::lookupCounter (
"Tpetra::DistObject::unpackAndCombine");
105 if (unpackAndCombineTimer.is_null ()) {
106 unpackAndCombineTimer =
107 TimeMonitor::getNewCounter (
"Tpetra::DistObject::unpackAndCombine");
109 unpackAndCombineTimer_ = unpackAndCombineTimer;
110 #endif // HAVE_TPETRA_TRANSFER_TIMERS
113 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
119 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
124 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
129 using Teuchos::TypeNameTraits;
131 std::ostringstream os;
132 os <<
"\"Tpetra::DistObject\": {"
133 <<
"Packet: " << TypeNameTraits<packet_type>::name ()
134 <<
", LocalOrdinal: " << TypeNameTraits<local_ordinal_type>::name ()
135 <<
", GlobalOrdinal: " << TypeNameTraits<global_ordinal_type>::name ()
136 <<
", Node: " << TypeNameTraits<Node>::name ();
137 if (this->getObjectLabel () !=
"") {
138 os <<
"Label: \"" << this->getObjectLabel () <<
"\"";
144 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
148 const Teuchos::EVerbosityLevel verbLevel)
const
150 using Teuchos::rcpFromRef;
151 using Teuchos::TypeNameTraits;
153 const Teuchos::EVerbosityLevel vl = (verbLevel == Teuchos::VERB_DEFAULT) ?
154 Teuchos::VERB_LOW : verbLevel;
155 Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getMap ()->getComm ();
156 const int myRank = comm.is_null () ? 0 : comm->getRank ();
157 const int numProcs = comm.is_null () ? 1 : comm->getSize ();
159 if (vl != Teuchos::VERB_NONE) {
160 Teuchos::OSTab tab0 (out);
162 out <<
"\"Tpetra::DistObject\":" << endl;
164 Teuchos::OSTab tab1 (out);
166 out <<
"Template parameters:" << endl;
168 Teuchos::OSTab tab2 (out);
169 out <<
"Packet: " << TypeNameTraits<packet_type>::name () << endl
170 <<
"LocalOrdinal: " << TypeNameTraits<local_ordinal_type>::name () << endl
171 <<
"GlobalOrdinal: " << TypeNameTraits<global_ordinal_type>::name () << endl
172 <<
"Node: " << TypeNameTraits<node_type>::name () << endl;
174 if (this->getObjectLabel () !=
"") {
175 out <<
"Label: \"" << this->getObjectLabel () <<
"\"" << endl;
182 out <<
"Map:" << endl;
184 Teuchos::OSTab tab2 (out);
185 map_->describe (out, vl);
189 if (vl > Teuchos::VERB_LOW) {
190 for (
int p = 0; p < numProcs; ++p) {
192 out <<
"Process " << myRank <<
":" << endl;
193 Teuchos::OSTab tab2 (out);
194 out <<
"Export buffer size (in packets): "
195 << exports_.extent (0)
197 <<
"Import buffer size (in packets): "
198 << imports_.extent (0)
201 if (! comm.is_null ()) {
211 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
216 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error,
217 "Tpetra::DistObject::removeEmptyProcessesInPlace: Not implemented");
249 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
257 const char modeString[] =
"doImport (forward mode)";
263 std::unique_ptr<std::string> prefix;
266 auto map = this->getMap ();
267 if (! map.is_null ()) {
268 auto comm = map->getComm ();
269 if (! comm.is_null ()) {
270 myRank = comm->getRank ();
273 prefix = [myRank] () {
274 std::ostringstream os;
275 os <<
"(Proc " << myRank <<
") ";
276 return std::unique_ptr<std::string> (
new std::string (os.str ()));
278 std::ostringstream os;
279 os << *prefix <<
"Tpetra::DistObject::" << modeString <<
":" << endl;
280 std::cerr << os.str ();
282 this->doTransfer (source, importer, modeString, DoForward, CM);
284 std::ostringstream os;
285 os << *prefix <<
"Tpetra::DistObject::" << modeString <<
": Done!"
287 std::cerr << os.str ();
291 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
299 const char modeString[] =
"doExport (forward mode)";
305 std::unique_ptr<std::string> prefix;
308 auto map = this->getMap ();
309 if (! map.is_null ()) {
310 auto comm = map->getComm ();
311 if (! comm.is_null ()) {
312 myRank = comm->getRank ();
315 prefix = [myRank] () {
316 std::ostringstream os;
317 os <<
"(Proc " << myRank <<
") ";
318 return std::unique_ptr<std::string> (
new std::string (os.str ()));
320 std::ostringstream os;
321 os << *prefix <<
"Tpetra::DistObject::" << modeString <<
":" << endl;
322 std::cerr << os.str ();
324 this->doTransfer (source, exporter, modeString, DoForward, CM);
326 std::ostringstream os;
327 os << *prefix <<
"Tpetra::DistObject::" << modeString <<
": Done!"
329 std::cerr << os.str ();
333 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
341 const char modeString[] =
"doImport (reverse mode)";
347 std::unique_ptr<std::string> prefix;
350 auto map = this->getMap ();
351 if (! map.is_null ()) {
352 auto comm = map->getComm ();
353 if (! comm.is_null ()) {
354 myRank = comm->getRank ();
357 prefix = [myRank] () {
358 std::ostringstream os;
359 os <<
"(Proc " << myRank <<
") ";
360 return std::unique_ptr<std::string> (
new std::string (os.str ()));
362 std::ostringstream os;
363 os << *prefix <<
"Tpetra::DistObject::" << modeString <<
":" << endl;
364 std::cerr << os.str ();
366 this->doTransfer (source, exporter, modeString, DoReverse, CM);
368 std::ostringstream os;
369 os << *prefix <<
"Tpetra::DistObject::" << modeString <<
": Done!"
371 std::cerr << os.str ();
375 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
383 const char modeString[] =
"doExport (reverse mode)";
389 std::unique_ptr<std::string> prefix;
392 auto map = this->getMap ();
393 if (! map.is_null ()) {
394 auto comm = map->getComm ();
395 if (! comm.is_null ()) {
396 myRank = comm->getRank ();
399 prefix = [myRank] () {
400 std::ostringstream os;
401 os <<
"(Proc " << myRank <<
") ";
402 return std::unique_ptr<std::string> (
new std::string (os.str ()));
404 std::ostringstream os;
405 os << *prefix <<
"Tpetra::DistObject::" << modeString <<
":" << endl;
406 std::cerr << os.str ();
408 this->doTransfer (source, importer, modeString, DoReverse, CM);
410 std::ostringstream os;
411 os << *prefix <<
"Tpetra::DistObject::" << modeString <<
": Done!"
413 std::cerr << os.str ();
417 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
424 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
431 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
435 const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
436 const char modeString[],
442 typedef LocalOrdinal LO;
449 if (revOp == DoForward) {
450 const bool myMapSameAsTransferTgtMap =
451 this->getMap ()->isSameAs (* (transfer.getTargetMap ()));
452 TEUCHOS_TEST_FOR_EXCEPTION
453 (! myMapSameAsTransferTgtMap, std::invalid_argument,
454 "Tpetra::DistObject::" << modeString <<
": For forward-mode "
455 "communication, the target DistObject's Map must be the same "
456 "(in the sense of Tpetra::Map::isSameAs) as the input "
457 "Export/Import object's target Map.");
460 const bool myMapSameAsTransferSrcMap =
461 this->getMap ()->isSameAs (* (transfer.getSourceMap ()));
462 TEUCHOS_TEST_FOR_EXCEPTION
463 (! myMapSameAsTransferSrcMap, std::invalid_argument,
464 "Tpetra::DistObject::" << modeString <<
": For reverse-mode "
465 "communication, the target DistObject's Map must be the same "
466 "(in the sense of Tpetra::Map::isSameAs) as the input "
467 "Export/Import object's source Map.");
473 const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
474 if (srcDistObj != NULL) {
475 if (revOp == DoForward) {
476 const bool srcMapSameAsImportSrcMap =
477 srcDistObj->
getMap ()->isSameAs (* (transfer.getSourceMap ()));
478 TEUCHOS_TEST_FOR_EXCEPTION
479 (! srcMapSameAsImportSrcMap, std::invalid_argument,
480 "Tpetra::DistObject::" << modeString <<
": For forward-mode "
481 "communication, the source DistObject's Map must be the same "
482 "as the input Export/Import object's source Map.");
485 const bool srcMapSameAsImportTgtMap =
486 srcDistObj->
getMap ()->isSameAs (* (transfer.getTargetMap ()));
487 TEUCHOS_TEST_FOR_EXCEPTION
488 (! srcMapSameAsImportTgtMap, std::invalid_argument,
489 "Tpetra::DistObject::" << modeString <<
": For reverse-mode "
490 "communication, the source DistObject's Map must be the same "
491 "as the input Export/Import object's target Map.");
500 std::unique_ptr<std::string> prefix;
503 auto map = this->getMap ();
504 if (! map.is_null ()) {
505 auto comm = map->getComm ();
506 if (! comm.is_null ()) {
507 myRank = comm->getRank ();
510 prefix = [myRank] () {
511 std::ostringstream os;
512 os <<
"(Proc " << myRank <<
") ";
513 return std::unique_ptr<std::string> (
new std::string (os.str ()));
515 std::ostringstream os;
516 os << *prefix <<
"Tpetra::DistObject::doTransfer:" << endl;
517 std::cerr << os.str ();
520 const size_t numSameIDs = transfer.getNumSameIDs ();
521 typedef Teuchos::ArrayView<const LocalOrdinal> view_type;
522 const view_type permuteToLIDs_ = (revOp == DoForward) ?
523 transfer.getPermuteToLIDs () : transfer.getPermuteFromLIDs ();
524 const view_type permuteFromLIDs_ = (revOp == DoForward) ?
525 transfer.getPermuteFromLIDs () : transfer.getPermuteToLIDs ();
526 const view_type exportLIDs_ = (revOp == DoForward) ?
527 transfer.getExportLIDs () : transfer.getRemoteLIDs ();
528 const view_type remoteLIDs_ = (revOp == DoForward) ?
529 transfer.getRemoteLIDs () : transfer.getExportLIDs ();
532 if (this->useNewInterface ()) {
533 using ::Tpetra::Details::Behavior;
535 const bool commOnHost = ! Behavior::assumeMpiIsCudaAware ();
537 std::ostringstream os;
538 os << *prefix <<
"doTransfer: Use new interface; "
539 "commOnHost=" << (commOnHost ?
"true" :
"false") << endl;
540 std::cerr << os.str ();
551 Kokkos::DualView<LO*, DT> permuteToLIDs =
552 getDualViewCopyFromArrayView<LO, DT> (permuteToLIDs_,
555 Kokkos::DualView<LO*, DT> permuteFromLIDs =
556 getDualViewCopyFromArrayView<LO, DT> (permuteFromLIDs_,
561 Kokkos::DualView<LO*, DT> remoteLIDs =
562 getDualViewCopyFromArrayView<LO, DT> (remoteLIDs_,
565 Kokkos::DualView<LO*, DT> exportLIDs =
566 getDualViewCopyFromArrayView<LO, DT> (exportLIDs_,
569 doTransferNew (src, CM, numSameIDs, permuteToLIDs, permuteFromLIDs,
570 remoteLIDs, exportLIDs, distor, revOp, commOnHost);
574 std::ostringstream os;
575 os << *prefix <<
"doTransfer: Use old interface" << endl;
576 std::cerr << os.str ();
578 doTransferOld (src, CM, numSameIDs, permuteToLIDs_, permuteFromLIDs_,
579 remoteLIDs_, exportLIDs_, distor, revOp);
583 std::ostringstream os;
584 os << *prefix <<
"Tpetra::DistObject::doTransfer: Done!" << endl;
585 std::cerr << os.str ();
589 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
595 const int myRank = this->getMap ()->getComm ()->getRank ();
596 std::ostringstream os;
597 os <<
"(Proc " << myRank <<
") Reallocate (if needed) imports_ from "
598 << imports_.extent (0) <<
" to " << newSize << std::endl;
599 std::cerr << os.str ();
602 const bool reallocated =
605 const int myRank = this->getMap ()->getComm ()->getRank ();
606 std::ostringstream os;
607 os <<
"(Proc " << myRank <<
") Finished reallocating imports_"
609 std::cerr << os.str ();
614 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
618 const size_t numImportLIDs)
627 constexpr
size_t tooBigFactor = 10;
631 const int myRank = this->getMap ()->getComm ()->getRank ();
632 std::ostringstream os;
633 os <<
"(Proc " << myRank <<
") reallocArraysForNumPacketsPerLid before:"
635 <<
"(Proc " << myRank <<
") "
638 <<
"(Proc " << myRank <<
") "
641 std::cerr << os.str ();
645 const bool firstReallocated =
648 "numExportPacketsPerLID",
655 const bool needFenceBeforeNextAlloc = ! firstReallocated;
656 const bool secondReallocated =
659 "numImportPacketsPerLID",
661 needFenceBeforeNextAlloc);
664 const int myRank = this->getMap ()->getComm ()->getRank ();
665 std::ostringstream os;
666 os <<
"(Proc " << myRank <<
") reallocArraysForNumPacketsPerLid before:"
668 <<
"(Proc " << myRank <<
") "
671 <<
"(Proc " << myRank <<
") "
674 std::cerr << os.str ();
677 return firstReallocated || secondReallocated;
680 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
686 const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
687 const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs,
688 const Teuchos::ArrayView<const LocalOrdinal>& remoteLIDs,
689 const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
699 constexpr
bool debug =
false;
701 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
702 Teuchos::TimeMonitor doXferMon (*doXferTimer_);
703 #endif // HAVE_TPETRA_TRANSFER_TIMERS
705 TEUCHOS_TEST_FOR_EXCEPTION(
706 ! checkSizes (src), std::invalid_argument,
707 "Tpetra::DistObject::doTransfer(): checkSizes() indicates that the "
708 "destination object is not a legal target for redistribution from the "
709 "source object. This probably means that they do not have the same "
710 "dimensions. For example, MultiVectors must have the same number of "
711 "rows and columns.");
714 const size_t numIDsToWrite = numSameIDs +
715 static_cast<size_t> (permuteToLIDs.size ()) +
716 static_cast<size_t> (remoteLIDs.size ());
717 if (numIDsToWrite == this->getMap ()->getNodeNumElements ()) {
725 rwo = KokkosClassic::WriteOnly;
736 const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
737 if (srcDistObj != NULL) {
738 srcDistObj->createViews ();
753 this->createViewsNonConst (rwo);
755 if (numSameIDs + permuteToLIDs.size()) {
756 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
757 Teuchos::TimeMonitor copyAndPermuteMon (*copyAndPermuteTimer_);
758 #endif // HAVE_TPETRA_TRANSFER_TIMERS
760 copyAndPermute (src, numSameIDs, permuteToLIDs, permuteFromLIDs);
771 size_t constantNumPackets = this->constantNumberOfPackets ();
778 if (constantNumPackets == 0) {
779 this->reallocArraysForNumPacketsPerLid (exportLIDs.size (),
784 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
785 Teuchos::TimeMonitor packAndPrepareMon (*packAndPrepareTimer_);
786 #endif // HAVE_TPETRA_TRANSFER_TIMERS
792 numExportPacketsPerLID_.template modify<Kokkos::HostSpace> ();
793 Teuchos::ArrayView<size_t> numExportPacketsPerLID =
801 Teuchos::Array<packet_type> exportsOld;
802 packAndPrepare (src, exportLIDs, exportsOld, numExportPacketsPerLID,
803 constantNumPackets, distor);
804 const size_t exportsLen = static_cast<size_t> (exportsOld.size ());
806 Kokkos::View<
const packet_type*, Kokkos::HostSpace,
807 Kokkos::MemoryUnmanaged> exportsOldK (exportsOld.getRawPtr (),
809 exports_.template modify<Kokkos::HostSpace> ();
819 if (srcDistObj != NULL) {
820 srcDistObj->releaseViews ();
825 if (constantNumPackets != 0) {
830 const size_t rbufLen = remoteLIDs.size() * constantNumPackets;
832 std::ostringstream os;
833 os <<
"*** doTransferOld: Const # packets: imports_.extent(0) = "
834 << imports_.extent (0) <<
", rbufLen = " << rbufLen
836 std::cerr << os.str ();
838 reallocImportsIfNeeded (rbufLen, debug);
842 bool needCommunication =
true;
843 if (revOp == DoReverse && ! isDistributed ()) {
844 needCommunication =
false;
853 else if (revOp == DoForward && srcDistObj != NULL &&
854 ! srcDistObj->isDistributed ()) {
855 needCommunication =
false;
858 if (needCommunication) {
859 if (revOp == DoReverse) {
860 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
861 Teuchos::TimeMonitor doPostsAndWaitsMon (*doPostsAndWaitsTimer_);
862 #endif // HAVE_TPETRA_TRANSFER_TIMERS
863 if (constantNumPackets == 0) {
869 numExportPacketsPerLID_.template sync<Kokkos::HostSpace> ();
870 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
877 numImportPacketsPerLID_.template modify<Kokkos::HostSpace> ();
878 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
881 numImportPacketsPerLID);
882 size_t totalImportPackets = 0;
884 typedef typename Kokkos::DualView<
size_t*,
885 device_type>::t_host::execution_space host_exec_space;
886 typedef Kokkos::RangePolicy<host_exec_space, Array_size_type> range_type;
887 const size_t*
const arrayToSum = numImportPacketsPerLID.getRawPtr ();
888 Kokkos::parallel_reduce (
"Count import packets",
889 range_type (0, numImportPacketsPerLID.size ()),
890 [=] (
const Array_size_type& i,
size_t& lclSum) {
891 lclSum += arrayToSum[i];
892 }, totalImportPackets);
895 reallocImportsIfNeeded (totalImportPackets, debug);
901 imports_.template modify<Kokkos::HostSpace> ();
902 Teuchos::ArrayView<packet_type> hostImports =
904 exports_.template sync<Kokkos::HostSpace> ();
905 Teuchos::ArrayView<const packet_type> hostExports =
908 numExportPacketsPerLID,
910 numImportPacketsPerLID);
917 imports_.template modify<Kokkos::HostSpace> ();
918 Teuchos::ArrayView<packet_type> hostImports =
920 exports_.template sync<Kokkos::HostSpace> ();
921 Teuchos::ArrayView<const packet_type> hostExports =
929 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
930 Teuchos::TimeMonitor doPostsAndWaitsMon (*doPostsAndWaitsTimer_);
931 #endif // HAVE_TPETRA_TRANSFER_TIMERS
932 if (constantNumPackets == 0) {
938 numExportPacketsPerLID_.template sync<Kokkos::HostSpace> ();
939 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
946 numImportPacketsPerLID_.template modify<Kokkos::HostSpace> ();
947 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
950 numImportPacketsPerLID);
951 size_t totalImportPackets = 0;
953 typedef typename Kokkos::DualView<
size_t*,
954 device_type>::t_host::execution_space host_exec_space;
955 typedef Kokkos::RangePolicy<host_exec_space, Array_size_type> range_type;
956 const size_t*
const arrayToSum = numImportPacketsPerLID.getRawPtr ();
957 Kokkos::parallel_reduce (
"Count import packets",
958 range_type (0, numImportPacketsPerLID.size ()),
959 [=] (
const Array_size_type& i,
size_t& lclSum) {
960 lclSum += arrayToSum[i];
961 }, totalImportPackets);
964 reallocImportsIfNeeded (totalImportPackets, debug);
970 imports_.template modify<Kokkos::HostSpace> ();
971 Teuchos::ArrayView<packet_type> hostImports =
973 exports_.template sync<Kokkos::HostSpace> ();
974 Teuchos::ArrayView<const packet_type> hostExports =
977 numExportPacketsPerLID,
979 numImportPacketsPerLID);
986 imports_.template modify<Kokkos::HostSpace> ();
987 Teuchos::ArrayView<packet_type> hostImports =
989 exports_.template sync<Kokkos::HostSpace> ();
990 Teuchos::ArrayView<const packet_type> hostExports =
998 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
999 Teuchos::TimeMonitor unpackAndCombineMon (*unpackAndCombineTimer_);
1000 #endif // HAVE_TPETRA_TRANSFER_TIMERS
1005 imports_.template modify<Kokkos::HostSpace> ();
1006 Teuchos::ArrayView<packet_type> hostImports =
1011 numImportPacketsPerLID_.template sync<Kokkos::HostSpace> ();
1015 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
1018 constantNumPackets, distor, CM);
1023 this->releaseViews ();
1027 template<
class DeviceType,
class IndexType =
size_t>
1029 SumFunctor (
const Kokkos::View<const size_t*, DeviceType>& viewToSum) :
1030 viewToSum_ (viewToSum) {}
1031 KOKKOS_FUNCTION
void operator() (
const IndexType& i,
size_t& lclSum)
const {
1032 lclSum += viewToSum_(i);
1034 Kokkos::View<const size_t*, DeviceType> viewToSum_;
1037 template<
class DeviceType,
class IndexType =
size_t>
1039 countTotalImportPackets (
const Kokkos::View<const size_t*, DeviceType>& numImportPacketsPerLID)
1041 using Kokkos::parallel_reduce;
1042 typedef DeviceType DT;
1043 typedef typename DT::execution_space DES;
1044 typedef Kokkos::RangePolicy<DES, IndexType> range_type;
1046 const IndexType numOut = numImportPacketsPerLID.extent (0);
1047 size_t totalImportPackets = 0;
1048 parallel_reduce (
"Count import packets",
1049 range_type (0, numOut),
1050 SumFunctor<DeviceType, IndexType> (numImportPacketsPerLID),
1051 totalImportPackets);
1052 return totalImportPackets;
1056 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1058 DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1059 doTransferNew (
const SrcDistObject& src,
1061 const size_t numSameIDs,
1062 const Kokkos::DualView<
const local_ordinal_type*,
1063 device_type>& permuteToLIDs,
1064 const Kokkos::DualView<
const local_ordinal_type*,
1065 device_type>& permuteFromLIDs,
1066 const Kokkos::DualView<
const local_ordinal_type*,
1067 device_type>& remoteLIDs,
1068 const Kokkos::DualView<
const local_ordinal_type*,
1069 device_type>& exportLIDs,
1070 Distributor& distor,
1071 const ReverseOption revOp,
1072 const bool commOnHost)
1076 using Kokkos::Compat::getArrayView;
1077 using Kokkos::Compat::getConstArrayView;
1078 using Kokkos::Compat::getKokkosViewDeepCopy;
1079 using Kokkos::Compat::create_const_view;
1081 typedef LocalOrdinal LO;
1082 typedef device_type DT;
1084 typedef typename Kokkos::DualView<LO*, DT>::t_dev::execution_space DES;
1093 typedef typename Kokkos::DualView<
size_t*,
1094 buffer_device_type>::t_dev::memory_space CDMS;
1095 typedef typename Kokkos::DualView<
size_t*,
1096 buffer_device_type>::t_host::memory_space CHMS;
1104 std::unique_ptr<std::string> prefix;
1106 auto map = this->getMap ();
1107 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
1108 const int myRank = comm.is_null () ? 0 : comm->getRank ();
1109 std::ostringstream os;
1110 os <<
"(Proc " << myRank <<
") ";
1111 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
1115 std::ostringstream os;
1116 os << *prefix <<
"Tpetra::CrsMatrix::doTransferNew: Input arguments:" << endl
1118 << *prefix <<
" numSameIDs: " << numSameIDs << endl
1127 << *prefix <<
" revOp: Do" << (revOp == DoReverse ?
"Reverse" :
"Forward") << endl
1128 << *prefix <<
" commOnHost: " << (commOnHost ?
"true" :
"false") << endl;
1129 std::cerr << os.str ();
1132 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
1133 Teuchos::TimeMonitor doXferMon (*doXferTimer_);
1134 #endif // HAVE_TPETRA_TRANSFER_TIMERS
1138 std::ostringstream os;
1139 os << *prefix <<
"1. checkSizes" << endl;
1140 std::cerr << os.str ();
1142 const bool checkSizesResult = this->checkSizes (src);
1143 TEUCHOS_TEST_FOR_EXCEPTION
1144 (! checkSizesResult, std::invalid_argument,
1145 "Tpetra::DistObject::doTransfer: checkSizes() indicates that the "
1146 "destination object is not a legal target for redistribution from the "
1147 "source object. This probably means that they do not have the same "
1148 "dimensions. For example, MultiVectors must have the same number of "
1149 "rows and columns.");
1156 if (numSameIDs + permuteToLIDs.extent (0) != 0) {
1159 std::ostringstream os;
1160 os << *prefix <<
"2. copyAndPermuteNew" << endl;
1161 std::cerr << os.str ();
1164 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
1165 Teuchos::TimeMonitor copyAndPermuteMon (*copyAndPermuteTimer_);
1166 #endif // HAVE_TPETRA_TRANSFER_TIMERS
1167 this->copyAndPermuteNew (src, numSameIDs, permuteToLIDs,
1171 std::ostringstream os;
1172 os << *prefix <<
"After copyAndPermuteNew:" << endl
1179 std::cerr << os.str ();
1191 size_t constantNumPackets = this->constantNumberOfPackets ();
1193 std::ostringstream os;
1194 os << *prefix <<
"constantNumPackets=" << constantNumPackets << endl;
1195 std::cerr << os.str ();
1203 if (constantNumPackets == 0) {
1205 std::ostringstream os;
1206 os << *prefix <<
"3. (Re)allocate num{Ex,Im}portPacketsPerLID"
1208 std::cerr << os.str ();
1212 this->reallocArraysForNumPacketsPerLid (exportLIDs.extent (0),
1213 remoteLIDs.extent (0));
1217 std::ostringstream os;
1218 os << *prefix <<
"4. packAndPrepareNew: before, "
1221 std::cerr << os.str ();
1224 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
1225 Teuchos::TimeMonitor packAndPrepareMon (*packAndPrepareTimer_);
1226 #endif // HAVE_TPETRA_TRANSFER_TIMERS
1232 this->packAndPrepareNew (src, exportLIDs, this->exports_,
1233 this->numExportPacketsPerLID_,
1234 constantNumPackets, distor);
1239 typedef typename Kokkos::View<char*, buffer_device_type>::HostMirror::device_type
1240 buffer_host_device_type;
1241 typedef typename buffer_host_device_type::memory_space
1242 buffer_host_memory_space;
1243 this->exports_.template sync<buffer_host_memory_space> ();
1246 typedef typename buffer_device_type::memory_space buffer_dev_memory_space;
1247 this->exports_.template sync<buffer_dev_memory_space> ();
1251 std::ostringstream os;
1252 os << *prefix <<
"5.1. After packAndPrepareNew, "
1255 std::cerr << os.str ();
1261 if (constantNumPackets != 0) {
1263 std::ostringstream os;
1264 os << *prefix <<
"6. Realloc imports_" << std::endl;
1265 std::cerr << os.str ();
1271 const size_t rbufLen = remoteLIDs.extent (0) * constantNumPackets;
1272 reallocImportsIfNeeded (rbufLen, verbose);
1276 bool needCommunication =
true;
1279 const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
1281 if (revOp == DoReverse && ! this->isDistributed ()) {
1282 needCommunication =
false;
1291 else if (revOp == DoForward && srcDistObj != NULL &&
1292 ! srcDistObj->isDistributed ()) {
1293 needCommunication =
false;
1297 std::ostringstream os;
1298 os << *prefix <<
"needCommunication="
1299 << (needCommunication ?
"true" :
"false") << endl;
1300 std::cerr << os.str ();
1311 if (needCommunication) {
1312 if (revOp == DoReverse) {
1314 std::ostringstream os;
1315 os << *prefix <<
"7.0. Reverse mode" << endl;
1316 std::cerr << os.str ();
1318 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
1319 Teuchos::TimeMonitor doPostsAndWaitsMon (*doPostsAndWaitsTimer_);
1320 #endif // HAVE_TPETRA_TRANSFER_TIMERS
1321 if (constantNumPackets == 0) {
1323 std::ostringstream os;
1324 os << *prefix <<
"7.1. Variable # packets / LID: first comm "
1325 <<
"(commOnHost = " << (commOnHost ?
"true" :
"false") <<
")"
1327 std::cerr << os.str ();
1329 size_t totalImportPackets = 0;
1331 this->numExportPacketsPerLID_.template sync<CHMS> ();
1332 this->numImportPacketsPerLID_.template sync<CHMS> ();
1333 this->numImportPacketsPerLID_.template modify<CHMS> ();
1334 auto numExp_h = create_const_view (this->numExportPacketsPerLID_.template view<CHMS> ());
1335 auto numImp_h = this->numImportPacketsPerLID_.template view<CHMS> ();
1338 distor.doReversePostsAndWaits (numExp_h, 1, numImp_h);
1341 typedef typename decltype (numImp_h)::device_type the_dev_type;
1342 totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_h);
1345 this->numExportPacketsPerLID_.template sync<CDMS> ();
1346 this->numImportPacketsPerLID_.template sync<CDMS> ();
1347 this->numImportPacketsPerLID_.template modify<CDMS> ();
1348 auto numExp_d = create_const_view (this->numExportPacketsPerLID_.template view<CDMS> ());
1349 auto numImp_d = this->numImportPacketsPerLID_.template view<CDMS> ();
1352 distor.doReversePostsAndWaits (numExp_d, 1, numImp_d);
1355 typedef typename decltype (numImp_d)::device_type the_dev_type;
1356 totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_d);
1360 std::ostringstream os;
1361 os << *prefix <<
"totalImportPackets=" << totalImportPackets
1363 std::cerr << os.str ();
1365 this->reallocImportsIfNeeded (totalImportPackets, verbose);
1367 std::ostringstream os;
1368 os << *prefix <<
"7.3. Second comm" << std::endl;
1369 std::cerr << os.str ();
1376 this->numExportPacketsPerLID_.template sync<CHMS> ();
1377 this->numImportPacketsPerLID_.template sync<CHMS> ();
1383 auto numExportPacketsPerLID_av =
1385 auto numImportPacketsPerLID_av =
1393 this->imports_.modified_device() = 0;
1394 this->imports_.modified_host() = 0;
1397 this->imports_.template modify<CHMS> ();
1398 distor.doReversePostsAndWaits (create_const_view (this->exports_.template view<CHMS> ()),
1399 numExportPacketsPerLID_av,
1400 this->imports_.template view<CHMS> (),
1401 numImportPacketsPerLID_av);
1404 this->imports_.template modify<CDMS> ();
1405 distor.doReversePostsAndWaits (create_const_view (this->exports_.template view<CDMS> ()),
1406 numExportPacketsPerLID_av,
1407 this->imports_.template view<CDMS> (),
1408 numImportPacketsPerLID_av);
1413 std::ostringstream os;
1414 os << *prefix <<
"7.1. Const # packets per LID: " << endl
1421 std::cerr << os.str ();
1429 this->imports_.modified_device() = 0;
1430 this->imports_.modified_host() = 0;
1433 this->imports_.template modify<CHMS> ();
1434 distor.doReversePostsAndWaits (create_const_view (this->exports_.template view<CHMS> ()),
1436 this->imports_.template view<CHMS> ());
1439 this->imports_.template modify<CDMS> ();
1440 distor.doReversePostsAndWaits (create_const_view (this->exports_.template view<CDMS> ()),
1442 this->imports_.template view<CDMS> ());
1448 std::cerr <<
">>> 7.0. Forward mode" << std::endl;
1451 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
1452 Teuchos::TimeMonitor doPostsAndWaitsMon (*doPostsAndWaitsTimer_);
1453 #endif // HAVE_TPETRA_TRANSFER_TIMERS
1454 if (constantNumPackets == 0) {
1456 std::cerr <<
">>> 7.1. Variable # packets / LID: first comm" << std::endl;
1459 size_t totalImportPackets = 0;
1461 this->numExportPacketsPerLID_.template sync<CHMS> ();
1462 this->numImportPacketsPerLID_.template sync<CHMS> ();
1463 this->numImportPacketsPerLID_.template modify<CHMS> ();
1464 auto numExp_h = create_const_view (this->numExportPacketsPerLID_.template view<CHMS> ());
1465 auto numImp_h = this->numImportPacketsPerLID_.template view<CHMS> ();
1468 distor.doPostsAndWaits (numExp_h, 1, numImp_h);
1471 typedef typename decltype (numImp_h)::device_type the_dev_type;
1472 totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_h);
1475 this->numExportPacketsPerLID_.template sync<CDMS> ();
1476 this->numImportPacketsPerLID_.template sync<CDMS> ();
1477 this->numImportPacketsPerLID_.template modify<CDMS> ();
1478 auto numExp_d = create_const_view (this->numExportPacketsPerLID_.template view<CDMS> ());
1479 auto numImp_d = this->numImportPacketsPerLID_.template view<CDMS> ();
1482 distor.doPostsAndWaits (numExp_d, 1, numImp_d);
1485 typedef typename decltype (numImp_d)::device_type the_dev_type;
1486 totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_d);
1489 this->reallocImportsIfNeeded (totalImportPackets, verbose);
1492 std::cerr <<
">>> 7.3. Second comm" << std::endl;
1499 this->numExportPacketsPerLID_.template sync<CHMS> ();
1500 this->numImportPacketsPerLID_.template sync<CHMS> ();
1506 auto numExportPacketsPerLID_av =
1508 auto numImportPacketsPerLID_av =
1516 this->imports_.modified_device() = 0;
1517 this->imports_.modified_host() = 0;
1520 this->imports_.template modify<CHMS> ();
1521 distor.doPostsAndWaits (create_const_view (this->exports_.template view<CHMS> ()),
1522 numExportPacketsPerLID_av,
1523 this->imports_.template view<CHMS> (),
1524 numImportPacketsPerLID_av);
1527 this->imports_.template modify<CDMS> ();
1528 distor.doPostsAndWaits (create_const_view (this->exports_.template view<CDMS> ()),
1529 numExportPacketsPerLID_av,
1530 this->imports_.template view<CDMS> (),
1531 numImportPacketsPerLID_av);
1536 std::ostringstream os;
1537 os << *prefix <<
"7.1. Const # packets per LID: "
1538 <<
"exports_.extent(0)=" << exports_.extent (0)
1539 <<
", imports_.extent(0) = " << imports_.extent (0)
1541 std::cerr << os.str ();
1548 this->imports_.modified_device() = 0;
1549 this->imports_.modified_host() = 0;
1553 std::ostringstream os;
1554 os << *prefix <<
"7.2. Comm buffers on host" << endl;
1555 std::cerr << os.str ();
1557 this->imports_.template modify<CHMS> ();
1558 distor.doPostsAndWaits (create_const_view (this->exports_.template view<CHMS> ()),
1560 this->imports_.template view<CHMS> ());
1564 std::ostringstream os;
1565 os << *prefix <<
"7.2. Comm buffers on device" << endl;
1566 std::cerr << os.str ();
1568 this->imports_.template modify<CDMS> ();
1569 distor.doPostsAndWaits (create_const_view (this->exports_.template view<CDMS> ()),
1571 this->imports_.template view<CDMS> ());
1578 std::ostringstream os;
1579 os << *prefix <<
"8. unpackAndCombineNew" << endl;
1580 std::cerr << os.str ();
1582 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
1583 Teuchos::TimeMonitor unpackAndCombineMon (*unpackAndCombineTimer_);
1584 #endif // HAVE_TPETRA_TRANSFER_TIMERS
1592 this->unpackAndCombineNew (remoteLIDs, this->imports_,
1593 this->numImportPacketsPerLID_,
1594 constantNumPackets, distor, CM);
1600 std::ostringstream os;
1601 os << *prefix <<
"9. Done!" << endl;
1602 std::cerr << os.str ();
1606 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1608 DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1609 print (std::ostream &os)
const
1611 using Teuchos::FancyOStream;
1612 using Teuchos::getFancyOStream;
1614 using Teuchos::rcpFromRef;
1617 RCP<FancyOStream> out = getFancyOStream (rcpFromRef (os));
1618 this->describe (*out, Teuchos::VERB_DEFAULT);
1621 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1627 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1633 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1641 template<
class DistObjectType>
1644 const Teuchos::RCP<
const Map<
typename DistObjectType::local_ordinal_type,
1645 typename DistObjectType::global_ordinal_type,
1646 typename DistObjectType::node_type> >& newMap)
1648 input->removeEmptyProcessesInPlace (newMap);
1649 if (newMap.is_null ()) {
1650 input = Teuchos::null;
1654 template<
class DistObjectType>
1659 typedef typename DistObjectType::local_ordinal_type LO;
1660 typedef typename DistObjectType::global_ordinal_type GO;
1661 typedef typename DistObjectType::node_type NT;
1665 removeEmptyProcessesInPlace<DistObjectType> (input, newMap);
1669 #define TPETRA_DISTOBJECT_INSTANT(SCALAR, LO, GO, NODE) \
1670 namespace Classes { template class DistObject< SCALAR , LO , GO , NODE >; }
1674 #define TPETRA_DISTOBJECT_INSTANT_CHAR(LO, GO, NODE) \
1675 namespace Classes { template class DistObject< char , LO , GO , NODE >; }
1679 #endif // TPETRA_DISTOBJECT_DEF_HPP