42 #ifndef TPETRA_DISTRIBUTOR_HPP
43 #define TPETRA_DISTRIBUTOR_HPP
46 #include <Teuchos_as.hpp>
47 #include <Teuchos_Describable.hpp>
48 #include <Teuchos_ParameterListAcceptorDefaultBase.hpp>
49 #include <Teuchos_VerboseObject.hpp>
61 #ifdef TPETRA_DISTRIBUTOR_TIMERS
62 # undef TPETRA_DISTRIBUTOR_TIMERS
63 #endif // TPETRA_DISTRIBUTOR_TIMERS
65 #include "KokkosCompat_View.hpp"
66 #include "Kokkos_Core.hpp"
67 #include "Kokkos_TeuchosCommAdapters.hpp"
68 #include <type_traits>
96 DISTRIBUTOR_NOT_INITIALIZED,
97 DISTRIBUTOR_INITIALIZED_BY_CREATE_FROM_SENDS,
98 DISTRIBUTOR_INITIALIZED_BY_CREATE_FROM_RECVS,
99 DISTRIBUTOR_INITIALIZED_BY_CREATE_FROM_SENDS_N_RECVS,
100 DISTRIBUTOR_INITIALIZED_BY_REVERSE,
101 DISTRIBUTOR_INITIALIZED_BY_COPY,
189 public Teuchos::Describable,
190 public Teuchos::ParameterListAcceptorDefaultBase {
203 explicit Distributor (
const Teuchos::RCP<
const Teuchos::Comm<int> >& comm);
216 Distributor (
const Teuchos::RCP<
const Teuchos::Comm<int> >& comm,
217 const Teuchos::RCP<Teuchos::FancyOStream>& out);
232 Distributor (
const Teuchos::RCP<
const Teuchos::Comm<int> >& comm,
233 const Teuchos::RCP<Teuchos::ParameterList>& plist);
251 Distributor (
const Teuchos::RCP<
const Teuchos::Comm<int> >& comm,
252 const Teuchos::RCP<Teuchos::FancyOStream>& out,
253 const Teuchos::RCP<Teuchos::ParameterList>& plist);
307 size_t createFromSends (
const Teuchos::ArrayView<const int>& exportProcIDs);
342 template <
class Ordinal>
345 const Teuchos::ArrayView<const int>& remoteProcIDs,
346 Teuchos::Array<Ordinal>& exportIDs,
347 Teuchos::Array<int>& exportProcIDs);
358 const Teuchos::ArrayView<const int>& remoteProcIDs);
393 Teuchos::ArrayView<const int>
getProcsTo()
const;
418 return howInitialized_;
461 template <
class Packet>
465 const Teuchos::ArrayView<Packet> &imports);
488 template <
class Packet>
491 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
492 const Teuchos::ArrayView<Packet> &imports,
493 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
519 template <
class Packet>
521 doPosts (
const Teuchos::ArrayRCP<const Packet> &exports,
523 const Teuchos::ArrayRCP<Packet> &imports);
543 template <
class Packet>
545 doPosts (
const Teuchos::ArrayRCP<const Packet> &exports,
546 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
547 const Teuchos::ArrayRCP<Packet> &imports,
548 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
562 template <
class Packet>
566 const Teuchos::ArrayView<Packet> &imports);
572 template <
class Packet>
575 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
576 const Teuchos::ArrayView<Packet> &imports,
577 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
583 template <
class Packet>
587 const Teuchos::ArrayRCP<Packet> &imports);
593 template <
class Packet>
596 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
597 const Teuchos::ArrayRCP<Packet> &imports,
598 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
628 template <
class ExpView,
class ImpView>
629 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
631 const ExpView &exports,
633 const ImpView &imports);
656 template <
class ExpView,
class ImpView>
657 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
659 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
660 const ImpView &imports,
661 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
687 template <
class ExpView,
class ImpView>
688 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
689 doPosts (
const ExpView &exports,
691 const ImpView &imports);
711 template <
class ExpView,
class ImpView>
712 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
713 doPosts (
const ExpView &exports,
714 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
715 const ImpView &imports,
716 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
722 template <
class ExpView,
class ImpView>
723 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
726 const ImpView &imports);
732 template <
class ExpView,
class ImpView>
733 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
735 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
736 const ImpView &imports,
737 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
743 template <
class ExpView,
class ImpView>
744 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
747 const ImpView &imports);
753 template <
class ExpView,
class ImpView>
754 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
756 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
757 const ImpView &imports,
758 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
764 bytes_sent = lastRoundBytesSend_;
765 bytes_recvd = lastRoundBytesRecv_;
797 describe (Teuchos::FancyOStream& out,
798 const Teuchos::EVerbosityLevel verbLevel =
799 Teuchos::Describable::verbLevel_default)
const;
804 Teuchos::RCP<const Teuchos::Comm<int> > comm_;
807 Teuchos::RCP<Teuchos::FancyOStream> out_;
819 bool barrierBetween_;
845 Teuchos::Array<int> procsTo_;
855 Teuchos::Array<size_t> startsTo_;
862 Teuchos::Array<size_t> lengthsTo_;
867 size_t maxSendLength_;
884 Teuchos::Array<size_t> indicesTo_;
903 size_t totalReceiveLength_;
910 Teuchos::Array<size_t> lengthsFrom_;
917 Teuchos::Array<int> procsFrom_;
924 Teuchos::Array<size_t> startsFrom_;
932 Teuchos::Array<size_t> indicesFrom_;
940 Teuchos::Array<Teuchos::RCP<Teuchos::CommRequest<int> > > requests_;
946 mutable Teuchos::RCP<Distributor> reverseDistributor_;
949 size_t lastRoundBytesSend_;
952 size_t lastRoundBytesRecv_;
954 #ifdef TPETRA_DISTRIBUTOR_TIMERS
955 Teuchos::RCP<Teuchos::Time> timer_doPosts3_;
956 Teuchos::RCP<Teuchos::Time> timer_doPosts4_;
957 Teuchos::RCP<Teuchos::Time> timer_doWaits_;
958 Teuchos::RCP<Teuchos::Time> timer_doPosts3_recvs_;
959 Teuchos::RCP<Teuchos::Time> timer_doPosts4_recvs_;
960 Teuchos::RCP<Teuchos::Time> timer_doPosts3_barrier_;
961 Teuchos::RCP<Teuchos::Time> timer_doPosts4_barrier_;
962 Teuchos::RCP<Teuchos::Time> timer_doPosts3_sends_;
963 Teuchos::RCP<Teuchos::Time> timer_doPosts4_sends_;
967 #endif // TPETRA_DISTRIBUTOR_TIMERS
980 bool useDistinctTags_;
986 int getTag (
const int pathTag)
const;
1006 init (
const Teuchos::RCP<
const Teuchos::Comm<int> >& comm,
1007 const Teuchos::RCP<Teuchos::FancyOStream>& out,
1008 const Teuchos::RCP<Teuchos::ParameterList>& plist);
1020 void computeReceives ();
1034 template <
class Ordinal>
1035 void computeSends (
const Teuchos::ArrayView<const Ordinal> &remoteGIDs,
1036 const Teuchos::ArrayView<const int> &remoteProcIDs,
1037 Teuchos::Array<Ordinal> &exportGIDs,
1038 Teuchos::Array<int> &exportProcIDs);
1041 void createReverseDistributor()
const;
1049 localDescribeToString (
const Teuchos::EVerbosityLevel vl)
const;
1053 template <
class Packet>
1057 const Teuchos::ArrayView<Packet>& imports)
1059 using Teuchos::arcp;
1060 using Teuchos::ArrayRCP;
1061 typedef typename ArrayRCP<const Packet>::size_type size_type;
1063 TEUCHOS_TEST_FOR_EXCEPTION(
1064 requests_.size () != 0, std::runtime_error,
"Tpetra::Distributor::"
1065 "doPostsAndWaits(3 args): There are " << requests_.size () <<
1066 " outstanding nonblocking messages pending. It is incorrect to call "
1067 "this method with posts outstanding.");
1079 ArrayRCP<const Packet> exportsArcp (exports.getRawPtr (),
1080 static_cast<size_type> (0),
1081 exports.size(),
false);
1096 arcp<Packet> (imports.getRawPtr (), 0, imports.size (),
false));
1099 lastRoundBytesSend_ = exports.size () *
sizeof (Packet);
1100 lastRoundBytesRecv_ = imports.size () *
sizeof (Packet);
1103 template <
class Packet>
1106 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
1107 const Teuchos::ArrayView<Packet> &imports,
1108 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
1110 using Teuchos::arcp;
1111 using Teuchos::ArrayRCP;
1113 TEUCHOS_TEST_FOR_EXCEPTION(
1114 requests_.size () != 0, std::runtime_error,
"Tpetra::Distributor::"
1115 "doPostsAndWaits: There are " << requests_.size () <<
" outstanding "
1116 "nonblocking messages pending. It is incorrect to call doPostsAndWaits "
1117 "with posts outstanding.");
1130 typedef typename ArrayRCP<const Packet>::size_type size_type;
1131 ArrayRCP<const Packet> exportsArcp (exports.getRawPtr (),
1132 static_cast<size_type> (0),
1133 exports.size (),
false);
1139 numExportPacketsPerLID,
1140 arcp<Packet> (imports.getRawPtr (), 0, imports.size (),
false),
1141 numImportPacketsPerLID);
1144 lastRoundBytesSend_ = exports.size () *
sizeof (Packet);
1145 lastRoundBytesRecv_ = imports.size () *
sizeof (Packet);
1149 template <
class Packet>
1151 doPosts (
const Teuchos::ArrayRCP<const Packet>& exports,
1153 const Teuchos::ArrayRCP<Packet>& imports)
1155 using Teuchos::Array;
1156 using Teuchos::ArrayRCP;
1157 using Teuchos::ArrayView;
1159 using Teuchos::FancyOStream;
1160 using Teuchos::includesVerbLevel;
1161 using Teuchos::ireceive;
1162 using Teuchos::isend;
1163 using Teuchos::OSTab;
1164 using Teuchos::readySend;
1165 using Teuchos::send;
1166 using Teuchos::ssend;
1167 using Teuchos::TypeNameTraits;
1168 using Teuchos::typeName;
1170 typedef Array<size_t>::size_type size_type;
1174 #ifdef TPETRA_DISTRIBUTOR_TIMERS
1175 Teuchos::TimeMonitor timeMon (*timer_doPosts3_);
1176 #endif // TPETRA_DISTRIBUTOR_TIMERS
1178 const int myRank = comm_->getRank ();
1182 const bool doBarrier = barrierBetween_;
1184 Teuchos::OSTab tab0 (out_);
1186 std::ostringstream os;
1187 os <<
"Proc " << myRank
1188 <<
": Distributor::doPosts(3 args, Teuchos::ArrayRCP)" << endl;
1191 Teuchos::OSTab tab1 (out_);
1193 TEUCHOS_TEST_FOR_EXCEPTION(
1194 sendType == Details::DISTRIBUTOR_RSEND && ! doBarrier, std::logic_error,
1195 "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): Ready-send "
1196 "version requires a barrier between posting receives and posting ready "
1197 "sends. This should have been checked before. "
1198 "Please report this bug to the Tpetra developers.");
1200 size_t selfReceiveOffset = 0;
1205 if (howInitialized_ != Details::DISTRIBUTOR_INITIALIZED_BY_REVERSE) {
1212 const size_t totalNumImportPackets = totalReceiveLength_ * numPackets;
1213 TEUCHOS_TEST_FOR_EXCEPTION
1214 (static_cast<size_t> (imports.size ()) < totalNumImportPackets,
1215 std::invalid_argument,
1216 "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): "
1217 "The 'imports' array must have enough entries to hold the expected number "
1218 "of import packets. imports.size() = " << imports.size () <<
" < "
1219 "totalNumImportPackets = " << totalNumImportPackets <<
".");
1227 const int pathTag = 0;
1228 const int tag = this->getTag (pathTag);
1230 #ifdef HAVE_TPETRA_DEBUG
1231 TEUCHOS_TEST_FOR_EXCEPTION
1232 (requests_.size () != 0,
1234 "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): Process "
1235 << myRank <<
": requests_.size() = " << requests_.size () <<
" != 0.");
1236 #endif // HAVE_TPETRA_DEBUG
1251 const size_type actualNumReceives = as<size_type> (numReceives_) +
1252 as<size_type> (selfMessage_ ? 1 : 0);
1253 requests_.resize (0);
1256 std::ostringstream os;
1257 os <<
"Proc " << myRank <<
": doPosts(3 args, Teuchos::ArrayRCP, "
1258 << (indicesTo_.empty () ?
"fast" :
"slow") <<
"): Post receives"
1269 #ifdef TPETRA_DISTRIBUTOR_TIMERS
1270 Teuchos::TimeMonitor timeMonRecvs (*timer_doPosts3_recvs_);
1271 #endif // TPETRA_DISTRIBUTOR_TIMERS
1273 size_t curBufOffset = 0;
1274 for (size_type i = 0; i < actualNumReceives; ++i) {
1275 const size_t curBufLen = lengthsFrom_[i] * numPackets;
1276 if (procsFrom_[i] != myRank) {
1278 std::ostringstream os;
1279 os <<
"Proc " << myRank <<
": doPosts(3 args, Teuchos::ArrayRCP, "
1280 << (indicesTo_.empty () ?
"fast" :
"slow") <<
"): "
1281 <<
"Post irecv: {source: " << procsFrom_[i]
1282 <<
", tag: " << tag <<
"}" << endl;
1292 TEUCHOS_TEST_FOR_EXCEPTION(
1293 curBufOffset + curBufLen > static_cast<size_t> (imports.size ()),
1295 "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): "
1296 "Exceeded size of 'imports' array in packing loop on Process " <<
1297 myRank <<
". imports.size() = " << imports.size () <<
" < "
1298 "curBufOffset(" << curBufOffset <<
") + curBufLen(" << curBufLen
1300 ArrayRCP<Packet> recvBuf =
1301 imports.persistingView (curBufOffset, curBufLen);
1302 requests_.push_back (ireceive<int, Packet> (recvBuf, procsFrom_[i],
1306 selfReceiveOffset = curBufOffset;
1308 curBufOffset += curBufLen;
1313 #ifdef TPETRA_DISTRIBUTOR_TIMERS
1314 Teuchos::TimeMonitor timeMonBarrier (*timer_doPosts3_barrier_);
1315 #endif // TPETRA_DISTRIBUTOR_TIMERS
1318 std::ostringstream os;
1319 os <<
"Proc " << myRank <<
": doPosts(3 args, Teuchos::ArrayRCP, "
1320 << (indicesTo_.empty () ?
"fast" :
"slow") <<
"): Barrier" << endl;
1331 #ifdef TPETRA_DISTRIBUTOR_TIMERS
1332 Teuchos::TimeMonitor timeMonSends (*timer_doPosts3_sends_);
1333 #endif // TPETRA_DISTRIBUTOR_TIMERS
1340 size_t numBlocks = numSends_ + selfMessage_;
1341 size_t procIndex = 0;
1342 while ((procIndex < numBlocks) && (procsTo_[procIndex] < myRank)) {
1345 if (procIndex == numBlocks) {
1350 size_t selfIndex = 0;
1353 std::ostringstream os;
1354 os <<
"Proc " << myRank
1355 <<
": doPosts(3 args, Teuchos::ArrayRCP, "
1356 << (indicesTo_.empty () ?
"fast" :
"slow") <<
"): Post sends" << endl;
1360 if (indicesTo_.empty()) {
1362 std::ostringstream os;
1363 os <<
"Proc " << myRank
1364 <<
": doPosts(3 args, Teuchos::ArrayRCP, fast): posting sends" << endl;
1370 for (
size_t i = 0; i < numBlocks; ++i) {
1371 size_t p = i + procIndex;
1372 if (p > (numBlocks - 1)) {
1376 if (procsTo_[p] != myRank) {
1378 std::ostringstream os;
1379 os <<
"Proc " << myRank
1380 <<
": doPosts(3 args, Teuchos::ArrayRCP, fast): Post send: "
1381 "{target: " << procsTo_[p] <<
", tag: " << tag <<
"}" << endl;
1385 ArrayView<const Packet> tmpSend =
1386 exports.view (startsTo_[p]*numPackets, lengthsTo_[p]*numPackets);
1388 if (sendType == Details::DISTRIBUTOR_SEND) {
1389 send<int, Packet> (tmpSend.getRawPtr (),
1390 as<int> (tmpSend.size ()),
1391 procsTo_[p], tag, *comm_);
1393 else if (sendType == Details::DISTRIBUTOR_ISEND) {
1394 ArrayRCP<const Packet> tmpSendBuf =
1395 exports.persistingView (startsTo_[p] * numPackets,
1396 lengthsTo_[p] * numPackets);
1397 requests_.push_back (isend<int, Packet> (tmpSendBuf, procsTo_[p],
1400 else if (sendType == Details::DISTRIBUTOR_RSEND) {
1401 readySend<int, Packet> (tmpSend.getRawPtr (),
1402 as<int> (tmpSend.size ()),
1403 procsTo_[p], tag, *comm_);
1405 else if (sendType == Details::DISTRIBUTOR_SSEND) {
1406 ssend<int, Packet> (tmpSend.getRawPtr (),
1407 as<int> (tmpSend.size ()),
1408 procsTo_[p], tag, *comm_);
1410 TEUCHOS_TEST_FOR_EXCEPTION(
1411 true, std::logic_error,
1412 "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): "
1413 "Invalid send type. We should never get here. "
1414 "Please report this bug to the Tpetra developers.");
1424 std::ostringstream os;
1425 os <<
"Proc " << myRank
1426 <<
": doPosts(3 args, Teuchos::ArrayRCP, fast): Self-send" << endl;
1436 std::copy (exports.begin()+startsTo_[selfNum]*numPackets,
1437 exports.begin()+startsTo_[selfNum]*numPackets+lengthsTo_[selfNum]*numPackets,
1438 imports.begin()+selfReceiveOffset);
1441 std::ostringstream os;
1442 os <<
"Proc " << myRank
1443 <<
": doPosts(3 args, Teuchos::ArrayRCP, fast) done" << endl;
1449 std::ostringstream os;
1450 os <<
"Proc " << myRank
1451 <<
": doPosts(3 args, Teuchos::ArrayRCP, slow): posting sends" << endl;
1457 ArrayRCP<Packet> sendArray (maxSendLength_ * numPackets);
1459 TEUCHOS_TEST_FOR_EXCEPTION(
1460 sendType == Details::DISTRIBUTOR_ISEND, std::logic_error,
1461 "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): "
1462 "The \"send buffer\" code path doesn't currently work with nonblocking sends.");
1464 for (
size_t i = 0; i < numBlocks; ++i) {
1465 size_t p = i + procIndex;
1466 if (p > (numBlocks - 1)) {
1470 if (procsTo_[p] != myRank) {
1472 std::ostringstream os;
1473 os <<
"Proc " << myRank
1474 <<
": doPosts(3 args, Teuchos::ArrayRCP, slow): Post send: "
1475 "{target: " << procsTo_[p] <<
", tag: " << tag <<
"}" << endl;
1479 typename ArrayView<const Packet>::iterator srcBegin, srcEnd;
1480 size_t sendArrayOffset = 0;
1481 size_t j = startsTo_[p];
1482 for (
size_t k = 0; k < lengthsTo_[p]; ++k, ++j) {
1483 srcBegin = exports.begin() + indicesTo_[j]*numPackets;
1484 srcEnd = srcBegin + numPackets;
1485 std::copy (srcBegin, srcEnd, sendArray.begin()+sendArrayOffset);
1486 sendArrayOffset += numPackets;
1488 ArrayView<const Packet> tmpSend =
1489 sendArray.view (0, lengthsTo_[p]*numPackets);
1491 if (sendType == Details::DISTRIBUTOR_SEND) {
1492 send<int, Packet> (tmpSend.getRawPtr (),
1493 as<int> (tmpSend.size ()),
1494 procsTo_[p], tag, *comm_);
1496 else if (sendType == Details::DISTRIBUTOR_ISEND) {
1497 ArrayRCP<const Packet> tmpSendBuf =
1498 sendArray.persistingView (0, lengthsTo_[p] * numPackets);
1499 requests_.push_back (isend<int, Packet> (tmpSendBuf, procsTo_[p],
1502 else if (sendType == Details::DISTRIBUTOR_RSEND) {
1503 readySend<int, Packet> (tmpSend.getRawPtr (),
1504 as<int> (tmpSend.size ()),
1505 procsTo_[p], tag, *comm_);
1507 else if (sendType == Details::DISTRIBUTOR_SSEND) {
1508 ssend<int, Packet> (tmpSend.getRawPtr (),
1509 as<int> (tmpSend.size ()),
1510 procsTo_[p], tag, *comm_);
1513 TEUCHOS_TEST_FOR_EXCEPTION(
1514 true, std::logic_error,
1515 "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): "
1516 "Invalid send type. We should never get here. "
1517 "Please report this bug to the Tpetra developers.");
1522 selfIndex = startsTo_[p];
1528 std::ostringstream os;
1529 os <<
"Proc " << myRank
1530 <<
": doPosts(3 args, Teuchos::ArrayRCP, slow): Self-send" << endl;
1533 for (
size_t k = 0; k < lengthsTo_[selfNum]; ++k) {
1534 std::copy (exports.begin()+indicesTo_[selfIndex]*numPackets,
1535 exports.begin()+indicesTo_[selfIndex]*numPackets + numPackets,
1536 imports.begin() + selfReceiveOffset);
1538 selfReceiveOffset += numPackets;
1542 std::ostringstream os;
1543 os <<
"Proc " << myRank
1544 <<
": doPosts(3 args, Teuchos::ArrayRCP, slow) done" << endl;
1550 std::ostringstream os;
1551 os <<
"Proc " << myRank <<
": doPosts done" << endl;
1556 template <
class Packet>
1558 doPosts (
const Teuchos::ArrayRCP<const Packet>& exports,
1559 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
1560 const Teuchos::ArrayRCP<Packet>& imports,
1561 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
1563 using Teuchos::Array;
1564 using Teuchos::ArrayRCP;
1565 using Teuchos::ArrayView;
1567 using Teuchos::ireceive;
1568 using Teuchos::isend;
1569 using Teuchos::readySend;
1570 using Teuchos::send;
1571 using Teuchos::ssend;
1572 using Teuchos::TypeNameTraits;
1573 #ifdef HAVE_TEUCHOS_DEBUG
1574 using Teuchos::OSTab;
1575 #endif // HAVE_TEUCHOS_DEBUG
1577 typedef Array<size_t>::size_type size_type;
1579 Teuchos::OSTab tab (out_);
1582 #ifdef TPETRA_DISTRIBUTOR_TIMERS
1583 Teuchos::TimeMonitor timeMon (*timer_doPosts4_);
1584 #endif // TPETRA_DISTRIBUTOR_TIMERS
1589 const bool doBarrier = barrierBetween_;
1615 TEUCHOS_TEST_FOR_EXCEPTION(
1616 sendType == Details::DISTRIBUTOR_RSEND && ! doBarrier,
1618 "Tpetra::Distributor::doPosts(4 args, Teuchos::ArrayRCP): Ready-send "
1619 "version requires a barrier between posting receives and posting ready "
1620 "ends. This should have been checked before. "
1621 "Please report this bug to the Tpetra developers.");
1623 const int myProcID = comm_->getRank ();
1624 size_t selfReceiveOffset = 0;
1626 #ifdef HAVE_TEUCHOS_DEBUG
1628 size_t totalNumImportPackets = 0;
1629 for (
size_t ii = 0; ii < static_cast<size_t> (numImportPacketsPerLID.size ()); ++ii) {
1630 totalNumImportPackets += numImportPacketsPerLID[ii];
1632 TEUCHOS_TEST_FOR_EXCEPTION(
1633 static_cast<size_t> (imports.size ()) < totalNumImportPackets,
1635 "Tpetra::Distributor::doPosts(4 args, Teuchos::ArrayRCP): The 'imports' "
1636 "array must have enough entries to hold the expected number of import "
1637 "packets. imports.size() = " << imports.size() <<
" < "
1638 "totalNumImportPackets = " << totalNumImportPackets <<
".");
1639 #endif // HAVE_TEUCHOS_DEBUG
1646 const int pathTag = 1;
1647 const int tag = this->getTag (pathTag);
1650 TEUCHOS_TEST_FOR_EXCEPTION(
1651 requests_.size () != 0,
1653 "Tpetra::Distributor::doPosts(4 args, Teuchos::ArrayRCP): Process "
1654 << myProcID <<
": requests_.size() = " << requests_.size () <<
" != 0.");
1657 std::ostringstream os;
1658 os <<
"Proc " << myProcID <<
": doPosts(4 args, Teuchos::ArrayRCP, "
1659 << (indicesTo_.empty () ?
"fast" :
"slow") <<
")" << endl;
1676 const size_type actualNumReceives = as<size_type> (numReceives_) +
1677 as<size_type> (selfMessage_ ? 1 : 0);
1678 requests_.resize (0);
1686 #ifdef TPETRA_DISTRIBUTOR_TIMERS
1687 Teuchos::TimeMonitor timeMonRecvs (*timer_doPosts4_recvs_);
1688 #endif // TPETRA_DISTRIBUTOR_TIMERS
1690 size_t curBufferOffset = 0;
1691 size_t curLIDoffset = 0;
1692 for (size_type i = 0; i < actualNumReceives; ++i) {
1693 size_t totalPacketsFrom_i = 0;
1694 for (
size_t j = 0; j < lengthsFrom_[i]; ++j) {
1695 totalPacketsFrom_i += numImportPacketsPerLID[curLIDoffset+j];
1697 curLIDoffset += lengthsFrom_[i];
1698 if (procsFrom_[i] != myProcID && totalPacketsFrom_i) {
1707 ArrayRCP<Packet> recvBuf =
1708 imports.persistingView (curBufferOffset, totalPacketsFrom_i);
1709 requests_.push_back (ireceive<int, Packet> (recvBuf, procsFrom_[i],
1713 selfReceiveOffset = curBufferOffset;
1715 curBufferOffset += totalPacketsFrom_i;
1720 #ifdef TPETRA_DISTRIBUTOR_TIMERS
1721 Teuchos::TimeMonitor timeMonBarrier (*timer_doPosts4_barrier_);
1722 #endif // TPETRA_DISTRIBUTOR_TIMERS
1731 #ifdef TPETRA_DISTRIBUTOR_TIMERS
1732 Teuchos::TimeMonitor timeMonSends (*timer_doPosts4_sends_);
1733 #endif // TPETRA_DISTRIBUTOR_TIMERS
1737 Array<size_t> sendPacketOffsets(numSends_,0), packetsPerSend(numSends_,0);
1738 size_t maxNumPackets = 0;
1739 size_t curPKToffset = 0;
1740 for (
size_t pp=0; pp<numSends_; ++pp) {
1741 sendPacketOffsets[pp] = curPKToffset;
1742 size_t numPackets = 0;
1743 for (
size_t j=startsTo_[pp]; j<startsTo_[pp]+lengthsTo_[pp]; ++j) {
1744 numPackets += numExportPacketsPerLID[j];
1746 if (numPackets > maxNumPackets) maxNumPackets = numPackets;
1747 packetsPerSend[pp] = numPackets;
1748 curPKToffset += numPackets;
1753 size_t numBlocks = numSends_+ selfMessage_;
1754 size_t procIndex = 0;
1755 while ((procIndex < numBlocks) && (procsTo_[procIndex] < myProcID)) {
1758 if (procIndex == numBlocks) {
1763 size_t selfIndex = 0;
1765 if (indicesTo_.empty()) {
1767 std::ostringstream os;
1768 os <<
"Proc " << myProcID
1769 <<
": doPosts(4 args, Teuchos::ArrayRCP, fast): posting sends" << endl;
1775 for (
size_t i = 0; i < numBlocks; ++i) {
1776 size_t p = i + procIndex;
1777 if (p > (numBlocks - 1)) {
1781 if (procsTo_[p] != myProcID && packetsPerSend[p] > 0) {
1782 ArrayView<const Packet> tmpSend =
1783 exports.view (sendPacketOffsets[p], packetsPerSend[p]);
1785 if (sendType == Details::DISTRIBUTOR_SEND) {
1786 send<int, Packet> (tmpSend.getRawPtr (),
1787 as<int> (tmpSend.size ()),
1788 procsTo_[p], tag, *comm_);
1790 else if (sendType == Details::DISTRIBUTOR_RSEND) {
1791 readySend<int, Packet> (tmpSend.getRawPtr (),
1792 as<int> (tmpSend.size ()),
1793 procsTo_[p], tag, *comm_);
1795 else if (sendType == Details::DISTRIBUTOR_ISEND) {
1796 ArrayRCP<const Packet> tmpSendBuf =
1797 exports.persistingView (sendPacketOffsets[p], packetsPerSend[p]);
1798 requests_.push_back (isend<int, Packet> (tmpSendBuf, procsTo_[p],
1801 else if (sendType == Details::DISTRIBUTOR_SSEND) {
1802 ssend<int, Packet> (tmpSend.getRawPtr (),
1803 as<int> (tmpSend.size ()),
1804 procsTo_[p], tag, *comm_);
1807 TEUCHOS_TEST_FOR_EXCEPTION(
1808 true, std::logic_error,
1809 "Tpetra::Distributor::doPosts(4 args, Teuchos::ArrayRCP): "
1810 "Invalid send type. We should never get here. Please report "
1811 "this bug to the Tpetra developers.");
1820 std::copy (exports.begin()+sendPacketOffsets[selfNum],
1821 exports.begin()+sendPacketOffsets[selfNum]+packetsPerSend[selfNum],
1822 imports.begin()+selfReceiveOffset);
1825 std::ostringstream os;
1826 os <<
"Proc " << myProcID
1827 <<
": doPosts(4 args, Teuchos::ArrayRCP, fast) done" << endl;
1833 std::ostringstream os;
1834 os <<
"Proc " << myProcID
1835 <<
": doPosts(4 args, Teuchos::ArrayRCP, slow): posting sends" << endl;
1840 ArrayRCP<Packet> sendArray (maxNumPackets);
1842 TEUCHOS_TEST_FOR_EXCEPTION(
1843 sendType == Details::DISTRIBUTOR_ISEND,
1845 "Tpetra::Distributor::doPosts(4 args, Teuchos::ArrayRCP): "
1846 "The \"send buffer\" code path may not necessarily work with nonblocking sends.");
1848 Array<size_t> indicesOffsets (numExportPacketsPerLID.size(), 0);
1850 for (
int j=0; j<numExportPacketsPerLID.size(); ++j) {
1851 indicesOffsets[j] = ioffset;
1852 ioffset += numExportPacketsPerLID[j];
1855 for (
size_t i = 0; i < numBlocks; ++i) {
1856 size_t p = i + procIndex;
1857 if (p > (numBlocks - 1)) {
1861 if (procsTo_[p] != myProcID) {
1862 typename ArrayView<const Packet>::iterator srcBegin, srcEnd;
1863 size_t sendArrayOffset = 0;
1864 size_t j = startsTo_[p];
1865 size_t numPacketsTo_p = 0;
1866 for (
size_t k = 0; k < lengthsTo_[p]; ++k, ++j) {
1867 srcBegin = exports.begin() + indicesOffsets[j];
1868 srcEnd = srcBegin + numExportPacketsPerLID[j];
1869 numPacketsTo_p += numExportPacketsPerLID[j];
1870 std::copy (srcBegin, srcEnd, sendArray.begin()+sendArrayOffset);
1871 sendArrayOffset += numExportPacketsPerLID[j];
1873 if (numPacketsTo_p > 0) {
1874 ArrayView<const Packet> tmpSend =
1875 sendArray.view (0, numPacketsTo_p);
1877 if (sendType == Details::DISTRIBUTOR_RSEND) {
1878 readySend<int, Packet> (tmpSend.getRawPtr (),
1879 as<int> (tmpSend.size ()),
1880 procsTo_[p], tag, *comm_);
1882 else if (sendType == Details::DISTRIBUTOR_ISEND) {
1883 ArrayRCP<const Packet> tmpSendBuf =
1884 sendArray.persistingView (0, numPacketsTo_p);
1885 requests_.push_back (isend<int, Packet> (tmpSendBuf, procsTo_[p],
1888 else if (sendType == Details::DISTRIBUTOR_SSEND) {
1889 ssend<int, Packet> (tmpSend.getRawPtr (),
1890 as<int> (tmpSend.size ()),
1891 procsTo_[p], tag, *comm_);
1894 send<int, Packet> (tmpSend.getRawPtr (),
1895 as<int> (tmpSend.size ()),
1896 procsTo_[p], tag, *comm_);
1902 selfIndex = startsTo_[p];
1907 for (
size_t k = 0; k < lengthsTo_[selfNum]; ++k) {
1908 std::copy (exports.begin()+indicesOffsets[selfIndex],
1909 exports.begin()+indicesOffsets[selfIndex]+numExportPacketsPerLID[selfIndex],
1910 imports.begin() + selfReceiveOffset);
1911 selfReceiveOffset += numExportPacketsPerLID[selfIndex];
1916 std::ostringstream os;
1917 os <<
"Proc " << myProcID
1918 <<
": doPosts(4 args, Teuchos::ArrayRCP, slow) done" << endl;
1924 template <
class Packet>
1928 const Teuchos::ArrayView<Packet>& imports)
1930 using Teuchos::arcp;
1931 using Teuchos::ArrayRCP;
1944 typedef typename ArrayRCP<const Packet>::size_type size_type;
1945 ArrayRCP<const Packet> exportsArcp (exports.getRawPtr(), as<size_type> (0),
1946 exports.size(),
false);
1953 arcp<Packet> (imports.getRawPtr (), 0, imports.size (),
false));
1956 lastRoundBytesSend_ = exports.size() *
sizeof(Packet);
1957 lastRoundBytesRecv_ = imports.size() *
sizeof(Packet);
1960 template <
class Packet>
1963 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
1964 const Teuchos::ArrayView<Packet> &imports,
1965 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
1968 using Teuchos::arcp;
1969 using Teuchos::ArrayRCP;
1971 TEUCHOS_TEST_FOR_EXCEPTION(
1972 requests_.size () != 0, std::runtime_error,
"Tpetra::Distributor::"
1973 "doReversePostsAndWaits(4 args): There are " << requests_.size ()
1974 <<
" outstanding nonblocking messages pending. It is incorrect to call "
1975 "this method with posts outstanding.");
1988 typedef typename ArrayRCP<const Packet>::size_type size_type;
1989 ArrayRCP<const Packet> exportsArcp (exports.getRawPtr (), as<size_type> (0),
1990 exports.size (),
false);
1992 numExportPacketsPerLID,
1993 arcp<Packet> (imports.getRawPtr (), 0, imports.size (),
false),
1994 numImportPacketsPerLID);
1997 lastRoundBytesSend_ = exports.size() *
sizeof(Packet);
1998 lastRoundBytesRecv_ = imports.size() *
sizeof(Packet);
2001 template <
class Packet>
2005 const Teuchos::ArrayRCP<Packet>& imports)
2008 TEUCHOS_TEST_FOR_EXCEPTION(
2009 ! indicesTo_.empty (), std::runtime_error,
2010 "Tpetra::Distributor::doReversePosts(3 args): Can only do reverse "
2011 "communication when original data are blocked by process.");
2012 if (reverseDistributor_.is_null ()) {
2013 createReverseDistributor ();
2015 reverseDistributor_->doPosts (exports, numPackets, imports);
2018 template <
class Packet>
2021 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
2022 const Teuchos::ArrayRCP<Packet>& imports,
2023 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
2026 TEUCHOS_TEST_FOR_EXCEPTION(
2027 ! indicesTo_.empty (), std::runtime_error,
2028 "Tpetra::Distributor::doReversePosts(3 args): Can only do reverse "
2029 "communication when original data are blocked by process.");
2030 if (reverseDistributor_.is_null ()) {
2031 createReverseDistributor ();
2033 reverseDistributor_->doPosts (exports, numExportPacketsPerLID,
2034 imports, numImportPacketsPerLID);
2037 template <
class ExpView,
class ImpView>
2038 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
2040 doPostsAndWaits (
const ExpView& exports,
2042 const ImpView& imports)
2049 RCP<Teuchos::OSTab> tab0, tab1;
2051 tab0 = rcp (
new Teuchos::OSTab (out_));
2052 const int myRank = comm_->getRank ();
2053 std::ostringstream os;
2054 os <<
"Proc " << myRank
2055 <<
": Distributor::doPostsAndWaits(3 args, Kokkos): "
2056 <<
"{sendType: " << DistributorSendTypeEnumToString (sendType_)
2057 <<
", barrierBetween: " << barrierBetween_ <<
"}" << endl;
2059 tab1 = rcp (
new Teuchos::OSTab (out_));
2062 TEUCHOS_TEST_FOR_EXCEPTION(
2063 requests_.size () != 0, std::runtime_error,
"Tpetra::Distributor::"
2064 "doPostsAndWaits(3 args): There are " << requests_.size () <<
2065 " outstanding nonblocking messages pending. It is incorrect to call "
2066 "this method with posts outstanding.");
2069 const int myRank = comm_->getRank ();
2070 std::ostringstream os;
2071 os <<
"Proc " << myRank
2072 <<
": Distributor::doPostsAndWaits: Call doPosts" << endl;
2075 doPosts (exports, numPackets, imports);
2077 const int myRank = comm_->getRank ();
2078 std::ostringstream os;
2079 os <<
"Proc " << myRank
2080 <<
": Distributor::doPostsAndWaits: Call doWaits" << endl;
2086 template <
class ExpView,
class ImpView>
2087 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
2089 doPostsAndWaits (
const ExpView& exports,
2090 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
2091 const ImpView& imports,
2092 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
2094 TEUCHOS_TEST_FOR_EXCEPTION(
2095 requests_.size () != 0, std::runtime_error,
2096 "Tpetra::Distributor::doPostsAndWaits(4 args): There are "
2097 << requests_.size () <<
" outstanding nonblocking messages pending. "
2098 "It is incorrect to call this method with posts outstanding.");
2100 doPosts (exports, numExportPacketsPerLID, imports, numImportPacketsPerLID);
2105 template <
class ExpView,
class ImpView>
2106 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
2108 doPosts (
const ExpView &exports,
2110 const ImpView &imports)
2112 using Teuchos::Array;
2114 using Teuchos::FancyOStream;
2115 using Teuchos::includesVerbLevel;
2116 using Teuchos::ireceive;
2117 using Teuchos::isend;
2118 using Teuchos::OSTab;
2119 using Teuchos::readySend;
2120 using Teuchos::send;
2121 using Teuchos::ssend;
2122 using Teuchos::TypeNameTraits;
2123 using Teuchos::typeName;
2125 using Kokkos::Compat::create_const_view;
2126 using Kokkos::Compat::create_view;
2127 using Kokkos::Compat::subview_offset;
2128 using Kokkos::Compat::deep_copy_offset;
2129 typedef Array<size_t>::size_type size_type;
2130 typedef ExpView exports_view_type;
2131 typedef ImpView imports_view_type;
2134 #ifdef KOKKOS_ENABLE_CUDA
2135 static_assert (! std::is_same<typename ExpView::memory_space, Kokkos::CudaUVMSpace>::value &&
2136 ! std::is_same<typename ImpView::memory_space, Kokkos::CudaUVMSpace>::value,
2137 "Please do not use Tpetra::Distributor with UVM "
2138 "allocations. See GitHub issue #1088.");
2139 #endif // KOKKOS_ENABLE_CUDA
2141 #ifdef TPETRA_DISTRIBUTOR_TIMERS
2142 Teuchos::TimeMonitor timeMon (*timer_doPosts3_);
2143 #endif // TPETRA_DISTRIBUTOR_TIMERS
2145 const int myRank = comm_->getRank ();
2149 const bool doBarrier = barrierBetween_;
2151 Teuchos::OSTab tab0 (out_);
2153 std::ostringstream os;
2154 os <<
"Proc " << myRank
2155 <<
": Distributor::doPosts(3 args, Kokkos)" << endl;
2158 Teuchos::OSTab tab1 (out_);
2160 TEUCHOS_TEST_FOR_EXCEPTION(
2161 sendType == Details::DISTRIBUTOR_RSEND && ! doBarrier,
2163 "Tpetra::Distributor::doPosts(3 args, Kokkos): Ready-send version "
2164 "requires a barrier between posting receives and posting ready sends. "
2165 "This should have been checked before. "
2166 "Please report this bug to the Tpetra developers.");
2168 size_t selfReceiveOffset = 0;
2175 const size_t totalNumImportPackets = totalReceiveLength_ * numPackets;
2178 std::ostringstream os;
2179 os <<
"Proc " << myRank <<
": doPosts: totalNumImportPackets = " <<
2180 totalNumImportPackets <<
" = " << totalReceiveLength_ <<
" * " <<
2181 numPackets <<
"; imports.extent(0) = " << imports.extent (0)
2186 #ifdef HAVE_TPETRA_DEBUG
2189 const size_t importBufSize = static_cast<size_t> (imports.extent (0));
2190 const int lclBad = (importBufSize < totalNumImportPackets) ? 1 : 0;
2192 using Teuchos::reduceAll;
2193 using Teuchos::REDUCE_MAX;
2194 using Teuchos::outArg;
2195 reduceAll (*comm_, REDUCE_MAX, lclBad, outArg (gblBad));
2196 TEUCHOS_TEST_FOR_EXCEPTION
2199 "Tpetra::Distributor::doPosts(3 args, Kokkos): "
2200 "On one or more MPI processes, the 'imports' array "
2201 "does not have enough entries to hold the expected number of "
2202 "import packets. ");
2205 TEUCHOS_TEST_FOR_EXCEPTION
2206 (static_cast<size_t> (imports.extent (0)) < totalNumImportPackets,
2208 "Tpetra::Distributor::doPosts(3 args, Kokkos): The 'imports' "
2209 "array must have enough entries to hold the expected number of import "
2210 "packets. imports.extent(0) = " << imports.extent (0) <<
" < "
2211 "totalNumImportPackets = " << totalNumImportPackets <<
" = "
2212 "totalReceiveLength_ (" << totalReceiveLength_ <<
") * numPackets ("
2213 << numPackets <<
").");
2214 #endif // HAVE_TPETRA_DEBUG
2222 const int pathTag = 0;
2223 const int tag = this->getTag (pathTag);
2225 #ifdef HAVE_TPETRA_DEBUG
2226 TEUCHOS_TEST_FOR_EXCEPTION
2227 (requests_.size () != 0,
2229 "Tpetra::Distributor::doPosts(3 args, Kokkos): Process "
2230 << myRank <<
": requests_.size() = " << requests_.size () <<
" != 0.");
2231 #endif // HAVE_TPETRA_DEBUG
2246 const size_type actualNumReceives = as<size_type> (numReceives_) +
2247 as<size_type> (selfMessage_ ? 1 : 0);
2248 requests_.resize (0);
2251 std::ostringstream os;
2252 os <<
"Proc " << myRank <<
": doPosts(3 args, Kokkos, "
2253 << (indicesTo_.empty () ?
"fast" :
"slow") <<
"): Post receives"
2264 #ifdef TPETRA_DISTRIBUTOR_TIMERS
2265 Teuchos::TimeMonitor timeMonRecvs (*timer_doPosts3_recvs_);
2266 #endif // TPETRA_DISTRIBUTOR_TIMERS
2268 size_t curBufferOffset = 0;
2269 for (size_type i = 0; i < actualNumReceives; ++i) {
2270 const size_t curBufLen = lengthsFrom_[i] * numPackets;
2271 if (procsFrom_[i] != myRank) {
2273 std::ostringstream os;
2274 os <<
"Proc " << myRank <<
": doPosts(3 args, Kokkos, "
2275 << (indicesTo_.empty () ?
"fast" :
"slow") <<
"): "
2276 <<
"Post irecv: {source: " << procsFrom_[i]
2277 <<
", tag: " << tag <<
"}" << endl;
2287 TEUCHOS_TEST_FOR_EXCEPTION(
2288 curBufferOffset + curBufLen > static_cast<size_t> (imports.size ()),
2289 std::logic_error,
"Tpetra::Distributor::doPosts(3 args, Kokkos): "
2290 "Exceeded size of 'imports' array in packing loop on Process " <<
2291 myRank <<
". imports.size() = " << imports.size () <<
" < "
2292 "curBufferOffset(" << curBufferOffset <<
") + curBufLen(" <<
2294 imports_view_type recvBuf =
2295 subview_offset (imports, curBufferOffset, curBufLen);
2296 requests_.push_back (ireceive<int> (recvBuf, procsFrom_[i],
2300 selfReceiveOffset = curBufferOffset;
2302 curBufferOffset += curBufLen;
2307 #ifdef TPETRA_DISTRIBUTOR_TIMERS
2308 Teuchos::TimeMonitor timeMonBarrier (*timer_doPosts3_barrier_);
2309 #endif // TPETRA_DISTRIBUTOR_TIMERS
2312 std::ostringstream os;
2313 os <<
"Proc " << myRank <<
": doPosts(3 args, Kokkos, "
2314 << (indicesTo_.empty () ?
"fast" :
"slow") <<
"): Barrier" << endl;
2325 #ifdef TPETRA_DISTRIBUTOR_TIMERS
2326 Teuchos::TimeMonitor timeMonSends (*timer_doPosts3_sends_);
2327 #endif // TPETRA_DISTRIBUTOR_TIMERS
2334 size_t numBlocks = numSends_ + selfMessage_;
2335 size_t procIndex = 0;
2336 while ((procIndex < numBlocks) && (procsTo_[procIndex] < myRank)) {
2339 if (procIndex == numBlocks) {
2344 size_t selfIndex = 0;
2347 std::ostringstream os;
2348 os <<
"Proc " << myRank <<
": doPosts(3 args, Kokkos, "
2349 << (indicesTo_.empty () ?
"fast" :
"slow") <<
"): Post sends" << endl;
2353 if (indicesTo_.empty()) {
2355 std::ostringstream os;
2356 os <<
"Proc " << myRank
2357 <<
": doPosts(3 args, Kokkos, fast): posting sends" << endl;
2363 for (
size_t i = 0; i < numBlocks; ++i) {
2364 size_t p = i + procIndex;
2365 if (p > (numBlocks - 1)) {
2369 if (procsTo_[p] != myRank) {
2371 std::ostringstream os;
2372 os <<
"Proc " << myRank <<
": doPosts(3 args, Kokkos, fast): Post send: "
2373 "{target: " << procsTo_[p] <<
", tag: " << tag <<
"}" << endl;
2386 exports_view_type tmpSend = subview_offset(
2387 exports, startsTo_[p]*numPackets, lengthsTo_[p]*numPackets);
2389 if (sendType == Details::DISTRIBUTOR_SEND) {
2391 as<int> (tmpSend.size ()),
2392 procsTo_[p], tag, *comm_);
2394 else if (sendType == Details::DISTRIBUTOR_ISEND) {
2395 exports_view_type tmpSendBuf =
2396 subview_offset (exports, startsTo_[p] * numPackets,
2397 lengthsTo_[p] * numPackets);
2398 requests_.push_back (isend<int> (tmpSendBuf, procsTo_[p],
2401 else if (sendType == Details::DISTRIBUTOR_RSEND) {
2402 readySend<int> (tmpSend,
2403 as<int> (tmpSend.size ()),
2404 procsTo_[p], tag, *comm_);
2406 else if (sendType == Details::DISTRIBUTOR_SSEND) {
2407 ssend<int> (tmpSend,
2408 as<int> (tmpSend.size ()),
2409 procsTo_[p], tag, *comm_);
2411 TEUCHOS_TEST_FOR_EXCEPTION(
2414 "Tpetra::Distributor::doPosts(3 args, Kokkos): "
2415 "Invalid send type. We should never get here. "
2416 "Please report this bug to the Tpetra developers.");
2426 std::ostringstream os;
2427 os <<
"Proc " << myRank
2428 <<
": doPosts(3 args, Kokkos, fast): Self-send" << endl;
2438 deep_copy_offset(imports, exports, selfReceiveOffset,
2439 startsTo_[selfNum]*numPackets,
2440 lengthsTo_[selfNum]*numPackets);
2443 std::ostringstream os;
2444 os <<
"Proc " << myRank <<
": doPosts(3 args, Kokkos, fast) done" << endl;
2450 std::ostringstream os;
2451 os <<
"Proc " << myRank
2452 <<
": doPosts(3 args, Kokkos, slow): posting sends" << endl;
2456 typedef typename ExpView::non_const_value_type Packet;
2457 typedef typename ExpView::array_layout Layout;
2458 typedef typename ExpView::device_type Device;
2459 typedef typename ExpView::memory_traits Mem;
2460 Kokkos::View<Packet*,Layout,Device,Mem> sendArray (
"sendArray",
2461 maxSendLength_ * numPackets);
2465 TEUCHOS_TEST_FOR_EXCEPTION(
2466 sendType == Details::DISTRIBUTOR_ISEND,
2468 "Tpetra::Distributor::doPosts(3 args, Kokkos): The \"send buffer\" code path "
2469 "doesn't currently work with nonblocking sends.");
2471 for (
size_t i = 0; i < numBlocks; ++i) {
2472 size_t p = i + procIndex;
2473 if (p > (numBlocks - 1)) {
2477 if (procsTo_[p] != myRank) {
2479 std::ostringstream os;
2480 os <<
"Proc " << myRank
2481 <<
": doPosts(3 args, Kokkos, slow): Post send: {target: "
2482 << procsTo_[p] <<
", tag: " << tag <<
"}" << endl;
2486 size_t sendArrayOffset = 0;
2487 size_t j = startsTo_[p];
2488 for (
size_t k = 0; k < lengthsTo_[p]; ++k, ++j) {
2489 deep_copy_offset(sendArray, exports, sendArrayOffset,
2490 indicesTo_[j]*numPackets, numPackets);
2491 sendArrayOffset += numPackets;
2494 subview_offset(sendArray,
size_t(0), lengthsTo_[p]*numPackets);
2496 if (sendType == Details::DISTRIBUTOR_SEND) {
2498 as<int> (tmpSend.size ()),
2499 procsTo_[p], tag, *comm_);
2501 else if (sendType == Details::DISTRIBUTOR_ISEND) {
2502 exports_view_type tmpSendBuf =
2503 subview_offset (sendArray,
size_t(0), lengthsTo_[p] * numPackets);
2504 requests_.push_back (isend<int> (tmpSendBuf, procsTo_[p],
2507 else if (sendType == Details::DISTRIBUTOR_RSEND) {
2508 readySend<int> (tmpSend,
2509 as<int> (tmpSend.size ()),
2510 procsTo_[p], tag, *comm_);
2512 else if (sendType == Details::DISTRIBUTOR_SSEND) {
2513 ssend<int> (tmpSend,
2514 as<int> (tmpSend.size ()),
2515 procsTo_[p], tag, *comm_);
2518 TEUCHOS_TEST_FOR_EXCEPTION(
2521 "Tpetra::Distributor::doPosts(3 args, Kokkos): "
2522 "Invalid send type. We should never get here. "
2523 "Please report this bug to the Tpetra developers.");
2528 selfIndex = startsTo_[p];
2534 std::ostringstream os;
2535 os <<
"Proc " << myRank
2536 <<
": doPosts(3 args, Kokkos, slow): Self-send" << endl;
2539 for (
size_t k = 0; k < lengthsTo_[selfNum]; ++k) {
2540 deep_copy_offset(imports, exports, selfReceiveOffset,
2541 indicesTo_[selfIndex]*numPackets, numPackets);
2543 selfReceiveOffset += numPackets;
2547 std::ostringstream os;
2548 os <<
"Proc " << myRank
2549 <<
": doPosts(3 args, Kokkos, slow) done" << endl;
2555 std::ostringstream os;
2556 os <<
"Proc " << myRank <<
": doPosts done" << endl;
2561 template <
class ExpView,
class ImpView>
2562 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
2564 doPosts (
const ExpView &exports,
2565 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
2566 const ImpView &imports,
2567 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
2569 using Teuchos::Array;
2571 using Teuchos::ireceive;
2572 using Teuchos::isend;
2573 using Teuchos::readySend;
2574 using Teuchos::send;
2575 using Teuchos::ssend;
2576 using Teuchos::TypeNameTraits;
2577 #ifdef HAVE_TEUCHOS_DEBUG
2578 using Teuchos::OSTab;
2579 #endif // HAVE_TEUCHOS_DEBUG
2581 using Kokkos::Compat::create_const_view;
2582 using Kokkos::Compat::create_view;
2583 using Kokkos::Compat::subview_offset;
2584 using Kokkos::Compat::deep_copy_offset;
2585 typedef Array<size_t>::size_type size_type;
2586 typedef ExpView exports_view_type;
2587 typedef ImpView imports_view_type;
2590 #ifdef KOKKOS_ENABLE_CUDA
2591 static_assert (! std::is_same<typename ExpView::memory_space, Kokkos::CudaUVMSpace>::value &&
2592 ! std::is_same<typename ImpView::memory_space, Kokkos::CudaUVMSpace>::value,
2593 "Please do not use Tpetra::Distributor with UVM "
2594 "allocations. See GitHub issue #1088.");
2595 #endif // KOKKOS_ENABLE_CUDA
2597 Teuchos::OSTab tab (out_);
2599 #ifdef TPETRA_DISTRIBUTOR_TIMERS
2600 Teuchos::TimeMonitor timeMon (*timer_doPosts4_);
2601 #endif // TPETRA_DISTRIBUTOR_TIMERS
2606 const bool doBarrier = barrierBetween_;
2632 TEUCHOS_TEST_FOR_EXCEPTION(
2633 sendType == Details::DISTRIBUTOR_RSEND && ! doBarrier,
2634 std::logic_error,
"Tpetra::Distributor::doPosts(4 args, Kokkos): Ready-send "
2635 "version requires a barrier between posting receives and posting ready "
2636 "sends. This should have been checked before. "
2637 "Please report this bug to the Tpetra developers.");
2639 const int myProcID = comm_->getRank ();
2640 size_t selfReceiveOffset = 0;
2642 #ifdef HAVE_TEUCHOS_DEBUG
2644 size_t totalNumImportPackets = 0;
2645 for (size_type ii = 0; ii < numImportPacketsPerLID.size (); ++ii) {
2646 totalNumImportPackets += numImportPacketsPerLID[ii];
2648 TEUCHOS_TEST_FOR_EXCEPTION(
2649 imports.extent (0) < totalNumImportPackets, std::runtime_error,
2650 "Tpetra::Distributor::doPosts(4 args, Kokkos): The 'imports' array must have "
2651 "enough entries to hold the expected number of import packets. "
2652 "imports.extent(0) = " << imports.extent (0) <<
" < "
2653 "totalNumImportPackets = " << totalNumImportPackets <<
".");
2654 #endif // HAVE_TEUCHOS_DEBUG
2661 const int pathTag = 1;
2662 const int tag = this->getTag (pathTag);
2665 TEUCHOS_TEST_FOR_EXCEPTION(
2666 requests_.size () != 0, std::logic_error,
"Tpetra::Distributor::"
2667 "doPosts(4 args, Kokkos): Process " << myProcID <<
": requests_.size () = "
2668 << requests_.size () <<
" != 0.");
2671 std::ostringstream os;
2672 os <<
"Proc " << myProcID <<
": doPosts(4 args, Kokkos, "
2673 << (indicesTo_.empty () ?
"fast" :
"slow") <<
")" << endl;
2690 const size_type actualNumReceives = as<size_type> (numReceives_) +
2691 as<size_type> (selfMessage_ ? 1 : 0);
2692 requests_.resize (0);
2700 #ifdef TPETRA_DISTRIBUTOR_TIMERS
2701 Teuchos::TimeMonitor timeMonRecvs (*timer_doPosts4_recvs_);
2702 #endif // TPETRA_DISTRIBUTOR_TIMERS
2704 size_t curBufferOffset = 0;
2705 size_t curLIDoffset = 0;
2706 for (size_type i = 0; i < actualNumReceives; ++i) {
2707 size_t totalPacketsFrom_i = 0;
2708 for (
size_t j = 0; j < lengthsFrom_[i]; ++j) {
2709 totalPacketsFrom_i += numImportPacketsPerLID[curLIDoffset+j];
2711 curLIDoffset += lengthsFrom_[i];
2712 if (procsFrom_[i] != myProcID && totalPacketsFrom_i) {
2721 imports_view_type recvBuf =
2722 subview_offset (imports, curBufferOffset, totalPacketsFrom_i);
2723 requests_.push_back (ireceive<int> (recvBuf, procsFrom_[i],
2727 selfReceiveOffset = curBufferOffset;
2729 curBufferOffset += totalPacketsFrom_i;
2734 #ifdef TPETRA_DISTRIBUTOR_TIMERS
2735 Teuchos::TimeMonitor timeMonBarrier (*timer_doPosts4_barrier_);
2736 #endif // TPETRA_DISTRIBUTOR_TIMERS
2745 #ifdef TPETRA_DISTRIBUTOR_TIMERS
2746 Teuchos::TimeMonitor timeMonSends (*timer_doPosts4_sends_);
2747 #endif // TPETRA_DISTRIBUTOR_TIMERS
2751 Array<size_t> sendPacketOffsets(numSends_,0), packetsPerSend(numSends_,0);
2752 size_t maxNumPackets = 0;
2753 size_t curPKToffset = 0;
2754 for (
size_t pp=0; pp<numSends_; ++pp) {
2755 sendPacketOffsets[pp] = curPKToffset;
2756 size_t numPackets = 0;
2757 for (
size_t j=startsTo_[pp]; j<startsTo_[pp]+lengthsTo_[pp]; ++j) {
2758 numPackets += numExportPacketsPerLID[j];
2760 if (numPackets > maxNumPackets) maxNumPackets = numPackets;
2761 packetsPerSend[pp] = numPackets;
2762 curPKToffset += numPackets;
2767 size_t numBlocks = numSends_+ selfMessage_;
2768 size_t procIndex = 0;
2769 while ((procIndex < numBlocks) && (procsTo_[procIndex] < myProcID)) {
2772 if (procIndex == numBlocks) {
2777 size_t selfIndex = 0;
2778 if (indicesTo_.empty()) {
2780 std::ostringstream os;
2781 os <<
"Proc " << myProcID
2782 <<
": doPosts(4 args, Kokkos, fast): posting sends" << endl;
2788 for (
size_t i = 0; i < numBlocks; ++i) {
2789 size_t p = i + procIndex;
2790 if (p > (numBlocks - 1)) {
2794 if (procsTo_[p] != myProcID && packetsPerSend[p] > 0) {
2795 exports_view_type tmpSend =
2796 subview_offset(exports, sendPacketOffsets[p], packetsPerSend[p]);
2798 if (sendType == Details::DISTRIBUTOR_SEND) {
2800 as<int> (tmpSend.size ()),
2801 procsTo_[p], tag, *comm_);
2803 else if (sendType == Details::DISTRIBUTOR_RSEND) {
2804 readySend<int> (tmpSend,
2805 as<int> (tmpSend.size ()),
2806 procsTo_[p], tag, *comm_);
2808 else if (sendType == Details::DISTRIBUTOR_ISEND) {
2809 exports_view_type tmpSendBuf =
2810 subview_offset (exports, sendPacketOffsets[p], packetsPerSend[p]);
2811 requests_.push_back (isend<int> (tmpSendBuf, procsTo_[p],
2814 else if (sendType == Details::DISTRIBUTOR_SSEND) {
2815 ssend<int> (tmpSend,
2816 as<int> (tmpSend.size ()),
2817 procsTo_[p], tag, *comm_);
2820 TEUCHOS_TEST_FOR_EXCEPTION(
2821 true, std::logic_error,
2822 "Tpetra::Distributor::doPosts(4 args, Kokkos): "
2823 "Invalid send type. We should never get here. "
2824 "Please report this bug to the Tpetra developers.");
2833 deep_copy_offset(imports, exports, selfReceiveOffset,
2834 sendPacketOffsets[selfNum], packetsPerSend[selfNum]);
2837 std::ostringstream os;
2838 os <<
"Proc " << myProcID <<
": doPosts(4 args, Kokkos, fast) done" << endl;
2844 std::ostringstream os;
2845 os <<
"Proc " << myProcID <<
": doPosts(4 args, Kokkos, slow): posting sends" << endl;
2850 typedef typename ExpView::non_const_value_type Packet;
2851 typedef typename ExpView::array_layout Layout;
2852 typedef typename ExpView::device_type Device;
2853 typedef typename ExpView::memory_traits Mem;
2854 Kokkos::View<Packet*,Layout,Device,Mem> sendArray (
"sendArray", maxNumPackets);
2856 TEUCHOS_TEST_FOR_EXCEPTION(
2857 sendType == Details::DISTRIBUTOR_ISEND,
2859 "Tpetra::Distributor::doPosts(4 args, Kokkos): "
2860 "The \"send buffer\" code path may not necessarily work with nonblocking sends.");
2862 Array<size_t> indicesOffsets (numExportPacketsPerLID.size(), 0);
2864 for (
int j=0; j<numExportPacketsPerLID.size(); ++j) {
2865 indicesOffsets[j] = ioffset;
2866 ioffset += numExportPacketsPerLID[j];
2869 for (
size_t i = 0; i < numBlocks; ++i) {
2870 size_t p = i + procIndex;
2871 if (p > (numBlocks - 1)) {
2875 if (procsTo_[p] != myProcID) {
2876 size_t sendArrayOffset = 0;
2877 size_t j = startsTo_[p];
2878 size_t numPacketsTo_p = 0;
2879 for (
size_t k = 0; k < lengthsTo_[p]; ++k, ++j) {
2880 numPacketsTo_p += numExportPacketsPerLID[j];
2881 deep_copy_offset(sendArray, exports, sendArrayOffset,
2882 indicesOffsets[j], numExportPacketsPerLID[j]);
2883 sendArrayOffset += numExportPacketsPerLID[j];
2885 if (numPacketsTo_p > 0) {
2887 subview_offset(sendArray,
size_t(0), numPacketsTo_p);
2889 if (sendType == Details::DISTRIBUTOR_RSEND) {
2890 readySend<int> (tmpSend,
2891 as<int> (tmpSend.size ()),
2892 procsTo_[p], tag, *comm_);
2894 else if (sendType == Details::DISTRIBUTOR_ISEND) {
2895 exports_view_type tmpSendBuf =
2896 subview_offset (sendArray,
size_t(0), numPacketsTo_p);
2897 requests_.push_back (isend<int> (tmpSendBuf, procsTo_[p],
2900 else if (sendType == Details::DISTRIBUTOR_SSEND) {
2901 ssend<int> (tmpSend,
2902 as<int> (tmpSend.size ()),
2903 procsTo_[p], tag, *comm_);
2907 as<int> (tmpSend.size ()),
2908 procsTo_[p], tag, *comm_);
2914 selfIndex = startsTo_[p];
2919 for (
size_t k = 0; k < lengthsTo_[selfNum]; ++k) {
2920 deep_copy_offset(imports, exports, selfReceiveOffset,
2921 indicesOffsets[selfIndex],
2922 numExportPacketsPerLID[selfIndex]);
2923 selfReceiveOffset += numExportPacketsPerLID[selfIndex];
2928 std::ostringstream os;
2929 os <<
"Proc " << myProcID
2930 <<
": doPosts(4 args, Kokkos, slow) done" << endl;
2936 template <
class ExpView,
class ImpView>
2937 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
2939 doReversePostsAndWaits (
const ExpView& exports,
2941 const ImpView& imports)
2943 doReversePosts (exports, numPackets, imports);
2947 template <
class ExpView,
class ImpView>
2948 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
2950 doReversePostsAndWaits (
const ExpView& exports,
2951 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
2952 const ImpView& imports,
2953 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
2955 TEUCHOS_TEST_FOR_EXCEPTION(requests_.size() != 0, std::runtime_error,
2956 "Tpetra::Distributor::doReversePostsAndWaits(4 args): There are "
2957 << requests_.size() <<
" outstanding nonblocking messages pending. It "
2958 "is incorrect to call this method with posts outstanding.");
2960 doReversePosts (exports, numExportPacketsPerLID, imports,
2961 numImportPacketsPerLID);
2965 template <
class ExpView,
class ImpView>
2966 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
2968 doReversePosts (
const ExpView &exports,
2970 const ImpView &imports)
2973 TEUCHOS_TEST_FOR_EXCEPTION(
2974 ! indicesTo_.empty (), std::runtime_error,
2975 "Tpetra::Distributor::doReversePosts(3 args): Can only do "
2976 "reverse communication when original data are blocked by process.");
2977 if (reverseDistributor_.is_null ()) {
2978 createReverseDistributor ();
2980 reverseDistributor_->doPosts (exports, numPackets, imports);
2983 template <
class ExpView,
class ImpView>
2984 typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
2986 doReversePosts (
const ExpView &exports,
2987 const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
2988 const ImpView &imports,
2989 const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
2992 TEUCHOS_TEST_FOR_EXCEPTION(
2993 ! indicesTo_.empty (), std::runtime_error,
2994 "Tpetra::Distributor::doReversePosts(3 args): Can only do "
2995 "reverse communication when original data are blocked by process.");
2996 if (reverseDistributor_.is_null ()) {
2997 createReverseDistributor ();
2999 reverseDistributor_->doPosts (exports, numExportPacketsPerLID,
3000 imports, numImportPacketsPerLID);
3003 template <
class OrdinalType>
3005 computeSends (
const Teuchos::ArrayView<const OrdinalType> & importGIDs,
3006 const Teuchos::ArrayView<const int> & importProcIDs,
3007 Teuchos::Array<OrdinalType> & exportGIDs,
3008 Teuchos::Array<int> & exportProcIDs)
3017 using Teuchos::Array;
3018 using Teuchos::ArrayView;
3020 typedef typename ArrayView<const OrdinalType>::size_type size_type;
3023 Teuchos::OSTab tab (out_);
3024 const int myRank = comm_->getRank ();
3026 std::ostringstream os;
3027 os <<
"Proc " << myRank <<
": computeSends" << endl;
3031 TEUCHOS_TEST_FOR_EXCEPTION(
3032 importGIDs.size () != importProcIDs.size (), std::invalid_argument,
3033 "Tpetra::Distributor::computeSends: On Process " << myRank <<
": "
3034 "importProcIDs.size() = " << importProcIDs.size ()
3035 <<
" != importGIDs.size() = " << importGIDs.size () <<
".");
3037 const size_type numImports = importProcIDs.size ();
3038 Array<size_t> importObjs (2*numImports);
3040 for (size_type i = 0; i < numImports; ++i) {
3041 importObjs[2*i] = static_cast<size_t> (importGIDs[i]);
3042 importObjs[2*i+1] = static_cast<size_t> (myRank);
3050 std::ostringstream os;
3051 os <<
"Proc " << myRank <<
": computeSends: tempPlan.createFromSends" << endl;
3057 const size_t numExportsAsSizeT = tempPlan.createFromSends (importProcIDs);
3058 const size_type numExports = static_cast<size_type> (numExportsAsSizeT);
3059 TEUCHOS_TEST_FOR_EXCEPTION(
3060 numExports < 0, std::logic_error,
"Tpetra::Distributor::computeSends: "
3061 "tempPlan.createFromSends() returned numExports = " << numExportsAsSizeT
3062 <<
" as a size_t, which overflows to " << numExports <<
" when cast to "
3063 << Teuchos::TypeNameTraits<size_type>::name () <<
". "
3064 "Please report this bug to the Tpetra developers.");
3065 TEUCHOS_TEST_FOR_EXCEPTION(
3066 static_cast<size_type> (tempPlan.getTotalReceiveLength ()) != numExports,
3067 std::logic_error,
"Tpetra::Distributor::computeSends: tempPlan.getTotal"
3068 "ReceiveLength() = " << tempPlan.getTotalReceiveLength () <<
" != num"
3069 "Exports = " << numExports <<
". Please report this bug to the "
3070 "Tpetra developers.");
3072 if (numExports > 0) {
3073 exportGIDs.resize (numExports);
3074 exportProcIDs.resize (numExports);
3085 TEUCHOS_TEST_FOR_EXCEPTION(
3086 sizeof (
size_t) <
sizeof (OrdinalType), std::logic_error,
3087 "Tpetra::Distributor::computeSends: sizeof(size_t) = " <<
sizeof(
size_t)
3088 <<
" < sizeof(" << Teuchos::TypeNameTraits<OrdinalType>::name () <<
") = "
3089 <<
sizeof (OrdinalType) <<
". This violates an assumption of the "
3090 "method. It's not hard to work around (just use Array<OrdinalType> as "
3091 "the export buffer, not Array<size_t>), but we haven't done that yet. "
3092 "Please report this bug to the Tpetra developers.");
3094 TEUCHOS_TEST_FOR_EXCEPTION(
3095 tempPlan.getTotalReceiveLength () < static_cast<size_t> (numExports),
3097 "Tpetra::Distributor::computeSends: tempPlan.getTotalReceiveLength() = "
3098 << tempPlan.getTotalReceiveLength() <<
" < numExports = " << numExports
3099 <<
". Please report this bug to the Tpetra developers.");
3101 Array<size_t> exportObjs (tempPlan.getTotalReceiveLength () * 2);
3103 std::ostringstream os;
3104 os <<
"Proc " << myRank <<
": computeSends: tempPlan.doPostsAndWaits" << endl;
3107 tempPlan.doPostsAndWaits<
size_t> (importObjs (), 2, exportObjs ());
3110 for (size_type i = 0; i < numExports; ++i) {
3111 exportGIDs[i] = static_cast<OrdinalType> (exportObjs[2*i]);
3112 exportProcIDs[i] = static_cast<int> (exportObjs[2*i+1]);
3116 std::ostringstream os;
3117 os <<
"Proc " << myRank <<
": computeSends done" << endl;
3122 template <
class OrdinalType>
3124 createFromRecvs (
const Teuchos::ArrayView<const OrdinalType> &remoteGIDs,
3125 const Teuchos::ArrayView<const int> &remoteProcIDs,
3126 Teuchos::Array<OrdinalType> &exportGIDs,
3127 Teuchos::Array<int> &exportProcIDs)
3132 Teuchos::OSTab tab (out_);
3133 const int myRank = comm_->getRank();
3136 *out_ <<
"Proc " << myRank <<
": createFromRecvs" << endl;
3139 #ifdef HAVE_TPETRA_DEBUG
3140 using Teuchos::outArg;
3141 using Teuchos::reduceAll;
3146 (remoteGIDs.size () != remoteProcIDs.size ()) ? myRank : -1;
3147 int maxErrProc = -1;
3148 reduceAll<int, int> (*comm_, Teuchos::REDUCE_MAX, errProc, outArg (maxErrProc));
3149 TEUCHOS_TEST_FOR_EXCEPTION(maxErrProc != -1, std::runtime_error,
3150 Teuchos::typeName (*
this) <<
"::createFromRecvs(): lists of remote IDs "
3151 "and remote process IDs must have the same size on all participating "
3152 "processes. Maximum process ID with error: " << maxErrProc <<
".");
3153 #else // NOT HAVE_TPETRA_DEBUG
3156 TEUCHOS_TEST_FOR_EXCEPTION(
3157 remoteGIDs.size () != remoteProcIDs.size (), std::invalid_argument,
3158 Teuchos::typeName (*
this) <<
"::createFromRecvs<" <<
3159 Teuchos::TypeNameTraits<OrdinalType>::name () <<
">(): On Process " <<
3160 myRank <<
": remoteGIDs.size() = " << remoteGIDs.size () <<
" != "
3161 "remoteProcIDs.size() = " << remoteProcIDs.size () <<
".");
3162 #endif // HAVE_TPETRA_DEBUG
3164 computeSends (remoteGIDs, remoteProcIDs, exportGIDs, exportProcIDs);
3166 const size_t numProcsSendingToMe =
createFromSends (exportProcIDs ());
3173 std::ostringstream os;
3174 os <<
"Proc " << myRank <<
": {numProcsSendingToMe: "
3175 << numProcsSendingToMe <<
", remoteProcIDs.size(): "
3176 << remoteProcIDs.size () <<
", selfMessage_: "
3177 << (selfMessage_ ?
"true" :
"false") <<
"}" << std::endl;
3182 *out_ <<
"Proc " << myRank <<
": createFromRecvs done" << endl;
3185 howInitialized_ = Details::DISTRIBUTOR_INITIALIZED_BY_CREATE_FROM_RECVS;
3191 #endif // TPETRA_DISTRIBUTOR_HPP