Tpetra parallel linear algebra  Version of the Day
Tpetra_Distributor.hpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_DISTRIBUTOR_HPP
43 #define TPETRA_DISTRIBUTOR_HPP
44 
45 #include "Tpetra_Util.hpp"
46 #include <Teuchos_as.hpp>
47 #include <Teuchos_Describable.hpp>
48 #include <Teuchos_ParameterListAcceptorDefaultBase.hpp>
49 #include <Teuchos_VerboseObject.hpp>
51 
52 // If TPETRA_DISTRIBUTOR_TIMERS is defined, Distributor will time
53 // doPosts (both versions) and doWaits, and register those timers with
54 // Teuchos::TimeMonitor so that summarize() or report() will show
55 // results.
56 
57 // #ifndef TPETRA_DISTRIBUTOR_TIMERS
58 // # define TPETRA_DISTRIBUTOR_TIMERS 1
59 // #endif // TPETRA_DISTRIBUTOR_TIMERS
60 
61 #ifdef TPETRA_DISTRIBUTOR_TIMERS
62 # undef TPETRA_DISTRIBUTOR_TIMERS
63 #endif // TPETRA_DISTRIBUTOR_TIMERS
64 
65 #include "KokkosCompat_View.hpp"
66 #include "Kokkos_Core.hpp"
67 #include "Kokkos_TeuchosCommAdapters.hpp"
68 #include <type_traits>
69 
70 namespace Tpetra {
71 
72  namespace Details {
78  DISTRIBUTOR_ISEND, // Use MPI_Isend (Teuchos::isend)
79  DISTRIBUTOR_RSEND, // Use MPI_Rsend (Teuchos::readySend)
80  DISTRIBUTOR_SEND, // Use MPI_Send (Teuchos::send)
81  DISTRIBUTOR_SSEND // Use MPI_Ssend (Teuchos::ssend)
82  };
83 
88  std::string
90 
96  DISTRIBUTOR_NOT_INITIALIZED, // Not initialized yet
97  DISTRIBUTOR_INITIALIZED_BY_CREATE_FROM_SENDS, // By createFromSends
98  DISTRIBUTOR_INITIALIZED_BY_CREATE_FROM_RECVS, // By createFromRecvs
99  DISTRIBUTOR_INITIALIZED_BY_CREATE_FROM_SENDS_N_RECVS, // By createFromSendsAndRecvs
100  DISTRIBUTOR_INITIALIZED_BY_REVERSE, // By createReverseDistributor
101  DISTRIBUTOR_INITIALIZED_BY_COPY, // By copy constructor
102  };
103 
108  std::string
110 
111  } // namespace Details
112 
119  Teuchos::Array<std::string> distributorSendTypes ();
120 
188  class Distributor :
189  public Teuchos::Describable,
190  public Teuchos::ParameterListAcceptorDefaultBase {
191  public:
193 
194 
203  explicit Distributor (const Teuchos::RCP<const Teuchos::Comm<int> >& comm);
204 
216  Distributor (const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
217  const Teuchos::RCP<Teuchos::FancyOStream>& out);
218 
232  Distributor (const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
233  const Teuchos::RCP<Teuchos::ParameterList>& plist);
234 
251  Distributor (const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
252  const Teuchos::RCP<Teuchos::FancyOStream>& out,
253  const Teuchos::RCP<Teuchos::ParameterList>& plist);
254 
256  Distributor (const Distributor& distributor);
257 
259  virtual ~Distributor ();
260 
266  void swap (Distributor& rhs);
267 
269 
271 
276  void setParameterList (const Teuchos::RCP<Teuchos::ParameterList>& plist);
277 
282  Teuchos::RCP<const Teuchos::ParameterList> getValidParameters () const;
283 
285 
287 
307  size_t createFromSends (const Teuchos::ArrayView<const int>& exportProcIDs);
308 
342  template <class Ordinal>
343  void
344  createFromRecvs (const Teuchos::ArrayView<const Ordinal>& remoteIDs,
345  const Teuchos::ArrayView<const int>& remoteProcIDs,
346  Teuchos::Array<Ordinal>& exportIDs,
347  Teuchos::Array<int>& exportProcIDs);
348 
356  void
357  createFromSendsAndRecvs (const Teuchos::ArrayView<const int>& exportProcIDs,
358  const Teuchos::ArrayView<const int>& remoteProcIDs);
359 
361 
363 
367  size_t getNumReceives() const;
368 
372  size_t getNumSends() const;
373 
375  bool hasSelfMessage() const;
376 
378  size_t getMaxSendLength() const;
379 
381  size_t getTotalReceiveLength() const;
382 
387  Teuchos::ArrayView<const int> getProcsFrom() const;
388 
393  Teuchos::ArrayView<const int> getProcsTo() const;
394 
402  Teuchos::ArrayView<const size_t> getLengthsFrom() const;
403 
411  Teuchos::ArrayView<const size_t> getLengthsTo() const;
412 
418  return howInitialized_;
419  }
420 
422 
424 
435  Teuchos::RCP<Distributor> getReverse() const;
436 
438 
440 
461  template <class Packet>
462  void
463  doPostsAndWaits (const Teuchos::ArrayView<const Packet> &exports,
464  size_t numPackets,
465  const Teuchos::ArrayView<Packet> &imports);
466 
488  template <class Packet>
489  void
490  doPostsAndWaits (const Teuchos::ArrayView<const Packet> &exports,
491  const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
492  const Teuchos::ArrayView<Packet> &imports,
493  const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
494 
519  template <class Packet>
520  void
521  doPosts (const Teuchos::ArrayRCP<const Packet> &exports,
522  size_t numPackets,
523  const Teuchos::ArrayRCP<Packet> &imports);
524 
543  template <class Packet>
544  void
545  doPosts (const Teuchos::ArrayRCP<const Packet> &exports,
546  const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
547  const Teuchos::ArrayRCP<Packet> &imports,
548  const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
549 
556  void doWaits ();
557 
562  template <class Packet>
563  void
564  doReversePostsAndWaits (const Teuchos::ArrayView<const Packet> &exports,
565  size_t numPackets,
566  const Teuchos::ArrayView<Packet> &imports);
567 
572  template <class Packet>
573  void
574  doReversePostsAndWaits (const Teuchos::ArrayView<const Packet> &exports,
575  const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
576  const Teuchos::ArrayView<Packet> &imports,
577  const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
578 
583  template <class Packet>
584  void
585  doReversePosts (const Teuchos::ArrayRCP<const Packet> &exports,
586  size_t numPackets,
587  const Teuchos::ArrayRCP<Packet> &imports);
588 
593  template <class Packet>
594  void
595  doReversePosts (const Teuchos::ArrayRCP<const Packet> &exports,
596  const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
597  const Teuchos::ArrayRCP<Packet> &imports,
598  const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
599 
606  void doReverseWaits ();
607 
628  template <class ExpView, class ImpView>
629  typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
631  const ExpView &exports,
632  size_t numPackets,
633  const ImpView &imports);
634 
656  template <class ExpView, class ImpView>
657  typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
658  doPostsAndWaits (const ExpView &exports,
659  const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
660  const ImpView &imports,
661  const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
662 
687  template <class ExpView, class ImpView>
688  typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
689  doPosts (const ExpView &exports,
690  size_t numPackets,
691  const ImpView &imports);
692 
711  template <class ExpView, class ImpView>
712  typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
713  doPosts (const ExpView &exports,
714  const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
715  const ImpView &imports,
716  const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
717 
722  template <class ExpView, class ImpView>
723  typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
724  doReversePostsAndWaits (const ExpView &exports,
725  size_t numPackets,
726  const ImpView &imports);
727 
732  template <class ExpView, class ImpView>
733  typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
734  doReversePostsAndWaits (const ExpView &exports,
735  const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
736  const ImpView &imports,
737  const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
738 
743  template <class ExpView, class ImpView>
744  typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
745  doReversePosts (const ExpView &exports,
746  size_t numPackets,
747  const ImpView &imports);
748 
753  template <class ExpView, class ImpView>
754  typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
755  doReversePosts (const ExpView &exports,
756  const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
757  const ImpView &imports,
758  const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID);
759 
763  void getLastDoStatistics(size_t & bytes_sent, size_t & bytes_recvd) const{
764  bytes_sent = lastRoundBytesSend_;
765  bytes_recvd = lastRoundBytesRecv_;
766  }
767 
769 
771 
773  std::string description() const;
774 
796  void
797  describe (Teuchos::FancyOStream& out,
798  const Teuchos::EVerbosityLevel verbLevel =
799  Teuchos::Describable::verbLevel_default) const;
801 
802  private:
804  Teuchos::RCP<const Teuchos::Comm<int> > comm_;
805 
807  Teuchos::RCP<Teuchos::FancyOStream> out_;
808 
810  Details::EDistributorHowInitialized howInitialized_;
811 
813 
814 
817 
819  bool barrierBetween_;
820 
822  bool debug_;
824 
828  bool selfMessage_;
829 
839  size_t numSends_;
840 
845  Teuchos::Array<int> procsTo_;
846 
855  Teuchos::Array<size_t> startsTo_;
856 
862  Teuchos::Array<size_t> lengthsTo_;
863 
867  size_t maxSendLength_;
868 
884  Teuchos::Array<size_t> indicesTo_;
885 
895  size_t numReceives_;
896 
903  size_t totalReceiveLength_;
904 
910  Teuchos::Array<size_t> lengthsFrom_;
911 
917  Teuchos::Array<int> procsFrom_;
918 
924  Teuchos::Array<size_t> startsFrom_;
925 
932  Teuchos::Array<size_t> indicesFrom_;
933 
940  Teuchos::Array<Teuchos::RCP<Teuchos::CommRequest<int> > > requests_;
941 
946  mutable Teuchos::RCP<Distributor> reverseDistributor_;
947 
949  size_t lastRoundBytesSend_;
950 
952  size_t lastRoundBytesRecv_;
953 
954 #ifdef TPETRA_DISTRIBUTOR_TIMERS
955  Teuchos::RCP<Teuchos::Time> timer_doPosts3_;
956  Teuchos::RCP<Teuchos::Time> timer_doPosts4_;
957  Teuchos::RCP<Teuchos::Time> timer_doWaits_;
958  Teuchos::RCP<Teuchos::Time> timer_doPosts3_recvs_;
959  Teuchos::RCP<Teuchos::Time> timer_doPosts4_recvs_;
960  Teuchos::RCP<Teuchos::Time> timer_doPosts3_barrier_;
961  Teuchos::RCP<Teuchos::Time> timer_doPosts4_barrier_;
962  Teuchos::RCP<Teuchos::Time> timer_doPosts3_sends_;
963  Teuchos::RCP<Teuchos::Time> timer_doPosts4_sends_;
964 
966  void makeTimers ();
967 #endif // TPETRA_DISTRIBUTOR_TIMERS
968 
980  bool useDistinctTags_;
981 
986  int getTag (const int pathTag) const;
987 
1005  void
1006  init (const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
1007  const Teuchos::RCP<Teuchos::FancyOStream>& out,
1008  const Teuchos::RCP<Teuchos::ParameterList>& plist);
1009 
1020  void computeReceives ();
1021 
1034  template <class Ordinal>
1035  void computeSends (const Teuchos::ArrayView<const Ordinal> &remoteGIDs,
1036  const Teuchos::ArrayView<const int> &remoteProcIDs,
1037  Teuchos::Array<Ordinal> &exportGIDs,
1038  Teuchos::Array<int> &exportProcIDs);
1039 
1041  void createReverseDistributor() const;
1042 
1043 
1048  std::string
1049  localDescribeToString (const Teuchos::EVerbosityLevel vl) const;
1050  }; // class Distributor
1051 
1052 
1053  template <class Packet>
1054  void Distributor::
1055  doPostsAndWaits (const Teuchos::ArrayView<const Packet>& exports,
1056  size_t numPackets,
1057  const Teuchos::ArrayView<Packet>& imports)
1058  {
1059  using Teuchos::arcp;
1060  using Teuchos::ArrayRCP;
1061  typedef typename ArrayRCP<const Packet>::size_type size_type;
1062 
1063  TEUCHOS_TEST_FOR_EXCEPTION(
1064  requests_.size () != 0, std::runtime_error, "Tpetra::Distributor::"
1065  "doPostsAndWaits(3 args): There are " << requests_.size () <<
1066  " outstanding nonblocking messages pending. It is incorrect to call "
1067  "this method with posts outstanding.");
1068 
1069  // doPosts() accepts the exports and imports arrays as ArrayRCPs,
1070  // requiring that the memory location is persisting (as is
1071  // necessary for nonblocking receives). However, it need only
1072  // persist until doWaits() completes, so it is safe for us to use
1073  // a nonpersisting reference in this case. The use of a
1074  // nonpersisting reference is purely a performance optimization.
1075 
1076  //const Packet* exportsPtr = exports.getRawPtr();
1077  //ArrayRCP<const Packet> exportsArcp (exportsPtr, static_cast<size_type> (0),
1078  // exports.size(), false);
1079  ArrayRCP<const Packet> exportsArcp (exports.getRawPtr (),
1080  static_cast<size_type> (0),
1081  exports.size(), false);
1082 
1083  // For some reason, neither of the options below (that use arcp)
1084  // compile for Packet=std::complex<double> with GCC 4.5.1. The
1085  // issue only arises with the exports array. This is why we
1086  // construct a separate nonowning ArrayRCP.
1087 
1088  // doPosts (arcp<const Packet> (exports.getRawPtr(), 0, exports.size(), false),
1089  // numPackets,
1090  // arcp<Packet> (imports.getRawPtr(), 0, imports.size(), false));
1091  // doPosts (arcp<const Packet> (exportsPtr, 0, exports.size(), false),
1092  // numPackets,
1093  // arcp<Packet> (imports.getRawPtr(), 0, imports.size(), false));
1094  doPosts (exportsArcp,
1095  numPackets,
1096  arcp<Packet> (imports.getRawPtr (), 0, imports.size (), false));
1097  doWaits ();
1098 
1099  lastRoundBytesSend_ = exports.size () * sizeof (Packet);
1100  lastRoundBytesRecv_ = imports.size () * sizeof (Packet);
1101  }
1102 
1103  template <class Packet>
1104  void Distributor::
1105  doPostsAndWaits (const Teuchos::ArrayView<const Packet>& exports,
1106  const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
1107  const Teuchos::ArrayView<Packet> &imports,
1108  const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
1109  {
1110  using Teuchos::arcp;
1111  using Teuchos::ArrayRCP;
1112 
1113  TEUCHOS_TEST_FOR_EXCEPTION(
1114  requests_.size () != 0, std::runtime_error, "Tpetra::Distributor::"
1115  "doPostsAndWaits: There are " << requests_.size () << " outstanding "
1116  "nonblocking messages pending. It is incorrect to call doPostsAndWaits "
1117  "with posts outstanding.");
1118 
1119  // doPosts() accepts the exports and imports arrays as ArrayRCPs,
1120  // requiring that the memory location is persisting (as is
1121  // necessary for nonblocking receives). However, it need only
1122  // persist until doWaits() completes, so it is safe for us to use
1123  // a nonpersisting reference in this case.
1124 
1125  // mfh 04 Apr 2012: For some reason, calling arcp<const Packet>
1126  // for Packet=std::complex<T> (e.g., T=float) fails to compile
1127  // with some versions of GCC. The issue only arises with the
1128  // exports array. This is why we construct a separate nonowning
1129  // ArrayRCP.
1130  typedef typename ArrayRCP<const Packet>::size_type size_type;
1131  ArrayRCP<const Packet> exportsArcp (exports.getRawPtr (),
1132  static_cast<size_type> (0),
1133  exports.size (), false);
1134  // mfh 04 Apr 2012: This is the offending code. This statement
1135  // would normally be in place of "exportsArcp" in the
1136  // doPosts() call below.
1137  //arcp<const Packet> (exports.getRawPtr(), 0, exports.size(), false),
1138  doPosts (exportsArcp,
1139  numExportPacketsPerLID,
1140  arcp<Packet> (imports.getRawPtr (), 0, imports.size (), false),
1141  numImportPacketsPerLID);
1142  doWaits ();
1143 
1144  lastRoundBytesSend_ = exports.size () * sizeof (Packet);
1145  lastRoundBytesRecv_ = imports.size () * sizeof (Packet);
1146  }
1147 
1148 
1149  template <class Packet>
1150  void Distributor::
1151  doPosts (const Teuchos::ArrayRCP<const Packet>& exports,
1152  size_t numPackets,
1153  const Teuchos::ArrayRCP<Packet>& imports)
1154  {
1155  using Teuchos::Array;
1156  using Teuchos::ArrayRCP;
1157  using Teuchos::ArrayView;
1158  using Teuchos::as;
1159  using Teuchos::FancyOStream;
1160  using Teuchos::includesVerbLevel;
1161  using Teuchos::ireceive;
1162  using Teuchos::isend;
1163  using Teuchos::OSTab;
1164  using Teuchos::readySend;
1165  using Teuchos::send;
1166  using Teuchos::ssend;
1167  using Teuchos::TypeNameTraits;
1168  using Teuchos::typeName;
1169  using std::endl;
1170  typedef Array<size_t>::size_type size_type;
1171 
1172  const bool verbose = Tpetra::Details::Behavior::verbose("Distributor");
1173 
1174 #ifdef TPETRA_DISTRIBUTOR_TIMERS
1175  Teuchos::TimeMonitor timeMon (*timer_doPosts3_);
1176 #endif // TPETRA_DISTRIBUTOR_TIMERS
1177 
1178  const int myRank = comm_->getRank ();
1179  // Run-time configurable parameters that come from the input
1180  // ParameterList set by setParameterList().
1181  const Details::EDistributorSendType sendType = sendType_;
1182  const bool doBarrier = barrierBetween_;
1183 
1184  Teuchos::OSTab tab0 (out_);
1185  if (verbose) {
1186  std::ostringstream os;
1187  os << "Proc " << myRank
1188  << ": Distributor::doPosts(3 args, Teuchos::ArrayRCP)" << endl;
1189  *out_ << os.str ();
1190  }
1191  Teuchos::OSTab tab1 (out_);
1192 
1193  TEUCHOS_TEST_FOR_EXCEPTION(
1194  sendType == Details::DISTRIBUTOR_RSEND && ! doBarrier, std::logic_error,
1195  "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): Ready-send "
1196  "version requires a barrier between posting receives and posting ready "
1197  "sends. This should have been checked before. "
1198  "Please report this bug to the Tpetra developers.");
1199 
1200  size_t selfReceiveOffset = 0;
1201 
1202  // mfh 30 Mar 2016: See Github Issue #227 to see why we need to
1203  // check whether we're doing reverse mode before checking the
1204  // length of the imports array.
1205  if (howInitialized_ != Details::DISTRIBUTOR_INITIALIZED_BY_REVERSE) {
1206  // Each message has the same number of packets.
1207  //
1208  // FIXME (mfh 18 Jul 2014): Relaxing this test from strict
1209  // inequality to a less-than seems to have fixed Bug 6170. It's
1210  // OK for the 'imports' array to be longer than it needs to be;
1211  // I'm just curious why it would be.
1212  const size_t totalNumImportPackets = totalReceiveLength_ * numPackets;
1213  TEUCHOS_TEST_FOR_EXCEPTION
1214  (static_cast<size_t> (imports.size ()) < totalNumImportPackets,
1215  std::invalid_argument,
1216  "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): "
1217  "The 'imports' array must have enough entries to hold the expected number "
1218  "of import packets. imports.size() = " << imports.size () << " < "
1219  "totalNumImportPackets = " << totalNumImportPackets << ".");
1220  }
1221 
1222  // MPI tag for nonblocking receives and blocking sends in this
1223  // method. Some processes might take the "fast" path
1224  // (indicesTo_.empty()) and others might take the "slow" path for
1225  // the same doPosts() call, so the path tag must be the same for
1226  // both.
1227  const int pathTag = 0;
1228  const int tag = this->getTag (pathTag);
1229 
1230 #ifdef HAVE_TPETRA_DEBUG
1231  TEUCHOS_TEST_FOR_EXCEPTION
1232  (requests_.size () != 0,
1233  std::logic_error,
1234  "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): Process "
1235  << myRank << ": requests_.size() = " << requests_.size () << " != 0.");
1236 #endif // HAVE_TPETRA_DEBUG
1237 
1238  // Distributor uses requests_.size() as the number of outstanding
1239  // nonblocking message requests, so we resize to zero to maintain
1240  // this invariant.
1241  //
1242  // numReceives_ does _not_ include the self message, if there is
1243  // one. Here, we do actually send a message to ourselves, so we
1244  // include any self message in the "actual" number of receives to
1245  // post.
1246  //
1247  // NOTE (mfh 19 Mar 2012): Epetra_MpiDistributor::DoPosts()
1248  // doesn't (re)allocate its array of requests. That happens in
1249  // CreateFromSends(), ComputeRecvs_(), DoReversePosts() (on
1250  // demand), or Resize_().
1251  const size_type actualNumReceives = as<size_type> (numReceives_) +
1252  as<size_type> (selfMessage_ ? 1 : 0);
1253  requests_.resize (0);
1254 
1255  if (verbose) {
1256  std::ostringstream os;
1257  os << "Proc " << myRank << ": doPosts(3 args, Teuchos::ArrayRCP, "
1258  << (indicesTo_.empty () ? "fast" : "slow") << "): Post receives"
1259  << endl;
1260  *out_ << os.str ();
1261  }
1262 
1263  // Post the nonblocking receives. It's common MPI wisdom to post
1264  // receives before sends. In MPI terms, this means favoring
1265  // adding to the "posted queue" (of receive requests) over adding
1266  // to the "unexpected queue" (of arrived messages not yet matched
1267  // with a receive).
1268  {
1269 #ifdef TPETRA_DISTRIBUTOR_TIMERS
1270  Teuchos::TimeMonitor timeMonRecvs (*timer_doPosts3_recvs_);
1271 #endif // TPETRA_DISTRIBUTOR_TIMERS
1272 
1273  size_t curBufOffset = 0;
1274  for (size_type i = 0; i < actualNumReceives; ++i) {
1275  const size_t curBufLen = lengthsFrom_[i] * numPackets;
1276  if (procsFrom_[i] != myRank) {
1277  if (verbose) {
1278  std::ostringstream os;
1279  os << "Proc " << myRank << ": doPosts(3 args, Teuchos::ArrayRCP, "
1280  << (indicesTo_.empty () ? "fast" : "slow") << "): "
1281  << "Post irecv: {source: " << procsFrom_[i]
1282  << ", tag: " << tag << "}" << endl;
1283  *out_ << os.str ();
1284  }
1285  // If my process is receiving these packet(s) from another
1286  // process (not a self-receive):
1287  //
1288  // 1. Set up the persisting view (recvBuf) of the imports
1289  // array, given the offset and size (total number of
1290  // packets from process procsFrom_[i]).
1291  // 2. Start the Irecv and save the resulting request.
1292  TEUCHOS_TEST_FOR_EXCEPTION(
1293  curBufOffset + curBufLen > static_cast<size_t> (imports.size ()),
1294  std::logic_error,
1295  "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): "
1296  "Exceeded size of 'imports' array in packing loop on Process " <<
1297  myRank << ". imports.size() = " << imports.size () << " < "
1298  "curBufOffset(" << curBufOffset << ") + curBufLen(" << curBufLen
1299  << ").");
1300  ArrayRCP<Packet> recvBuf =
1301  imports.persistingView (curBufOffset, curBufLen);
1302  requests_.push_back (ireceive<int, Packet> (recvBuf, procsFrom_[i],
1303  tag, *comm_));
1304  }
1305  else { // Receiving from myself
1306  selfReceiveOffset = curBufOffset; // Remember the self-recv offset
1307  }
1308  curBufOffset += curBufLen;
1309  }
1310  }
1311 
1312  if (doBarrier) {
1313 #ifdef TPETRA_DISTRIBUTOR_TIMERS
1314  Teuchos::TimeMonitor timeMonBarrier (*timer_doPosts3_barrier_);
1315 #endif // TPETRA_DISTRIBUTOR_TIMERS
1316 
1317  if (verbose) {
1318  std::ostringstream os;
1319  os << "Proc " << myRank << ": doPosts(3 args, Teuchos::ArrayRCP, "
1320  << (indicesTo_.empty () ? "fast" : "slow") << "): Barrier" << endl;
1321  *out_ << os.str ();
1322  }
1323  // If we are using ready sends (MPI_Rsend) below, we need to do
1324  // a barrier before we post the ready sends. This is because a
1325  // ready send requires that its matching receive has already
1326  // been posted before the send has been posted. The only way to
1327  // guarantee that in this case is to use a barrier.
1328  comm_->barrier ();
1329  }
1330 
1331 #ifdef TPETRA_DISTRIBUTOR_TIMERS
1332  Teuchos::TimeMonitor timeMonSends (*timer_doPosts3_sends_);
1333 #endif // TPETRA_DISTRIBUTOR_TIMERS
1334 
1335  // setup scan through procsTo_ list starting with higher numbered procs
1336  // (should help balance message traffic)
1337  //
1338  // FIXME (mfh 20 Feb 2013) Why haven't we precomputed this?
1339  // It doesn't depend on the input at all.
1340  size_t numBlocks = numSends_ + selfMessage_;
1341  size_t procIndex = 0;
1342  while ((procIndex < numBlocks) && (procsTo_[procIndex] < myRank)) {
1343  ++procIndex;
1344  }
1345  if (procIndex == numBlocks) {
1346  procIndex = 0;
1347  }
1348 
1349  size_t selfNum = 0;
1350  size_t selfIndex = 0;
1351 
1352  if (verbose) {
1353  std::ostringstream os;
1354  os << "Proc " << myRank
1355  << ": doPosts(3 args, Teuchos::ArrayRCP, "
1356  << (indicesTo_.empty () ? "fast" : "slow") << "): Post sends" << endl;
1357  *out_ << os.str ();
1358  }
1359 
1360  if (indicesTo_.empty()) {
1361  if (verbose) {
1362  std::ostringstream os;
1363  os << "Proc " << myRank
1364  << ": doPosts(3 args, Teuchos::ArrayRCP, fast): posting sends" << endl;
1365  *out_ << os.str ();
1366  }
1367 
1368  // Data are already blocked (laid out) by process, so we don't
1369  // need a separate send buffer (besides the exports array).
1370  for (size_t i = 0; i < numBlocks; ++i) {
1371  size_t p = i + procIndex;
1372  if (p > (numBlocks - 1)) {
1373  p -= numBlocks;
1374  }
1375 
1376  if (procsTo_[p] != myRank) {
1377  if (verbose) {
1378  std::ostringstream os;
1379  os << "Proc " << myRank
1380  << ": doPosts(3 args, Teuchos::ArrayRCP, fast): Post send: "
1381  "{target: " << procsTo_[p] << ", tag: " << tag << "}" << endl;
1382  *out_ << os.str ();
1383  }
1384 
1385  ArrayView<const Packet> tmpSend =
1386  exports.view (startsTo_[p]*numPackets, lengthsTo_[p]*numPackets);
1387 
1388  if (sendType == Details::DISTRIBUTOR_SEND) {
1389  send<int, Packet> (tmpSend.getRawPtr (),
1390  as<int> (tmpSend.size ()),
1391  procsTo_[p], tag, *comm_);
1392  }
1393  else if (sendType == Details::DISTRIBUTOR_ISEND) {
1394  ArrayRCP<const Packet> tmpSendBuf =
1395  exports.persistingView (startsTo_[p] * numPackets,
1396  lengthsTo_[p] * numPackets);
1397  requests_.push_back (isend<int, Packet> (tmpSendBuf, procsTo_[p],
1398  tag, *comm_));
1399  }
1400  else if (sendType == Details::DISTRIBUTOR_RSEND) {
1401  readySend<int, Packet> (tmpSend.getRawPtr (),
1402  as<int> (tmpSend.size ()),
1403  procsTo_[p], tag, *comm_);
1404  }
1405  else if (sendType == Details::DISTRIBUTOR_SSEND) {
1406  ssend<int, Packet> (tmpSend.getRawPtr (),
1407  as<int> (tmpSend.size ()),
1408  procsTo_[p], tag, *comm_);
1409  } else {
1410  TEUCHOS_TEST_FOR_EXCEPTION(
1411  true, std::logic_error,
1412  "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): "
1413  "Invalid send type. We should never get here. "
1414  "Please report this bug to the Tpetra developers.");
1415  }
1416  }
1417  else { // "Sending" the message to myself
1418  selfNum = p;
1419  }
1420  }
1421 
1422  if (selfMessage_) {
1423  if (verbose) {
1424  std::ostringstream os;
1425  os << "Proc " << myRank
1426  << ": doPosts(3 args, Teuchos::ArrayRCP, fast): Self-send" << endl;
1427  *out_ << os.str ();
1428  }
1429  // This is how we "send a message to ourself": we copy from
1430  // the export buffer to the import buffer. That saves
1431  // Teuchos::Comm implementations other than MpiComm (in
1432  // particular, SerialComm) the trouble of implementing self
1433  // messages correctly. (To do this right, SerialComm would
1434  // need internal buffer space for messages, keyed on the
1435  // message's tag.)
1436  std::copy (exports.begin()+startsTo_[selfNum]*numPackets,
1437  exports.begin()+startsTo_[selfNum]*numPackets+lengthsTo_[selfNum]*numPackets,
1438  imports.begin()+selfReceiveOffset);
1439  }
1440  if (verbose) {
1441  std::ostringstream os;
1442  os << "Proc " << myRank
1443  << ": doPosts(3 args, Teuchos::ArrayRCP, fast) done" << endl;
1444  *out_ << os.str ();
1445  }
1446  }
1447  else { // data are not blocked by proc, use send buffer
1448  if (verbose) {
1449  std::ostringstream os;
1450  os << "Proc " << myRank
1451  << ": doPosts(3 args, Teuchos::ArrayRCP, slow): posting sends" << endl;
1452  *out_ << os.str ();
1453  }
1454 
1455  // FIXME (mfh 05 Mar 2013) This is broken for Isend (nonblocking
1456  // sends), because the buffer is only long enough for one send.
1457  ArrayRCP<Packet> sendArray (maxSendLength_ * numPackets); // send buffer
1458 
1459  TEUCHOS_TEST_FOR_EXCEPTION(
1460  sendType == Details::DISTRIBUTOR_ISEND, std::logic_error,
1461  "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): "
1462  "The \"send buffer\" code path doesn't currently work with nonblocking sends.");
1463 
1464  for (size_t i = 0; i < numBlocks; ++i) {
1465  size_t p = i + procIndex;
1466  if (p > (numBlocks - 1)) {
1467  p -= numBlocks;
1468  }
1469 
1470  if (procsTo_[p] != myRank) {
1471  if (verbose) {
1472  std::ostringstream os;
1473  os << "Proc " << myRank
1474  << ": doPosts(3 args, Teuchos::ArrayRCP, slow): Post send: "
1475  "{target: " << procsTo_[p] << ", tag: " << tag << "}" << endl;
1476  *out_ << os.str ();
1477  }
1478 
1479  typename ArrayView<const Packet>::iterator srcBegin, srcEnd;
1480  size_t sendArrayOffset = 0;
1481  size_t j = startsTo_[p];
1482  for (size_t k = 0; k < lengthsTo_[p]; ++k, ++j) {
1483  srcBegin = exports.begin() + indicesTo_[j]*numPackets;
1484  srcEnd = srcBegin + numPackets;
1485  std::copy (srcBegin, srcEnd, sendArray.begin()+sendArrayOffset);
1486  sendArrayOffset += numPackets;
1487  }
1488  ArrayView<const Packet> tmpSend =
1489  sendArray.view (0, lengthsTo_[p]*numPackets);
1490 
1491  if (sendType == Details::DISTRIBUTOR_SEND) {
1492  send<int, Packet> (tmpSend.getRawPtr (),
1493  as<int> (tmpSend.size ()),
1494  procsTo_[p], tag, *comm_);
1495  }
1496  else if (sendType == Details::DISTRIBUTOR_ISEND) {
1497  ArrayRCP<const Packet> tmpSendBuf =
1498  sendArray.persistingView (0, lengthsTo_[p] * numPackets);
1499  requests_.push_back (isend<int, Packet> (tmpSendBuf, procsTo_[p],
1500  tag, *comm_));
1501  }
1502  else if (sendType == Details::DISTRIBUTOR_RSEND) {
1503  readySend<int, Packet> (tmpSend.getRawPtr (),
1504  as<int> (tmpSend.size ()),
1505  procsTo_[p], tag, *comm_);
1506  }
1507  else if (sendType == Details::DISTRIBUTOR_SSEND) {
1508  ssend<int, Packet> (tmpSend.getRawPtr (),
1509  as<int> (tmpSend.size ()),
1510  procsTo_[p], tag, *comm_);
1511  }
1512  else {
1513  TEUCHOS_TEST_FOR_EXCEPTION(
1514  true, std::logic_error,
1515  "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): "
1516  "Invalid send type. We should never get here. "
1517  "Please report this bug to the Tpetra developers.");
1518  }
1519  }
1520  else { // "Sending" the message to myself
1521  selfNum = p;
1522  selfIndex = startsTo_[p];
1523  }
1524  }
1525 
1526  if (selfMessage_) {
1527  if (verbose) {
1528  std::ostringstream os;
1529  os << "Proc " << myRank
1530  << ": doPosts(3 args, Teuchos::ArrayRCP, slow): Self-send" << endl;
1531  *out_ << os.str ();
1532  }
1533  for (size_t k = 0; k < lengthsTo_[selfNum]; ++k) {
1534  std::copy (exports.begin()+indicesTo_[selfIndex]*numPackets,
1535  exports.begin()+indicesTo_[selfIndex]*numPackets + numPackets,
1536  imports.begin() + selfReceiveOffset);
1537  ++selfIndex;
1538  selfReceiveOffset += numPackets;
1539  }
1540  }
1541  if (verbose) {
1542  std::ostringstream os;
1543  os << "Proc " << myRank
1544  << ": doPosts(3 args, Teuchos::ArrayRCP, slow) done" << endl;
1545  *out_ << os.str ();
1546  }
1547  }
1548 
1549  if (verbose) {
1550  std::ostringstream os;
1551  os << "Proc " << myRank << ": doPosts done" << endl;
1552  *out_ << os.str ();
1553  }
1554  }
1555 
1556  template <class Packet>
1557  void Distributor::
1558  doPosts (const Teuchos::ArrayRCP<const Packet>& exports,
1559  const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
1560  const Teuchos::ArrayRCP<Packet>& imports,
1561  const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
1562  {
1563  using Teuchos::Array;
1564  using Teuchos::ArrayRCP;
1565  using Teuchos::ArrayView;
1566  using Teuchos::as;
1567  using Teuchos::ireceive;
1568  using Teuchos::isend;
1569  using Teuchos::readySend;
1570  using Teuchos::send;
1571  using Teuchos::ssend;
1572  using Teuchos::TypeNameTraits;
1573 #ifdef HAVE_TEUCHOS_DEBUG
1574  using Teuchos::OSTab;
1575 #endif // HAVE_TEUCHOS_DEBUG
1576  using std::endl;
1577  typedef Array<size_t>::size_type size_type;
1578 
1579  Teuchos::OSTab tab (out_);
1580  const bool verbose = Tpetra::Details::Behavior::verbose("Distributor");
1581 
1582 #ifdef TPETRA_DISTRIBUTOR_TIMERS
1583  Teuchos::TimeMonitor timeMon (*timer_doPosts4_);
1584 #endif // TPETRA_DISTRIBUTOR_TIMERS
1585 
1586  // Run-time configurable parameters that come from the input
1587  // ParameterList set by setParameterList().
1588  const Details::EDistributorSendType sendType = sendType_;
1589  const bool doBarrier = barrierBetween_;
1590 
1591 // #ifdef HAVE_TEUCHOS_DEBUG
1592 // // Prepare for verbose output, if applicable.
1593 // Teuchos::EVerbosityLevel verbLevel = this->getVerbLevel ();
1594 // Teuchos::RCP<Teuchos::FancyOStream> out = this->getOStream ();
1595 // const bool doPrint = out.get () && (comm_->getRank () == 0) &&
1596 // includesVerbLevel (verbLevel, Teuchos::VERB_EXTREME, true);
1597 
1598 // if (doPrint) {
1599 // // Only need one process to print out parameters.
1600 // *out << "Distributor::doPosts (4 args)" << endl;
1601 // }
1602 // // Add one tab level. We declare this outside the doPrint scopes
1603 // // so that the tab persists until the end of this method.
1604 // Teuchos::OSTab tab = this->getOSTab ();
1605 // if (doPrint) {
1606 // *out << "Parameters:" << endl;
1607 // {
1608 // OSTab tab2 (out);
1609 // *out << "sendType: " << DistributorSendTypeEnumToString (sendType)
1610 // << endl << "barrierBetween: " << doBarrier << endl;
1611 // }
1612 // }
1613 // #endif // HAVE_TEUCHOS_DEBUG
1614 
1615  TEUCHOS_TEST_FOR_EXCEPTION(
1616  sendType == Details::DISTRIBUTOR_RSEND && ! doBarrier,
1617  std::logic_error,
1618  "Tpetra::Distributor::doPosts(4 args, Teuchos::ArrayRCP): Ready-send "
1619  "version requires a barrier between posting receives and posting ready "
1620  "ends. This should have been checked before. "
1621  "Please report this bug to the Tpetra developers.");
1622 
1623  const int myProcID = comm_->getRank ();
1624  size_t selfReceiveOffset = 0;
1625 
1626 #ifdef HAVE_TEUCHOS_DEBUG
1627  // Different messages may have different numbers of packets.
1628  size_t totalNumImportPackets = 0;
1629  for (size_t ii = 0; ii < static_cast<size_t> (numImportPacketsPerLID.size ()); ++ii) {
1630  totalNumImportPackets += numImportPacketsPerLID[ii];
1631  }
1632  TEUCHOS_TEST_FOR_EXCEPTION(
1633  static_cast<size_t> (imports.size ()) < totalNumImportPackets,
1634  std::runtime_error,
1635  "Tpetra::Distributor::doPosts(4 args, Teuchos::ArrayRCP): The 'imports' "
1636  "array must have enough entries to hold the expected number of import "
1637  "packets. imports.size() = " << imports.size() << " < "
1638  "totalNumImportPackets = " << totalNumImportPackets << ".");
1639 #endif // HAVE_TEUCHOS_DEBUG
1640 
1641  // MPI tag for nonblocking receives and blocking sends in this
1642  // method. Some processes might take the "fast" path
1643  // (indicesTo_.empty()) and others might take the "slow" path for
1644  // the same doPosts() call, so the path tag must be the same for
1645  // both.
1646  const int pathTag = 1;
1647  const int tag = this->getTag (pathTag);
1648 
1649  if (debug_) {
1650  TEUCHOS_TEST_FOR_EXCEPTION(
1651  requests_.size () != 0,
1652  std::logic_error,
1653  "Tpetra::Distributor::doPosts(4 args, Teuchos::ArrayRCP): Process "
1654  << myProcID << ": requests_.size() = " << requests_.size () << " != 0.");
1655  }
1656  if (verbose) {
1657  std::ostringstream os;
1658  os << "Proc " << myProcID << ": doPosts(4 args, Teuchos::ArrayRCP, "
1659  << (indicesTo_.empty () ? "fast" : "slow") << ")" << endl;
1660  *out_ << os.str ();
1661  }
1662 
1663  // Distributor uses requests_.size() as the number of outstanding
1664  // nonblocking message requests, so we resize to zero to maintain
1665  // this invariant.
1666  //
1667  // numReceives_ does _not_ include the self message, if there is
1668  // one. Here, we do actually send a message to ourselves, so we
1669  // include any self message in the "actual" number of receives to
1670  // post.
1671  //
1672  // NOTE (mfh 19 Mar 2012): Epetra_MpiDistributor::DoPosts()
1673  // doesn't (re)allocate its array of requests. That happens in
1674  // CreateFromSends(), ComputeRecvs_(), DoReversePosts() (on
1675  // demand), or Resize_().
1676  const size_type actualNumReceives = as<size_type> (numReceives_) +
1677  as<size_type> (selfMessage_ ? 1 : 0);
1678  requests_.resize (0);
1679 
1680  // Post the nonblocking receives. It's common MPI wisdom to post
1681  // receives before sends. In MPI terms, this means favoring
1682  // adding to the "posted queue" (of receive requests) over adding
1683  // to the "unexpected queue" (of arrived messages not yet matched
1684  // with a receive).
1685  {
1686 #ifdef TPETRA_DISTRIBUTOR_TIMERS
1687  Teuchos::TimeMonitor timeMonRecvs (*timer_doPosts4_recvs_);
1688 #endif // TPETRA_DISTRIBUTOR_TIMERS
1689 
1690  size_t curBufferOffset = 0;
1691  size_t curLIDoffset = 0;
1692  for (size_type i = 0; i < actualNumReceives; ++i) {
1693  size_t totalPacketsFrom_i = 0;
1694  for (size_t j = 0; j < lengthsFrom_[i]; ++j) {
1695  totalPacketsFrom_i += numImportPacketsPerLID[curLIDoffset+j];
1696  }
1697  curLIDoffset += lengthsFrom_[i];
1698  if (procsFrom_[i] != myProcID && totalPacketsFrom_i) {
1699  // If my process is receiving these packet(s) from another
1700  // process (not a self-receive), and if there is at least
1701  // one packet to receive:
1702  //
1703  // 1. Set up the persisting view (recvBuf) into the imports
1704  // array, given the offset and size (total number of
1705  // packets from process procsFrom_[i]).
1706  // 2. Start the Irecv and save the resulting request.
1707  ArrayRCP<Packet> recvBuf =
1708  imports.persistingView (curBufferOffset, totalPacketsFrom_i);
1709  requests_.push_back (ireceive<int, Packet> (recvBuf, procsFrom_[i],
1710  tag, *comm_));
1711  }
1712  else { // Receiving these packet(s) from myself
1713  selfReceiveOffset = curBufferOffset; // Remember the offset
1714  }
1715  curBufferOffset += totalPacketsFrom_i;
1716  }
1717  }
1718 
1719  if (doBarrier) {
1720 #ifdef TPETRA_DISTRIBUTOR_TIMERS
1721  Teuchos::TimeMonitor timeMonBarrier (*timer_doPosts4_barrier_);
1722 #endif // TPETRA_DISTRIBUTOR_TIMERS
1723  // If we are using ready sends (MPI_Rsend) below, we need to do
1724  // a barrier before we post the ready sends. This is because a
1725  // ready send requires that its matching receive has already
1726  // been posted before the send has been posted. The only way to
1727  // guarantee that in this case is to use a barrier.
1728  comm_->barrier ();
1729  }
1730 
1731 #ifdef TPETRA_DISTRIBUTOR_TIMERS
1732  Teuchos::TimeMonitor timeMonSends (*timer_doPosts4_sends_);
1733 #endif // TPETRA_DISTRIBUTOR_TIMERS
1734 
1735  // setup arrays containing starting-offsets into exports for each send,
1736  // and num-packets-to-send for each send.
1737  Array<size_t> sendPacketOffsets(numSends_,0), packetsPerSend(numSends_,0);
1738  size_t maxNumPackets = 0;
1739  size_t curPKToffset = 0;
1740  for (size_t pp=0; pp<numSends_; ++pp) {
1741  sendPacketOffsets[pp] = curPKToffset;
1742  size_t numPackets = 0;
1743  for (size_t j=startsTo_[pp]; j<startsTo_[pp]+lengthsTo_[pp]; ++j) {
1744  numPackets += numExportPacketsPerLID[j];
1745  }
1746  if (numPackets > maxNumPackets) maxNumPackets = numPackets;
1747  packetsPerSend[pp] = numPackets;
1748  curPKToffset += numPackets;
1749  }
1750 
1751  // setup scan through procsTo_ list starting with higher numbered procs
1752  // (should help balance message traffic)
1753  size_t numBlocks = numSends_+ selfMessage_;
1754  size_t procIndex = 0;
1755  while ((procIndex < numBlocks) && (procsTo_[procIndex] < myProcID)) {
1756  ++procIndex;
1757  }
1758  if (procIndex == numBlocks) {
1759  procIndex = 0;
1760  }
1761 
1762  size_t selfNum = 0;
1763  size_t selfIndex = 0;
1764 
1765  if (indicesTo_.empty()) {
1766  if (verbose) {
1767  std::ostringstream os;
1768  os << "Proc " << myProcID
1769  << ": doPosts(4 args, Teuchos::ArrayRCP, fast): posting sends" << endl;
1770  *out_ << os.str ();
1771  }
1772 
1773  // Data are already blocked (laid out) by process, so we don't
1774  // need a separate send buffer (besides the exports array).
1775  for (size_t i = 0; i < numBlocks; ++i) {
1776  size_t p = i + procIndex;
1777  if (p > (numBlocks - 1)) {
1778  p -= numBlocks;
1779  }
1780 
1781  if (procsTo_[p] != myProcID && packetsPerSend[p] > 0) {
1782  ArrayView<const Packet> tmpSend =
1783  exports.view (sendPacketOffsets[p], packetsPerSend[p]);
1784 
1785  if (sendType == Details::DISTRIBUTOR_SEND) { // the default, so put it first
1786  send<int, Packet> (tmpSend.getRawPtr (),
1787  as<int> (tmpSend.size ()),
1788  procsTo_[p], tag, *comm_);
1789  }
1790  else if (sendType == Details::DISTRIBUTOR_RSEND) {
1791  readySend<int, Packet> (tmpSend.getRawPtr (),
1792  as<int> (tmpSend.size ()),
1793  procsTo_[p], tag, *comm_);
1794  }
1795  else if (sendType == Details::DISTRIBUTOR_ISEND) {
1796  ArrayRCP<const Packet> tmpSendBuf =
1797  exports.persistingView (sendPacketOffsets[p], packetsPerSend[p]);
1798  requests_.push_back (isend<int, Packet> (tmpSendBuf, procsTo_[p],
1799  tag, *comm_));
1800  }
1801  else if (sendType == Details::DISTRIBUTOR_SSEND) {
1802  ssend<int, Packet> (tmpSend.getRawPtr (),
1803  as<int> (tmpSend.size ()),
1804  procsTo_[p], tag, *comm_);
1805  }
1806  else {
1807  TEUCHOS_TEST_FOR_EXCEPTION(
1808  true, std::logic_error,
1809  "Tpetra::Distributor::doPosts(4 args, Teuchos::ArrayRCP): "
1810  "Invalid send type. We should never get here. Please report "
1811  "this bug to the Tpetra developers.");
1812  }
1813  }
1814  else { // "Sending" the message to myself
1815  selfNum = p;
1816  }
1817  }
1818 
1819  if (selfMessage_) {
1820  std::copy (exports.begin()+sendPacketOffsets[selfNum],
1821  exports.begin()+sendPacketOffsets[selfNum]+packetsPerSend[selfNum],
1822  imports.begin()+selfReceiveOffset);
1823  }
1824  if (verbose) {
1825  std::ostringstream os;
1826  os << "Proc " << myProcID
1827  << ": doPosts(4 args, Teuchos::ArrayRCP, fast) done" << endl;
1828  *out_ << os.str ();
1829  }
1830  }
1831  else { // data are not blocked by proc, use send buffer
1832  if (verbose) {
1833  std::ostringstream os;
1834  os << "Proc " << myProcID
1835  << ": doPosts(4 args, Teuchos::ArrayRCP, slow): posting sends" << endl;
1836  *out_ << os.str ();
1837  }
1838 
1839  // FIXME (mfh 05 Mar 2013) This may be broken for Isend.
1840  ArrayRCP<Packet> sendArray (maxNumPackets); // send buffer
1841 
1842  TEUCHOS_TEST_FOR_EXCEPTION(
1843  sendType == Details::DISTRIBUTOR_ISEND,
1844  std::logic_error,
1845  "Tpetra::Distributor::doPosts(4 args, Teuchos::ArrayRCP): "
1846  "The \"send buffer\" code path may not necessarily work with nonblocking sends.");
1847 
1848  Array<size_t> indicesOffsets (numExportPacketsPerLID.size(), 0);
1849  size_t ioffset = 0;
1850  for (int j=0; j<numExportPacketsPerLID.size(); ++j) {
1851  indicesOffsets[j] = ioffset;
1852  ioffset += numExportPacketsPerLID[j];
1853  }
1854 
1855  for (size_t i = 0; i < numBlocks; ++i) {
1856  size_t p = i + procIndex;
1857  if (p > (numBlocks - 1)) {
1858  p -= numBlocks;
1859  }
1860 
1861  if (procsTo_[p] != myProcID) {
1862  typename ArrayView<const Packet>::iterator srcBegin, srcEnd;
1863  size_t sendArrayOffset = 0;
1864  size_t j = startsTo_[p];
1865  size_t numPacketsTo_p = 0;
1866  for (size_t k = 0; k < lengthsTo_[p]; ++k, ++j) {
1867  srcBegin = exports.begin() + indicesOffsets[j];
1868  srcEnd = srcBegin + numExportPacketsPerLID[j];
1869  numPacketsTo_p += numExportPacketsPerLID[j];
1870  std::copy (srcBegin, srcEnd, sendArray.begin()+sendArrayOffset);
1871  sendArrayOffset += numExportPacketsPerLID[j];
1872  }
1873  if (numPacketsTo_p > 0) {
1874  ArrayView<const Packet> tmpSend =
1875  sendArray.view (0, numPacketsTo_p);
1876 
1877  if (sendType == Details::DISTRIBUTOR_RSEND) {
1878  readySend<int, Packet> (tmpSend.getRawPtr (),
1879  as<int> (tmpSend.size ()),
1880  procsTo_[p], tag, *comm_);
1881  }
1882  else if (sendType == Details::DISTRIBUTOR_ISEND) {
1883  ArrayRCP<const Packet> tmpSendBuf =
1884  sendArray.persistingView (0, numPacketsTo_p);
1885  requests_.push_back (isend<int, Packet> (tmpSendBuf, procsTo_[p],
1886  tag, *comm_));
1887  }
1888  else if (sendType == Details::DISTRIBUTOR_SSEND) {
1889  ssend<int, Packet> (tmpSend.getRawPtr (),
1890  as<int> (tmpSend.size ()),
1891  procsTo_[p], tag, *comm_);
1892  }
1893  else { // if (sendType == Details::DISTRIBUTOR_SSEND)
1894  send<int, Packet> (tmpSend.getRawPtr (),
1895  as<int> (tmpSend.size ()),
1896  procsTo_[p], tag, *comm_);
1897  }
1898  }
1899  }
1900  else { // "Sending" the message to myself
1901  selfNum = p;
1902  selfIndex = startsTo_[p];
1903  }
1904  }
1905 
1906  if (selfMessage_) {
1907  for (size_t k = 0; k < lengthsTo_[selfNum]; ++k) {
1908  std::copy (exports.begin()+indicesOffsets[selfIndex],
1909  exports.begin()+indicesOffsets[selfIndex]+numExportPacketsPerLID[selfIndex],
1910  imports.begin() + selfReceiveOffset);
1911  selfReceiveOffset += numExportPacketsPerLID[selfIndex];
1912  ++selfIndex;
1913  }
1914  }
1915  if (verbose) {
1916  std::ostringstream os;
1917  os << "Proc " << myProcID
1918  << ": doPosts(4 args, Teuchos::ArrayRCP, slow) done" << endl;
1919  *out_ << os.str ();
1920  }
1921  }
1922  }
1923 
1924  template <class Packet>
1925  void Distributor::
1926  doReversePostsAndWaits (const Teuchos::ArrayView<const Packet>& exports,
1927  size_t numPackets,
1928  const Teuchos::ArrayView<Packet>& imports)
1929  {
1930  using Teuchos::arcp;
1931  using Teuchos::ArrayRCP;
1932  using Teuchos::as;
1933 
1934  // doReversePosts() takes exports and imports as ArrayRCPs,
1935  // requiring that the memory locations are persisting. However,
1936  // they need only persist within the scope of that routine, so it
1937  // is safe for us to use nonpersisting references in this case.
1938 
1939  // mfh 04 Apr 2012: For some reason, calling arcp<const Packet>
1940  // for Packet=std::complex<T> (e.g., T=float) fails to compile
1941  // with some versions of GCC. The issue only arises with the
1942  // exports array. This is why we construct a separate nonowning
1943  // ArrayRCP.
1944  typedef typename ArrayRCP<const Packet>::size_type size_type;
1945  ArrayRCP<const Packet> exportsArcp (exports.getRawPtr(), as<size_type> (0),
1946  exports.size(), false);
1947  // mfh 04 Apr 2012: This is the offending code. This statement
1948  // would normally be in place of "exportsArcp" in the
1949  // doReversePosts() call below.
1950  //arcp<const Packet> (exports.getRawPtr(), 0, exports.size(), false)
1951  doReversePosts (exportsArcp,
1952  numPackets,
1953  arcp<Packet> (imports.getRawPtr (), 0, imports.size (), false));
1954  doReverseWaits ();
1955 
1956  lastRoundBytesSend_ = exports.size() * sizeof(Packet);
1957  lastRoundBytesRecv_ = imports.size() * sizeof(Packet);
1958  }
1959 
1960  template <class Packet>
1961  void Distributor::
1962  doReversePostsAndWaits (const Teuchos::ArrayView<const Packet>& exports,
1963  const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
1964  const Teuchos::ArrayView<Packet> &imports,
1965  const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
1966  {
1967  using Teuchos::as;
1968  using Teuchos::arcp;
1969  using Teuchos::ArrayRCP;
1970 
1971  TEUCHOS_TEST_FOR_EXCEPTION(
1972  requests_.size () != 0, std::runtime_error, "Tpetra::Distributor::"
1973  "doReversePostsAndWaits(4 args): There are " << requests_.size ()
1974  << " outstanding nonblocking messages pending. It is incorrect to call "
1975  "this method with posts outstanding.");
1976 
1977  // doReversePosts() accepts the exports and imports arrays as
1978  // ArrayRCPs, requiring that the memory location is persisting (as
1979  // is necessary for nonblocking receives). However, it need only
1980  // persist until doReverseWaits() completes, so it is safe for us
1981  // to use a nonpersisting reference in this case. The use of a
1982  // nonpersisting reference is purely a performance optimization.
1983 
1984  // mfh 02 Apr 2012: For some reason, calling arcp<const Packet>
1985  // for Packet=std::complex<double> fails to compile with some
1986  // versions of GCC. The issue only arises with the exports array.
1987  // This is why we construct a separate nonowning ArrayRCP.
1988  typedef typename ArrayRCP<const Packet>::size_type size_type;
1989  ArrayRCP<const Packet> exportsArcp (exports.getRawPtr (), as<size_type> (0),
1990  exports.size (), false);
1991  doReversePosts (exportsArcp,
1992  numExportPacketsPerLID,
1993  arcp<Packet> (imports.getRawPtr (), 0, imports.size (), false),
1994  numImportPacketsPerLID);
1995  doReverseWaits ();
1996 
1997  lastRoundBytesSend_ = exports.size() * sizeof(Packet);
1998  lastRoundBytesRecv_ = imports.size() * sizeof(Packet);
1999  }
2000 
2001  template <class Packet>
2002  void Distributor::
2003  doReversePosts (const Teuchos::ArrayRCP<const Packet>& exports,
2004  size_t numPackets,
2005  const Teuchos::ArrayRCP<Packet>& imports)
2006  {
2007  // FIXME (mfh 29 Mar 2012) WHY?
2008  TEUCHOS_TEST_FOR_EXCEPTION(
2009  ! indicesTo_.empty (), std::runtime_error,
2010  "Tpetra::Distributor::doReversePosts(3 args): Can only do reverse "
2011  "communication when original data are blocked by process.");
2012  if (reverseDistributor_.is_null ()) {
2013  createReverseDistributor ();
2014  }
2015  reverseDistributor_->doPosts (exports, numPackets, imports);
2016  }
2017 
2018  template <class Packet>
2019  void Distributor::
2020  doReversePosts (const Teuchos::ArrayRCP<const Packet>& exports,
2021  const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
2022  const Teuchos::ArrayRCP<Packet>& imports,
2023  const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
2024  {
2025  // FIXME (mfh 29 Mar 2012) WHY?
2026  TEUCHOS_TEST_FOR_EXCEPTION(
2027  ! indicesTo_.empty (), std::runtime_error,
2028  "Tpetra::Distributor::doReversePosts(3 args): Can only do reverse "
2029  "communication when original data are blocked by process.");
2030  if (reverseDistributor_.is_null ()) {
2031  createReverseDistributor ();
2032  }
2033  reverseDistributor_->doPosts (exports, numExportPacketsPerLID,
2034  imports, numImportPacketsPerLID);
2035  }
2036 
2037  template <class ExpView, class ImpView>
2038  typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
2039  Distributor::
2040  doPostsAndWaits (const ExpView& exports,
2041  size_t numPackets,
2042  const ImpView& imports)
2043  {
2044  using Teuchos::RCP;
2045  using Teuchos::rcp;
2046  using std::endl;
2047  const bool verbose = Tpetra::Details::Behavior::verbose("Distributor");
2048 
2049  RCP<Teuchos::OSTab> tab0, tab1;
2050  if (verbose) {
2051  tab0 = rcp (new Teuchos::OSTab (out_));
2052  const int myRank = comm_->getRank ();
2053  std::ostringstream os;
2054  os << "Proc " << myRank
2055  << ": Distributor::doPostsAndWaits(3 args, Kokkos): "
2056  << "{sendType: " << DistributorSendTypeEnumToString (sendType_)
2057  << ", barrierBetween: " << barrierBetween_ << "}" << endl;
2058  *out_ << os.str ();
2059  tab1 = rcp (new Teuchos::OSTab (out_));
2060  }
2061 
2062  TEUCHOS_TEST_FOR_EXCEPTION(
2063  requests_.size () != 0, std::runtime_error, "Tpetra::Distributor::"
2064  "doPostsAndWaits(3 args): There are " << requests_.size () <<
2065  " outstanding nonblocking messages pending. It is incorrect to call "
2066  "this method with posts outstanding.");
2067 
2068  if (verbose) {
2069  const int myRank = comm_->getRank ();
2070  std::ostringstream os;
2071  os << "Proc " << myRank
2072  << ": Distributor::doPostsAndWaits: Call doPosts" << endl;
2073  *out_ << os.str ();
2074  }
2075  doPosts (exports, numPackets, imports);
2076  if (verbose) {
2077  const int myRank = comm_->getRank ();
2078  std::ostringstream os;
2079  os << "Proc " << myRank
2080  << ": Distributor::doPostsAndWaits: Call doWaits" << endl;
2081  *out_ << os.str ();
2082  }
2083  doWaits ();
2084  }
2085 
2086  template <class ExpView, class ImpView>
2087  typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
2088  Distributor::
2089  doPostsAndWaits (const ExpView& exports,
2090  const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
2091  const ImpView& imports,
2092  const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
2093  {
2094  TEUCHOS_TEST_FOR_EXCEPTION(
2095  requests_.size () != 0, std::runtime_error,
2096  "Tpetra::Distributor::doPostsAndWaits(4 args): There are "
2097  << requests_.size () << " outstanding nonblocking messages pending. "
2098  "It is incorrect to call this method with posts outstanding.");
2099 
2100  doPosts (exports, numExportPacketsPerLID, imports, numImportPacketsPerLID);
2101  doWaits ();
2102  }
2103 
2104 
2105  template <class ExpView, class ImpView>
2106  typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
2107  Distributor::
2108  doPosts (const ExpView &exports,
2109  size_t numPackets,
2110  const ImpView &imports)
2111  {
2112  using Teuchos::Array;
2113  using Teuchos::as;
2114  using Teuchos::FancyOStream;
2115  using Teuchos::includesVerbLevel;
2116  using Teuchos::ireceive;
2117  using Teuchos::isend;
2118  using Teuchos::OSTab;
2119  using Teuchos::readySend;
2120  using Teuchos::send;
2121  using Teuchos::ssend;
2122  using Teuchos::TypeNameTraits;
2123  using Teuchos::typeName;
2124  using std::endl;
2125  using Kokkos::Compat::create_const_view;
2126  using Kokkos::Compat::create_view;
2127  using Kokkos::Compat::subview_offset;
2128  using Kokkos::Compat::deep_copy_offset;
2129  typedef Array<size_t>::size_type size_type;
2130  typedef ExpView exports_view_type;
2131  typedef ImpView imports_view_type;
2132 
2133  const bool verbose = Tpetra::Details::Behavior::verbose("Distributor");
2134 #ifdef KOKKOS_ENABLE_CUDA
2135  static_assert (! std::is_same<typename ExpView::memory_space, Kokkos::CudaUVMSpace>::value &&
2136  ! std::is_same<typename ImpView::memory_space, Kokkos::CudaUVMSpace>::value,
2137  "Please do not use Tpetra::Distributor with UVM "
2138  "allocations. See GitHub issue #1088.");
2139 #endif // KOKKOS_ENABLE_CUDA
2140 
2141 #ifdef TPETRA_DISTRIBUTOR_TIMERS
2142  Teuchos::TimeMonitor timeMon (*timer_doPosts3_);
2143 #endif // TPETRA_DISTRIBUTOR_TIMERS
2144 
2145  const int myRank = comm_->getRank ();
2146  // Run-time configurable parameters that come from the input
2147  // ParameterList set by setParameterList().
2148  const Details::EDistributorSendType sendType = sendType_;
2149  const bool doBarrier = barrierBetween_;
2150 
2151  Teuchos::OSTab tab0 (out_);
2152  if (verbose) {
2153  std::ostringstream os;
2154  os << "Proc " << myRank
2155  << ": Distributor::doPosts(3 args, Kokkos)" << endl;
2156  *out_ << os.str ();
2157  }
2158  Teuchos::OSTab tab1 (out_);
2159 
2160  TEUCHOS_TEST_FOR_EXCEPTION(
2161  sendType == Details::DISTRIBUTOR_RSEND && ! doBarrier,
2162  std::logic_error,
2163  "Tpetra::Distributor::doPosts(3 args, Kokkos): Ready-send version "
2164  "requires a barrier between posting receives and posting ready sends. "
2165  "This should have been checked before. "
2166  "Please report this bug to the Tpetra developers.");
2167 
2168  size_t selfReceiveOffset = 0;
2169 
2170  // mfh 30 Mar 2016: See Github Issue #227 to see why we need to
2171  // check whether we're doing reverse mode before checking the
2172  // length of the imports array.
2173  if (false /* howInitialized_ != Details::DISTRIBUTOR_INITIALIZED_BY_REVERSE */) {
2174  // Each message has the same number of packets.
2175  const size_t totalNumImportPackets = totalReceiveLength_ * numPackets;
2176 
2177  if (verbose) {
2178  std::ostringstream os;
2179  os << "Proc " << myRank << ": doPosts: totalNumImportPackets = " <<
2180  totalNumImportPackets << " = " << totalReceiveLength_ << " * " <<
2181  numPackets << "; imports.extent(0) = " << imports.extent (0)
2182  << endl;
2183  *out_ << os.str ();
2184  }
2185 
2186 #ifdef HAVE_TPETRA_DEBUG
2187  // mfh 31 Mar 2016: Extra special all-reduce check to help diagnose #227.
2188  {
2189  const size_t importBufSize = static_cast<size_t> (imports.extent (0));
2190  const int lclBad = (importBufSize < totalNumImportPackets) ? 1 : 0;
2191  int gblBad = 0;
2192  using Teuchos::reduceAll;
2193  using Teuchos::REDUCE_MAX;
2194  using Teuchos::outArg;
2195  reduceAll (*comm_, REDUCE_MAX, lclBad, outArg (gblBad));
2196  TEUCHOS_TEST_FOR_EXCEPTION
2197  (gblBad != 0,
2198  std::runtime_error,
2199  "Tpetra::Distributor::doPosts(3 args, Kokkos): "
2200  "On one or more MPI processes, the 'imports' array "
2201  "does not have enough entries to hold the expected number of "
2202  "import packets. ");
2203  }
2204 #else
2205  TEUCHOS_TEST_FOR_EXCEPTION
2206  (static_cast<size_t> (imports.extent (0)) < totalNumImportPackets,
2207  std::runtime_error,
2208  "Tpetra::Distributor::doPosts(3 args, Kokkos): The 'imports' "
2209  "array must have enough entries to hold the expected number of import "
2210  "packets. imports.extent(0) = " << imports.extent (0) << " < "
2211  "totalNumImportPackets = " << totalNumImportPackets << " = "
2212  "totalReceiveLength_ (" << totalReceiveLength_ << ") * numPackets ("
2213  << numPackets << ").");
2214 #endif // HAVE_TPETRA_DEBUG
2215  }
2216 
2217  // MPI tag for nonblocking receives and blocking sends in this
2218  // method. Some processes might take the "fast" path
2219  // (indicesTo_.empty()) and others might take the "slow" path for
2220  // the same doPosts() call, so the path tag must be the same for
2221  // both.
2222  const int pathTag = 0;
2223  const int tag = this->getTag (pathTag);
2224 
2225 #ifdef HAVE_TPETRA_DEBUG
2226  TEUCHOS_TEST_FOR_EXCEPTION
2227  (requests_.size () != 0,
2228  std::logic_error,
2229  "Tpetra::Distributor::doPosts(3 args, Kokkos): Process "
2230  << myRank << ": requests_.size() = " << requests_.size () << " != 0.");
2231 #endif // HAVE_TPETRA_DEBUG
2232 
2233  // Distributor uses requests_.size() as the number of outstanding
2234  // nonblocking message requests, so we resize to zero to maintain
2235  // this invariant.
2236  //
2237  // numReceives_ does _not_ include the self message, if there is
2238  // one. Here, we do actually send a message to ourselves, so we
2239  // include any self message in the "actual" number of receives to
2240  // post.
2241  //
2242  // NOTE (mfh 19 Mar 2012): Epetra_MpiDistributor::DoPosts()
2243  // doesn't (re)allocate its array of requests. That happens in
2244  // CreateFromSends(), ComputeRecvs_(), DoReversePosts() (on
2245  // demand), or Resize_().
2246  const size_type actualNumReceives = as<size_type> (numReceives_) +
2247  as<size_type> (selfMessage_ ? 1 : 0);
2248  requests_.resize (0);
2249 
2250  if (verbose) {
2251  std::ostringstream os;
2252  os << "Proc " << myRank << ": doPosts(3 args, Kokkos, "
2253  << (indicesTo_.empty () ? "fast" : "slow") << "): Post receives"
2254  << endl;
2255  *out_ << os.str ();
2256  }
2257 
2258  // Post the nonblocking receives. It's common MPI wisdom to post
2259  // receives before sends. In MPI terms, this means favoring
2260  // adding to the "posted queue" (of receive requests) over adding
2261  // to the "unexpected queue" (of arrived messages not yet matched
2262  // with a receive).
2263  {
2264 #ifdef TPETRA_DISTRIBUTOR_TIMERS
2265  Teuchos::TimeMonitor timeMonRecvs (*timer_doPosts3_recvs_);
2266 #endif // TPETRA_DISTRIBUTOR_TIMERS
2267 
2268  size_t curBufferOffset = 0;
2269  for (size_type i = 0; i < actualNumReceives; ++i) {
2270  const size_t curBufLen = lengthsFrom_[i] * numPackets;
2271  if (procsFrom_[i] != myRank) {
2272  if (verbose) {
2273  std::ostringstream os;
2274  os << "Proc " << myRank << ": doPosts(3 args, Kokkos, "
2275  << (indicesTo_.empty () ? "fast" : "slow") << "): "
2276  << "Post irecv: {source: " << procsFrom_[i]
2277  << ", tag: " << tag << "}" << endl;
2278  *out_ << os.str ();
2279  }
2280  // If my process is receiving these packet(s) from another
2281  // process (not a self-receive):
2282  //
2283  // 1. Set up the persisting view (recvBuf) of the imports
2284  // array, given the offset and size (total number of
2285  // packets from process procsFrom_[i]).
2286  // 2. Start the Irecv and save the resulting request.
2287  TEUCHOS_TEST_FOR_EXCEPTION(
2288  curBufferOffset + curBufLen > static_cast<size_t> (imports.size ()),
2289  std::logic_error, "Tpetra::Distributor::doPosts(3 args, Kokkos): "
2290  "Exceeded size of 'imports' array in packing loop on Process " <<
2291  myRank << ". imports.size() = " << imports.size () << " < "
2292  "curBufferOffset(" << curBufferOffset << ") + curBufLen(" <<
2293  curBufLen << ").");
2294  imports_view_type recvBuf =
2295  subview_offset (imports, curBufferOffset, curBufLen);
2296  requests_.push_back (ireceive<int> (recvBuf, procsFrom_[i],
2297  tag, *comm_));
2298  }
2299  else { // Receiving from myself
2300  selfReceiveOffset = curBufferOffset; // Remember the self-recv offset
2301  }
2302  curBufferOffset += curBufLen;
2303  }
2304  }
2305 
2306  if (doBarrier) {
2307 #ifdef TPETRA_DISTRIBUTOR_TIMERS
2308  Teuchos::TimeMonitor timeMonBarrier (*timer_doPosts3_barrier_);
2309 #endif // TPETRA_DISTRIBUTOR_TIMERS
2310 
2311  if (verbose) {
2312  std::ostringstream os;
2313  os << "Proc " << myRank << ": doPosts(3 args, Kokkos, "
2314  << (indicesTo_.empty () ? "fast" : "slow") << "): Barrier" << endl;
2315  *out_ << os.str ();
2316  }
2317  // If we are using ready sends (MPI_Rsend) below, we need to do
2318  // a barrier before we post the ready sends. This is because a
2319  // ready send requires that its matching receive has already
2320  // been posted before the send has been posted. The only way to
2321  // guarantee that in this case is to use a barrier.
2322  comm_->barrier ();
2323  }
2324 
2325 #ifdef TPETRA_DISTRIBUTOR_TIMERS
2326  Teuchos::TimeMonitor timeMonSends (*timer_doPosts3_sends_);
2327 #endif // TPETRA_DISTRIBUTOR_TIMERS
2328 
2329  // setup scan through procsTo_ list starting with higher numbered procs
2330  // (should help balance message traffic)
2331  //
2332  // FIXME (mfh 20 Feb 2013) Why haven't we precomputed this?
2333  // It doesn't depend on the input at all.
2334  size_t numBlocks = numSends_ + selfMessage_;
2335  size_t procIndex = 0;
2336  while ((procIndex < numBlocks) && (procsTo_[procIndex] < myRank)) {
2337  ++procIndex;
2338  }
2339  if (procIndex == numBlocks) {
2340  procIndex = 0;
2341  }
2342 
2343  size_t selfNum = 0;
2344  size_t selfIndex = 0;
2345 
2346  if (verbose) {
2347  std::ostringstream os;
2348  os << "Proc " << myRank << ": doPosts(3 args, Kokkos, "
2349  << (indicesTo_.empty () ? "fast" : "slow") << "): Post sends" << endl;
2350  *out_ << os.str ();
2351  }
2352 
2353  if (indicesTo_.empty()) {
2354  if (verbose) {
2355  std::ostringstream os;
2356  os << "Proc " << myRank
2357  << ": doPosts(3 args, Kokkos, fast): posting sends" << endl;
2358  *out_ << os.str ();
2359  }
2360 
2361  // Data are already blocked (laid out) by process, so we don't
2362  // need a separate send buffer (besides the exports array).
2363  for (size_t i = 0; i < numBlocks; ++i) {
2364  size_t p = i + procIndex;
2365  if (p > (numBlocks - 1)) {
2366  p -= numBlocks;
2367  }
2368 
2369  if (procsTo_[p] != myRank) {
2370  if (verbose) {
2371  std::ostringstream os;
2372  os << "Proc " << myRank << ": doPosts(3 args, Kokkos, fast): Post send: "
2373  "{target: " << procsTo_[p] << ", tag: " << tag << "}" << endl;
2374  *out_ << os.str ();
2375  }
2376  // if (debug_) {
2377  // const size_t off = startsTo_[p] * numPackets;
2378  // const size_t len = lengthsTo_[p] * numPackets;
2379  // TEUCHOS_TEST_FOR_EXCEPTION
2380  // (static_cast<size_t> (off + len) >
2381  // static_cast<size_t> (exports.size ()), std::logic_error,
2382  // "doPosts: off=" << off << " + len=" << len << " > "
2383  // "exports.size()=" << exports.size () << ".");
2384  // }
2385 
2386  exports_view_type tmpSend = subview_offset(
2387  exports, startsTo_[p]*numPackets, lengthsTo_[p]*numPackets);
2388 
2389  if (sendType == Details::DISTRIBUTOR_SEND) {
2390  send<int> (tmpSend,
2391  as<int> (tmpSend.size ()),
2392  procsTo_[p], tag, *comm_);
2393  }
2394  else if (sendType == Details::DISTRIBUTOR_ISEND) {
2395  exports_view_type tmpSendBuf =
2396  subview_offset (exports, startsTo_[p] * numPackets,
2397  lengthsTo_[p] * numPackets);
2398  requests_.push_back (isend<int> (tmpSendBuf, procsTo_[p],
2399  tag, *comm_));
2400  }
2401  else if (sendType == Details::DISTRIBUTOR_RSEND) {
2402  readySend<int> (tmpSend,
2403  as<int> (tmpSend.size ()),
2404  procsTo_[p], tag, *comm_);
2405  }
2406  else if (sendType == Details::DISTRIBUTOR_SSEND) {
2407  ssend<int> (tmpSend,
2408  as<int> (tmpSend.size ()),
2409  procsTo_[p], tag, *comm_);
2410  } else {
2411  TEUCHOS_TEST_FOR_EXCEPTION(
2412  true,
2413  std::logic_error,
2414  "Tpetra::Distributor::doPosts(3 args, Kokkos): "
2415  "Invalid send type. We should never get here. "
2416  "Please report this bug to the Tpetra developers.");
2417  }
2418  }
2419  else { // "Sending" the message to myself
2420  selfNum = p;
2421  }
2422  }
2423 
2424  if (selfMessage_) {
2425  if (verbose) {
2426  std::ostringstream os;
2427  os << "Proc " << myRank
2428  << ": doPosts(3 args, Kokkos, fast): Self-send" << endl;
2429  *out_ << os.str ();
2430  }
2431  // This is how we "send a message to ourself": we copy from
2432  // the export buffer to the import buffer. That saves
2433  // Teuchos::Comm implementations other than MpiComm (in
2434  // particular, SerialComm) the trouble of implementing self
2435  // messages correctly. (To do this right, SerialComm would
2436  // need internal buffer space for messages, keyed on the
2437  // message's tag.)
2438  deep_copy_offset(imports, exports, selfReceiveOffset,
2439  startsTo_[selfNum]*numPackets,
2440  lengthsTo_[selfNum]*numPackets);
2441  }
2442  if (verbose) {
2443  std::ostringstream os;
2444  os << "Proc " << myRank << ": doPosts(3 args, Kokkos, fast) done" << endl;
2445  *out_ << os.str ();
2446  }
2447  }
2448  else { // data are not blocked by proc, use send buffer
2449  if (verbose) {
2450  std::ostringstream os;
2451  os << "Proc " << myRank
2452  << ": doPosts(3 args, Kokkos, slow): posting sends" << endl;
2453  *out_ << os.str ();
2454  }
2455 
2456  typedef typename ExpView::non_const_value_type Packet;
2457  typedef typename ExpView::array_layout Layout;
2458  typedef typename ExpView::device_type Device;
2459  typedef typename ExpView::memory_traits Mem;
2460  Kokkos::View<Packet*,Layout,Device,Mem> sendArray ("sendArray",
2461  maxSendLength_ * numPackets);
2462 
2463  // FIXME (mfh 05 Mar 2013) This is broken for Isend (nonblocking
2464  // sends), because the buffer is only long enough for one send.
2465  TEUCHOS_TEST_FOR_EXCEPTION(
2466  sendType == Details::DISTRIBUTOR_ISEND,
2467  std::logic_error,
2468  "Tpetra::Distributor::doPosts(3 args, Kokkos): The \"send buffer\" code path "
2469  "doesn't currently work with nonblocking sends.");
2470 
2471  for (size_t i = 0; i < numBlocks; ++i) {
2472  size_t p = i + procIndex;
2473  if (p > (numBlocks - 1)) {
2474  p -= numBlocks;
2475  }
2476 
2477  if (procsTo_[p] != myRank) {
2478  if (verbose) {
2479  std::ostringstream os;
2480  os << "Proc " << myRank
2481  << ": doPosts(3 args, Kokkos, slow): Post send: {target: "
2482  << procsTo_[p] << ", tag: " << tag << "}" << endl;
2483  *out_ << os.str ();
2484  }
2485 
2486  size_t sendArrayOffset = 0;
2487  size_t j = startsTo_[p];
2488  for (size_t k = 0; k < lengthsTo_[p]; ++k, ++j) {
2489  deep_copy_offset(sendArray, exports, sendArrayOffset,
2490  indicesTo_[j]*numPackets, numPackets);
2491  sendArrayOffset += numPackets;
2492  }
2493  ImpView tmpSend =
2494  subview_offset(sendArray, size_t(0), lengthsTo_[p]*numPackets);
2495 
2496  if (sendType == Details::DISTRIBUTOR_SEND) {
2497  send<int> (tmpSend,
2498  as<int> (tmpSend.size ()),
2499  procsTo_[p], tag, *comm_);
2500  }
2501  else if (sendType == Details::DISTRIBUTOR_ISEND) {
2502  exports_view_type tmpSendBuf =
2503  subview_offset (sendArray, size_t(0), lengthsTo_[p] * numPackets);
2504  requests_.push_back (isend<int> (tmpSendBuf, procsTo_[p],
2505  tag, *comm_));
2506  }
2507  else if (sendType == Details::DISTRIBUTOR_RSEND) {
2508  readySend<int> (tmpSend,
2509  as<int> (tmpSend.size ()),
2510  procsTo_[p], tag, *comm_);
2511  }
2512  else if (sendType == Details::DISTRIBUTOR_SSEND) {
2513  ssend<int> (tmpSend,
2514  as<int> (tmpSend.size ()),
2515  procsTo_[p], tag, *comm_);
2516  }
2517  else {
2518  TEUCHOS_TEST_FOR_EXCEPTION(
2519  true,
2520  std::logic_error,
2521  "Tpetra::Distributor::doPosts(3 args, Kokkos): "
2522  "Invalid send type. We should never get here. "
2523  "Please report this bug to the Tpetra developers.");
2524  }
2525  }
2526  else { // "Sending" the message to myself
2527  selfNum = p;
2528  selfIndex = startsTo_[p];
2529  }
2530  }
2531 
2532  if (selfMessage_) {
2533  if (verbose) {
2534  std::ostringstream os;
2535  os << "Proc " << myRank
2536  << ": doPosts(3 args, Kokkos, slow): Self-send" << endl;
2537  *out_ << os.str ();
2538  }
2539  for (size_t k = 0; k < lengthsTo_[selfNum]; ++k) {
2540  deep_copy_offset(imports, exports, selfReceiveOffset,
2541  indicesTo_[selfIndex]*numPackets, numPackets);
2542  ++selfIndex;
2543  selfReceiveOffset += numPackets;
2544  }
2545  }
2546  if (verbose) {
2547  std::ostringstream os;
2548  os << "Proc " << myRank
2549  << ": doPosts(3 args, Kokkos, slow) done" << endl;
2550  *out_ << os.str ();
2551  }
2552  }
2553 
2554  if (verbose) {
2555  std::ostringstream os;
2556  os << "Proc " << myRank << ": doPosts done" << endl;
2557  *out_ << os.str ();
2558  }
2559  }
2560 
2561  template <class ExpView, class ImpView>
2562  typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
2563  Distributor::
2564  doPosts (const ExpView &exports,
2565  const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
2566  const ImpView &imports,
2567  const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
2568  {
2569  using Teuchos::Array;
2570  using Teuchos::as;
2571  using Teuchos::ireceive;
2572  using Teuchos::isend;
2573  using Teuchos::readySend;
2574  using Teuchos::send;
2575  using Teuchos::ssend;
2576  using Teuchos::TypeNameTraits;
2577 #ifdef HAVE_TEUCHOS_DEBUG
2578  using Teuchos::OSTab;
2579 #endif // HAVE_TEUCHOS_DEBUG
2580  using std::endl;
2581  using Kokkos::Compat::create_const_view;
2582  using Kokkos::Compat::create_view;
2583  using Kokkos::Compat::subview_offset;
2584  using Kokkos::Compat::deep_copy_offset;
2585  typedef Array<size_t>::size_type size_type;
2586  typedef ExpView exports_view_type;
2587  typedef ImpView imports_view_type;
2588  const bool verbose = Tpetra::Details::Behavior::verbose("Distributor");
2589 
2590 #ifdef KOKKOS_ENABLE_CUDA
2591  static_assert (! std::is_same<typename ExpView::memory_space, Kokkos::CudaUVMSpace>::value &&
2592  ! std::is_same<typename ImpView::memory_space, Kokkos::CudaUVMSpace>::value,
2593  "Please do not use Tpetra::Distributor with UVM "
2594  "allocations. See GitHub issue #1088.");
2595 #endif // KOKKOS_ENABLE_CUDA
2596 
2597  Teuchos::OSTab tab (out_);
2598 
2599 #ifdef TPETRA_DISTRIBUTOR_TIMERS
2600  Teuchos::TimeMonitor timeMon (*timer_doPosts4_);
2601 #endif // TPETRA_DISTRIBUTOR_TIMERS
2602 
2603  // Run-time configurable parameters that come from the input
2604  // ParameterList set by setParameterList().
2605  const Details::EDistributorSendType sendType = sendType_;
2606  const bool doBarrier = barrierBetween_;
2607 
2608 // #ifdef HAVE_TEUCHOS_DEBUG
2609 // // Prepare for verbose output, if applicable.
2610 // Teuchos::EVerbosityLevel verbLevel = this->getVerbLevel ();
2611 // RCP<Teuchos::FancyOStream> out = this->getOStream ();
2612 // const bool doPrint = out.get () && (comm_->getRank () == 0) &&
2613 // includesVerbLevel (verbLevel, Teuchos::VERB_EXTREME, true);
2614 
2615 // if (doPrint) {
2616 // // Only need one process to print out parameters.
2617 // *out << "Distributor::doPosts (4 args)" << endl;
2618 // }
2619 // // Add one tab level. We declare this outside the doPrint scopes
2620 // // so that the tab persists until the end of this method.
2621 // Teuchos::OSTab tab = this->getOSTab ();
2622 // if (doPrint) {
2623 // *out << "Parameters:" << endl;
2624 // {
2625 // OSTab tab2 (out);
2626 // *out << "sendType: " << DistributorSendTypeEnumToString (sendType)
2627 // << endl << "barrierBetween: " << doBarrier << endl;
2628 // }
2629 // }
2630 // #endif // HAVE_TEUCHOS_DEBUG
2631 
2632  TEUCHOS_TEST_FOR_EXCEPTION(
2633  sendType == Details::DISTRIBUTOR_RSEND && ! doBarrier,
2634  std::logic_error, "Tpetra::Distributor::doPosts(4 args, Kokkos): Ready-send "
2635  "version requires a barrier between posting receives and posting ready "
2636  "sends. This should have been checked before. "
2637  "Please report this bug to the Tpetra developers.");
2638 
2639  const int myProcID = comm_->getRank ();
2640  size_t selfReceiveOffset = 0;
2641 
2642 #ifdef HAVE_TEUCHOS_DEBUG
2643  // Different messages may have different numbers of packets.
2644  size_t totalNumImportPackets = 0;
2645  for (size_type ii = 0; ii < numImportPacketsPerLID.size (); ++ii) {
2646  totalNumImportPackets += numImportPacketsPerLID[ii];
2647  }
2648  TEUCHOS_TEST_FOR_EXCEPTION(
2649  imports.extent (0) < totalNumImportPackets, std::runtime_error,
2650  "Tpetra::Distributor::doPosts(4 args, Kokkos): The 'imports' array must have "
2651  "enough entries to hold the expected number of import packets. "
2652  "imports.extent(0) = " << imports.extent (0) << " < "
2653  "totalNumImportPackets = " << totalNumImportPackets << ".");
2654 #endif // HAVE_TEUCHOS_DEBUG
2655 
2656  // MPI tag for nonblocking receives and blocking sends in this
2657  // method. Some processes might take the "fast" path
2658  // (indicesTo_.empty()) and others might take the "slow" path for
2659  // the same doPosts() call, so the path tag must be the same for
2660  // both.
2661  const int pathTag = 1;
2662  const int tag = this->getTag (pathTag);
2663 
2664  if (debug_) {
2665  TEUCHOS_TEST_FOR_EXCEPTION(
2666  requests_.size () != 0, std::logic_error, "Tpetra::Distributor::"
2667  "doPosts(4 args, Kokkos): Process " << myProcID << ": requests_.size () = "
2668  << requests_.size () << " != 0.");
2669  }
2670  if (verbose) {
2671  std::ostringstream os;
2672  os << "Proc " << myProcID << ": doPosts(4 args, Kokkos, "
2673  << (indicesTo_.empty () ? "fast" : "slow") << ")" << endl;
2674  *out_ << os.str ();
2675  }
2676 
2677  // Distributor uses requests_.size() as the number of outstanding
2678  // nonblocking message requests, so we resize to zero to maintain
2679  // this invariant.
2680  //
2681  // numReceives_ does _not_ include the self message, if there is
2682  // one. Here, we do actually send a message to ourselves, so we
2683  // include any self message in the "actual" number of receives to
2684  // post.
2685  //
2686  // NOTE (mfh 19 Mar 2012): Epetra_MpiDistributor::DoPosts()
2687  // doesn't (re)allocate its array of requests. That happens in
2688  // CreateFromSends(), ComputeRecvs_(), DoReversePosts() (on
2689  // demand), or Resize_().
2690  const size_type actualNumReceives = as<size_type> (numReceives_) +
2691  as<size_type> (selfMessage_ ? 1 : 0);
2692  requests_.resize (0);
2693 
2694  // Post the nonblocking receives. It's common MPI wisdom to post
2695  // receives before sends. In MPI terms, this means favoring
2696  // adding to the "posted queue" (of receive requests) over adding
2697  // to the "unexpected queue" (of arrived messages not yet matched
2698  // with a receive).
2699  {
2700 #ifdef TPETRA_DISTRIBUTOR_TIMERS
2701  Teuchos::TimeMonitor timeMonRecvs (*timer_doPosts4_recvs_);
2702 #endif // TPETRA_DISTRIBUTOR_TIMERS
2703 
2704  size_t curBufferOffset = 0;
2705  size_t curLIDoffset = 0;
2706  for (size_type i = 0; i < actualNumReceives; ++i) {
2707  size_t totalPacketsFrom_i = 0;
2708  for (size_t j = 0; j < lengthsFrom_[i]; ++j) {
2709  totalPacketsFrom_i += numImportPacketsPerLID[curLIDoffset+j];
2710  }
2711  curLIDoffset += lengthsFrom_[i];
2712  if (procsFrom_[i] != myProcID && totalPacketsFrom_i) {
2713  // If my process is receiving these packet(s) from another
2714  // process (not a self-receive), and if there is at least
2715  // one packet to receive:
2716  //
2717  // 1. Set up the persisting view (recvBuf) into the imports
2718  // array, given the offset and size (total number of
2719  // packets from process procsFrom_[i]).
2720  // 2. Start the Irecv and save the resulting request.
2721  imports_view_type recvBuf =
2722  subview_offset (imports, curBufferOffset, totalPacketsFrom_i);
2723  requests_.push_back (ireceive<int> (recvBuf, procsFrom_[i],
2724  tag, *comm_));
2725  }
2726  else { // Receiving these packet(s) from myself
2727  selfReceiveOffset = curBufferOffset; // Remember the offset
2728  }
2729  curBufferOffset += totalPacketsFrom_i;
2730  }
2731  }
2732 
2733  if (doBarrier) {
2734 #ifdef TPETRA_DISTRIBUTOR_TIMERS
2735  Teuchos::TimeMonitor timeMonBarrier (*timer_doPosts4_barrier_);
2736 #endif // TPETRA_DISTRIBUTOR_TIMERS
2737  // If we are using ready sends (MPI_Rsend) below, we need to do
2738  // a barrier before we post the ready sends. This is because a
2739  // ready send requires that its matching receive has already
2740  // been posted before the send has been posted. The only way to
2741  // guarantee that in this case is to use a barrier.
2742  comm_->barrier ();
2743  }
2744 
2745 #ifdef TPETRA_DISTRIBUTOR_TIMERS
2746  Teuchos::TimeMonitor timeMonSends (*timer_doPosts4_sends_);
2747 #endif // TPETRA_DISTRIBUTOR_TIMERS
2748 
2749  // setup arrays containing starting-offsets into exports for each send,
2750  // and num-packets-to-send for each send.
2751  Array<size_t> sendPacketOffsets(numSends_,0), packetsPerSend(numSends_,0);
2752  size_t maxNumPackets = 0;
2753  size_t curPKToffset = 0;
2754  for (size_t pp=0; pp<numSends_; ++pp) {
2755  sendPacketOffsets[pp] = curPKToffset;
2756  size_t numPackets = 0;
2757  for (size_t j=startsTo_[pp]; j<startsTo_[pp]+lengthsTo_[pp]; ++j) {
2758  numPackets += numExportPacketsPerLID[j];
2759  }
2760  if (numPackets > maxNumPackets) maxNumPackets = numPackets;
2761  packetsPerSend[pp] = numPackets;
2762  curPKToffset += numPackets;
2763  }
2764 
2765  // setup scan through procsTo_ list starting with higher numbered procs
2766  // (should help balance message traffic)
2767  size_t numBlocks = numSends_+ selfMessage_;
2768  size_t procIndex = 0;
2769  while ((procIndex < numBlocks) && (procsTo_[procIndex] < myProcID)) {
2770  ++procIndex;
2771  }
2772  if (procIndex == numBlocks) {
2773  procIndex = 0;
2774  }
2775 
2776  size_t selfNum = 0;
2777  size_t selfIndex = 0;
2778  if (indicesTo_.empty()) {
2779  if (verbose) {
2780  std::ostringstream os;
2781  os << "Proc " << myProcID
2782  << ": doPosts(4 args, Kokkos, fast): posting sends" << endl;
2783  *out_ << os.str ();
2784  }
2785 
2786  // Data are already blocked (laid out) by process, so we don't
2787  // need a separate send buffer (besides the exports array).
2788  for (size_t i = 0; i < numBlocks; ++i) {
2789  size_t p = i + procIndex;
2790  if (p > (numBlocks - 1)) {
2791  p -= numBlocks;
2792  }
2793 
2794  if (procsTo_[p] != myProcID && packetsPerSend[p] > 0) {
2795  exports_view_type tmpSend =
2796  subview_offset(exports, sendPacketOffsets[p], packetsPerSend[p]);
2797 
2798  if (sendType == Details::DISTRIBUTOR_SEND) { // the default, so put it first
2799  send<int> (tmpSend,
2800  as<int> (tmpSend.size ()),
2801  procsTo_[p], tag, *comm_);
2802  }
2803  else if (sendType == Details::DISTRIBUTOR_RSEND) {
2804  readySend<int> (tmpSend,
2805  as<int> (tmpSend.size ()),
2806  procsTo_[p], tag, *comm_);
2807  }
2808  else if (sendType == Details::DISTRIBUTOR_ISEND) {
2809  exports_view_type tmpSendBuf =
2810  subview_offset (exports, sendPacketOffsets[p], packetsPerSend[p]);
2811  requests_.push_back (isend<int> (tmpSendBuf, procsTo_[p],
2812  tag, *comm_));
2813  }
2814  else if (sendType == Details::DISTRIBUTOR_SSEND) {
2815  ssend<int> (tmpSend,
2816  as<int> (tmpSend.size ()),
2817  procsTo_[p], tag, *comm_);
2818  }
2819  else {
2820  TEUCHOS_TEST_FOR_EXCEPTION(
2821  true, std::logic_error,
2822  "Tpetra::Distributor::doPosts(4 args, Kokkos): "
2823  "Invalid send type. We should never get here. "
2824  "Please report this bug to the Tpetra developers.");
2825  }
2826  }
2827  else { // "Sending" the message to myself
2828  selfNum = p;
2829  }
2830  }
2831 
2832  if (selfMessage_) {
2833  deep_copy_offset(imports, exports, selfReceiveOffset,
2834  sendPacketOffsets[selfNum], packetsPerSend[selfNum]);
2835  }
2836  if (verbose) {
2837  std::ostringstream os;
2838  os << "Proc " << myProcID << ": doPosts(4 args, Kokkos, fast) done" << endl;
2839  *out_ << os.str ();
2840  }
2841  }
2842  else { // data are not blocked by proc, use send buffer
2843  if (verbose) {
2844  std::ostringstream os;
2845  os << "Proc " << myProcID << ": doPosts(4 args, Kokkos, slow): posting sends" << endl;
2846  *out_ << os.str ();
2847  }
2848 
2849  // FIXME (mfh 05 Mar 2013) This may be broken for Isend.
2850  typedef typename ExpView::non_const_value_type Packet;
2851  typedef typename ExpView::array_layout Layout;
2852  typedef typename ExpView::device_type Device;
2853  typedef typename ExpView::memory_traits Mem;
2854  Kokkos::View<Packet*,Layout,Device,Mem> sendArray ("sendArray", maxNumPackets); // send buffer
2855 
2856  TEUCHOS_TEST_FOR_EXCEPTION(
2857  sendType == Details::DISTRIBUTOR_ISEND,
2858  std::logic_error,
2859  "Tpetra::Distributor::doPosts(4 args, Kokkos): "
2860  "The \"send buffer\" code path may not necessarily work with nonblocking sends.");
2861 
2862  Array<size_t> indicesOffsets (numExportPacketsPerLID.size(), 0);
2863  size_t ioffset = 0;
2864  for (int j=0; j<numExportPacketsPerLID.size(); ++j) {
2865  indicesOffsets[j] = ioffset;
2866  ioffset += numExportPacketsPerLID[j];
2867  }
2868 
2869  for (size_t i = 0; i < numBlocks; ++i) {
2870  size_t p = i + procIndex;
2871  if (p > (numBlocks - 1)) {
2872  p -= numBlocks;
2873  }
2874 
2875  if (procsTo_[p] != myProcID) {
2876  size_t sendArrayOffset = 0;
2877  size_t j = startsTo_[p];
2878  size_t numPacketsTo_p = 0;
2879  for (size_t k = 0; k < lengthsTo_[p]; ++k, ++j) {
2880  numPacketsTo_p += numExportPacketsPerLID[j];
2881  deep_copy_offset(sendArray, exports, sendArrayOffset,
2882  indicesOffsets[j], numExportPacketsPerLID[j]);
2883  sendArrayOffset += numExportPacketsPerLID[j];
2884  }
2885  if (numPacketsTo_p > 0) {
2886  ImpView tmpSend =
2887  subview_offset(sendArray, size_t(0), numPacketsTo_p);
2888 
2889  if (sendType == Details::DISTRIBUTOR_RSEND) {
2890  readySend<int> (tmpSend,
2891  as<int> (tmpSend.size ()),
2892  procsTo_[p], tag, *comm_);
2893  }
2894  else if (sendType == Details::DISTRIBUTOR_ISEND) {
2895  exports_view_type tmpSendBuf =
2896  subview_offset (sendArray, size_t(0), numPacketsTo_p);
2897  requests_.push_back (isend<int> (tmpSendBuf, procsTo_[p],
2898  tag, *comm_));
2899  }
2900  else if (sendType == Details::DISTRIBUTOR_SSEND) {
2901  ssend<int> (tmpSend,
2902  as<int> (tmpSend.size ()),
2903  procsTo_[p], tag, *comm_);
2904  }
2905  else { // if (sendType == Details::DISTRIBUTOR_SSEND)
2906  send<int> (tmpSend,
2907  as<int> (tmpSend.size ()),
2908  procsTo_[p], tag, *comm_);
2909  }
2910  }
2911  }
2912  else { // "Sending" the message to myself
2913  selfNum = p;
2914  selfIndex = startsTo_[p];
2915  }
2916  }
2917 
2918  if (selfMessage_) {
2919  for (size_t k = 0; k < lengthsTo_[selfNum]; ++k) {
2920  deep_copy_offset(imports, exports, selfReceiveOffset,
2921  indicesOffsets[selfIndex],
2922  numExportPacketsPerLID[selfIndex]);
2923  selfReceiveOffset += numExportPacketsPerLID[selfIndex];
2924  ++selfIndex;
2925  }
2926  }
2927  if (verbose) {
2928  std::ostringstream os;
2929  os << "Proc " << myProcID
2930  << ": doPosts(4 args, Kokkos, slow) done" << endl;
2931  *out_ << os.str ();
2932  }
2933  }
2934  }
2935 
2936  template <class ExpView, class ImpView>
2937  typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
2938  Distributor::
2939  doReversePostsAndWaits (const ExpView& exports,
2940  size_t numPackets,
2941  const ImpView& imports)
2942  {
2943  doReversePosts (exports, numPackets, imports);
2944  doReverseWaits ();
2945  }
2946 
2947  template <class ExpView, class ImpView>
2948  typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
2949  Distributor::
2950  doReversePostsAndWaits (const ExpView& exports,
2951  const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
2952  const ImpView& imports,
2953  const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
2954  {
2955  TEUCHOS_TEST_FOR_EXCEPTION(requests_.size() != 0, std::runtime_error,
2956  "Tpetra::Distributor::doReversePostsAndWaits(4 args): There are "
2957  << requests_.size() << " outstanding nonblocking messages pending. It "
2958  "is incorrect to call this method with posts outstanding.");
2959 
2960  doReversePosts (exports, numExportPacketsPerLID, imports,
2961  numImportPacketsPerLID);
2962  doReverseWaits ();
2963  }
2964 
2965  template <class ExpView, class ImpView>
2966  typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
2967  Distributor::
2968  doReversePosts (const ExpView &exports,
2969  size_t numPackets,
2970  const ImpView &imports)
2971  {
2972  // FIXME (mfh 29 Mar 2012) WHY?
2973  TEUCHOS_TEST_FOR_EXCEPTION(
2974  ! indicesTo_.empty (), std::runtime_error,
2975  "Tpetra::Distributor::doReversePosts(3 args): Can only do "
2976  "reverse communication when original data are blocked by process.");
2977  if (reverseDistributor_.is_null ()) {
2978  createReverseDistributor ();
2979  }
2980  reverseDistributor_->doPosts (exports, numPackets, imports);
2981  }
2982 
2983  template <class ExpView, class ImpView>
2984  typename std::enable_if<(Kokkos::Impl::is_view<ExpView>::value && Kokkos::Impl::is_view<ImpView>::value)>::type
2985  Distributor::
2986  doReversePosts (const ExpView &exports,
2987  const Teuchos::ArrayView<const size_t>& numExportPacketsPerLID,
2988  const ImpView &imports,
2989  const Teuchos::ArrayView<const size_t>& numImportPacketsPerLID)
2990  {
2991  // FIXME (mfh 29 Mar 2012) WHY?
2992  TEUCHOS_TEST_FOR_EXCEPTION(
2993  ! indicesTo_.empty (), std::runtime_error,
2994  "Tpetra::Distributor::doReversePosts(3 args): Can only do "
2995  "reverse communication when original data are blocked by process.");
2996  if (reverseDistributor_.is_null ()) {
2997  createReverseDistributor ();
2998  }
2999  reverseDistributor_->doPosts (exports, numExportPacketsPerLID,
3000  imports, numImportPacketsPerLID);
3001  }
3002 
3003  template <class OrdinalType>
3004  void Distributor::
3005  computeSends (const Teuchos::ArrayView<const OrdinalType> & importGIDs,
3006  const Teuchos::ArrayView<const int> & importProcIDs,
3007  Teuchos::Array<OrdinalType> & exportGIDs,
3008  Teuchos::Array<int> & exportProcIDs)
3009  {
3010  // NOTE (mfh 19 Apr 2012): There was a note on this code saying:
3011  // "assumes that size_t >= Ordinal". The code certainly does
3012  // assume that sizeof(size_t) >= sizeof(OrdinalType) as well as
3013  // sizeof(size_t) >= sizeof(int). This is because it casts the
3014  // OrdinalType elements of importGIDs (along with their
3015  // corresponding process IDs, as int) to size_t, and does a
3016  // doPostsAndWaits<size_t>() to send the packed data.
3017  using Teuchos::Array;
3018  using Teuchos::ArrayView;
3019  using std::endl;
3020  typedef typename ArrayView<const OrdinalType>::size_type size_type;
3021  const bool verbose = Tpetra::Details::Behavior::verbose("Distributor");
3022 
3023  Teuchos::OSTab tab (out_);
3024  const int myRank = comm_->getRank ();
3025  if (verbose) {
3026  std::ostringstream os;
3027  os << "Proc " << myRank << ": computeSends" << endl;
3028  *out_ << os.str ();
3029  }
3030 
3031  TEUCHOS_TEST_FOR_EXCEPTION(
3032  importGIDs.size () != importProcIDs.size (), std::invalid_argument,
3033  "Tpetra::Distributor::computeSends: On Process " << myRank << ": "
3034  "importProcIDs.size() = " << importProcIDs.size ()
3035  << " != importGIDs.size() = " << importGIDs.size () << ".");
3036 
3037  const size_type numImports = importProcIDs.size ();
3038  Array<size_t> importObjs (2*numImports);
3039  // Pack pairs (importGIDs[i], my process ID) to send into importObjs.
3040  for (size_type i = 0; i < numImports; ++i) {
3041  importObjs[2*i] = static_cast<size_t> (importGIDs[i]);
3042  importObjs[2*i+1] = static_cast<size_t> (myRank);
3043  }
3044  //
3045  // Use a temporary Distributor to send the (importGIDs[i], myRank)
3046  // pairs to importProcIDs[i].
3047  //
3048  Distributor tempPlan (comm_, out_);
3049  if (verbose) {
3050  std::ostringstream os;
3051  os << "Proc " << myRank << ": computeSends: tempPlan.createFromSends" << endl;
3052  *out_ << os.str ();
3053  }
3054 
3055  // mfh 20 Mar 2014: An extra-cautious cast from unsigned to
3056  // signed, in order to forestall any possible causes for Bug 6069.
3057  const size_t numExportsAsSizeT = tempPlan.createFromSends (importProcIDs);
3058  const size_type numExports = static_cast<size_type> (numExportsAsSizeT);
3059  TEUCHOS_TEST_FOR_EXCEPTION(
3060  numExports < 0, std::logic_error, "Tpetra::Distributor::computeSends: "
3061  "tempPlan.createFromSends() returned numExports = " << numExportsAsSizeT
3062  << " as a size_t, which overflows to " << numExports << " when cast to "
3063  << Teuchos::TypeNameTraits<size_type>::name () << ". "
3064  "Please report this bug to the Tpetra developers.");
3065  TEUCHOS_TEST_FOR_EXCEPTION(
3066  static_cast<size_type> (tempPlan.getTotalReceiveLength ()) != numExports,
3067  std::logic_error, "Tpetra::Distributor::computeSends: tempPlan.getTotal"
3068  "ReceiveLength() = " << tempPlan.getTotalReceiveLength () << " != num"
3069  "Exports = " << numExports << ". Please report this bug to the "
3070  "Tpetra developers.");
3071 
3072  if (numExports > 0) {
3073  exportGIDs.resize (numExports);
3074  exportProcIDs.resize (numExports);
3075  }
3076 
3077  // exportObjs: Packed receive buffer. (exportObjs[2*i],
3078  // exportObjs[2*i+1]) will give the (GID, PID) pair for export i,
3079  // after tempPlan.doPostsAndWaits(...) finishes below.
3080  //
3081  // FIXME (mfh 19 Mar 2014) This only works if OrdinalType fits in
3082  // size_t. This issue might come up, for example, on a 32-bit
3083  // machine using 64-bit global indices. I will add a check here
3084  // for that case.
3085  TEUCHOS_TEST_FOR_EXCEPTION(
3086  sizeof (size_t) < sizeof (OrdinalType), std::logic_error,
3087  "Tpetra::Distributor::computeSends: sizeof(size_t) = " << sizeof(size_t)
3088  << " < sizeof(" << Teuchos::TypeNameTraits<OrdinalType>::name () << ") = "
3089  << sizeof (OrdinalType) << ". This violates an assumption of the "
3090  "method. It's not hard to work around (just use Array<OrdinalType> as "
3091  "the export buffer, not Array<size_t>), but we haven't done that yet. "
3092  "Please report this bug to the Tpetra developers.");
3093 
3094  TEUCHOS_TEST_FOR_EXCEPTION(
3095  tempPlan.getTotalReceiveLength () < static_cast<size_t> (numExports),
3096  std::logic_error,
3097  "Tpetra::Distributor::computeSends: tempPlan.getTotalReceiveLength() = "
3098  << tempPlan.getTotalReceiveLength() << " < numExports = " << numExports
3099  << ". Please report this bug to the Tpetra developers.");
3100 
3101  Array<size_t> exportObjs (tempPlan.getTotalReceiveLength () * 2);
3102  if (verbose) {
3103  std::ostringstream os;
3104  os << "Proc " << myRank << ": computeSends: tempPlan.doPostsAndWaits" << endl;
3105  *out_ << os.str ();
3106  }
3107  tempPlan.doPostsAndWaits<size_t> (importObjs (), 2, exportObjs ());
3108 
3109  // Unpack received (GID, PID) pairs into exportIDs resp. exportProcIDs.
3110  for (size_type i = 0; i < numExports; ++i) {
3111  exportGIDs[i] = static_cast<OrdinalType> (exportObjs[2*i]);
3112  exportProcIDs[i] = static_cast<int> (exportObjs[2*i+1]);
3113  }
3114 
3115  if (verbose) {
3116  std::ostringstream os;
3117  os << "Proc " << myRank << ": computeSends done" << endl;
3118  *out_ << os.str ();
3119  }
3120  }
3121 
3122  template <class OrdinalType>
3123  void Distributor::
3124  createFromRecvs (const Teuchos::ArrayView<const OrdinalType> &remoteGIDs,
3125  const Teuchos::ArrayView<const int> &remoteProcIDs,
3126  Teuchos::Array<OrdinalType> &exportGIDs,
3127  Teuchos::Array<int> &exportProcIDs)
3128  {
3129  using std::endl;
3130  const bool verbose = Tpetra::Details::Behavior::verbose("Distributor");
3131 
3132  Teuchos::OSTab tab (out_);
3133  const int myRank = comm_->getRank();
3134 
3135  if (verbose) {
3136  *out_ << "Proc " << myRank << ": createFromRecvs" << endl;
3137  }
3138 
3139 #ifdef HAVE_TPETRA_DEBUG
3140  using Teuchos::outArg;
3141  using Teuchos::reduceAll;
3142 
3143  // In debug mode, first test locally, then do an all-reduce to
3144  // make sure that all processes passed.
3145  const int errProc =
3146  (remoteGIDs.size () != remoteProcIDs.size ()) ? myRank : -1;
3147  int maxErrProc = -1;
3148  reduceAll<int, int> (*comm_, Teuchos::REDUCE_MAX, errProc, outArg (maxErrProc));
3149  TEUCHOS_TEST_FOR_EXCEPTION(maxErrProc != -1, std::runtime_error,
3150  Teuchos::typeName (*this) << "::createFromRecvs(): lists of remote IDs "
3151  "and remote process IDs must have the same size on all participating "
3152  "processes. Maximum process ID with error: " << maxErrProc << ".");
3153 #else // NOT HAVE_TPETRA_DEBUG
3154 
3155  // In non-debug mode, just test locally.
3156  TEUCHOS_TEST_FOR_EXCEPTION(
3157  remoteGIDs.size () != remoteProcIDs.size (), std::invalid_argument,
3158  Teuchos::typeName (*this) << "::createFromRecvs<" <<
3159  Teuchos::TypeNameTraits<OrdinalType>::name () << ">(): On Process " <<
3160  myRank << ": remoteGIDs.size() = " << remoteGIDs.size () << " != "
3161  "remoteProcIDs.size() = " << remoteProcIDs.size () << ".");
3162 #endif // HAVE_TPETRA_DEBUG
3163 
3164  computeSends (remoteGIDs, remoteProcIDs, exportGIDs, exportProcIDs);
3165 
3166  const size_t numProcsSendingToMe = createFromSends (exportProcIDs ());
3167 
3168  if (verbose) {
3169  // NOTE (mfh 20 Mar 2014) If remoteProcIDs could contain
3170  // duplicates, then its length might not be the right check here,
3171  // even if we account for selfMessage_. selfMessage_ is set in
3172  // createFromSends.
3173  std::ostringstream os;
3174  os << "Proc " << myRank << ": {numProcsSendingToMe: "
3175  << numProcsSendingToMe << ", remoteProcIDs.size(): "
3176  << remoteProcIDs.size () << ", selfMessage_: "
3177  << (selfMessage_ ? "true" : "false") << "}" << std::endl;
3178  *out_ << os.str ();
3179  }
3180 
3181  if (verbose) {
3182  *out_ << "Proc " << myRank << ": createFromRecvs done" << endl;
3183  }
3184 
3185  howInitialized_ = Details::DISTRIBUTOR_INITIALIZED_BY_CREATE_FROM_RECVS;
3186  }
3187 
3188 
3189 } // namespace Tpetra
3190 
3191 #endif // TPETRA_DISTRIBUTOR_HPP
Tpetra::Distributor::setParameterList
void setParameterList(const Teuchos::RCP< Teuchos::ParameterList > &plist)
Set Distributor parameters.
Definition: Tpetra_Distributor.cpp:378
Tpetra::Distributor::Distributor
Distributor(const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Construct using the specified communicator and default parameters.
Definition: Tpetra_Distributor.cpp:174
Tpetra::Distributor::getReverse
Teuchos::RCP< Distributor > getReverse() const
A reverse communication plan Distributor.
Definition: Tpetra_Distributor.cpp:519
Tpetra::Distributor::doReversePostsAndWaits
void doReversePostsAndWaits(const Teuchos::ArrayView< const Packet > &exports, size_t numPackets, const Teuchos::ArrayView< Packet > &imports)
Execute the reverse communication plan.
Definition: Tpetra_Distributor.hpp:1926
Tpetra::Distributor::createFromSends
size_t createFromSends(const Teuchos::ArrayView< const int > &exportProcIDs)
Set up Distributor using list of process ranks to which this process will send.
Definition: Tpetra_Distributor.cpp:1120
Tpetra::Distributor::description
std::string description() const
Return a one-line description of this object.
Definition: Tpetra_Distributor.cpp:680
Tpetra::Details::EDistributorHowInitialized
EDistributorHowInitialized
Enum indicating how and whether a Distributor was initialized.
Definition: Tpetra_Distributor.hpp:95
Tpetra_Details_Behavior.hpp
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Tpetra::Distributor::getValidParameters
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const
List of valid Distributor parameters.
Definition: Tpetra_Distributor.cpp:440
Tpetra::Distributor::getNumReceives
size_t getNumReceives() const
The number of processes from which we will receive data.
Definition: Tpetra_Distributor.cpp:494
Tpetra::distributorSendTypes
Teuchos::Array< std::string > distributorSendTypes()
Valid values for Distributor's "Send type" parameter.
Definition: Tpetra_Distributor.cpp:91
Tpetra::Distributor::createFromSendsAndRecvs
void createFromSendsAndRecvs(const Teuchos::ArrayView< const int > &exportProcIDs, const Teuchos::ArrayView< const int > &remoteProcIDs)
Set up Distributor using list of process ranks to which to send, and list of process ranks from which...
Definition: Tpetra_Distributor.cpp:1451
Tpetra::Details::DistributorHowInitializedEnumToString
std::string DistributorHowInitializedEnumToString(EDistributorHowInitialized how)
Convert an EDistributorHowInitialized enum value to a string.
Definition: Tpetra_Distributor.cpp:71
Tpetra::Distributor::doPostsAndWaits
void doPostsAndWaits(const Teuchos::ArrayView< const Packet > &exports, size_t numPackets, const Teuchos::ArrayView< Packet > &imports)
Execute the (forward) communication plan.
Definition: Tpetra_Distributor.hpp:1055
Details
Implementation details of Tpetra.
Tpetra::Distributor::describe
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Describe this object in a human-readable way to the given output stream.
Definition: Tpetra_Distributor.cpp:750
Tpetra::Details::DistributorSendTypeEnumToString
std::string DistributorSendTypeEnumToString(EDistributorSendType sendType)
Convert an EDistributorSendType enum value to a string.
Definition: Tpetra_Distributor.cpp:50
Tpetra::Distributor::getProcsTo
Teuchos::ArrayView< const int > getProcsTo() const
Ranks of the processes to which this process will send values.
Definition: Tpetra_Distributor.cpp:512
Tpetra::Details::EDistributorSendType
EDistributorSendType
The type of MPI send that Distributor should use.
Definition: Tpetra_Distributor.hpp:77
Tpetra::Distributor::getLengthsFrom
Teuchos::ArrayView< const size_t > getLengthsFrom() const
Number of values this process will receive from each process.
Definition: Tpetra_Distributor.cpp:509
Tpetra::Distributor::getLastDoStatistics
void getLastDoStatistics(size_t &bytes_sent, size_t &bytes_recvd) const
Information on the last call to do/doReverse.
Definition: Tpetra_Distributor.hpp:763
Tpetra::Distributor::howInitialized
Details::EDistributorHowInitialized howInitialized() const
Return an enum indicating whether and how a Distributor was initialized.
Definition: Tpetra_Distributor.hpp:417
Tpetra::Distributor::getProcsFrom
Teuchos::ArrayView< const int > getProcsFrom() const
Ranks of the processes sending values to this process.
Definition: Tpetra_Distributor.cpp:506
Tpetra::Distributor
Sets up and executes a communication plan for a Tpetra DistObject.
Definition: Tpetra_Distributor.hpp:188
Tpetra::Details::Behavior::verbose
static bool verbose()
Whether Tpetra is in verbose mode.
Definition: Tpetra_Details_Behavior.cpp:260
Tpetra_Util.hpp
Stand-alone utility functions and macros.
Tpetra::Distributor::getNumSends
size_t getNumSends() const
The number of processes to which we will send data.
Definition: Tpetra_Distributor.cpp:500
Tpetra::Distributor::~Distributor
virtual ~Distributor()
Destructor (virtual for memory safety).
Definition: Tpetra_Distributor.cpp:358
Tpetra::Distributor::getLengthsTo
Teuchos::ArrayView< const size_t > getLengthsTo() const
Number of values this process will send to each process.
Definition: Tpetra_Distributor.cpp:515
Tpetra::Distributor::doReverseWaits
void doReverseWaits()
Definition: Tpetra_Distributor.cpp:673
Tpetra::Distributor::getMaxSendLength
size_t getMaxSendLength() const
Maximum number of values this process will send to another single process.
Definition: Tpetra_Distributor.cpp:503
Tpetra::Distributor::hasSelfMessage
bool hasSelfMessage() const
Whether the calling process will send or receive messages to itself.
Definition: Tpetra_Distributor.cpp:497
Tpetra::Distributor::swap
void swap(Distributor &rhs)
Swap the contents of rhs with those of *this.
Definition: Tpetra_Distributor.cpp:311
Tpetra
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Tpetra::Distributor::createFromRecvs
void createFromRecvs(const Teuchos::ArrayView< const Ordinal > &remoteIDs, const Teuchos::ArrayView< const int > &remoteProcIDs, Teuchos::Array< Ordinal > &exportIDs, Teuchos::Array< int > &exportProcIDs)
Set up Distributor using list of process ranks from which to receive.
Tpetra::Distributor::getTotalReceiveLength
size_t getTotalReceiveLength() const
Total number of values this process will receive from other processes.
Definition: Tpetra_Distributor.cpp:491
Tpetra::Distributor::doWaits
void doWaits()
Definition: Tpetra_Distributor.cpp:605
Tpetra::Distributor::doReversePosts
void doReversePosts(const Teuchos::ArrayRCP< const Packet > &exports, size_t numPackets, const Teuchos::ArrayRCP< Packet > &imports)
Post the data for a reverse plan, but do not execute the waits yet.
Definition: Tpetra_Distributor.hpp:2003
Tpetra::Distributor::doPosts
void doPosts(const Teuchos::ArrayRCP< const Packet > &exports, size_t numPackets, const Teuchos::ArrayRCP< Packet > &imports)
Post the data for a forward plan, but do not execute the waits yet.
Definition: Tpetra_Distributor.hpp:1151