Tpetra parallel linear algebra  Version of the Day
Tpetra_DistObject_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_DISTOBJECT_DEF_HPP
43 #define TPETRA_DISTOBJECT_DEF_HPP
44 
52 
53 #include "Tpetra_Distributor.hpp"
56 #include <memory>
57 
58 namespace Tpetra {
59 namespace Classes {
60 
61  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
63  DistObject (const Teuchos::RCP<const map_type>& map) :
64  map_ (map)
65  {
66 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
67  using Teuchos::RCP;
68  using Teuchos::Time;
69  using Teuchos::TimeMonitor;
70 
71  RCP<Time> doXferTimer =
72  TimeMonitor::lookupCounter ("Tpetra::DistObject::doTransfer");
73  if (doXferTimer.is_null ()) {
74  doXferTimer =
75  TimeMonitor::getNewCounter ("Tpetra::DistObject::doTransfer");
76  }
77  doXferTimer_ = doXferTimer;
78 
79  RCP<Time> copyAndPermuteTimer =
80  TimeMonitor::lookupCounter ("Tpetra::DistObject::copyAndPermute");
81  if (copyAndPermuteTimer.is_null ()) {
82  copyAndPermuteTimer =
83  TimeMonitor::getNewCounter ("Tpetra::DistObject::copyAndPermute");
84  }
85  copyAndPermuteTimer_ = copyAndPermuteTimer;
86 
87  RCP<Time> packAndPrepareTimer =
88  TimeMonitor::lookupCounter ("Tpetra::DistObject::packAndPrepare");
89  if (packAndPrepareTimer.is_null ()) {
90  packAndPrepareTimer =
91  TimeMonitor::getNewCounter ("Tpetra::DistObject::packAndPrepare");
92  }
93  packAndPrepareTimer_ = packAndPrepareTimer;
94 
95  RCP<Time> doPostsAndWaitsTimer =
96  TimeMonitor::lookupCounter ("Tpetra::DistObject::doPostsAndWaits");
97  if (doPostsAndWaitsTimer.is_null ()) {
98  doPostsAndWaitsTimer =
99  TimeMonitor::getNewCounter ("Tpetra::DistObject::doPostsAndWaits");
100  }
101  doPostsAndWaitsTimer_ = doPostsAndWaitsTimer;
102 
103  RCP<Time> unpackAndCombineTimer =
104  TimeMonitor::lookupCounter ("Tpetra::DistObject::unpackAndCombine");
105  if (unpackAndCombineTimer.is_null ()) {
106  unpackAndCombineTimer =
107  TimeMonitor::getNewCounter ("Tpetra::DistObject::unpackAndCombine");
108  }
109  unpackAndCombineTimer_ = unpackAndCombineTimer;
110 #endif // HAVE_TPETRA_TRANSFER_TIMERS
111  }
112 
113  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
116  map_ (rhs.map_)
117  {}
118 
119  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
122  {}
123 
124  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
125  std::string
127  description () const
128  {
129  using Teuchos::TypeNameTraits;
130 
131  std::ostringstream os;
132  os << "\"Tpetra::DistObject\": {"
133  << "Packet: " << TypeNameTraits<packet_type>::name ()
134  << ", LocalOrdinal: " << TypeNameTraits<local_ordinal_type>::name ()
135  << ", GlobalOrdinal: " << TypeNameTraits<global_ordinal_type>::name ()
136  << ", Node: " << TypeNameTraits<Node>::name ();
137  if (this->getObjectLabel () != "") {
138  os << "Label: \"" << this->getObjectLabel () << "\"";
139  }
140  os << "}";
141  return os.str ();
142  }
143 
144  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
145  void
147  describe (Teuchos::FancyOStream &out,
148  const Teuchos::EVerbosityLevel verbLevel) const
149  {
150  using Teuchos::rcpFromRef;
151  using Teuchos::TypeNameTraits;
152  using std::endl;
153  const Teuchos::EVerbosityLevel vl = (verbLevel == Teuchos::VERB_DEFAULT) ?
154  Teuchos::VERB_LOW : verbLevel;
155  Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getMap ()->getComm ();
156  const int myRank = comm.is_null () ? 0 : comm->getRank ();
157  const int numProcs = comm.is_null () ? 1 : comm->getSize ();
158 
159  if (vl != Teuchos::VERB_NONE) {
160  Teuchos::OSTab tab0 (out);
161  if (myRank == 0) {
162  out << "\"Tpetra::DistObject\":" << endl;
163  }
164  Teuchos::OSTab tab1 (out);
165  if (myRank == 0) {
166  out << "Template parameters:" << endl;
167  {
168  Teuchos::OSTab tab2 (out);
169  out << "Packet: " << TypeNameTraits<packet_type>::name () << endl
170  << "LocalOrdinal: " << TypeNameTraits<local_ordinal_type>::name () << endl
171  << "GlobalOrdinal: " << TypeNameTraits<global_ordinal_type>::name () << endl
172  << "Node: " << TypeNameTraits<node_type>::name () << endl;
173  }
174  if (this->getObjectLabel () != "") {
175  out << "Label: \"" << this->getObjectLabel () << "\"" << endl;
176  }
177  } // if myRank == 0
178 
179  // Describe the Map.
180  {
181  if (myRank == 0) {
182  out << "Map:" << endl;
183  }
184  Teuchos::OSTab tab2 (out);
185  map_->describe (out, vl);
186  }
187 
188  // At verbosity > VERB_LOW, each process prints something.
189  if (vl > Teuchos::VERB_LOW) {
190  for (int p = 0; p < numProcs; ++p) {
191  if (myRank == p) {
192  out << "Process " << myRank << ":" << endl;
193  Teuchos::OSTab tab2 (out);
194  out << "Export buffer size (in packets): "
195  << exports_.extent (0)
196  << endl
197  << "Import buffer size (in packets): "
198  << imports_.extent (0)
199  << endl;
200  }
201  if (! comm.is_null ()) {
202  comm->barrier (); // give output time to finish
203  comm->barrier ();
204  comm->barrier ();
205  }
206  } // for each process rank p
207  } // if vl > VERB_LOW
208  } // if vl != VERB_NONE
209  }
210 
211  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
212  void
214  removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& newMap)
215  {
216  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
217  "Tpetra::DistObject::removeEmptyProcessesInPlace: Not implemented");
218  }
219 
220  /* These are provided in base DistObject template
221  template<class DistObjectType>
222  void
223  removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input,
224  const Teuchos::RCP<const Map<typename DistObjectType::local_ordinal_type,
225  typename DistObjectType::global_ordinal_type,
226  typename DistObjectType::node_type> >& newMap)
227  {
228  input->removeEmptyProcessesInPlace (newMap);
229  if (newMap.is_null ()) { // my process is excluded
230  input = Teuchos::null;
231  }
232  }
233 
234  template<class DistObjectType>
235  void
236  removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input)
237  {
238  using Teuchos::RCP;
239  typedef typename DistObjectType::local_ordinal_type LO;
240  typedef typename DistObjectType::global_ordinal_type GO;
241  typedef typename DistObjectType::node_type NT;
242  typedef Map<LO, GO, NT> map_type;
243 
244  RCP<const map_type> newMap = input->getMap ()->removeEmptyProcesses ();
245  removeEmptyProcessesInPlace<DistObjectType> (input, newMap);
246  }
247  */
248 
249  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
250  void
252  doImport (const SrcDistObject& source,
254  CombineMode CM)
255  {
256  using std::endl;
257  const char modeString[] = "doImport (forward mode)";
258 
259  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
260  // output to std::cerr on every MPI process. This is unwise for
261  // runs with large numbers of MPI processes.
262  const bool verbose = ::Tpetra::Details::Behavior::verbose ();
263  std::unique_ptr<std::string> prefix;
264  if (verbose) {
265  int myRank = 0;
266  auto map = this->getMap ();
267  if (! map.is_null ()) {
268  auto comm = map->getComm ();
269  if (! comm.is_null ()) {
270  myRank = comm->getRank ();
271  }
272  }
273  prefix = [myRank] () {
274  std::ostringstream os;
275  os << "(Proc " << myRank << ") ";
276  return std::unique_ptr<std::string> (new std::string (os.str ()));
277  } ();
278  std::ostringstream os;
279  os << *prefix << "Tpetra::DistObject::" << modeString << ":" << endl;
280  std::cerr << os.str ();
281  }
282  this->doTransfer (source, importer, modeString, DoForward, CM);
283  if (verbose) {
284  std::ostringstream os;
285  os << *prefix << "Tpetra::DistObject::" << modeString << ": Done!"
286  << endl;
287  std::cerr << os.str ();
288  }
289  }
290 
291  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
292  void
294  doExport (const SrcDistObject& source,
296  CombineMode CM)
297  {
298  using std::endl;
299  const char modeString[] = "doExport (forward mode)";
300 
301  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
302  // output to std::cerr on every MPI process. This is unwise for
303  // runs with large numbers of MPI processes.
304  const bool verbose = ::Tpetra::Details::Behavior::verbose ();
305  std::unique_ptr<std::string> prefix;
306  if (verbose) {
307  int myRank = 0;
308  auto map = this->getMap ();
309  if (! map.is_null ()) {
310  auto comm = map->getComm ();
311  if (! comm.is_null ()) {
312  myRank = comm->getRank ();
313  }
314  }
315  prefix = [myRank] () {
316  std::ostringstream os;
317  os << "(Proc " << myRank << ") ";
318  return std::unique_ptr<std::string> (new std::string (os.str ()));
319  } ();
320  std::ostringstream os;
321  os << *prefix << "Tpetra::DistObject::" << modeString << ":" << endl;
322  std::cerr << os.str ();
323  }
324  this->doTransfer (source, exporter, modeString, DoForward, CM);
325  if (verbose) {
326  std::ostringstream os;
327  os << *prefix << "Tpetra::DistObject::" << modeString << ": Done!"
328  << endl;
329  std::cerr << os.str ();
330  }
331  }
332 
333  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
334  void
336  doImport (const SrcDistObject& source,
338  CombineMode CM)
339  {
340  using std::endl;
341  const char modeString[] = "doImport (reverse mode)";
342 
343  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
344  // output to std::cerr on every MPI process. This is unwise for
345  // runs with large numbers of MPI processes.
346  const bool verbose = ::Tpetra::Details::Behavior::verbose ();
347  std::unique_ptr<std::string> prefix;
348  if (verbose) {
349  int myRank = 0;
350  auto map = this->getMap ();
351  if (! map.is_null ()) {
352  auto comm = map->getComm ();
353  if (! comm.is_null ()) {
354  myRank = comm->getRank ();
355  }
356  }
357  prefix = [myRank] () {
358  std::ostringstream os;
359  os << "(Proc " << myRank << ") ";
360  return std::unique_ptr<std::string> (new std::string (os.str ()));
361  } ();
362  std::ostringstream os;
363  os << *prefix << "Tpetra::DistObject::" << modeString << ":" << endl;
364  std::cerr << os.str ();
365  }
366  this->doTransfer (source, exporter, modeString, DoReverse, CM);
367  if (verbose) {
368  std::ostringstream os;
369  os << *prefix << "Tpetra::DistObject::" << modeString << ": Done!"
370  << endl;
371  std::cerr << os.str ();
372  }
373  }
374 
375  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
376  void
378  doExport (const SrcDistObject& source,
380  CombineMode CM)
381  {
382  using std::endl;
383  const char modeString[] = "doExport (reverse mode)";
384 
385  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
386  // output to std::cerr on every MPI process. This is unwise for
387  // runs with large numbers of MPI processes.
388  const bool verbose = ::Tpetra::Details::Behavior::verbose ();
389  std::unique_ptr<std::string> prefix;
390  if (verbose) {
391  int myRank = 0;
392  auto map = this->getMap ();
393  if (! map.is_null ()) {
394  auto comm = map->getComm ();
395  if (! comm.is_null ()) {
396  myRank = comm->getRank ();
397  }
398  }
399  prefix = [myRank] () {
400  std::ostringstream os;
401  os << "(Proc " << myRank << ") ";
402  return std::unique_ptr<std::string> (new std::string (os.str ()));
403  } ();
404  std::ostringstream os;
405  os << *prefix << "Tpetra::DistObject::" << modeString << ":" << endl;
406  std::cerr << os.str ();
407  }
408  this->doTransfer (source, importer, modeString, DoReverse, CM);
409  if (verbose) {
410  std::ostringstream os;
411  os << *prefix << "Tpetra::DistObject::" << modeString << ": Done!"
412  << endl;
413  std::cerr << os.str ();
414  }
415  }
416 
417  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
418  bool
420  isDistributed () const {
421  return map_->isDistributed ();
422  }
423 
424  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
425  size_t
428  return 0; // default implementation; subclasses may override
429  }
430 
431  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
432  void
435  const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
436  const char modeString[],
437  const ReverseOption revOp,
438  const CombineMode CM)
439  {
441  using std::endl;
442  typedef LocalOrdinal LO;
443  typedef device_type DT;
444 
445  // mfh 18 Oct 2017: Set TPETRA_DEBUG to true to enable extra debug
446  // checks. These may communicate more.
447  const bool debug = ::Tpetra::Details::Behavior::debug ();
448  if (debug) {
449  if (revOp == DoForward) {
450  const bool myMapSameAsTransferTgtMap =
451  this->getMap ()->isSameAs (* (transfer.getTargetMap ()));
452  TEUCHOS_TEST_FOR_EXCEPTION
453  (! myMapSameAsTransferTgtMap, std::invalid_argument,
454  "Tpetra::DistObject::" << modeString << ": For forward-mode "
455  "communication, the target DistObject's Map must be the same "
456  "(in the sense of Tpetra::Map::isSameAs) as the input "
457  "Export/Import object's target Map.");
458  }
459  else { // revOp == DoReverse
460  const bool myMapSameAsTransferSrcMap =
461  this->getMap ()->isSameAs (* (transfer.getSourceMap ()));
462  TEUCHOS_TEST_FOR_EXCEPTION
463  (! myMapSameAsTransferSrcMap, std::invalid_argument,
464  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
465  "communication, the target DistObject's Map must be the same "
466  "(in the sense of Tpetra::Map::isSameAs) as the input "
467  "Export/Import object's source Map.");
468  }
469 
470  // SrcDistObject need not even _have_ Maps. However, if the
471  // source object is a DistObject, it has a Map, and we may
472  // compare that Map with the Transfer's Maps.
473  const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
474  if (srcDistObj != NULL) {
475  if (revOp == DoForward) {
476  const bool srcMapSameAsImportSrcMap =
477  srcDistObj->getMap ()->isSameAs (* (transfer.getSourceMap ()));
478  TEUCHOS_TEST_FOR_EXCEPTION
479  (! srcMapSameAsImportSrcMap, std::invalid_argument,
480  "Tpetra::DistObject::" << modeString << ": For forward-mode "
481  "communication, the source DistObject's Map must be the same "
482  "as the input Export/Import object's source Map.");
483  }
484  else { // revOp == DoReverse
485  const bool srcMapSameAsImportTgtMap =
486  srcDistObj->getMap ()->isSameAs (* (transfer.getTargetMap ()));
487  TEUCHOS_TEST_FOR_EXCEPTION
488  (! srcMapSameAsImportTgtMap, std::invalid_argument,
489  "Tpetra::DistObject::" << modeString << ": For reverse-mode "
490  "communication, the source DistObject's Map must be the same "
491  "as the input Export/Import object's target Map.");
492  }
493  }
494  }
495 
496  // mfh 03 Aug 2017, 17 Oct 2017: Set TPETRA_VERBOSE to true for
497  // copious debug output to std::cerr on every MPI process. This
498  // is unwise for runs with large numbers of MPI processes.
499  const bool verbose = ::Tpetra::Details::Behavior::verbose ();
500  std::unique_ptr<std::string> prefix;
501  if (verbose) {
502  int myRank = 0;
503  auto map = this->getMap ();
504  if (! map.is_null ()) {
505  auto comm = map->getComm ();
506  if (! comm.is_null ()) {
507  myRank = comm->getRank ();
508  }
509  }
510  prefix = [myRank] () {
511  std::ostringstream os;
512  os << "(Proc " << myRank << ") ";
513  return std::unique_ptr<std::string> (new std::string (os.str ()));
514  } ();
515  std::ostringstream os;
516  os << *prefix << "Tpetra::DistObject::doTransfer:" << endl;
517  std::cerr << os.str ();
518  }
519 
520  const size_t numSameIDs = transfer.getNumSameIDs ();
521  typedef Teuchos::ArrayView<const LocalOrdinal> view_type;
522  const view_type permuteToLIDs_ = (revOp == DoForward) ?
523  transfer.getPermuteToLIDs () : transfer.getPermuteFromLIDs ();
524  const view_type permuteFromLIDs_ = (revOp == DoForward) ?
525  transfer.getPermuteFromLIDs () : transfer.getPermuteToLIDs ();
526  const view_type exportLIDs_ = (revOp == DoForward) ?
527  transfer.getExportLIDs () : transfer.getRemoteLIDs ();
528  const view_type remoteLIDs_ = (revOp == DoForward) ?
529  transfer.getRemoteLIDs () : transfer.getExportLIDs ();
530  Distributor& distor = transfer.getDistributor ();
531 
532  if (this->useNewInterface ()) {
533  using ::Tpetra::Details::Behavior;
534  // Do we need all communication buffers to live on host?
535  const bool commOnHost = ! Behavior::assumeMpiIsCudaAware ();
536  if (verbose) {
537  std::ostringstream os;
538  os << *prefix << "doTransfer: Use new interface; "
539  "commOnHost=" << (commOnHost ? "true" : "false") << endl;
540  std::cerr << os.str ();
541  }
542 
543  // Convert arguments to Kokkos::DualView. This currently
544  // involves deep copy, either to host or to device (depending on
545  // commOnHost). At some point, we need to change the interface
546  // of doTransfer so it takes DualView (or just View) rather than
547  // Teuchos::ArrayView, so that we won't need this deep copy.
548  //
549  // We don't need to sync the arguments. commOnHost determines
550  // where the most recent version lives.
551  Kokkos::DualView<LO*, DT> permuteToLIDs =
552  getDualViewCopyFromArrayView<LO, DT> (permuteToLIDs_,
553  "permuteToLIDs",
554  commOnHost);
555  Kokkos::DualView<LO*, DT> permuteFromLIDs =
556  getDualViewCopyFromArrayView<LO, DT> (permuteFromLIDs_,
557  "permuteFromLIDs",
558  commOnHost);
559  // No need to sync this. packAndPrepareNew will use it to
560  // determine where to pack (in host or device memory).
561  Kokkos::DualView<LO*, DT> remoteLIDs =
562  getDualViewCopyFromArrayView<LO, DT> (remoteLIDs_,
563  "remoteLIDs",
564  commOnHost);
565  Kokkos::DualView<LO*, DT> exportLIDs =
566  getDualViewCopyFromArrayView<LO, DT> (exportLIDs_,
567  "exportLIDs",
568  commOnHost);
569  doTransferNew (src, CM, numSameIDs, permuteToLIDs, permuteFromLIDs,
570  remoteLIDs, exportLIDs, distor, revOp, commOnHost);
571  }
572  else {
573  if (verbose) {
574  std::ostringstream os;
575  os << *prefix << "doTransfer: Use old interface" << endl;
576  std::cerr << os.str ();
577  }
578  doTransferOld (src, CM, numSameIDs, permuteToLIDs_, permuteFromLIDs_,
579  remoteLIDs_, exportLIDs_, distor, revOp);
580  }
581 
582  if (verbose) {
583  std::ostringstream os;
584  os << *prefix << "Tpetra::DistObject::doTransfer: Done!" << endl;
585  std::cerr << os.str ();
586  }
587  }
588 
589  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
590  bool
592  reallocImportsIfNeeded (const size_t newSize, const bool verbose)
593  {
594  if (verbose) {
595  const int myRank = this->getMap ()->getComm ()->getRank ();
596  std::ostringstream os;
597  os << "(Proc " << myRank << ") Reallocate (if needed) imports_ from "
598  << imports_.extent (0) << " to " << newSize << std::endl;
599  std::cerr << os.str ();
600  }
602  const bool reallocated =
603  reallocDualViewIfNeeded (this->imports_, newSize, "imports");
604  if (verbose) {
605  const int myRank = this->getMap ()->getComm ()->getRank ();
606  std::ostringstream os;
607  os << "(Proc " << myRank << ") Finished reallocating imports_"
608  << std::endl;
609  std::cerr << os.str ();
610  }
611  return reallocated;
612  }
613 
614  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
615  bool
617  reallocArraysForNumPacketsPerLid (const size_t numExportLIDs,
618  const size_t numImportLIDs)
619  {
622  using std::endl;
623  // If an array is already allocated, and if is at least
624  // tooBigFactor times bigger than it needs to be, free it and
625  // reallocate to the size we need, in order to save space.
626  // Otherwise, take subviews to reduce allocation size.
627  constexpr size_t tooBigFactor = 10;
628 
629  const bool verbose = ::Tpetra::Details::Behavior::verbose ();
630  if (verbose) {
631  const int myRank = this->getMap ()->getComm ()->getRank ();
632  std::ostringstream os;
633  os << "(Proc " << myRank << ") reallocArraysForNumPacketsPerLid before:"
634  << endl
635  << "(Proc " << myRank << ") "
636  << dualViewStatusToString (this->numExportPacketsPerLID_, "numExportPacketsPerLID_")
637  << endl
638  << "(Proc " << myRank << ") "
639  << dualViewStatusToString (this->numImportPacketsPerLID_, "numImportPacketsPerLID_")
640  << endl;
641  std::cerr << os.str ();
642  }
643 
644  // Reallocate numExportPacketsPerLID_ if needed.
645  const bool firstReallocated =
646  reallocDualViewIfNeeded (this->numExportPacketsPerLID_,
647  numExportLIDs,
648  "numExportPacketsPerLID",
649  tooBigFactor,
650  true); // need fence before, if realloc'ing
651 
652  // If we reallocated above, then we fenced after that
653  // reallocation. This means that we don't need to fence again,
654  // before the next reallocation.
655  const bool needFenceBeforeNextAlloc = ! firstReallocated;
656  const bool secondReallocated =
657  reallocDualViewIfNeeded (this->numImportPacketsPerLID_,
658  numImportLIDs,
659  "numImportPacketsPerLID",
660  tooBigFactor,
661  needFenceBeforeNextAlloc);
662 
663  if (verbose) {
664  const int myRank = this->getMap ()->getComm ()->getRank ();
665  std::ostringstream os;
666  os << "(Proc " << myRank << ") reallocArraysForNumPacketsPerLid before:"
667  << endl
668  << "(Proc " << myRank << ") "
669  << dualViewStatusToString (this->numExportPacketsPerLID_, "numExportPacketsPerLID_")
670  << endl
671  << "(Proc " << myRank << ") "
672  << dualViewStatusToString (this->numImportPacketsPerLID_, "numImportPacketsPerLID_")
673  << endl;
674  std::cerr << os.str ();
675  }
676 
677  return firstReallocated || secondReallocated;
678  }
679 
680  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
681  void
683  doTransferOld (const SrcDistObject& src,
684  CombineMode CM,
685  size_t numSameIDs,
686  const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
687  const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs,
688  const Teuchos::ArrayView<const LocalOrdinal>& remoteLIDs,
689  const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
690  Distributor &distor,
691  ReverseOption revOp)
692  {
695 
696  // mfh 03 Aug 2017: Set this to true for copious debug output to
697  // std::cerr on every MPI process. This is unwise for runs with
698  // large numbers of MPI processes.
699  constexpr bool debug = false;
700 
701 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
702  Teuchos::TimeMonitor doXferMon (*doXferTimer_);
703 #endif // HAVE_TPETRA_TRANSFER_TIMERS
704 
705  TEUCHOS_TEST_FOR_EXCEPTION(
706  ! checkSizes (src), std::invalid_argument,
707  "Tpetra::DistObject::doTransfer(): checkSizes() indicates that the "
708  "destination object is not a legal target for redistribution from the "
709  "source object. This probably means that they do not have the same "
710  "dimensions. For example, MultiVectors must have the same number of "
711  "rows and columns.");
712  KokkosClassic::ReadWriteOption rwo = KokkosClassic::ReadWrite;
713  if (CM == INSERT || CM == REPLACE) {
714  const size_t numIDsToWrite = numSameIDs +
715  static_cast<size_t> (permuteToLIDs.size ()) +
716  static_cast<size_t> (remoteLIDs.size ());
717  if (numIDsToWrite == this->getMap ()->getNodeNumElements ()) {
718  // We're overwriting all of our local data in the destination
719  // object, so a write-only view suffices.
720  //
721  // FIXME (mfh 10 Apr 2012) This doesn't make sense for a
722  // CrsMatrix with a dynamic graph. INSERT mode could mean
723  // that we're adding new entries to the object, but we don't
724  // want to get rid of the old ones.
725  rwo = KokkosClassic::WriteOnly;
726  }
727  }
728  // Tell the source to create a read-only view of its data. On a
729  // discrete accelerator such as a GPU, this brings EVERYTHING from
730  // device memory to host memory.
731  //
732  // FIXME (mfh 23 Mar 2012) By passing in the list of GIDs (or
733  // rather, local LIDs to send) and packet counts, createViews()
734  // could create a "sparse view" that only brings in the necessary
735  // data from device to host memory.
736  const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
737  if (srcDistObj != NULL) {
738  srcDistObj->createViews ();
739  }
740 
741  // Tell the target to create a view of its data. Depending on
742  // rwo, this could be a write-only view or a read-and-write view.
743  // On a discrete accelerator such as a GPU, a write-only view only
744  // requires a transfer from host to device memory. A
745  // read-and-write view requires a two-way transfer. This has the
746  // same problem as createViews(): it transfers EVERYTHING, not
747  // just the necessary data.
748  //
749  // FIXME (mfh 23 Mar 2012) By passing in the list of GIDs (or
750  // rather, local LIDs into which to receive) and packet counts,
751  // createViewsNonConst() could create a "sparse view" that only
752  // transfers the necessary data.
753  this->createViewsNonConst (rwo);
754 
755  if (numSameIDs + permuteToLIDs.size()) {
756 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
757  Teuchos::TimeMonitor copyAndPermuteMon (*copyAndPermuteTimer_);
758 #endif // HAVE_TPETRA_TRANSFER_TIMERS
759  // There is at least one GID to copy or permute.
760  copyAndPermute (src, numSameIDs, permuteToLIDs, permuteFromLIDs);
761  }
762 
763  // The method may return zero even if the implementation actually
764  // does have a constant number of packets per LID. However, if it
765  // returns nonzero, we may use this information to avoid
766  // (re)allocating num{Ex,Im}portPacketsPerLID_. packAndPrepare()
767  // will set this to its final value.
768  //
769  // We only need this if CM != ZERO, but it has to be lifted out of
770  // that scope because there are multiple tests for CM != ZERO.
771  size_t constantNumPackets = this->constantNumberOfPackets ();
772 
773  // We only need to pack communication buffers if the combine mode
774  // is not ZERO. A "ZERO combine mode" means that the results are
775  // the same as if we had received all zeros, and added them to the
776  // existing values. That means we don't need to communicate.
777  if (CM != ZERO) {
778  if (constantNumPackets == 0) {
779  this->reallocArraysForNumPacketsPerLid (exportLIDs.size (),
780  remoteLIDs.size ());
781  }
782 
783  {
784 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
785  Teuchos::TimeMonitor packAndPrepareMon (*packAndPrepareTimer_);
786 #endif // HAVE_TPETRA_TRANSFER_TIMERS
787  // Ask the source to pack data. Also ask it whether there are a
788  // constant number of packets per element (constantNumPackets is
789  // an output argument). If there are, constantNumPackets will
790  // come back nonzero. Otherwise, the source will fill the
791  // numExportPacketsPerLID_ array.
792  numExportPacketsPerLID_.template modify<Kokkos::HostSpace> ();
793  Teuchos::ArrayView<size_t> numExportPacketsPerLID =
794  getArrayViewFromDualView (numExportPacketsPerLID_);
795 
796  // FIXME (mfh 26 Apr 2016) For backwards compatibility, use
797  // the old packAndPrepare interface that takes and resizes the
798  // exports buffer as a Teuchos::Array<packet_type>. Then,
799  // copy out that buffer into the host version of exports_.
800 
801  Teuchos::Array<packet_type> exportsOld;
802  packAndPrepare (src, exportLIDs, exportsOld, numExportPacketsPerLID,
803  constantNumPackets, distor);
804  const size_t exportsLen = static_cast<size_t> (exportsOld.size ());
805  reallocDualViewIfNeeded (this->exports_, exportsLen, "exports");
806  Kokkos::View<const packet_type*, Kokkos::HostSpace,
807  Kokkos::MemoryUnmanaged> exportsOldK (exportsOld.getRawPtr (),
808  exportsLen);
809  exports_.template modify<Kokkos::HostSpace> ();
810  Kokkos::deep_copy (exports_.template view<Kokkos::HostSpace> (),
811  exportsOldK);
812  }
813  }
814 
815  // We don't need the source's data anymore, so it can let go of
816  // its views. On an accelerator device with a separate memory
817  // space (like a GPU), this frees host memory, since device memory
818  // has the "master" version of the data.
819  if (srcDistObj != NULL) {
820  srcDistObj->releaseViews ();
821  }
822 
823  // We only need to send data if the combine mode is not ZERO.
824  if (CM != ZERO) {
825  if (constantNumPackets != 0) {
826  // There are a constant number of packets per element. We
827  // already know (from the number of "remote" (incoming)
828  // elements) how many incoming elements we expect, so we can
829  // resize the buffer accordingly.
830  const size_t rbufLen = remoteLIDs.size() * constantNumPackets;
831  if (debug) {
832  std::ostringstream os;
833  os << "*** doTransferOld: Const # packets: imports_.extent(0) = "
834  << imports_.extent (0) << ", rbufLen = " << rbufLen
835  << std::endl;
836  std::cerr << os.str ();
837  }
838  reallocImportsIfNeeded (rbufLen, debug);
839  }
840 
841  // Do we need to do communication (via doPostsAndWaits)?
842  bool needCommunication = true;
843  if (revOp == DoReverse && ! isDistributed ()) {
844  needCommunication = false;
845  }
846  // FIXME (mfh 30 Jun 2013): Checking whether the source object
847  // is distributed requires a cast to DistObject. If it's not a
848  // DistObject, then I'm not quite sure what to do. Perhaps it
849  // would be more appropriate for SrcDistObject to have an
850  // isDistributed() method. For now, I'll just assume that we
851  // need to do communication unless the cast succeeds and the
852  // source is not distributed.
853  else if (revOp == DoForward && srcDistObj != NULL &&
854  ! srcDistObj->isDistributed ()) {
855  needCommunication = false;
856  }
857 
858  if (needCommunication) {
859  if (revOp == DoReverse) {
860 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
861  Teuchos::TimeMonitor doPostsAndWaitsMon (*doPostsAndWaitsTimer_);
862 #endif // HAVE_TPETRA_TRANSFER_TIMERS
863  if (constantNumPackets == 0) { //variable num-packets-per-LID:
864  // First communicate the number of packets per LID to receive.
865 
866  // Make sure that host has the latest version, since we're
867  // using the version on host. If host has the latest
868  // version already, syncing to host does nothing.
869  numExportPacketsPerLID_.template sync<Kokkos::HostSpace> ();
870  Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
871  getArrayViewFromDualView (numExportPacketsPerLID_);
872 
873  // numImportPacketsPerLID_ is the output array here, so
874  // mark it as modified. It's strictly output, so we don't
875  // have to sync from device.
876  //numImportPacketsPerLID_.template sync<Kokkos::HostSpace> ();
877  numImportPacketsPerLID_.template modify<Kokkos::HostSpace> ();
878  Teuchos::ArrayView<size_t> numImportPacketsPerLID =
879  getArrayViewFromDualView (numImportPacketsPerLID_);
880  distor.doReversePostsAndWaits (numExportPacketsPerLID, 1,
881  numImportPacketsPerLID);
882  size_t totalImportPackets = 0;
883  {
884  typedef typename Kokkos::DualView<size_t*,
885  device_type>::t_host::execution_space host_exec_space;
886  typedef Kokkos::RangePolicy<host_exec_space, Array_size_type> range_type;
887  const size_t* const arrayToSum = numImportPacketsPerLID.getRawPtr ();
888  Kokkos::parallel_reduce ("Count import packets",
889  range_type (0, numImportPacketsPerLID.size ()),
890  [=] (const Array_size_type& i, size_t& lclSum) {
891  lclSum += arrayToSum[i];
892  }, totalImportPackets);
893  }
894 
895  reallocImportsIfNeeded (totalImportPackets, debug);
896 
897  // We don't need to sync imports_, because it is only for
898  // output here. Similarly, we don't need to mark exports_
899  // as modified, since it is read only here. This legacy
900  // version of doTransfer only uses host arrays.
901  imports_.template modify<Kokkos::HostSpace> ();
902  Teuchos::ArrayView<packet_type> hostImports =
903  getArrayViewFromDualView (imports_);
904  exports_.template sync<Kokkos::HostSpace> ();
905  Teuchos::ArrayView<const packet_type> hostExports =
906  getArrayViewFromDualView (exports_);
907  distor.doReversePostsAndWaits (hostExports,
908  numExportPacketsPerLID,
909  hostImports,
910  numImportPacketsPerLID);
911  }
912  else {
913  // We don't need to sync imports_, because it is only for
914  // output here. Similarly, we don't need to mark exports_
915  // as modified, since it is read only here. This legacy
916  // version of doTransfer only uses host arrays.
917  imports_.template modify<Kokkos::HostSpace> ();
918  Teuchos::ArrayView<packet_type> hostImports =
919  getArrayViewFromDualView (imports_);
920  exports_.template sync<Kokkos::HostSpace> ();
921  Teuchos::ArrayView<const packet_type> hostExports =
922  getArrayViewFromDualView (exports_);
923  distor.doReversePostsAndWaits (hostExports,
924  constantNumPackets,
925  hostImports);
926  }
927  }
928  else { // revOp == DoForward
929 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
930  Teuchos::TimeMonitor doPostsAndWaitsMon (*doPostsAndWaitsTimer_);
931 #endif // HAVE_TPETRA_TRANSFER_TIMERS
932  if (constantNumPackets == 0) { //variable num-packets-per-LID:
933  // First communicate the number of packets per LID to receive.
934 
935  // Make sure that host has the latest version, since we're
936  // using the version on host. If host has the latest
937  // version already, syncing to host does nothing.
938  numExportPacketsPerLID_.template sync<Kokkos::HostSpace> ();
939  Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
940  getArrayViewFromDualView (numExportPacketsPerLID_);
941 
942  // numImportPacketsPerLID_ is the output array here, so
943  // mark it as modified. It's strictly output, so we don't
944  // have to sync from device.
945  //numImportPacketsPerLID_.template sync<Kokkos::HostSpace> ();
946  numImportPacketsPerLID_.template modify<Kokkos::HostSpace> ();
947  Teuchos::ArrayView<size_t> numImportPacketsPerLID =
948  getArrayViewFromDualView (numImportPacketsPerLID_);
949  distor.doPostsAndWaits (numExportPacketsPerLID, 1,
950  numImportPacketsPerLID);
951  size_t totalImportPackets = 0;
952  {
953  typedef typename Kokkos::DualView<size_t*,
954  device_type>::t_host::execution_space host_exec_space;
955  typedef Kokkos::RangePolicy<host_exec_space, Array_size_type> range_type;
956  const size_t* const arrayToSum = numImportPacketsPerLID.getRawPtr ();
957  Kokkos::parallel_reduce ("Count import packets",
958  range_type (0, numImportPacketsPerLID.size ()),
959  [=] (const Array_size_type& i, size_t& lclSum) {
960  lclSum += arrayToSum[i];
961  }, totalImportPackets);
962  }
963 
964  reallocImportsIfNeeded (totalImportPackets, debug);
965 
966  // We don't need to sync imports_, because it is only for
967  // output here. Similarly, we don't need to mark exports_
968  // as modified, since it is read only here. This legacy
969  // version of doTransfer only uses host arrays.
970  imports_.template modify<Kokkos::HostSpace> ();
971  Teuchos::ArrayView<packet_type> hostImports =
972  getArrayViewFromDualView (imports_);
973  exports_.template sync<Kokkos::HostSpace> ();
974  Teuchos::ArrayView<const packet_type> hostExports =
975  getArrayViewFromDualView (exports_);
976  distor.doPostsAndWaits (hostExports,
977  numExportPacketsPerLID,
978  hostImports,
979  numImportPacketsPerLID);
980  }
981  else {
982  // We don't need to sync imports_, because it is only for
983  // output here. Similarly, we don't need to mark exports_
984  // as modified, since it is read only here. This legacy
985  // version of doTransfer only uses host arrays.
986  imports_.template modify<Kokkos::HostSpace> ();
987  Teuchos::ArrayView<packet_type> hostImports =
988  getArrayViewFromDualView (imports_);
989  exports_.template sync<Kokkos::HostSpace> ();
990  Teuchos::ArrayView<const packet_type> hostExports =
991  getArrayViewFromDualView (exports_);
992  distor.doPostsAndWaits (hostExports,
993  constantNumPackets,
994  hostImports);
995  }
996  }
997  {
998 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
999  Teuchos::TimeMonitor unpackAndCombineMon (*unpackAndCombineTimer_);
1000 #endif // HAVE_TPETRA_TRANSFER_TIMERS
1001 
1002  // We don't need to sync imports_, because it is only for
1003  // output here. This legacy version of doTransfer only uses
1004  // host arrays.
1005  imports_.template modify<Kokkos::HostSpace> ();
1006  Teuchos::ArrayView<packet_type> hostImports =
1007  getArrayViewFromDualView (imports_);
1008  // NOTE (mfh 25 Apr 2016) unpackAndCombine doesn't actually
1009  // change its numImportPacketsPerLID argument, so we don't
1010  // have to mark it modified here.
1011  numImportPacketsPerLID_.template sync<Kokkos::HostSpace> ();
1012  // FIXME (mfh 25 Apr 2016) unpackAndCombine doesn't actually
1013  // change its numImportPacketsPerLID argument, so we should
1014  // be able to use a const Teuchos::ArrayView here.
1015  Teuchos::ArrayView<size_t> numImportPacketsPerLID =
1016  getArrayViewFromDualView (numImportPacketsPerLID_);
1017  unpackAndCombine (remoteLIDs, hostImports, numImportPacketsPerLID,
1018  constantNumPackets, distor, CM);
1019  }
1020  }
1021  } // if (CM != ZERO)
1022 
1023  this->releaseViews ();
1024  }
1025 
1026  namespace { // (anonymous)
1027  template<class DeviceType, class IndexType = size_t>
1028  struct SumFunctor {
1029  SumFunctor (const Kokkos::View<const size_t*, DeviceType>& viewToSum) :
1030  viewToSum_ (viewToSum) {}
1031  KOKKOS_FUNCTION void operator() (const IndexType& i, size_t& lclSum) const {
1032  lclSum += viewToSum_(i);
1033  }
1034  Kokkos::View<const size_t*, DeviceType> viewToSum_;
1035  };
1036 
1037  template<class DeviceType, class IndexType = size_t>
1038  size_t
1039  countTotalImportPackets (const Kokkos::View<const size_t*, DeviceType>& numImportPacketsPerLID)
1040  {
1041  using Kokkos::parallel_reduce;
1042  typedef DeviceType DT;
1043  typedef typename DT::execution_space DES;
1044  typedef Kokkos::RangePolicy<DES, IndexType> range_type;
1045 
1046  const IndexType numOut = numImportPacketsPerLID.extent (0);
1047  size_t totalImportPackets = 0;
1048  parallel_reduce ("Count import packets",
1049  range_type (0, numOut),
1050  SumFunctor<DeviceType, IndexType> (numImportPacketsPerLID),
1051  totalImportPackets);
1052  return totalImportPackets;
1053  }
1054  } // namespace (anonymous)
1055 
1056  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1057  void
1058  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1059  doTransferNew (const SrcDistObject& src,
1060  const CombineMode CM,
1061  const size_t numSameIDs,
1062  const Kokkos::DualView<const local_ordinal_type*,
1063  device_type>& permuteToLIDs,
1064  const Kokkos::DualView<const local_ordinal_type*,
1065  device_type>& permuteFromLIDs,
1066  const Kokkos::DualView<const local_ordinal_type*,
1067  device_type>& remoteLIDs,
1068  const Kokkos::DualView<const local_ordinal_type*,
1069  device_type>& exportLIDs,
1070  Distributor& distor,
1071  const ReverseOption revOp,
1072  const bool commOnHost)
1073  {
1076  using Kokkos::Compat::getArrayView;
1077  using Kokkos::Compat::getConstArrayView;
1078  using Kokkos::Compat::getKokkosViewDeepCopy;
1079  using Kokkos::Compat::create_const_view;
1080  using std::endl;
1081  typedef LocalOrdinal LO;
1082  typedef device_type DT;
1083 
1084  typedef typename Kokkos::DualView<LO*, DT>::t_dev::execution_space DES;
1085  //typedef typename Kokkos::DualView<LO*, DT>::t_dev::memory_space DMS; // unused
1086  //typedef typename Kokkos::DualView<LO*, DT>::t_dev::memory_space HMS; // unused
1087 
1088  // DistObject's communication buffers (exports_,
1089  // numExportPacketsPerLID_, imports_, and numImportPacketsPerLID_)
1090  // may have different memory spaces than device_type would
1091  // indicate. See GitHub issue #1088. Abbreviations: "communication
1092  // host memory space" and "communication device memory space."
1093  typedef typename Kokkos::DualView<size_t*,
1094  buffer_device_type>::t_dev::memory_space CDMS;
1095  typedef typename Kokkos::DualView<size_t*,
1096  buffer_device_type>::t_host::memory_space CHMS;
1097 
1098  // mfh 03 Aug 2017, 17 Oct 2017: Set TPETRA_VERBOSE to true for
1099  // copious debug output to std::cerr on every MPI process. This
1100  // is unwise for runs with large numbers of MPI processes.
1101  const bool verbose = ::Tpetra::Details::Behavior::verbose ();
1102  // Prefix for verbose output. Use a pointer, so we don't pay for
1103  // string construction unless needed. We set this below.
1104  std::unique_ptr<std::string> prefix;
1105  if (verbose) {
1106  auto map = this->getMap ();
1107  auto comm = map.is_null () ? Teuchos::null : map->getComm ();
1108  const int myRank = comm.is_null () ? 0 : comm->getRank ();
1109  std::ostringstream os;
1110  os << "(Proc " << myRank << ") ";
1111  prefix = std::unique_ptr<std::string> (new std::string (os.str ()));
1112  }
1113 
1114  if (verbose) {
1115  std::ostringstream os;
1116  os << *prefix << "Tpetra::CrsMatrix::doTransferNew: Input arguments:" << endl
1117  << *prefix << " combineMode: " << combineModeToString (CM) << endl
1118  << *prefix << " numSameIDs: " << numSameIDs << endl
1119  << *prefix << " "
1120  << dualViewStatusToString (permuteToLIDs, "permuteToLIDs") << endl
1121  << *prefix << " "
1122  << dualViewStatusToString (permuteFromLIDs, "permuteFromLIDs") << endl
1123  << *prefix << " "
1124  << dualViewStatusToString (remoteLIDs, "remoteLIDs") << endl
1125  << *prefix << " "
1126  << dualViewStatusToString (exportLIDs, "exportLIDs") << endl
1127  << *prefix << " revOp: Do" << (revOp == DoReverse ? "Reverse" : "Forward") << endl
1128  << *prefix << " commOnHost: " << (commOnHost ? "true" : "false") << endl;
1129  std::cerr << os.str ();
1130  }
1131 
1132 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
1133  Teuchos::TimeMonitor doXferMon (*doXferTimer_);
1134 #endif // HAVE_TPETRA_TRANSFER_TIMERS
1135 
1136  {
1137  if (verbose) {
1138  std::ostringstream os;
1139  os << *prefix << "1. checkSizes" << endl;
1140  std::cerr << os.str ();
1141  }
1142  const bool checkSizesResult = this->checkSizes (src);
1143  TEUCHOS_TEST_FOR_EXCEPTION
1144  (! checkSizesResult, std::invalid_argument,
1145  "Tpetra::DistObject::doTransfer: checkSizes() indicates that the "
1146  "destination object is not a legal target for redistribution from the "
1147  "source object. This probably means that they do not have the same "
1148  "dimensions. For example, MultiVectors must have the same number of "
1149  "rows and columns.");
1150  }
1151 
1152  // NOTE (mfh 26 Apr 2016) Chris Baker's implementation understood
1153  // that if CM == INSERT || CM == REPLACE, the target object could
1154  // be write only. We don't optimize for that here.
1155 
1156  if (numSameIDs + permuteToLIDs.extent (0) != 0) {
1157  // There is at least one GID to copy or permute.
1158  if (verbose) {
1159  std::ostringstream os;
1160  os << *prefix << "2. copyAndPermuteNew" << endl;
1161  std::cerr << os.str ();
1162  }
1163  {
1164 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
1165  Teuchos::TimeMonitor copyAndPermuteMon (*copyAndPermuteTimer_);
1166 #endif // HAVE_TPETRA_TRANSFER_TIMERS
1167  this->copyAndPermuteNew (src, numSameIDs, permuteToLIDs,
1168  permuteFromLIDs);
1169  }
1170  if (verbose) {
1171  std::ostringstream os;
1172  os << *prefix << "After copyAndPermuteNew:" << endl
1173  << *prefix << " "
1174  << dualViewStatusToString (permuteToLIDs, "permuteToLIDs")
1175  << endl
1176  << *prefix << " "
1177  << dualViewStatusToString (permuteFromLIDs, "permuteFromLIDs")
1178  << endl;
1179  std::cerr << os.str ();
1180  }
1181  }
1182 
1183  // The method may return zero even if the implementation actually
1184  // does have a constant number of packets per LID. However, if it
1185  // returns nonzero, we may use this information to avoid
1186  // (re)allocating num{Ex,Im}portPacketsPerLID_. packAndPrepare()
1187  // will set this to its final value.
1188  //
1189  // We only need this if CM != ZERO, but it has to be lifted out of
1190  // that scope because there are multiple tests for CM != ZERO.
1191  size_t constantNumPackets = this->constantNumberOfPackets ();
1192  if (verbose) {
1193  std::ostringstream os;
1194  os << *prefix << "constantNumPackets=" << constantNumPackets << endl;
1195  std::cerr << os.str ();
1196  }
1197 
1198  // We only need to pack communication buffers if the combine mode
1199  // is not ZERO. A "ZERO combine mode" means that the results are
1200  // the same as if we had received all zeros, and added them to the
1201  // existing values. That means we don't need to communicate.
1202  if (CM != ZERO) {
1203  if (constantNumPackets == 0) {
1204  if (verbose) {
1205  std::ostringstream os;
1206  os << *prefix << "3. (Re)allocate num{Ex,Im}portPacketsPerLID"
1207  << endl;
1208  std::cerr << os.str ();
1209  }
1210  // This only reallocates if necessary, that is, if the sizes
1211  // don't match.
1212  this->reallocArraysForNumPacketsPerLid (exportLIDs.extent (0),
1213  remoteLIDs.extent (0));
1214  }
1215 
1216  if (verbose) {
1217  std::ostringstream os;
1218  os << *prefix << "4. packAndPrepareNew: before, "
1219  << dualViewStatusToString (this->exports_, "exports_")
1220  << endl;
1221  std::cerr << os.str ();
1222  }
1223  {
1224 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
1225  Teuchos::TimeMonitor packAndPrepareMon (*packAndPrepareTimer_);
1226 #endif // HAVE_TPETRA_TRANSFER_TIMERS
1227  // Ask the source to pack data. Also ask it whether there are
1228  // a constant number of packets per element
1229  // (constantNumPackets is an output argument). If there are,
1230  // constantNumPackets will come back nonzero. Otherwise, the
1231  // source will fill the numExportPacketsPerLID_ array.
1232  this->packAndPrepareNew (src, exportLIDs, this->exports_,
1233  this->numExportPacketsPerLID_,
1234  constantNumPackets, distor);
1235  // FIXME (mfh 18 Oct 2017) if (! commOnHost), sync to device?
1236  // Alternately, make packAndPrepareNew take a "commOnHost"
1237  // argument to tell it where to leave the data?
1238  if (commOnHost) {
1239  typedef typename Kokkos::View<char*, buffer_device_type>::HostMirror::device_type
1240  buffer_host_device_type;
1241  typedef typename buffer_host_device_type::memory_space
1242  buffer_host_memory_space;
1243  this->exports_.template sync<buffer_host_memory_space> ();
1244  }
1245  else { // ! commOnHost
1246  typedef typename buffer_device_type::memory_space buffer_dev_memory_space;
1247  this->exports_.template sync<buffer_dev_memory_space> ();
1248  }
1249  }
1250  if (verbose) {
1251  std::ostringstream os;
1252  os << *prefix << "5.1. After packAndPrepareNew, "
1253  << dualViewStatusToString (this->exports_, "exports_")
1254  << endl;
1255  std::cerr << os.str ();
1256  }
1257  } // if (CM != ZERO)
1258 
1259  // We only need to send data if the combine mode is not ZERO.
1260  if (CM != ZERO) {
1261  if (constantNumPackets != 0) {
1262  if (verbose) {
1263  std::ostringstream os;
1264  os << *prefix << "6. Realloc imports_" << std::endl;
1265  std::cerr << os.str ();
1266  }
1267  // There are a constant number of packets per element. We
1268  // already know (from the number of "remote" (incoming)
1269  // elements) how many incoming elements we expect, so we can
1270  // resize the buffer accordingly.
1271  const size_t rbufLen = remoteLIDs.extent (0) * constantNumPackets;
1272  reallocImportsIfNeeded (rbufLen, verbose);
1273  }
1274 
1275  // Do we need to do communication (via doPostsAndWaits)?
1276  bool needCommunication = true;
1277 
1278  // This may be NULL. It will be used below.
1279  const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
1280 
1281  if (revOp == DoReverse && ! this->isDistributed ()) {
1282  needCommunication = false;
1283  }
1284  // FIXME (mfh 30 Jun 2013): Checking whether the source object
1285  // is distributed requires a cast to DistObject. If it's not a
1286  // DistObject, then I'm not quite sure what to do. Perhaps it
1287  // would be more appropriate for SrcDistObject to have an
1288  // isDistributed() method. For now, I'll just assume that we
1289  // need to do communication unless the cast succeeds and the
1290  // source is not distributed.
1291  else if (revOp == DoForward && srcDistObj != NULL &&
1292  ! srcDistObj->isDistributed ()) {
1293  needCommunication = false;
1294  }
1295 
1296  if (verbose) {
1297  std::ostringstream os;
1298  os << *prefix << "needCommunication="
1299  << (needCommunication ? "true" : "false") << endl;
1300  std::cerr << os.str ();
1301  }
1302 
1303  // FIXME (mfh 17 Feb 2014) Distributor doesn't actually inspect
1304  // the contents of the "exports" or "imports" arrays, other than
1305  // to do a deep copy in the (should be technically unnecessary,
1306  // but isn't for some odd reason) "self-message" case.
1307  // Distributor thus doesn't need host views; it could do just
1308  // fine with device views, assuming that MPI knows how to read
1309  // device memory (which doesn't even require UVM).
1310 
1311  if (needCommunication) {
1312  if (revOp == DoReverse) {
1313  if (verbose) {
1314  std::ostringstream os;
1315  os << *prefix << "7.0. Reverse mode" << endl;
1316  std::cerr << os.str ();
1317  }
1318 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
1319  Teuchos::TimeMonitor doPostsAndWaitsMon (*doPostsAndWaitsTimer_);
1320 #endif // HAVE_TPETRA_TRANSFER_TIMERS
1321  if (constantNumPackets == 0) { //variable num-packets-per-LID:
1322  if (verbose) {
1323  std::ostringstream os;
1324  os << *prefix << "7.1. Variable # packets / LID: first comm "
1325  << "(commOnHost = " << (commOnHost ? "true" : "false") << ")"
1326  << endl;
1327  std::cerr << os.str ();
1328  }
1329  size_t totalImportPackets = 0;
1330  if (commOnHost) {
1331  this->numExportPacketsPerLID_.template sync<CHMS> ();
1332  this->numImportPacketsPerLID_.template sync<CHMS> ();
1333  this->numImportPacketsPerLID_.template modify<CHMS> (); // output argument
1334  auto numExp_h = create_const_view (this->numExportPacketsPerLID_.template view<CHMS> ());
1335  auto numImp_h = this->numImportPacketsPerLID_.template view<CHMS> ();
1336 
1337  // MPI communication happens here.
1338  distor.doReversePostsAndWaits (numExp_h, 1, numImp_h);
1339 
1340  DES::fence (); // just in case UVM doesn't behave right
1341  typedef typename decltype (numImp_h)::device_type the_dev_type;
1342  totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_h);
1343  }
1344  else {
1345  this->numExportPacketsPerLID_.template sync<CDMS> ();
1346  this->numImportPacketsPerLID_.template sync<CDMS> ();
1347  this->numImportPacketsPerLID_.template modify<CDMS> (); // output argument
1348  auto numExp_d = create_const_view (this->numExportPacketsPerLID_.template view<CDMS> ());
1349  auto numImp_d = this->numImportPacketsPerLID_.template view<CDMS> ();
1350 
1351  // MPI communication happens here.
1352  distor.doReversePostsAndWaits (numExp_d, 1, numImp_d);
1353 
1354  DES::fence (); // just in case UVM doesn't behave right
1355  typedef typename decltype (numImp_d)::device_type the_dev_type;
1356  totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_d);
1357  }
1358 
1359  if (verbose) {
1360  std::ostringstream os;
1361  os << *prefix << "totalImportPackets=" << totalImportPackets
1362  << endl;
1363  std::cerr << os.str ();
1364  }
1365  this->reallocImportsIfNeeded (totalImportPackets, verbose);
1366  if (verbose) {
1367  std::ostringstream os;
1368  os << *prefix << "7.3. Second comm" << std::endl;
1369  std::cerr << os.str ();
1370  }
1371 
1372  // NOTE (mfh 25 Apr 2016, 01 Aug 2017) Since we need to
1373  // launch MPI communication on host, we will need
1374  // numExportPacketsPerLID and numImportPacketsPerLID on
1375  // host.
1376  this->numExportPacketsPerLID_.template sync<CHMS> ();
1377  this->numImportPacketsPerLID_.template sync<CHMS> ();
1378 
1379  // NOTE (mfh 25 Apr 2016, 01 Aug 2017) doPostsAndWaits and
1380  // doReversePostsAndWaits currently want
1381  // numExportPacketsPerLID and numImportPacketsPerLID as
1382  // Teuchos::ArrayView, rather than as Kokkos::View.
1383  auto numExportPacketsPerLID_av =
1384  getArrayViewFromDualView (this->numExportPacketsPerLID_);
1385  auto numImportPacketsPerLID_av =
1386  getArrayViewFromDualView (this->numImportPacketsPerLID_);
1387 
1388  // imports_ is for output only, so we don't need to sync
1389  // it before marking it as modified. However, in order to
1390  // prevent spurious debug-mode errors (e.g., "modified on
1391  // both device and host"), we first need to clear its
1392  // "modified" flags.
1393  this->imports_.modified_device() = 0;
1394  this->imports_.modified_host() = 0;
1395 
1396  if (commOnHost) {
1397  this->imports_.template modify<CHMS> ();
1398  distor.doReversePostsAndWaits (create_const_view (this->exports_.template view<CHMS> ()),
1399  numExportPacketsPerLID_av,
1400  this->imports_.template view<CHMS> (),
1401  numImportPacketsPerLID_av);
1402  }
1403  else {
1404  this->imports_.template modify<CDMS> ();
1405  distor.doReversePostsAndWaits (create_const_view (this->exports_.template view<CDMS> ()),
1406  numExportPacketsPerLID_av,
1407  this->imports_.template view<CDMS> (),
1408  numImportPacketsPerLID_av);
1409  }
1410  }
1411  else {
1412  if (verbose) {
1413  std::ostringstream os;
1414  os << *prefix << "7.1. Const # packets per LID: " << endl
1415  << *prefix << " "
1416  << dualViewStatusToString (this->exports_, "exports_")
1417  << endl
1418  << *prefix << " "
1419  << dualViewStatusToString (this->exports_, "imports_")
1420  << endl;
1421  std::cerr << os.str ();
1422  }
1423 
1424  // imports_ is for output only, so we don't need to sync
1425  // it before marking it as modified. However, in order to
1426  // prevent spurious debug-mode errors (e.g., "modified on
1427  // both device and host"), we first need to clear its
1428  // "modified" flags.
1429  this->imports_.modified_device() = 0;
1430  this->imports_.modified_host() = 0;
1431 
1432  if (commOnHost) {
1433  this->imports_.template modify<CHMS> ();
1434  distor.doReversePostsAndWaits (create_const_view (this->exports_.template view<CHMS> ()),
1435  constantNumPackets,
1436  this->imports_.template view<CHMS> ());
1437  }
1438  else { // pack on device
1439  this->imports_.template modify<CDMS> ();
1440  distor.doReversePostsAndWaits (create_const_view (this->exports_.template view<CDMS> ()),
1441  constantNumPackets,
1442  this->imports_.template view<CDMS> ());
1443  }
1444  }
1445  }
1446  else { // revOp == DoForward
1447  if (verbose) {
1448  std::cerr << ">>> 7.0. Forward mode" << std::endl;
1449  }
1450 
1451 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
1452  Teuchos::TimeMonitor doPostsAndWaitsMon (*doPostsAndWaitsTimer_);
1453 #endif // HAVE_TPETRA_TRANSFER_TIMERS
1454  if (constantNumPackets == 0) { //variable num-packets-per-LID:
1455  if (verbose) {
1456  std::cerr << ">>> 7.1. Variable # packets / LID: first comm" << std::endl;
1457  }
1458 
1459  size_t totalImportPackets = 0;
1460  if (commOnHost) {
1461  this->numExportPacketsPerLID_.template sync<CHMS> ();
1462  this->numImportPacketsPerLID_.template sync<CHMS> ();
1463  this->numImportPacketsPerLID_.template modify<CHMS> (); // output argument
1464  auto numExp_h = create_const_view (this->numExportPacketsPerLID_.template view<CHMS> ());
1465  auto numImp_h = this->numImportPacketsPerLID_.template view<CHMS> ();
1466 
1467  // MPI communication happens here.
1468  distor.doPostsAndWaits (numExp_h, 1, numImp_h);
1469 
1470  DES::fence (); // just in case UVM doesn't behave right
1471  typedef typename decltype (numImp_h)::device_type the_dev_type;
1472  totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_h);
1473  }
1474  else {
1475  this->numExportPacketsPerLID_.template sync<CDMS> ();
1476  this->numImportPacketsPerLID_.template sync<CDMS> ();
1477  this->numImportPacketsPerLID_.template modify<CDMS> (); // output argument
1478  auto numExp_d = create_const_view (this->numExportPacketsPerLID_.template view<CDMS> ());
1479  auto numImp_d = this->numImportPacketsPerLID_.template view<CDMS> ();
1480 
1481  // MPI communication happens here.
1482  distor.doPostsAndWaits (numExp_d, 1, numImp_d);
1483 
1484  DES::fence (); // just in case UVM doesn't behave right
1485  typedef typename decltype (numImp_d)::device_type the_dev_type;
1486  totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_d);
1487  }
1488 
1489  this->reallocImportsIfNeeded (totalImportPackets, verbose);
1490 
1491  if (verbose) {
1492  std::cerr << ">>> 7.3. Second comm" << std::endl;
1493  }
1494 
1495  // NOTE (mfh 25 Apr 2016, 01 Aug 2017) Since we need to
1496  // launch MPI communication on host, we will need
1497  // numExportPacketsPerLID and numImportPacketsPerLID on
1498  // host.
1499  this->numExportPacketsPerLID_.template sync<CHMS> ();
1500  this->numImportPacketsPerLID_.template sync<CHMS> ();
1501 
1502  // NOTE (mfh 25 Apr 2016, 01 Aug 2017) doPostsAndWaits and
1503  // doReversePostsAndWaits currently want
1504  // numExportPacketsPerLID and numImportPacketsPerLID as
1505  // Teuchos::ArrayView, rather than as Kokkos::View.
1506  auto numExportPacketsPerLID_av =
1507  getArrayViewFromDualView (this->numExportPacketsPerLID_);
1508  auto numImportPacketsPerLID_av =
1509  getArrayViewFromDualView (this->numImportPacketsPerLID_);
1510 
1511  // imports_ is for output only, so we don't need to sync
1512  // it before marking it as modified. However, in order to
1513  // prevent spurious debug-mode errors (e.g., "modified on
1514  // both device and host"), we first need to clear its
1515  // "modified" flags.
1516  this->imports_.modified_device() = 0;
1517  this->imports_.modified_host() = 0;
1518 
1519  if (commOnHost) {
1520  this->imports_.template modify<CHMS> ();
1521  distor.doPostsAndWaits (create_const_view (this->exports_.template view<CHMS> ()),
1522  numExportPacketsPerLID_av,
1523  this->imports_.template view<CHMS> (),
1524  numImportPacketsPerLID_av);
1525  }
1526  else { // pack on device
1527  this->imports_.template modify<CDMS> ();
1528  distor.doPostsAndWaits (create_const_view (this->exports_.template view<CDMS> ()),
1529  numExportPacketsPerLID_av,
1530  this->imports_.template view<CDMS> (),
1531  numImportPacketsPerLID_av);
1532  }
1533  }
1534  else { // constant number of packets per LID
1535  if (verbose) {
1536  std::ostringstream os;
1537  os << *prefix << "7.1. Const # packets per LID: "
1538  << "exports_.extent(0)=" << exports_.extent (0)
1539  << ", imports_.extent(0) = " << imports_.extent (0)
1540  << endl;
1541  std::cerr << os.str ();
1542  }
1543  // imports_ is for output only, so we don't need to sync
1544  // it before marking it as modified. However, in order to
1545  // prevent spurious debug-mode errors (e.g., "modified on
1546  // both device and host"), we first need to clear its
1547  // "modified" flags.
1548  this->imports_.modified_device() = 0;
1549  this->imports_.modified_host() = 0;
1550 
1551  if (commOnHost) {
1552  if (verbose) {
1553  std::ostringstream os;
1554  os << *prefix << "7.2. Comm buffers on host" << endl;
1555  std::cerr << os.str ();
1556  }
1557  this->imports_.template modify<CHMS> ();
1558  distor.doPostsAndWaits (create_const_view (this->exports_.template view<CHMS> ()),
1559  constantNumPackets,
1560  this->imports_.template view<CHMS> ());
1561  }
1562  else { // pack on device
1563  if (verbose) {
1564  std::ostringstream os;
1565  os << *prefix << "7.2. Comm buffers on device" << endl;
1566  std::cerr << os.str ();
1567  }
1568  this->imports_.template modify<CDMS> ();
1569  distor.doPostsAndWaits (create_const_view (this->exports_.template view<CDMS> ()),
1570  constantNumPackets,
1571  this->imports_.template view<CDMS> ());
1572  }
1573  }
1574  }
1575 
1576  {
1577  if (verbose) {
1578  std::ostringstream os;
1579  os << *prefix << "8. unpackAndCombineNew" << endl;
1580  std::cerr << os.str ();
1581  }
1582 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
1583  Teuchos::TimeMonitor unpackAndCombineMon (*unpackAndCombineTimer_);
1584 #endif // HAVE_TPETRA_TRANSFER_TIMERS
1585  // NOTE (mfh 26 Apr 2016) We don't actually need to sync the
1586  // input DualViews, but they DO need to be most recently
1587  // updated in the same memory space.
1588  //
1589  // FIXME (mfh 26 Apr 2016) Check that all input DualViews
1590  // were most recently updated in the same memory space, and
1591  // sync them to the same place (based on commOnHost) if not.
1592  this->unpackAndCombineNew (remoteLIDs, this->imports_,
1593  this->numImportPacketsPerLID_,
1594  constantNumPackets, distor, CM);
1595  }
1596  } // if (needCommunication)
1597  } // if (CM != ZERO)
1598 
1599  if (verbose) {
1600  std::ostringstream os;
1601  os << *prefix << "9. Done!" << endl;
1602  std::cerr << os.str ();
1603  }
1604  }
1605 
1606  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1607  void
1608  DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1609  print (std::ostream &os) const
1610  {
1611  using Teuchos::FancyOStream;
1612  using Teuchos::getFancyOStream;
1613  using Teuchos::RCP;
1614  using Teuchos::rcpFromRef;
1615  using std::endl;
1616 
1617  RCP<FancyOStream> out = getFancyOStream (rcpFromRef (os));
1618  this->describe (*out, Teuchos::VERB_DEFAULT);
1619  }
1620 
1621  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1622  void
1625  {}
1626 
1627  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1628  void
1631  {}
1632 
1633  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1634  void
1637  {}
1638 
1639 } // namespace Classes
1640 
1641  template<class DistObjectType>
1642  void
1643  removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input,
1644  const Teuchos::RCP<const Map<typename DistObjectType::local_ordinal_type,
1645  typename DistObjectType::global_ordinal_type,
1646  typename DistObjectType::node_type> >& newMap)
1647  {
1648  input->removeEmptyProcessesInPlace (newMap);
1649  if (newMap.is_null ()) { // my process is excluded
1650  input = Teuchos::null;
1651  }
1652  }
1653 
1654  template<class DistObjectType>
1655  void
1656  removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input)
1657  {
1658  using Teuchos::RCP;
1659  typedef typename DistObjectType::local_ordinal_type LO;
1660  typedef typename DistObjectType::global_ordinal_type GO;
1661  typedef typename DistObjectType::node_type NT;
1662  typedef Map<LO, GO, NT> map_type;
1663 
1664  RCP<const map_type> newMap = input->getMap ()->removeEmptyProcesses ();
1665  removeEmptyProcessesInPlace<DistObjectType> (input, newMap);
1666  }
1667 
1668 // Explicit instantiation macro for general DistObject.
1669 #define TPETRA_DISTOBJECT_INSTANT(SCALAR, LO, GO, NODE) \
1670  namespace Classes { template class DistObject< SCALAR , LO , GO , NODE >; }
1671 
1672 // Explicit instantiation macro for DistObject<char, ...>.
1673 // The "SLGN" stuff above doesn't work for Packet=char.
1674 #define TPETRA_DISTOBJECT_INSTANT_CHAR(LO, GO, NODE) \
1675  namespace Classes { template class DistObject< char , LO , GO , NODE >; }
1676 
1677 } // namespace Tpetra
1678 
1679 #endif // TPETRA_DISTOBJECT_DEF_HPP
Tpetra::Distributor::doReversePostsAndWaits
void doReversePostsAndWaits(const Teuchos::ArrayView< const Packet > &exports, size_t numPackets, const Teuchos::ArrayView< Packet > &imports)
Execute the reverse communication plan.
Definition: Tpetra_Distributor.hpp:1926
Tpetra::combineModeToString
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
Definition: Tpetra_CombineMode.cpp:89
Tpetra::REPLACE
Replace existing values with new values.
Definition: Tpetra_CombineMode.hpp:97
Tpetra_Details_Behavior.hpp
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Tpetra::Classes::DistObject< GlobalOrdinal, LocalOrdinal, GlobalOrdinal, Node >::device_type
Node::device_type device_type
The Kokkos Device type.
Definition: Tpetra_DistObject_decl.hpp:370
Tpetra::Details::getArrayViewFromDualView
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
Definition: Tpetra_Util.hpp:878
Tpetra::Details::getDualViewCopyFromArrayView
Kokkos::DualView< T *, DT > getDualViewCopyFromArrayView(const Teuchos::ArrayView< const T > &x_av, const char label[], const bool leaveOnHost)
Get a 1-D Kokkos::DualView which is a deep copy of the input Teuchos::ArrayView (which views host mem...
Definition: Tpetra_Util.hpp:912
Tpetra::Distributor::doPostsAndWaits
void doPostsAndWaits(const Teuchos::ArrayView< const Packet > &exports, size_t numPackets, const Teuchos::ArrayView< Packet > &imports)
Execute the (forward) communication plan.
Definition: Tpetra_Distributor.hpp:1055
Tpetra::Details::Behavior::debug
static bool debug()
Whether Tpetra is in debug mode.
Definition: Tpetra_Details_Behavior.cpp:245
Tpetra::ZERO
Replace old values with zero.
Definition: Tpetra_CombineMode.hpp:99
Tpetra::Classes::DistObject::getMap
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Definition: Tpetra_DistObject_decl.hpp:510
Tpetra::Classes::DistObject
Base class for distributed Tpetra objects that support data redistribution.
Definition: Tpetra_DistObject_decl.hpp:349
Tpetra::Classes::DistObject< GlobalOrdinal, LocalOrdinal, GlobalOrdinal, Node >::ReverseOption
ReverseOption
Whether the data transfer should be performed in forward or reverse mode.
Definition: Tpetra_DistObject_decl.hpp:605
KokkosClassic::ReadWriteOption
ReadWriteOption
Read/write options for non-const views.
Definition: Tpetra_DistObject_decl.hpp:74
Tpetra::Details::dualViewStatusToString
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
Definition: Tpetra_Util.hpp:942
Tpetra::Classes::Import
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Definition: Tpetra_Import_decl.hpp:115
Tpetra::Distributor
Sets up and executes a communication plan for a Tpetra DistObject.
Definition: Tpetra_Distributor.hpp:188
Tpetra::Details::Behavior::verbose
static bool verbose()
Whether Tpetra is in verbose mode.
Definition: Tpetra_Details_Behavior.cpp:260
Tpetra::Details::UnpackAndCombineCrsGraphImpl::unpackAndCombine
void unpackAndCombine(const LocalGraph &local_graph, const LocalMap &local_map, const Kokkos::View< const Packet *, BufferDevice, Kokkos::MemoryUnmanaged > &imports, const Kokkos::View< const size_t *, BufferDevice, Kokkos::MemoryUnmanaged > &num_packets_per_lid, const Kokkos::View< const typename LocalMap::local_ordinal_type *, typename LocalMap::device_type, Kokkos::MemoryUnmanaged > &import_lids, const Tpetra::CombineMode combine_mode, const bool unpack_pids, const bool atomic)
Perform the unpack operation for the graph.
Definition: Tpetra_Details_unpackCrsGraphAndCombine_def.hpp:315
Tpetra_Details_reallocDualViewIfNeeded.hpp
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
Tpetra::removeEmptyProcessesInPlace
void removeEmptyProcessesInPlace(Teuchos::RCP< DistObjectType > &input, const Teuchos::RCP< const Map< typename DistObjectType::local_ordinal_type, typename DistObjectType::global_ordinal_type, typename DistObjectType::node_type > > &newMap)
Remove processes which contain no elements in this object's Map.
Definition: Tpetra_DistObject_def.hpp:1643
Tpetra::Classes::Map
A parallel distribution of indices over processes.
Definition: Tpetra_Map_decl.hpp:247
Tpetra::DistObject
::Tpetra::Classes::DistObject< Packet, LocalOrdinal, GlobalOrdinal, Node > DistObject
Alias for Tpetra::Classes::DistObject.
Definition: Tpetra_DistObject_fwd.hpp:75
Tpetra
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Tpetra::deep_copy
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Definition: Tpetra_MultiVector_decl.hpp:2453
Tpetra::Classes::Export
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Definition: Tpetra_Export_decl.hpp:124
Tpetra::SrcDistObject
Abstract base class for objects that can be the source of an Import or Export operation.
Definition: Tpetra_SrcDistObject.hpp:89
Tpetra::Classes::DistObject::isDistributed
bool isDistributed() const
Whether this is a globally distributed object.
Definition: Tpetra_DistObject_def.hpp:420
Tpetra::INSERT
Insert new values that don't currently exist.
Definition: Tpetra_CombineMode.hpp:96
Tpetra::Classes::Map::removeEmptyProcesses
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > removeEmptyProcesses() const
Return a new Map with processes with zero elements removed.
Definition: Tpetra_Map_def.hpp:1797
Tpetra::Details::reallocDualViewIfNeeded
bool reallocDualViewIfNeeded(Kokkos::DualView< ValueType *, DeviceType > &dv, const size_t newSize, const char newLabel[], const size_t tooBigFactor=2, const bool needFenceBeforeRealloc=true)
Reallocate the DualView in/out argument, if needed.
Definition: Tpetra_Details_reallocDualViewIfNeeded.hpp:83
Tpetra::CombineMode
CombineMode
Rule for combining data in an Import or Export.
Definition: Tpetra_CombineMode.hpp:94
Tpetra::Classes::DistObject::DistObject
DistObject(const Teuchos::RCP< const map_type > &map)
Constructor.
Definition: Tpetra_DistObject_def.hpp:63