Tpetra parallel linear algebra  Version of the Day
Tpetra_Details_CooMatrix.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_DETAILS_COOMATRIX_HPP
43 #define TPETRA_DETAILS_COOMATRIX_HPP
44 
49 
50 #include "TpetraCore_config.h"
51 #include "Tpetra_Details_PackTriples.hpp"
52 #include "Tpetra_DistObject.hpp"
53 #include "Tpetra_Details_gathervPrint.hpp"
55 #include "Teuchos_TypeNameTraits.hpp"
56 
57 #include <initializer_list>
58 #include <map>
59 #include <memory>
60 #include <string>
61 #include <type_traits>
62 #include <vector>
63 
64 
65 namespace Tpetra {
66 namespace Details {
67 
68 // Implementation details of Tpetra::Details.
69 // So, users REALLY should not use anything in here.
70 namespace Impl {
71 
74 template<class IndexType>
75 struct CooGraphEntry {
76  IndexType row;
77  IndexType col;
78 };
79 
84 template<class IndexType>
86  bool
87  operator () (const CooGraphEntry<IndexType>& x,
88  const CooGraphEntry<IndexType>& y) const
89  {
90  return (x.row < y.row) || (x.row == y.row && x.col < y.col);
91  }
92 };
93 
96 template<class SC, class GO>
98 private:
106  typedef std::map<CooGraphEntry<GO>, SC,
107  CompareCooGraphEntries<GO> > entries_type;
108 
111  entries_type entries_;
112 
113 public:
115  typedef char packet_type;
116 
118  CooMatrixImpl () = default;
119 
126  void
127  sumIntoGlobalValue (const GO gblRowInd,
128  const GO gblColInd,
129  const SC& val)
130  {
131  // There's no sense in worrying about the insertion hint, since
132  // indices may be all over the place. Users make no promises of
133  // sorting or locality of input.
134  CooGraphEntry<GO> ij {gblRowInd, gblColInd};
135  auto result = this->entries_.insert ({ij, val});
136  if (! result.second) { // already in the map
137  result.first->second += val; // sum in the new value
138  }
139  }
140 
149  void
150  sumIntoGlobalValues (const GO gblRowInds[],
151  const GO gblColInds[],
152  const SC vals[],
153  const std::size_t numEnt)
154  {
155  for (std::size_t k = 0; k < numEnt; ++k) {
156  // There's no sense in worrying about the insertion hint, since
157  // indices may be all over the place. Users make no promises of
158  // sorting or locality of input.
159  CooGraphEntry<GO> ij {gblRowInds[k], gblColInds[k]};
160  const SC val = vals[k];
161  auto result = this->entries_.insert ({ij, val});
162  if (! result.second) { // already in the map
163  result.first->second += val; // sum in the new value
164  }
165  }
166  }
167 
169  std::size_t
171  {
172  return this->entries_.size ();
173  }
174 
178  void
179  forAllEntries (std::function<void (const GO, const GO, const SC&)> f) const
180  {
181  for (auto iter = this->entries_.begin ();
182  iter != this->entries_.end (); ++iter) {
183  f (iter->first.row, iter->first.col, iter->second);
184  }
185  }
186 
192  void
193  mergeIntoRow (const GO tgtGblRow,
194  const CooMatrixImpl<SC, GO>& src,
195  const GO srcGblRow)
196  {
197  // const char prefix[] =
198  // "Tpetra::Details::Impl::CooMatrixImpl::mergeIntoRow: ";
199 
200  entries_type& tgtEntries = this->entries_;
201  const entries_type& srcEntries = src.entries_;
202 
203  // Find all entries with the given global row index. The min GO
204  // value is guaranteed to be the least possible column index, so
205  // beg1 is a lower bound for all (row index, column index) pairs.
206  // Lower bound is inclusive; upper bound is exclusive.
207  auto srcBeg = srcEntries.lower_bound ({srcGblRow, std::numeric_limits<GO>::min ()});
208  auto srcEnd = srcEntries.upper_bound ({srcGblRow+1, std::numeric_limits<GO>::min ()});
209  auto tgtBeg = tgtEntries.lower_bound ({tgtGblRow, std::numeric_limits<GO>::min ()});
210  auto tgtEnd = tgtEntries.upper_bound ({tgtGblRow+1, std::numeric_limits<GO>::min ()});
211 
212  // Don't waste time iterating over the current row of *this, if
213  // the current row of src is empty.
214  if (srcBeg != srcEnd) {
215  for (auto tgtCur = tgtBeg; tgtCur != tgtEnd; ++tgtCur) {
216  auto srcCur = srcBeg;
217  while (srcCur != srcEnd && srcCur->first.col < tgtCur->first.col) {
218  ++srcCur;
219  }
220  // At this point, one of the following is true:
221  //
222  // 1. srcCur == srcEnd, thus nothing more to insert.
223  // 2. srcCur->first.col > tgtCur->first.col, thus the current
224  // row of src has no entry matching tgtCur->first, so we
225  // have to insert it. Insertion does not invalidate
226  // tgtEntries iterators, and neither srcEntries nor
227  // tgtEntries have duplicates, so this is safe.
228  // 3. srcCur->first.col == tgtCur->first.col, so add the two entries.
229  if (srcCur != srcEnd) {
230  if (srcCur->first.col == tgtCur->first.col) {
231  tgtCur->second += srcCur->second;
232  }
233  else {
234  // tgtCur is (optimally) right before where we want to put
235  // the new entry, since srcCur points to the first entry
236  // in srcEntries whose column index is greater than that
237  // of the entry to which tgtCur points.
238  (void) tgtEntries.insert (tgtCur, *srcCur);
239  }
240  } // if srcCur != srcEnd
241  } // for each entry in the current row (lclRow) of *this
242  } // if srcBeg != srcEnd
243  }
244 
254  int
255  countPackRow (int& numPackets,
256  const GO gblRow,
257  const ::Teuchos::Comm<int>& comm,
258  std::ostream* errStrm = NULL) const
259  {
260  using ::Tpetra::Details::countPackTriples;
261  using ::Tpetra::Details::countPackTriplesCount;
262  using std::endl;
263  const char prefix[] = "Tpetra::Details::Impl::CooMatrixImpl::countPackRow: ";
264 #ifdef HAVE_TPETRACORE_MPI
265  int errCode = MPI_SUCCESS;
266 #else
267  int errCode = 0;
268 #endif // HAVE_TPETRACORE_MPI
269 
270  // Count the number of entries in the given row.
271  const GO minGO = std::numeric_limits<GO>::min ();
272  auto beg = this->entries_.lower_bound ({gblRow, minGO});
273  auto end = this->entries_.upper_bound ({gblRow+1, minGO});
274  int numEnt = 0;
275  for (auto iter = beg; iter != end; ++iter) {
276  ++numEnt;
277  if (numEnt == std::numeric_limits<int>::max ()) {
278  if (errStrm != NULL) {
279  *errStrm << prefix << "In (global) row " << gblRow << ", the number "
280  "of entries thus far, numEnt = " << numEnt << ", has reached the "
281  "maximum int value. We need to store this count as int for MPI's "
282  "sake." << endl;
283  }
284  return -1;
285  }
286  }
287 
288  int numPacketsForCount = 0; // output argument of countPackTriplesCount
289  {
290  errCode =
291  countPackTriplesCount (comm, numPacketsForCount, errStrm);
292  if (errCode != 0) {
293  if (errStrm != NULL) {
294  *errStrm << prefix << "countPackTriplesCount "
295  "returned errCode = " << errCode << " != 0." << endl;
296  }
297  return errCode;
298  }
299  if (numPacketsForCount < 0) {
300  if (errStrm != NULL) {
301  *errStrm << prefix << "countPackTriplesCount returned "
302  "numPacketsForCount = " << numPacketsForCount << " < 0." << endl;
303  }
304  return -1;
305  }
306  }
307 
308  int numPacketsForTriples = 0; // output argument of countPackTriples
309  {
310  errCode = countPackTriples<SC, GO> (numEnt, comm, numPacketsForTriples);
311  TEUCHOS_TEST_FOR_EXCEPTION
312  (errCode != 0, std::runtime_error, prefix << "countPackTriples "
313  "returned errCode = " << errCode << " != 0.");
314  TEUCHOS_TEST_FOR_EXCEPTION
315  (numPacketsForTriples < 0, std::logic_error, prefix << "countPackTriples "
316  "returned numPacketsForTriples = " << numPacketsForTriples << " < 0.");
317  }
318 
319  numPackets = numPacketsForCount + numPacketsForTriples;
320  return errCode;
321  }
322 
337  void
338  packRow (packet_type outBuf[],
339  const int outBufSize,
340  int& outBufCurPos, // in/out argument
341  const ::Teuchos::Comm<int>& comm,
342  std::vector<GO>& gblRowInds,
343  std::vector<GO>& gblColInds,
344  std::vector<SC>& vals,
345  const GO gblRow) const
346  {
347  using ::Tpetra::Details::packTriples;
348  using ::Tpetra::Details::packTriplesCount;
349  const char prefix[] = "Tpetra::Details::Impl::CooMatrixImpl::packRow: ";
350 
351  const GO minGO = std::numeric_limits<GO>::min ();
352  auto beg = this->entries_.lower_bound ({gblRow, minGO});
353  auto end = this->entries_.upper_bound ({gblRow+1, minGO});
354 
355  // This doesn't actually deallocate. Only swapping with an empty
356  // std::vector does that.
357  gblRowInds.resize (0);
358  gblColInds.resize (0);
359  vals.resize (0);
360 
361  int numEnt = 0;
362  for (auto iter = beg; iter != end; ++iter) {
363  gblRowInds.push_back (iter->first.row);
364  gblColInds.push_back (iter->first.col);
365  vals.push_back (iter->second);
366  ++numEnt;
367  TEUCHOS_TEST_FOR_EXCEPTION
368  (numEnt == std::numeric_limits<int>::max (), std::runtime_error, prefix
369  << "In (global) row " << gblRow << ", the number of entries thus far, "
370  "numEnt = " << numEnt << ", has reached the maximum int value. "
371  "We need to store this count as int for MPI's sake.");
372  }
373 
374  {
375  const int errCode =
376  packTriplesCount (numEnt, outBuf, outBufSize, outBufCurPos, comm);
377  TEUCHOS_TEST_FOR_EXCEPTION
378  (errCode != 0, std::runtime_error, prefix
379  << "In (global) row " << gblRow << ", packTriplesCount returned "
380  "errCode = " << errCode << " != 0.");
381  }
382  {
383  const int errCode =
384  packTriples (gblRowInds.data (),
385  gblColInds.data (),
386  vals.data (),
387  numEnt,
388  outBuf,
389  outBufSize,
390  outBufCurPos, // in/out argument
391  comm);
392  TEUCHOS_TEST_FOR_EXCEPTION
393  (errCode != 0, std::runtime_error, prefix << "In (global) row "
394  << gblRow << ", packTriples returned errCode = " << errCode
395  << " != 0.");
396  }
397  }
398 
406  GO
407  getMyGlobalRowIndices (std::vector<GO>& rowInds) const
408  {
409  rowInds.clear ();
410 
411  GO lclMinRowInd = std::numeric_limits<GO>::max (); // compute local min
412  for (typename entries_type::const_iterator iter = this->entries_.begin ();
413  iter != this->entries_.end (); ++iter) {
414  const GO rowInd = iter->first.row;
415  if (rowInd < lclMinRowInd) {
416  lclMinRowInd = rowInd;
417  }
418  if (rowInds.empty () || rowInds.back () != rowInd) {
419  rowInds.push_back (rowInd); // don't insert duplicates
420  }
421  }
422  return lclMinRowInd;
423  }
424 
425  template<class OffsetType>
426  void
427  buildCrs (std::vector<OffsetType>& rowOffsets,
428  GO gblColInds[],
429  SC vals[]) const
430  {
431  static_assert (std::is_integral<OffsetType>::value,
432  "OffsetType must be a built-in integer type.");
433 
434  // clear() doesn't generally free storage; it just resets the
435  // length. Thus, we reuse any existing storage here.
436  rowOffsets.clear ();
437 
438  const std::size_t numEnt = this->getLclNumEntries ();
439  if (numEnt == 0) {
440  rowOffsets.push_back (0);
441  }
442  else {
443  typename entries_type::const_iterator iter = this->entries_.begin ();
444  GO prevGblRowInd = iter->first.row;
445 
446  OffsetType k = 0;
447  for ( ; iter != this->entries_.end (); ++iter, ++k) {
448  const GO gblRowInd = iter->first.row;
449  if (k == 0 || gblRowInd != prevGblRowInd) {
450  // The row offsets array always has at least one entry. The
451  // first entry is always zero, and the last entry is always
452  // the number of matrix entries.
453  rowOffsets.push_back (k);
454  prevGblRowInd = gblRowInd;
455  }
456  gblColInds[k] = iter->first.col;
457 
458  static_assert (std::is_same<typename std::decay<decltype (iter->second)>::type, SC>::value,
459  "The type of iter->second != SC.");
460  vals[k] = iter->second;
461  }
462  rowOffsets.push_back (static_cast<OffsetType> (numEnt));
463  }
464  }
465 
478  template<class OffsetType, class LO>
479  void
480  buildLocallyIndexedCrs (std::vector<OffsetType>& rowOffsets,
481  LO lclColInds[],
482  SC vals[],
483  std::function<LO (const GO)> gblToLcl) const
484  {
485  static_assert (std::is_integral<OffsetType>::value,
486  "OffsetType must be a built-in integer type.");
487  static_assert (std::is_integral<LO>::value,
488  "LO must be a built-in integer type.");
489 
490  // clear() doesn't generally free storage; it just resets the
491  // length. Thus, we reuse any existing storage here.
492  rowOffsets.clear ();
493 
494  const std::size_t numEnt = this->getLclNumEntries ();
495  if (numEnt == 0) {
496  rowOffsets.push_back (0);
497  }
498  else {
499  typename entries_type::const_iterator iter = this->entries_.begin ();
500  GO prevGblRowInd = iter->first.row;
501 
502  OffsetType k = 0;
503  for ( ; iter != this->entries_.end (); ++iter, ++k) {
504  const GO gblRowInd = iter->first.row;
505  if (k == 0 || gblRowInd != prevGblRowInd) {
506  // The row offsets array always has at least one entry. The
507  // first entry is always zero, and the last entry is always
508  // the number of matrix entries.
509  rowOffsets.push_back (k);
510  prevGblRowInd = gblRowInd;
511  }
512  lclColInds[k] = gblToLcl (iter->first.col);
513  vals[k] = iter->second;
514  }
515  rowOffsets.push_back (static_cast<OffsetType> (numEnt));
516  }
517  }
518 };
519 
520 } // namespace Impl
521 
570 template<class SC,
574 class CooMatrix : public ::Tpetra::DistObject<char, LO, GO, NT> {
575 public:
577  typedef char packet_type;
579  typedef SC scalar_type;
580  typedef LO local_ordinal_type;
581  typedef GO global_ordinal_type;
582  typedef NT node_type;
583  typedef typename NT::device_type device_type;
585  typedef ::Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type> map_type;
586 
587 private:
591 
594 
595 public:
602  base_type (::Teuchos::null),
603  localError_ (new bool (false)),
604  errs_ (new std::shared_ptr<std::ostringstream> ()) // ptr to a null ptr
605  {}
606 
614  CooMatrix (const ::Teuchos::RCP<const map_type>& map) :
615  base_type (map),
616  localError_ (new bool (false)),
617  errs_ (new std::shared_ptr<std::ostringstream> ()) // ptr to a null ptr
618  {}
619 
621  virtual ~CooMatrix () {}
622 
629  void
630  sumIntoGlobalValue (const GO gblRowInd,
631  const GO gblColInd,
632  const SC& val)
633  {
634  this->impl_.sumIntoGlobalValue (gblRowInd, gblColInd, val);
635  }
636 
645  void
646  sumIntoGlobalValues (const GO gblRowInds[],
647  const GO gblColInds[],
648  const SC vals[],
649  const std::size_t numEnt)
650  {
651  this->impl_.sumIntoGlobalValues (gblRowInds, gblColInds, vals, numEnt);
652  }
653 
655  void
656  sumIntoGlobalValues (std::initializer_list<GO> gblRowInds,
657  std::initializer_list<GO> gblColInds,
658  std::initializer_list<SC> vals,
659  const std::size_t numEnt)
660  {
661  this->impl_.sumIntoGlobalValues (gblRowInds.begin (), gblColInds.begin (),
662  vals.begin (), numEnt);
663  }
664 
666  std::size_t
668  {
669  return this->impl_.getLclNumEntries ();
670  }
671 
672  template<class OffsetType>
673  void
674  buildCrs (::Kokkos::View<OffsetType*, device_type>& rowOffsets,
675  ::Kokkos::View<GO*, device_type>& gblColInds,
676  ::Kokkos::View<typename ::Kokkos::Details::ArithTraits<SC>::val_type*, device_type>& vals)
677  {
678  static_assert (std::is_integral<OffsetType>::value,
679  "OffsetType must be a built-in integer type.");
680  using ::Kokkos::create_mirror_view;
682  using ::Kokkos::View;
683  typedef typename ::Kokkos::Details::ArithTraits<SC>::val_type ISC;
684 
685  const std::size_t numEnt = this->getLclNumEntries ();
686 
687  gblColInds = View<GO*, device_type> ("gblColInds", numEnt);
688  vals = View<ISC*, device_type> ("vals", numEnt);
689  auto gblColInds_h = create_mirror_view (gblColInds);
690  auto vals_h = create_mirror_view (vals);
691 
692  std::vector<std::size_t> rowOffsetsSV;
693  this->impl_.buildCrs (rowOffsetsSV,
694  gblColInds_h.data (),
695  vals_h.data ());
696  rowOffsets =
697  View<OffsetType*, device_type> ("rowOffsets", rowOffsetsSV.size ());
698  typename View<OffsetType*, device_type>::HostMirror
699  rowOffsets_h (rowOffsetsSV.data (), rowOffsetsSV.size ());
700  deep_copy (rowOffsets, rowOffsets_h);
701 
702  deep_copy (gblColInds, gblColInds_h);
703  deep_copy (vals, vals_h);
704  }
705 
716  void
717  fillComplete (const ::Teuchos::RCP<const ::Teuchos::Comm<int> >& comm)
718  {
719  if (comm.is_null ()) {
720  this->map_ = ::Teuchos::null;
721  }
722  else {
723  this->map_ = this->buildMap (comm);
724  }
725  }
726 
735  void
737  {
738  TEUCHOS_TEST_FOR_EXCEPTION
739  (this->getMap ().is_null (), std::runtime_error, "Tpetra::Details::"
740  "CooMatrix::fillComplete: This object does not yet have a Map. "
741  "You must call the version of fillComplete "
742  "that takes an input communicator.");
743  this->fillComplete (this->getMap ()->getComm ());
744  }
745 
760  bool localError () const {
761  return *localError_;
762  }
763 
781  std::string errorMessages () const {
782  return ((*errs_).get () == NULL) ? std::string ("") : (*errs_)->str ();
783  }
784 
785 private:
798  std::shared_ptr<bool> localError_;
799 
807  std::shared_ptr<std::shared_ptr<std::ostringstream> > errs_;
808 
810  std::ostream&
811  markLocalErrorAndGetStream ()
812  {
813  * (this->localError_) = true;
814  if ((*errs_).get () == NULL) {
815  *errs_ = std::shared_ptr<std::ostringstream> (new std::ostringstream ());
816  }
817  return **errs_;
818  }
819 
820 public:
823  virtual std::string description () const {
824  using Teuchos::TypeNameTraits;
825 
826  std::ostringstream os;
827  os << "\"Tpetra::Details::CooMatrix\": {"
828  << "SC: " << TypeNameTraits<SC>::name ()
829  << ", LO: " << TypeNameTraits<LO>::name ()
830  << ", GO: " << TypeNameTraits<GO>::name ()
831  << ", NT: " << TypeNameTraits<NT>::name ();
832  if (this->getObjectLabel () != "") {
833  os << ", Label: \"" << this->getObjectLabel () << "\"";
834  }
835  os << ", Has Map: " << (this->map_.is_null () ? "false" : "true")
836  << "}";
837  return os.str ();
838  }
839 
842  virtual void
843  describe (Teuchos::FancyOStream& out,
844  const Teuchos::EVerbosityLevel verbLevel =
845  Teuchos::Describable::verbLevel_default) const
846  {
847  using ::Tpetra::Details::gathervPrint;
848  using ::Teuchos::EVerbosityLevel;
849  using ::Teuchos::OSTab;
850  using ::Teuchos::TypeNameTraits;
851  using ::Teuchos::VERB_DEFAULT;
852  using ::Teuchos::VERB_LOW;
853  using ::Teuchos::VERB_MEDIUM;
854  using std::endl;
855 
856  const auto vl = (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
857 
858  auto comm = this->getMap ().is_null () ?
859  Teuchos::null : this->getMap ()->getComm ();
860  const int myRank = comm.is_null () ? 0 : comm->getRank ();
861  // const int numProcs = comm.is_null () ? 1 : comm->getSize ();
862 
863  if (vl != Teuchos::VERB_NONE) {
864  // Convention is to start describe() implementations with a tab.
865  OSTab tab0 (out);
866  if (myRank == 0) {
867  out << "\"Tpetra::Details::CooMatrix\":" << endl;
868  }
869  OSTab tab1 (out);
870  if (myRank == 0) {
871  out << "Template parameters:" << endl;
872  {
873  OSTab tab2 (out);
874  out << "SC: " << TypeNameTraits<SC>::name () << endl
875  << "LO: " << TypeNameTraits<LO>::name () << endl
876  << "GO: " << TypeNameTraits<GO>::name () << endl
877  << "NT: " << TypeNameTraits<NT>::name () << endl;
878  }
879  if (this->getObjectLabel () != "") {
880  out << "Label: \"" << this->getObjectLabel () << "\"" << endl;
881  }
882  out << "Has Map: " << (this->map_.is_null () ? "false" : "true") << endl;
883  } // if myRank == 0
884 
885  // Describe the Map, if it exists.
886  if (! this->map_.is_null ()) {
887  if (myRank == 0) {
888  out << "Map:" << endl;
889  }
890  OSTab tab2 (out);
891  this->map_->describe (out, vl);
892  }
893 
894  // At verbosity > VERB_LOW, each process prints something.
895  if (vl > VERB_LOW) {
896  std::ostringstream os;
897  os << "Process " << myRank << ":" << endl;
898  //OSTab tab3 (os);
899 
900  const std::size_t numEnt = this->impl_.getLclNumEntries ();
901  os << " Local number of entries: " << numEnt << endl;
902  if (vl > VERB_MEDIUM) {
903  os << " Local entries:" << endl;
904  //OSTab tab4 (os);
905  this->impl_.forAllEntries ([&os] (const GO row, const GO col, const SC& val) {
906  os << " {"
907  << "row: " << row
908  << ", col: " << col
909  << ", val: " << val
910  << "}" << endl;
911  });
912  }
913  gathervPrint (out, os.str (), *comm);
914  }
915  } // vl != Teuchos::VERB_NONE
916  }
917 
918 private:
927  Teuchos::RCP<const map_type>
928  buildMap (const ::Teuchos::RCP<const ::Teuchos::Comm<int> >& comm)
929  {
930  using ::Teuchos::outArg;
931  using ::Teuchos::rcp;
932  using ::Teuchos::REDUCE_MIN;
933  using ::Teuchos::reduceAll;
934  typedef ::Tpetra::global_size_t GST;
935  //const char prefix[] = "Tpetra::Details::CooMatrix::buildMap: ";
936 
937  // Processes where comm is null don't participate in the Map.
938  if (comm.is_null ()) {
939  return ::Teuchos::null;
940  }
941 
942  // mfh 17 Jan 2017: We just happen to use row indices, because
943  // that's what Tpetra::CrsMatrix currently uses. That's probably
944  // not the best thing to use, but it's not bad for commonly
945  // encountered matrices. A better more general approach might be
946  // to hash (row index, column index) pairs to a global index. One
947  // could make that unique by doing a parallel scan at map
948  // construction time.
949 
950  std::vector<GO> rowInds;
951  const GO lclMinGblRowInd = this->impl_.getMyGlobalRowIndices (rowInds);
952 
953  // Compute the globally min row index for the "index base."
954  GO gblMinGblRowInd = 0; // output argument
955  reduceAll<int, GO> (*comm, REDUCE_MIN, lclMinGblRowInd,
956  outArg (gblMinGblRowInd));
957  const GO indexBase = gblMinGblRowInd;
958  const GST INV = Tpetra::Details::OrdinalTraits<GST>::invalid ();
959  return rcp (new map_type (INV, rowInds.data (), rowInds.size (),
960  indexBase, comm));
961  }
962 
963 protected:
967  virtual size_t constantNumberOfPackets () const {
968  return static_cast<size_t> (0);
969  }
970 
974  virtual bool
975  checkSizes (const ::Tpetra::SrcDistObject& source)
976  {
977  using std::endl;
979  const char prefix[] = "Tpetra::Details::CooMatrix::checkSizes: ";
980 
981  const this_type* src = dynamic_cast<const this_type* > (&source);
982 
983  if (src == NULL) {
984  std::ostream& err = markLocalErrorAndGetStream ();
985  err << prefix << "The source object of the Import or Export "
986  "must be a CooMatrix with the same template parameters as the "
987  "target object." << endl;
988  }
989  else if (this->map_.is_null ()) {
990  std::ostream& err = markLocalErrorAndGetStream ();
991  err << prefix << "The target object of the Import or Export "
992  "must be a CooMatrix with a nonnull Map." << endl;
993  }
994  return ! (* (this->localError_));
995  }
996 
1000  virtual bool useNewInterface () { return true; }
1001 
1004  virtual void
1005  copyAndPermuteNew (const ::Tpetra::SrcDistObject& sourceObject,
1006  const size_t numSameIDs,
1007  const ::Kokkos::DualView<const LO*, device_type>& permuteToLIDs,
1008  const ::Kokkos::DualView<const LO*, device_type>& permuteFromLIDs)
1009  {
1010  using std::endl;
1012  const char prefix[] = "Tpetra::Details::CooMatrix::copyAndPermuteNew: ";
1013 
1014  // There's no communication in this method, so it's safe just to
1015  // return on error.
1016 
1017  if (* (this->localError_)) {
1018  std::ostream& err = this->markLocalErrorAndGetStream ();
1019  err << prefix << "The target object of the Import or Export is "
1020  "already in an error state." << endl;
1021  return;
1022  }
1023 
1024  const this_type* src = dynamic_cast<const this_type*> (&sourceObject);
1025  if (src == NULL) {
1026  std::ostream& err = this->markLocalErrorAndGetStream ();
1027  err << prefix << "Input argument 'sourceObject' is not a CooMatrix."
1028  << endl;
1029  return;
1030  }
1031 
1032  const size_t numPermuteIDs =
1033  static_cast<size_t> (permuteToLIDs.extent (0));
1034  if (numPermuteIDs != static_cast<size_t> (permuteFromLIDs.extent (0))) {
1035  std::ostream& err = this->markLocalErrorAndGetStream ();
1036  err << prefix << "permuteToLIDs.extent(0) = "
1037  << numPermuteIDs << " != permuteFromLIDs.extent(0) = "
1038  << permuteFromLIDs.extent (0) << "." << endl;
1039  return;
1040  }
1041  if (sizeof (int) <= sizeof (size_t) &&
1042  numPermuteIDs > static_cast<size_t> (std::numeric_limits<int>::max ())) {
1043  std::ostream& err = this->markLocalErrorAndGetStream ();
1044  err << prefix << "numPermuteIDs = " << numPermuteIDs
1045  << ", a size_t, overflows int." << endl;
1046  return;
1047  }
1048 
1049  // TODO (mfh 19 Jan 2017) Check that permuteToLIDs and
1050  // permuteFromLIDs are sync'd to host.
1051 
1052  // Even though this is an std::set, we can start with numSameIDs
1053  // just by iterating through the first entries of the set.
1054 
1055  if (sizeof (int) <= sizeof (size_t) &&
1056  numSameIDs > static_cast<size_t> (std::numeric_limits<int>::max ())) {
1057  std::ostream& err = this->markLocalErrorAndGetStream ();
1058  err << prefix << "numSameIDs = " << numSameIDs
1059  << ", a size_t, overflows int." << endl;
1060  return;
1061  }
1062 
1063  //
1064  // Copy in entries from any initial rows with the same global row indices.
1065  //
1066  const LO numSame = static_cast<int> (numSameIDs);
1067  // Count of local row indices encountered here with invalid global
1068  // row indices. If nonzero, something went wrong. If something
1069  // did go wrong, we'll defer responding until the end of this
1070  // method, so we can print as much useful info as possible.
1071  LO numInvalidSameRows = 0;
1072  for (LO lclRow = 0; lclRow < numSame; ++lclRow) {
1073  // All numSame initial rows have the same global index in both
1074  // source and target Maps, so we only need to convert to global
1075  // once.
1076  const GO gblRow = this->map_->getGlobalElement (lclRow);
1077  if (gblRow == ::Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
1078  ++numInvalidSameRows;
1079  continue;
1080  }
1081  else {
1082  this->impl_.mergeIntoRow (gblRow, src->impl_, gblRow);
1083  }
1084  }
1085 
1086  //
1087  // Copy in entries from remaining rows that are permutations, that
1088  // is, that live in both the source and target Maps, but aren't
1089  // included in the "same" list (see above).
1090  //
1091  const LO numPermute = static_cast<int> (numPermuteIDs);
1092  // Count of local "from" row indices encountered here with invalid
1093  // global row indices. If nonzero, something went wrong. If
1094  // something did go wrong, we'll defer responding until the end of
1095  // this method, so we can print as much useful info as possible.
1096  LO numInvalidRowsFrom = 0;
1097  // Count of local "to" row indices encountered here with invalid
1098  // global row indices. If nonzero, something went wrong. If
1099  // something did go wrong, we'll defer responding until the end of
1100  // this method, so we can print as much useful info as possible.
1101  LO numInvalidRowsTo = 0;
1102  for (LO k = 0; k < numPermute; ++k) {
1103  const LO lclRowFrom = permuteFromLIDs.h_view(k);
1104  const LO lclRowTo = permuteToLIDs.h_view(k);
1105  const GO gblRowFrom = src->map_->getGlobalElement (lclRowFrom);
1106  const GO gblRowTo = this->map_->getGlobalElement (lclRowTo);
1107 
1108  bool bothConversionsValid = true;
1109  if (gblRowFrom == ::Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
1110  ++numInvalidRowsFrom;
1111  bothConversionsValid = false;
1112  }
1113  if (gblRowTo == ::Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
1114  ++numInvalidRowsTo;
1115  bothConversionsValid = false;
1116  }
1117  if (bothConversionsValid) {
1118  this->impl_.mergeIntoRow (gblRowTo, src->impl_, gblRowFrom);
1119  }
1120  }
1121 
1122  // Print info if any errors occurred.
1123  if (numInvalidSameRows != 0 || numInvalidRowsFrom != 0 || numInvalidRowsTo != 0) {
1124  // Collect and print all the invalid input row indices, for the
1125  // "same," "from," and "to" lists.
1126  std::vector<std::pair<LO, GO> > invalidSameRows;
1127  invalidSameRows.reserve (numInvalidSameRows);
1128  std::vector<std::pair<LO, GO> > invalidRowsFrom;
1129  invalidRowsFrom.reserve (numInvalidRowsFrom);
1130  std::vector<std::pair<LO, GO> > invalidRowsTo;
1131  invalidRowsTo.reserve (numInvalidRowsTo);
1132 
1133  for (LO lclRow = 0; lclRow < numSame; ++lclRow) {
1134  // All numSame initial rows have the same global index in both
1135  // source and target Maps, so we only need to convert to global
1136  // once.
1137  const GO gblRow = this->map_->getGlobalElement (lclRow);
1138  if (gblRow == ::Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
1139  invalidSameRows.push_back ({lclRow, gblRow});
1140  }
1141  }
1142 
1143  for (LO k = 0; k < numPermute; ++k) {
1144  const LO lclRowFrom = permuteFromLIDs.h_view(k);
1145  const LO lclRowTo = permuteToLIDs.h_view(k);
1146  const GO gblRowFrom = src->map_->getGlobalElement (lclRowFrom);
1147  const GO gblRowTo = this->map_->getGlobalElement (lclRowTo);
1148 
1149  if (gblRowFrom == ::Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
1150  invalidRowsFrom.push_back ({lclRowFrom, gblRowFrom});
1151  }
1152  if (gblRowTo == ::Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
1153  invalidRowsTo.push_back ({lclRowTo, gblRowTo});
1154  }
1155  }
1156 
1157  std::ostringstream os;
1158  if (numInvalidSameRows != 0) {
1159  os << "Invalid permute \"same\" (local, global) index pairs: [";
1160  for (std::size_t k = 0; k < invalidSameRows.size (); ++k) {
1161  const auto& p = invalidSameRows[k];
1162  os << "(" << p.first << "," << p.second << ")";
1163  if (k + 1 < invalidSameRows.size ()) {
1164  os << ", ";
1165  }
1166  }
1167  os << "]" << std::endl;
1168  }
1169  if (numInvalidRowsFrom != 0) {
1170  os << "Invalid permute \"from\" (local, global) index pairs: [";
1171  for (std::size_t k = 0; k < invalidRowsFrom.size (); ++k) {
1172  const auto& p = invalidRowsFrom[k];
1173  os << "(" << p.first << "," << p.second << ")";
1174  if (k + 1 < invalidRowsFrom.size ()) {
1175  os << ", ";
1176  }
1177  }
1178  os << "]" << std::endl;
1179  }
1180  if (numInvalidRowsTo != 0) {
1181  os << "Invalid permute \"to\" (local, global) index pairs: [";
1182  for (std::size_t k = 0; k < invalidRowsTo.size (); ++k) {
1183  const auto& p = invalidRowsTo[k];
1184  os << "(" << p.first << "," << p.second << ")";
1185  if (k + 1 < invalidRowsTo.size ()) {
1186  os << ", ";
1187  }
1188  }
1189  os << "]" << std::endl;
1190  }
1191 
1192  std::ostream& err = this->markLocalErrorAndGetStream ();
1193  err << prefix << os.str ();
1194  return;
1195  }
1196  }
1197 
1199  typedef typename ::Tpetra::DistObject<char, LO, GO, NT>::buffer_device_type buffer_device_type;
1200 
1203  virtual void
1204  packAndPrepareNew (const ::Tpetra::SrcDistObject& sourceObject,
1205  const ::Kokkos::DualView<const local_ordinal_type*, device_type>& exportLIDs,
1206  ::Kokkos::DualView<packet_type*, buffer_device_type>& exports,
1207  const ::Kokkos::DualView<size_t*, buffer_device_type>& numPacketsPerLID,
1208  size_t& constantNumPackets,
1209  ::Tpetra::Distributor& /* distor */)
1210  {
1211  using ::Teuchos::Comm;
1212  using ::Teuchos::RCP;
1213  using std::endl;
1215  const char prefix[] = "Tpetra::Details::CooMatrix::packAndPrepareNew: ";
1216  const char suffix[] = " This should never happen. "
1217  "Please report this bug to the Tpetra developers.";
1218 
1219  // Tell the caller that different rows may have different numbers
1220  // of matrix entries.
1221  constantNumPackets = 0;
1222 
1223  const this_type* src = dynamic_cast<const this_type*> (&sourceObject);
1224  if (src == NULL) {
1225  std::ostream& err = this->markLocalErrorAndGetStream ();
1226  err << prefix << "Input argument 'sourceObject' is not a CooMatrix."
1227  << endl;
1228  }
1229  else if (* (src->localError_)) {
1230  std::ostream& err = this->markLocalErrorAndGetStream ();
1231  err << prefix << "The source (input) object of the Import or Export "
1232  "is already in an error state on this process."
1233  << endl;
1234  }
1235  else if (* (this->localError_)) {
1236  std::ostream& err = this->markLocalErrorAndGetStream ();
1237  err << prefix << "The target (output, \"this\") object of the Import "
1238  "or Export is already in an error state on this process." << endl;
1239  }
1240  // Respond to detected error(s) by resizing 'exports' to zero (so
1241  // we won't be tempted to read it later), and filling
1242  // numPacketsPerLID with zeros.
1243  if (* (this->localError_)) {
1244  // Resize 'exports' to zero, so we won't be tempted to read it.
1245  Details::reallocDualViewIfNeeded (exports, 0, "CooMatrix exports");
1246  // Trick to get around const DualView& being const.
1247  {
1248  auto numPacketsPerLID_tmp = numPacketsPerLID;
1249  numPacketsPerLID_tmp.template sync<Kokkos::HostSpace> ();
1250  numPacketsPerLID_tmp.template modify<Kokkos::HostSpace> ();
1251  }
1252  // Fill numPacketsPerLID with zeros.
1253  ::Kokkos::deep_copy (numPacketsPerLID.h_view, static_cast<size_t> (0));
1254  return;
1255  }
1256 
1257  // TODO (mfh 28 Jan 2017) pack source object's data, NOT *this's data!
1258 
1259  const size_t numExports = exportLIDs.extent (0);
1260  if (numExports == 0) {
1261  Details::reallocDualViewIfNeeded (exports, 0, exports.h_view.label ());
1262  return; // nothing to send
1263  }
1264  RCP<const Comm<int> > comm = src->getMap ().is_null () ?
1265  Teuchos::null : src->getMap ()->getComm ();
1266  if (comm.is_null () || comm->getSize () == 1) {
1267  if (numExports != static_cast<size_t> (0)) {
1268  std::ostream& err = this->markLocalErrorAndGetStream ();
1269  err << prefix << "The input communicator is either null or "
1270  "has only one process, but numExports = " << numExports << " != 0."
1271  << suffix << endl;
1272  return;
1273  }
1274  // Don't go into the rest of this method unless there are
1275  // actually processes other than the calling process. This is
1276  // because the pack and unpack functions only have nonstub
1277  // implementations if building with MPI.
1278  return;
1279  }
1280 
1281  // Trick to get around const DualView& being const.
1282  {
1283  auto numPacketsPerLID_tmp = numPacketsPerLID;
1284  numPacketsPerLID_tmp.template sync<Kokkos::HostSpace> ();
1285  numPacketsPerLID_tmp.template modify<Kokkos::HostSpace> ();
1286  }
1287 
1288  int totalNumPackets = 0;
1289  size_t numInvalidRowInds = 0;
1290  std::ostringstream errStrm; // current loop iteration's error messages
1291  for (size_t k = 0; k < numExports; ++k) {
1292  const LO lclRow = exportLIDs.h_view[k];
1293  // We're packing the source object's data, so we need to use the
1294  // source object's Map to convert from local to global indices.
1295  const GO gblRow = src->map_->getGlobalElement (lclRow);
1296  if (gblRow == ::Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
1297  // Mark the error later; just count for now.
1298  ++numInvalidRowInds;
1299  numPacketsPerLID.h_view[k] = 0;
1300  continue;
1301  }
1302 
1303  // Count the number of bytes needed to pack the current row of
1304  // the source object.
1305  int numPackets = 0;
1306  const int errCode =
1307  src->impl_.countPackRow (numPackets, gblRow, *comm, &errStrm);
1308  if (errCode != 0) {
1309  std::ostream& err = this->markLocalErrorAndGetStream ();
1310  err << prefix << errStrm.str () << endl;
1311  numPacketsPerLID.h_view[k] = 0;
1312  continue;
1313  }
1314 
1315  // Make sure that the total number of packets fits in int.
1316  // MPI requires this.
1317  const long long newTotalNumPackets =
1318  static_cast<long long> (totalNumPackets) +
1319  static_cast<long long> (numPackets);
1320  if (newTotalNumPackets >
1321  static_cast<long long> (std::numeric_limits<int>::max ())) {
1322  std::ostream& err = this->markLocalErrorAndGetStream ();
1323  err << prefix << "The new total number of packets "
1324  << newTotalNumPackets << " does not fit in int." << endl;
1325  // At this point, we definitely cannot continue. In order to
1326  // leave the output arguments in a rational state, we zero out
1327  // all remaining entries of numPacketsPerLID before returning.
1328  for (size_t k2 = k; k2 < numExports; ++k2) {
1329  numPacketsPerLID.h_view[k2] = 0;
1330  }
1331  return;
1332  }
1333  numPacketsPerLID.h_view[k] = static_cast<size_t> (numPackets);
1334  totalNumPackets = static_cast<int> (newTotalNumPackets);
1335  }
1336 
1337  // If we found invalid row indices in exportLIDs, go back,
1338  // collect, and report them.
1339  if (numInvalidRowInds != 0) {
1340  std::vector<std::pair<LO, GO> > invalidRowInds;
1341  for (size_t k = 0; k < numExports; ++k) {
1342  const LO lclRow = exportLIDs.h_view[k];
1343  // We're packing the source object's data, so we need to use
1344  // the source object's Map to convert from local to global
1345  // indices.
1346  const GO gblRow = src->map_->getGlobalElement (lclRow);
1347  if (gblRow == ::Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
1348  invalidRowInds.push_back ({lclRow, gblRow});
1349  }
1350  }
1351  std::ostringstream os;
1352  os << prefix << "We found " << numInvalidRowInds << " invalid row ind"
1353  << (numInvalidRowInds != static_cast<size_t> (1) ? "ices" : "ex")
1354  << " out of " << numExports << " in exportLIDs. Here is the list "
1355  << " of invalid row indices: [";
1356  for (size_t k = 0; k < invalidRowInds.size (); ++k) {
1357  os << "(LID: " << invalidRowInds[k].first << ", GID: "
1358  << invalidRowInds[k].second << ")";
1359  if (k + 1 < invalidRowInds.size ()) {
1360  os << ", ";
1361  }
1362  }
1363  os << "].";
1364 
1365  std::ostream& err = this->markLocalErrorAndGetStream ();
1366  err << prefix << os.str () << std::endl;
1367  return;
1368  }
1369 
1370  {
1371  const bool reallocated =
1372  Details::reallocDualViewIfNeeded (exports, totalNumPackets,
1373  "CooMatrix exports");
1374  if (reallocated) {
1375  exports.template sync<Kokkos::HostSpace> (); // make sure alloc'd on host
1376  }
1377  }
1378  exports.template modify<Kokkos::HostSpace> ();
1379 
1380  // FIXME (mfh 17 Jan 2017) packTriples wants three arrays, not a
1381  // single array of structs. For now, we just copy.
1382  std::vector<GO> gblRowInds;
1383  std::vector<GO> gblColInds;
1384  std::vector<SC> vals;
1385 
1386  int outBufCurPos = 0;
1387  packet_type* outBuf = exports.h_view.data ();
1388  for (size_t k = 0; k < numExports; ++k) {
1389  const LO lclRow = exportLIDs.h_view[k];
1390  // We're packing the source object's data, so we need to use the
1391  // source object's Map to convert from local to global indices.
1392  const GO gblRow = src->map_->getGlobalElement (lclRow);
1393  // Pack the current row of the source object.
1394  src->impl_.packRow (outBuf, totalNumPackets, outBufCurPos, *comm,
1395  gblRowInds, gblColInds, vals, gblRow);
1396  }
1397 
1398  // Keep 'exports' modified on host.
1399  //exports.template sync<typename device_type::memory_space> ();
1400  }
1401 
1404  virtual void
1405  unpackAndCombineNew (const ::Kokkos::DualView<const local_ordinal_type*, device_type>& importLIDs,
1406  const ::Kokkos::DualView<const packet_type*, buffer_device_type>& imports,
1407  const ::Kokkos::DualView<const size_t*, buffer_device_type>& numPacketsPerLID,
1408  const size_t /* constantNumPackets */, // we don't actually use this; we assume this is 0
1409  ::Tpetra::Distributor& /* distor */,
1410  const ::Tpetra::CombineMode /* CM */)
1411  {
1412  using ::Teuchos::Comm;
1413  using ::Teuchos::RCP;
1414  using std::endl;
1415 
1416  // (MPI) communication buffers may have different memory spaces
1417  // then device_type Views. See #1088. "CDMS" stands for
1418  // "communication device memory space" and "CHMS" for
1419  // "communication host memory space."
1420  typedef typename Kokkos::DualView<const size_t*, buffer_device_type>::t_dev::memory_space CDMS;
1421  typedef typename Kokkos::DualView<const size_t*, buffer_device_type>::t_host::memory_space CHMS;
1422  typedef typename Kokkos::DualView<const LO*, device_type>::t_dev::memory_space DMS;
1423  typedef typename Kokkos::DualView<const LO*, device_type>::t_host::memory_space HMS;
1424 
1425  const char prefix[] = "Tpetra::Details::CooMatrix::unpackAndCombineNew: ";
1426  const char suffix[] = " This should never happen. "
1427  "Please report this bug to the Tpetra developers.";
1428 
1429  const std::size_t numImports = importLIDs.extent (0);
1430  if (numImports == 0) {
1431  return; // nothing to receive
1432  }
1433  else if (imports.extent (0) == 0) {
1434  std::ostream& err = this->markLocalErrorAndGetStream ();
1435  typename Kokkos::DualView<const LO*, device_type>::t_host importLIDs_h;
1436  {
1437  if (importLIDs.template need_sync<HMS> ()) {
1438  // Device version of the DualView is the most recently updated.
1439  // It's forbidden to sync a DualView<const T>, so we must copy.
1440  auto importLIDs_d = importLIDs.template view<DMS> ();
1441  typedef typename decltype (importLIDs_h)::non_const_type HVNC;
1442  HVNC importLIDs_h_nc (importLIDs_d.label (), importLIDs.extent (0));
1443  Kokkos::deep_copy (importLIDs_h_nc, importLIDs_d);
1444  importLIDs_h = importLIDs_h_nc;
1445  }
1446  else { // host version of the DualView is up-to-date
1447  importLIDs_h = importLIDs.h_view; // importLIDs.template view<HMS> ();
1448  }
1449  }
1450  err << prefix << "importLIDs.extent(0) = " << numImports << " != 0, "
1451  << "but imports.extent(0) = 0. This doesn't make sense, because "
1452  << "for every incoming LID, CooMatrix packs at least the count of "
1453  << "triples associated with that LID, even if the count is zero. "
1454  << "importLIDs = [";
1455  for (std::size_t k = 0; k < numImports; ++k) {
1456  err << importLIDs.h_view[k];
1457  if (k + 1 < numImports) {
1458  err << ", ";
1459  }
1460  }
1461  err << "]. " << suffix << endl;
1462  return;
1463  }
1464 
1465  RCP<const Comm<int> > comm = this->getMap ().is_null () ?
1466  Teuchos::null : this->getMap ()->getComm ();
1467  if (comm.is_null () || comm->getSize () == 1) {
1468  if (numImports != static_cast<size_t> (0)) {
1469  std::ostream& err = this->markLocalErrorAndGetStream ();
1470  err << prefix << "The input communicator is either null or "
1471  "has only one process, but numImports = " << numImports << " != 0."
1472  << suffix << endl;
1473  return;
1474  }
1475  // Don't go into the rest of this method unless there are
1476  // actually processes other than the calling process. This is
1477  // because the pack and unpack functions only have nonstub
1478  // implementations if building with MPI.
1479  return;
1480  }
1481 
1482  // Make sure that the length of 'imports' fits in int.
1483  // This is ultimately an MPI restriction.
1484  if (static_cast<size_t> (imports.extent (0)) >
1485  static_cast<size_t> (std::numeric_limits<int>::max ())) {
1486  std::ostream& err = this->markLocalErrorAndGetStream ();
1487  err << prefix << "imports.extent(0) = "
1488  << imports.extent (0) << " does not fit in int." << endl;
1489  return;
1490  }
1491  const int inBufSize = static_cast<int> (imports.extent (0));
1492 
1493  // It's forbidden to sync a DualView<const T>, so if the input
1494  // DualView are not in sync on host, we must make copies.
1495 
1496  typename Kokkos::DualView<const LO*, device_type>::t_host importLIDs_h;
1497  typename Kokkos::DualView<const packet_type*, buffer_device_type>::t_host imports_h;
1498  typename Kokkos::DualView<const size_t*, buffer_device_type>::t_host numPacketsPerLID_h;
1499  {
1500  if (importLIDs.template need_sync<HMS> ()) {
1501  // Device version of the DualView is the most recently updated.
1502  auto importLIDs_d = importLIDs.template view<DMS> ();
1503  typedef typename decltype (importLIDs_h)::non_const_type HVNC;
1504  HVNC importLIDs_h_nc (importLIDs_d.label (),
1505  importLIDs.extent (0));
1506  Kokkos::deep_copy (importLIDs_h_nc, importLIDs_d);
1507  importLIDs_h = importLIDs_h_nc;
1508  }
1509  else { // host version of the DualView is up-to-date
1510  importLIDs_h = importLIDs.h_view; // importLIDs.template view<HMS> ();
1511  }
1512 
1513  if (imports.template need_sync<CHMS> ()) {
1514  // Device version of the DualView is the most recently updated.
1515  auto imports_d = imports.template view<CDMS> ();
1516  typedef typename decltype (imports_h)::non_const_type HVNC;
1517  HVNC imports_h_nc (imports_d.label (),
1518  imports.extent (0));
1519  Kokkos::deep_copy (imports_h_nc, imports_d);
1520  imports_h = imports_h_nc;
1521  }
1522  else { // host version of the DualView is up-to-date
1523  imports_h = imports.h_view; // imports.template view<HMS> ();
1524  }
1525 
1526  if (numPacketsPerLID.template need_sync<CHMS> ()) {
1527  // Device version of the DualView is the most recently updated.
1528  auto numPacketsPerLID_d = numPacketsPerLID.template view<CDMS> ();
1529  typedef typename decltype (numPacketsPerLID_h)::non_const_type HVNC;
1530  HVNC numPacketsPerLID_h_nc (numPacketsPerLID_d.label (),
1531  numPacketsPerLID.extent (0));
1532  Kokkos::deep_copy (numPacketsPerLID_h_nc, numPacketsPerLID_d);
1533  numPacketsPerLID_h = numPacketsPerLID_h_nc;
1534  }
1535  else { // host version of the DualView is up-to-date
1536  numPacketsPerLID_h = numPacketsPerLID.h_view; // numPacketsPerLID.template view<HMS> ();
1537  }
1538  }
1539 
1540  // FIXME (mfh 17,24 Jan 2017) packTriples wants three arrays, not a
1541  // single array of structs. For now, we just copy.
1542  std::vector<GO> gblRowInds;
1543  std::vector<GO> gblColInds;
1544  std::vector<SC> vals;
1545 
1546  const packet_type* inBuf = imports_h.data ();
1547  int inBufCurPos = 0;
1548  size_t numInvalidRowInds = 0;
1549  int errCode = 0;
1550  std::ostringstream errStrm; // for unpack* error output.
1551  for (size_t k = 0; k < numImports; ++k) {
1552  const LO lclRow = importLIDs_h(k);
1553  const GO gblRow = this->map_->getGlobalElement (lclRow);
1554  if (gblRow == ::Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
1555  ++numInvalidRowInds;
1556  continue;
1557  }
1558 
1559  // Remember where we were, so we don't overrun the buffer
1560  // length. inBufCurPos is an in/out argument of unpackTriples*.
1561  const int origInBufCurPos = inBufCurPos;
1562 
1563  int numEnt = 0; // output argument of unpackTriplesCount
1564  errCode = unpackTriplesCount (inBuf, inBufSize, inBufCurPos,
1565  numEnt, *comm, &errStrm);
1566  if (errCode != 0 || numEnt < 0 || inBufCurPos < origInBufCurPos) {
1567  std::ostream& err = this->markLocalErrorAndGetStream ();
1568 
1569  err << prefix << "In unpack loop, k=" << k << ": ";
1570  if (errCode != 0) {
1571  err << " unpackTriplesCount returned errCode = " << errCode
1572  << " != 0." << endl;
1573  }
1574  if (numEnt < 0) {
1575  err << " unpackTriplesCount returned errCode = 0, but numEnt = "
1576  << numEnt << " < 0." << endl;
1577  }
1578  if (inBufCurPos < origInBufCurPos) {
1579  err << " After unpackTriplesCount, inBufCurPos = " << inBufCurPos
1580  << " < origInBufCurPos = " << origInBufCurPos << "." << endl;
1581  }
1582  err << " unpackTriplesCount report: " << errStrm.str () << endl;
1583  err << suffix << endl;
1584 
1585  // We only continue in a debug build, because the above error
1586  // messages could consume too much memory and cause an
1587  // out-of-memory error, without actually printing. Printing
1588  // everything is useful in a debug build, but not so much in a
1589  // release build.
1590 #ifdef HAVE_TPETRA_DEBUG
1591  // Clear out the current error stream, so we don't accumulate
1592  // over loop iterations.
1593  errStrm.str ("");
1594  continue;
1595 #else
1596  return;
1597 #endif // HAVE_TPETRA_DEBUG
1598  }
1599 
1600  // FIXME (mfh 17,24 Jan 2017) packTriples wants three arrays,
1601  // not a single array of structs. For now, we just copy.
1602  gblRowInds.resize (numEnt);
1603  gblColInds.resize (numEnt);
1604  vals.resize (numEnt);
1605 
1606  errCode = unpackTriples (inBuf, inBufSize, inBufCurPos,
1607  gblRowInds.data (), gblColInds.data (),
1608  vals.data (), numEnt, *comm, &errStrm);
1609  if (errCode != 0) {
1610  std::ostream& err = this->markLocalErrorAndGetStream ();
1611  err << prefix << "unpackTriples returned errCode = "
1612  << errCode << " != 0. It reports: " << errStrm.str ()
1613  << endl;
1614  // We only continue in a debug build, because the above error
1615  // messages could consume too much memory and cause an
1616  // out-of-memory error, without actually printing. Printing
1617  // everything is useful in a debug build, but not so much in a
1618  // release build.
1619 #ifdef HAVE_TPETRA_DEBUG
1620  // Clear out the current error stream, so we don't accumulate
1621  // over loop iterations.
1622  errStrm.str ("");
1623  continue;
1624 #else
1625  return;
1626 #endif // HAVE_TPETRA_DEBUG
1627  }
1628  this->sumIntoGlobalValues (gblRowInds.data (), gblColInds.data (),
1629  vals.data (), numEnt);
1630  }
1631 
1632  // If we found invalid row indices in exportLIDs, go back,
1633  // collect, and report them.
1634  if (numInvalidRowInds != 0) {
1635  // Mark the error now, before we possibly run out of memory.
1636  // The latter could raise an exception (e.g., std::bad_alloc),
1637  // but at least we would get the error state right.
1638  std::ostream& err = this->markLocalErrorAndGetStream ();
1639 
1640  std::vector<std::pair<LO, GO> > invalidRowInds;
1641  for (size_t k = 0; k < numImports; ++k) {
1642  const LO lclRow = importLIDs_h(k);
1643  const GO gblRow = this->map_->getGlobalElement (lclRow);
1644  if (gblRow == ::Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
1645  invalidRowInds.push_back ({lclRow, gblRow});
1646  }
1647  }
1648 
1649  err << prefix << "We found " << numInvalidRowInds << " invalid row ind"
1650  << (numInvalidRowInds != static_cast<size_t> (1) ? "ices" : "ex")
1651  << " out of " << numImports << " in importLIDs. Here is the list "
1652  << " of invalid row indices: [";
1653  for (size_t k = 0; k < invalidRowInds.size (); ++k) {
1654  err << "(LID: " << invalidRowInds[k].first << ", GID: "
1655  << invalidRowInds[k].second << ")";
1656  if (k + 1 < invalidRowInds.size ()) {
1657  err << ", ";
1658  }
1659  }
1660  err << "].";
1661  return;
1662  }
1663  }
1664 };
1665 
1666 } // namespace Details
1667 } // namespace Tpetra
1668 
1669 #endif // TPETRA_DETAILS_COOMATRIX_HPP
Tpetra::Details::unpackTriples
int unpackTriples(const char[], const int, int &, OrdinalType[], OrdinalType[], ScalarType[], const int, const ::Teuchos::Comm< int > &, std::ostream *errStrm=NULL)
Unpack matrix entries ("triples" (i, j, A(i,j))) from the given input buffer.
Definition: Tpetra_Details_PackTriples.hpp:579
Tpetra::Details::CooMatrix::fillComplete
void fillComplete(const ::Teuchos::RCP< const ::Teuchos::Comm< int > > &comm)
Tell the matrix that you are done inserting entries locally, and that the matrix should build its Map...
Definition: Tpetra_Details_CooMatrix.hpp:717
Tpetra::Details::CooMatrix::sumIntoGlobalValue
void sumIntoGlobalValue(const GO gblRowInd, const GO gblColInd, const SC &val)
Insert one entry locally into the sparse matrix, if it does not exist there yet. If it does exist,...
Definition: Tpetra_Details_CooMatrix.hpp:630
Tpetra::Details::Impl::CooGraphEntry
Type of each (row index, column index) pair in the Tpetra::Details::CooMatrix (see below).
Definition: Tpetra_Details_CooMatrix.hpp:75
Tpetra::Classes::DistObject::local_ordinal_type
LocalOrdinal local_ordinal_type
The type of local indices.
Definition: Tpetra_DistObject_decl.hpp:363
Tpetra::Details::CooMatrix::errorMessages
std::string errorMessages() const
The current stream of error messages.
Definition: Tpetra_Details_CooMatrix.hpp:781
Tpetra::Classes::DistObject::map_
Teuchos::RCP< const map_type > map_
The Map over which this object is distributed.
Definition: Tpetra_DistObject_decl.hpp:942
Tpetra::Details::Impl::CooMatrixImpl::sumIntoGlobalValue
void sumIntoGlobalValue(const GO gblRowInd, const GO gblColInd, const SC &val)
Insert one entry locally into the sparse matrix, if it does not exist there yet. If it does exist,...
Definition: Tpetra_Details_CooMatrix.hpp:127
Tpetra::Details::CooMatrix::buffer_device_type
::Tpetra::DistObject< char, LO, GO, NT >::buffer_device_type buffer_device_type
Kokkos::Device specialization for DistObject communication buffers.
Definition: Tpetra_Details_CooMatrix.hpp:1199
Tpetra::Details::CooMatrix::CooMatrix
CooMatrix()
Default constructor.
Definition: Tpetra_Details_CooMatrix.hpp:601
Tpetra::Details::Impl::CooMatrixImpl::forAllEntries
void forAllEntries(std::function< void(const GO, const GO, const SC &)> f) const
Execute the given function for all entries of the sparse matrix, sequentially (no thread parallelism)...
Definition: Tpetra_Details_CooMatrix.hpp:179
Tpetra::Details::CooMatrix::sumIntoGlobalValues
void sumIntoGlobalValues(const GO gblRowInds[], const GO gblColInds[], const SC vals[], const std::size_t numEnt)
Insert multiple entries locally into the sparse matrix.
Definition: Tpetra_Details_CooMatrix.hpp:646
Tpetra::Details::CooMatrix
Sparse matrix used only for file input / output.
Definition: Tpetra_Details_CooMatrix.hpp:574
Tpetra::Details::CooMatrix::packet_type
char packet_type
This class transfers data as bytes (MPI_BYTE).
Definition: Tpetra_Details_CooMatrix.hpp:577
Tpetra::Classes::DistObject::device_type
Node::device_type device_type
The Kokkos Device type.
Definition: Tpetra_DistObject_decl.hpp:370
Tpetra::Details::Impl::CooMatrixImpl::buildLocallyIndexedCrs
void buildLocallyIndexedCrs(std::vector< OffsetType > &rowOffsets, LO lclColInds[], SC vals[], std::function< LO(const GO)> gblToLcl) const
Build a locally indexed version of CRS storage.
Definition: Tpetra_Details_CooMatrix.hpp:480
Tpetra::Details::CooMatrix::useNewInterface
virtual bool useNewInterface()
Whether the subclass implements the "old" or "new" (Kokkos-friendly) interface (it implements the "ne...
Definition: Tpetra_Details_CooMatrix.hpp:1000
Details
Implementation details of Tpetra.
Tpetra::Details::gathervPrint
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator,...
Definition: Tpetra_Details_gathervPrint.cpp:52
Tpetra::Details::CooMatrix::fillComplete
void fillComplete()
Special version of fillComplete that assumes that the matrix already has a Map, and reuses its commun...
Definition: Tpetra_Details_CooMatrix.hpp:736
Tpetra::Details::Impl::CooMatrixImpl
Implementation detail of Tpetra::Details::CooMatrix (which see below).
Definition: Tpetra_Details_CooMatrix.hpp:97
Tpetra::Classes::DistObject::getMap
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Definition: Tpetra_DistObject_decl.hpp:510
Tpetra::Details::CooMatrix::unpackAndCombineNew
virtual void unpackAndCombineNew(const ::Kokkos::DualView< const local_ordinal_type *, device_type > &importLIDs, const ::Kokkos::DualView< const packet_type *, buffer_device_type > &imports, const ::Kokkos::DualView< const size_t *, buffer_device_type > &numPacketsPerLID, const size_t, ::Tpetra::Distributor &, const ::Tpetra::CombineMode)
While we do use the "new" Kokkos::DualView - based interface, we don't currently use device Views.
Definition: Tpetra_Details_CooMatrix.hpp:1405
Tpetra::Details::Impl::CooMatrixImpl::getMyGlobalRowIndices
GO getMyGlobalRowIndices(std::vector< GO > &rowInds) const
Get the global row indices on this process, sorted and made unique, and return the minimum global row...
Definition: Tpetra_Details_CooMatrix.hpp:407
Tpetra::Details::CooMatrix::scalar_type
SC scalar_type
Type of each entry (value) in the sparse matrix.
Definition: Tpetra_Details_CooMatrix.hpp:579
Tpetra::Details::CooMatrix::description
virtual std::string description() const
One-line descriptiion of this object; overrides Teuchos::Describable method.
Definition: Tpetra_Details_CooMatrix.hpp:823
Tpetra::Classes::DistObject
Base class for distributed Tpetra objects that support data redistribution.
Definition: Tpetra_DistObject_decl.hpp:349
Tpetra::Details::packTriplesCount
int packTriplesCount(const int, char[], const int, int &, const ::Teuchos::Comm< int > &, std::ostream *errStrm)
Pack the count (number) of matrix triples.
Definition: Tpetra_Details_PackTriples.cpp:201
Tpetra::Details::CooMatrix::~CooMatrix
virtual ~CooMatrix()
Destructor (virtual for memory safety of derived classes).
Definition: Tpetra_Details_CooMatrix.hpp:621
Tpetra::Details::CooMatrix::sumIntoGlobalValues
void sumIntoGlobalValues(std::initializer_list< GO > gblRowInds, std::initializer_list< GO > gblColInds, std::initializer_list< SC > vals, const std::size_t numEnt)
Initializer-list overload of the above method (which see).
Definition: Tpetra_Details_CooMatrix.hpp:656
Tpetra::Details::Impl::CooMatrixImpl::packet_type
char packet_type
Type for packing and unpacking data.
Definition: Tpetra_Details_CooMatrix.hpp:115
Tpetra::Distributor
Sets up and executes a communication plan for a Tpetra DistObject.
Definition: Tpetra_Distributor.hpp:188
Tpetra::Details::Impl::CooMatrixImpl::CooMatrixImpl
CooMatrixImpl()=default
Default constructor.
Tpetra::Details::CooMatrix::localError
bool localError() const
Whether this object had an error on the calling process.
Definition: Tpetra_Details_CooMatrix.hpp:760
Tpetra::Details::Impl::CooMatrixImpl::packRow
void packRow(packet_type outBuf[], const int outBufSize, int &outBufCurPos, const ::Teuchos::Comm< int > &comm, std::vector< GO > &gblRowInds, std::vector< GO > &gblColInds, std::vector< SC > &vals, const GO gblRow) const
Pack the given row of the matrix.
Definition: Tpetra_Details_CooMatrix.hpp:338
Tpetra::Details::CooMatrix::copyAndPermuteNew
virtual void copyAndPermuteNew(const ::Tpetra::SrcDistObject &sourceObject, const size_t numSameIDs, const ::Kokkos::DualView< const LO *, device_type > &permuteToLIDs, const ::Kokkos::DualView< const LO *, device_type > &permuteFromLIDs)
While we do use the "new" Kokkos::DualView - based interface, we don't currently use device Views.
Definition: Tpetra_Details_CooMatrix.hpp:1005
Tpetra::Details::Impl::CooMatrixImpl::getLclNumEntries
std::size_t getLclNumEntries() const
Number of entries in the sparse matrix on the calling process.
Definition: Tpetra_Details_CooMatrix.hpp:170
Tpetra::Details::Impl::CooMatrixImpl::mergeIntoRow
void mergeIntoRow(const GO tgtGblRow, const CooMatrixImpl< SC, GO > &src, const GO srcGblRow)
Into global row tgtGblRow of *this, merge global row srcGblRow of src.
Definition: Tpetra_Details_CooMatrix.hpp:193
Tpetra::Details::unpackTriplesCount
int unpackTriplesCount(const char[], const int, int &, int &, const ::Teuchos::Comm< int > &, std::ostream *errStrm)
Unpack just the count of triples from the given input buffer.
Definition: Tpetra_Details_PackTriples.cpp:232
Tpetra::Details::CooMatrix::packAndPrepareNew
virtual void packAndPrepareNew(const ::Tpetra::SrcDistObject &sourceObject, const ::Kokkos::DualView< const local_ordinal_type *, device_type > &exportLIDs, ::Kokkos::DualView< packet_type *, buffer_device_type > &exports, const ::Kokkos::DualView< size_t *, buffer_device_type > &numPacketsPerLID, size_t &constantNumPackets, ::Tpetra::Distributor &)
While we do use the "new" Kokkos::DualView - based interface, we don't currently use device Views.
Definition: Tpetra_Details_CooMatrix.hpp:1204
Tpetra::Details::countPackTriplesCount
int countPackTriplesCount(const ::Teuchos::Comm< int > &, int &size, std::ostream *errStrm)
Compute the buffer size required by packTriples for packing the number of matrix entries ("triples").
Definition: Tpetra_Details_PackTriples.cpp:173
Tpetra::Details::Impl::CooMatrixImpl::countPackRow
int countPackRow(int &numPackets, const GO gblRow, const ::Teuchos::Comm< int > &comm, std::ostream *errStrm=NULL) const
Count the number of packets (bytes, in this case) needed to pack the given row of the matrix.
Definition: Tpetra_Details_CooMatrix.hpp:255
Tpetra::Details::Impl::CompareCooGraphEntries
Function comparing two CooGraphEntry structs, lexicographically, first by row index,...
Definition: Tpetra_Details_CooMatrix.hpp:85
Tpetra_Details_reallocDualViewIfNeeded.hpp
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
Tpetra::Details::Impl::CooMatrixImpl::sumIntoGlobalValues
void sumIntoGlobalValues(const GO gblRowInds[], const GO gblColInds[], const SC vals[], const std::size_t numEnt)
Insert multiple entries locally into the sparse matrix.
Definition: Tpetra_Details_CooMatrix.hpp:150
Tpetra::Details::CooMatrix::describe
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print a descriptiion of this object to the given output stream; overrides Teuchos::Describable method...
Definition: Tpetra_Details_CooMatrix.hpp:843
Tpetra::Classes::DistObject::packet_type
::Kokkos::Details::ArithTraits< Packet >::val_type packet_type
The type of each datum being sent or received in an Import or Export.
Definition: Tpetra_DistObject_decl.hpp:361
Tpetra::Details::CooMatrix::checkSizes
virtual bool checkSizes(const ::Tpetra::SrcDistObject &source)
Compare the source and target (this) objects for compatibility.
Definition: Tpetra_Details_CooMatrix.hpp:975
Tpetra::DistObject
::Tpetra::Classes::DistObject< Packet, LocalOrdinal, GlobalOrdinal, Node > DistObject
Alias for Tpetra::Classes::DistObject.
Definition: Tpetra_DistObject_fwd.hpp:75
Tpetra::Details::CooMatrix::constantNumberOfPackets
virtual size_t constantNumberOfPackets() const
By returning 0, tell DistObject that this class may not necessarily have a constant number of "packet...
Definition: Tpetra_Details_CooMatrix.hpp:967
Tpetra::Details::CooMatrix::map_type
::Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > map_type
Type of the Map specialization to give to the constructor.
Definition: Tpetra_Details_CooMatrix.hpp:585
Tpetra
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Tpetra::deep_copy
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Definition: Tpetra_MultiVector_decl.hpp:2453
Tpetra::Details::CooMatrix::CooMatrix
CooMatrix(const ::Teuchos::RCP< const map_type > &map)
Constructor that takes a Map.
Definition: Tpetra_Details_CooMatrix.hpp:614
Tpetra::Details::packTriples
int packTriples(const OrdinalType[], const OrdinalType[], const ScalarType[], const int, char[], const int, int &, const ::Teuchos::Comm< int > &, std::ostream *errStrm=NULL)
Pack matrix entries ("triples" (i, j, A(i,j))) into the given output buffer.
Definition: Tpetra_Details_PackTriples.hpp:463
Tpetra::Classes::DistObject::node_type
Node node_type
The Kokkos Node type.
Definition: Tpetra_DistObject_decl.hpp:367
Tpetra::Details::CooMatrix::getLclNumEntries
std::size_t getLclNumEntries() const
Number of entries in the sparse matrix on the calling process.
Definition: Tpetra_Details_CooMatrix.hpp:667
Tpetra::Details::reallocDualViewIfNeeded
bool reallocDualViewIfNeeded(Kokkos::DualView< ValueType *, DeviceType > &dv, const size_t newSize, const char newLabel[], const size_t tooBigFactor=2, const bool needFenceBeforeRealloc=true)
Reallocate the DualView in/out argument, if needed.
Definition: Tpetra_Details_reallocDualViewIfNeeded.hpp:83
Tpetra::Classes::DistObject::global_ordinal_type
GlobalOrdinal global_ordinal_type
The type of global indices.
Definition: Tpetra_DistObject_decl.hpp:365
Tpetra::CombineMode
CombineMode
Rule for combining data in an Import or Export.
Definition: Tpetra_CombineMode.hpp:94