Tpetra parallel linear algebra  Version of the Day
Tpetra_MultiVectorFiller.hpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 #ifndef __Tpetra_MultiVectorFiller_hpp
42 #define __Tpetra_MultiVectorFiller_hpp
43 
44 #include "Tpetra_MultiVector.hpp"
45 #include "Tpetra_Vector.hpp"
46 #include "Teuchos_CommHelpers.hpp"
47 #include <iterator>
48 #include <set>
49 
50 namespace Tpetra {
51 namespace Details {
52  // \fn sortAndMergeIn
53  // \brief Sort and merge newEntries into allEntries, and make unique.
54  //
55  // \param allEntries [in/out]: Array in which current entries have
56  // already been stored, and in which the new entries are to be
57  // stored. Passed by reference in case we need to resize it.
58  //
59  // \param currentEntries [in/out]: Current entries, which we assume
60  // have already been sorted and made unique. Aliases the
61  // beginning of allEntries.
62  //
63  // \param newEntries [in/out] New entries, which have not yet been
64  // sorted or made unique. This does <i>not</i> alias allEntries.
65  //
66  // Sort and make entries of newEntries unique. Resize allEntries if
67  // necessary to fit the unique entries of newEntries. Merge
68  // newEntries into allEntries and make the results unique. (This is
69  // cheaper than sorting the whole array.)
70  //
71  // \return A view of all the entries (current and new) in allEntries.
72  template<class T>
73  Teuchos::ArrayView<T>
74  sortAndMergeIn (Teuchos::Array<T>& allEntries,
75  Teuchos::ArrayView<T> currentEntries,
76  Teuchos::ArrayView<T> newEntries)
77  {
78  using Teuchos::ArrayView;
79  using Teuchos::as;
80  typedef typename ArrayView<T>::iterator iter_type;
81  typedef typename ArrayView<T>::size_type size_type;
82 
83  std::sort (newEntries.begin(), newEntries.end());
84  iter_type it = std::unique (newEntries.begin(), newEntries.end());
85  const size_type numNew = as<size_type> (it - newEntries.begin());
86  // View of the sorted, made-unique new entries to merge in.
87  ArrayView<T> newEntriesView = newEntries.view (0, numNew);
88 
89  const size_type numCur = currentEntries.size();
90  if (allEntries.size() < numCur + numNew) {
91  allEntries.resize (numCur + numNew);
92  }
93  ArrayView<T> allView = allEntries.view (0, numCur + numNew);
94  ArrayView<T> newView = allEntries.view (numCur, numNew); // target of copy
95 
96  std::copy (newEntries.begin(), newEntries.end(), newView.begin());
97  std::inplace_merge (allView.begin(), newView.begin(), allView.end());
98  iter_type it2 = std::unique (allView.begin(), allView.end());
99  const size_type numTotal = as<size_type> (it2 - allView.begin());
100 
101  return allEntries.view (0, numTotal);
102  }
103 
109  template<class MV>
111  public:
112  typedef typename MV::scalar_type scalar_type;
113  typedef typename MV::local_ordinal_type local_ordinal_type;
114  typedef typename MV::global_ordinal_type global_ordinal_type;
115  typedef typename MV::node_type node_type;
116 
117  typedef ::Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type> map_type;
118 
130  MultiVectorFillerData (const Teuchos::RCP<const map_type>& map) :
131  map_ (map),
132  numCols_ (0)
133  {}
134 
151  MultiVectorFillerData (const Teuchos::RCP<const map_type>& map,
152  const size_t numColumns) :
153  map_ (map),
154  numCols_ (numColumns),
155  sourceIndices_ (numColumns),
156  sourceValues_ (numColumns)
157  {}
158 
160  void
161  setNumColumns (const size_t newNumColumns)
162  {
163  const size_t oldNumColumns = getNumColumns();
164  if (newNumColumns >= oldNumColumns) {
165  for (size_t j = oldNumColumns; j < newNumColumns; ++j) {
166  sourceIndices_.push_back (Teuchos::Array<global_ordinal_type> ());
167  sourceValues_.push_back (Teuchos::Array<scalar_type> ());
168  }
169  }
170  else {
171  // This may not necessarily deallocate any data, but that's OK.
172  sourceIndices_.resize (newNumColumns);
173  sourceValues_.resize (newNumColumns);
174  }
175  numCols_ = oldNumColumns;
176  }
177 
178  void
179  sumIntoGlobalValues (const Teuchos::ArrayView<const global_ordinal_type>& rows,
180  const size_t column,
181  const Teuchos::ArrayView<const scalar_type>& values)
182  {
183  if (column >= getNumColumns()) {
184  for (size_t j = column; j < getNumColumns(); ++j) {
185  sourceIndices_.push_back (Teuchos::Array<global_ordinal_type> ());
186  sourceValues_.push_back (Teuchos::Array<scalar_type> ());
187  }
188  }
189  std::copy (rows.begin(), rows.end(), std::back_inserter (sourceIndices_[column]));
190  std::copy (values.begin(), values.end(), std::back_inserter (sourceValues_[column]));
191  }
192 
200  void
201  sumIntoGlobalValues (const Teuchos::ArrayView<const global_ordinal_type>& rows,
202  const Teuchos::ArrayView<const scalar_type>& values)
203  {
204  typedef global_ordinal_type GO;
205  typedef scalar_type ST;
206  typedef typename Teuchos::ArrayView<const GO>::const_iterator GoIter;
207  typedef typename Teuchos::ArrayView<const ST>::const_iterator StIter;
208 
209  const size_t numColumns = getNumColumns();
210  GoIter rowIter = rows.begin();
211  StIter valIter = values.begin();
212  for (size_t j = 0; j < numColumns; ++j) {
213  GoIter rowIterNext = rowIter + numColumns;
214  StIter valIterNext = valIter + numColumns;
215  std::copy (rowIter, rowIterNext, std::back_inserter (sourceIndices_[j]));
216  std::copy (valIter, valIterNext, std::back_inserter (sourceValues_[j]));
217  rowIter = rowIterNext;
218  valIter = valIterNext;
219  }
220  }
221 
246  template<class BinaryFunction>
247  void
248  locallyAssemble (MV& X, BinaryFunction& f)
249  {
250  using Teuchos::Array;
251  using Teuchos::ArrayRCP;
252  using Teuchos::ArrayView;
253  using Teuchos::RCP;
254  typedef local_ordinal_type LO;
255  typedef global_ordinal_type GO;
256  typedef scalar_type ST;
257  typedef node_type NT;
258 
259  RCP<const ::Tpetra::Map<LO, GO, NT> > map = X.getMap();
260  Array<LO> localIndices;
261  const size_t numColumns = getNumColumns();
262  for (size_t j = 0; j < numColumns; ++j) {
263  const typename Array<const GO>::size_type numIndices = sourceIndices_[j].size();
264  // Precompute all the local indices before saving to the
265  // vector. Hopefully this gives us a little bit of locality
266  // in the global->local conversion, at the expense of a little
267  // more storage.
268  if (localIndices.size() < numIndices) {
269  localIndices.resize (numIndices);
270  }
271  ArrayView<LO> localIndicesView = localIndices.view (0, numIndices);
272  ArrayView<const GO> globalIndicesView = sourceIndices_[j].view (0, numIndices);
273  for (typename ArrayView<const GO>::size_type i = 0; i < numIndices; ++i) {
274  localIndices[i] = map->getLocalElement (globalIndicesView[i]);
275  }
276 
277  ArrayRCP<ST> X_j = X.getDataNonConst (j);
278  ArrayView<const ST> localValues = sourceValues_[j].view (0, numIndices);
279  for (typename ArrayView<const GO>::size_type i = 0; i < numIndices; ++i) {
280  const LO localInd = localIndices[i];
281  X_j[localInd] = f (X_j[localInd], localValues[i]);
282  }
283  }
284  }
285 
287  void
289  {
290  std::plus<scalar_type> f;
291  locallyAssemble<std::plus<scalar_type> > (X, f);
292  }
293 
295  void clear() {
296  Teuchos::Array<Teuchos::Array<global_ordinal_type> > newSourceIndices;
297  Teuchos::Array<Teuchos::Array<scalar_type> > newSourceValues;
298  // The standard STL idiom for clearing the contents of a vector completely.
299  std::swap (sourceIndices_, newSourceIndices);
300  std::swap (sourceValues_, newSourceValues);
301  }
302 
304  Teuchos::Array<global_ordinal_type>
306  {
307  using Teuchos::Array;
308  using Teuchos::ArrayView;
309  using Teuchos::as;
310  typedef global_ordinal_type GO;
311  typedef typename Array<GO>::size_type size_type;
312 
313  Array<GO> allInds (0); // will resize below
314  const size_t numCols = getNumColumns();
315 
316  if (numCols == 1) {
317  // Special case for 1 column avoids copying indices twice.
318  // Pick the size of the array exactly the first time so there
319  // are at most two allocations (the constructor may choose to
320  // allocate).
321  const size_type numNew = sourceIndices_[0].size();
322  allInds.resize (allInds.size() + numNew);
323  std::copy (sourceIndices_[0].begin(), sourceIndices_[0].end(),
324  allInds.begin());
325  std::sort (allInds.begin(), allInds.end());
326  typename Array<GO>::iterator it =
327  std::unique (allInds.begin(), allInds.end());
328  const size_type numFinal = as<size_type> (it - allInds.begin());
329  allInds.resize (numFinal);
330  }
331  else {
332  // Carefully collect all the row indices one column at a time.
333  // This ensures that the total allocation size in this routine
334  // is independent of the number of columns. Also, only sort
335  // the current column's indices. Use merges to ensure sorted
336  // order in the collected final result.
337  ArrayView<GO> curIndsView = allInds.view (0, 0); // will grow
338  Array<GO> newInds;
339  for (size_t j = 0; j < numCols; ++j) {
340  const size_type numNew = sourceIndices_[j].size();
341  if (numNew > newInds.size()) {
342  newInds.resize (numNew);
343  }
344  ArrayView<GO> newIndsView = newInds.view (0, numNew);
345  std::copy (sourceIndices_[j].begin(), sourceIndices_[j].end(),
346  newIndsView.begin());
347  curIndsView = sortAndMergeIn<GO> (allInds, curIndsView, newIndsView);
348  }
349  }
350  return allInds;
351  }
352 
353  private:
354  Teuchos::RCP<const map_type> map_;
355  size_t numCols_;
356  Teuchos::Array<Teuchos::Array<global_ordinal_type> > sourceIndices_;
357  Teuchos::Array<Teuchos::Array<scalar_type> > sourceValues_;
358 
359  size_t getNumColumns() const { return numCols_; }
360  };
361 
368  template<class MV>
369  class MultiVectorFillerData2 : public Teuchos::Describable {
370  public:
371  typedef typename MV::scalar_type scalar_type;
372  typedef typename MV::local_ordinal_type local_ordinal_type;
373  typedef typename MV::global_ordinal_type global_ordinal_type;
374  typedef typename MV::node_type node_type;
375 
376  typedef ::Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type> map_type;
377 
388  MultiVectorFillerData2 (const Teuchos::RCP<const map_type>& map,
389  const Teuchos::EVerbosityLevel verbLevel=Teuchos::VERB_DEFAULT,
390  const Teuchos::RCP<Teuchos::FancyOStream>& out=Teuchos::null) :
391  map_ (map),
392  numCols_ (0),
393  verbLevel_ (verbLevel),
394  out_ (out)
395  {}
396 
414  MultiVectorFillerData2 (const Teuchos::RCP<const map_type>& map,
415  const size_t numColumns,
416  const Teuchos::EVerbosityLevel verbLevel=Teuchos::VERB_DEFAULT,
417  const Teuchos::RCP<Teuchos::FancyOStream>& out=Teuchos::null) :
418  map_ (map),
419  numCols_ (numColumns),
420  localVec_ (new MV (map, numColumns)),
421 #if 0
422  nonlocalIndices_ (numColumns),
423  nonlocalValues_ (numColumns),
424 #endif // 0
425  verbLevel_ (verbLevel),
426  out_ (out)
427  {}
428 
429  std::string
430  description() const
431  {
432  std::ostringstream oss;
433  oss << "Tpetra::MultiVectorFillerData2<"
434  << Teuchos::TypeNameTraits<MV>::name () << ">";
435  return oss.str();
436  }
437 
438  void
439  describe (Teuchos::FancyOStream& out,
440  const Teuchos::EVerbosityLevel verbLevel =
441  Teuchos::Describable::verbLevel_default) const
442  {
443  using std::endl;
444  using Teuchos::Array;
445  using Teuchos::ArrayRCP;
446  using Teuchos::ArrayView;
447  using Teuchos::RCP;
448  using Teuchos::VERB_DEFAULT;
449  using Teuchos::VERB_NONE;
450  using Teuchos::VERB_LOW;
451  using Teuchos::VERB_MEDIUM;
452  using Teuchos::VERB_HIGH;
453  using Teuchos::VERB_EXTREME;
454 
455  // Set default verbosity if applicable.
456  const Teuchos::EVerbosityLevel vl =
457  (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
458 
459  RCP<const Teuchos::Comm<int> > comm = map_->getComm();
460  const int myImageID = comm->getRank();
461  const int numImages = comm->getSize();
462 
463  if (vl != VERB_NONE) {
464  // Don't set the tab level unless we're printing something.
465  Teuchos::OSTab tab (out);
466 
467  if (myImageID == 0) { // >= VERB_LOW prints description()
468  out << this->description() << endl;
469  }
470  for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
471  if (myImageID == imageCtr) {
472  if (vl != VERB_LOW) {
473  // At verbosity > VERB_LOW, each process prints something.
474  out << "Process " << myImageID << ":" << endl;
475 
476  Teuchos::OSTab procTab (out);
477  // >= VERB_MEDIUM: print the local vector length.
478  out << "local length=" << localVec_->getLocalLength();
479  if (vl != VERB_MEDIUM) {
480  // >= VERB_HIGH: print isConstantStride() and getStride()
481  if (localVec_->isConstantStride()) {
482  out << ", constant stride=" << localVec_->getStride() << endl;
483  }
484  else {
485  out << ", not constant stride" << endl;
486  }
487  if (vl == VERB_EXTREME) {
488  // VERB_EXTREME: print all the values in the multivector.
489  out << "Local values:" << endl;
490  ArrayRCP<ArrayRCP<const scalar_type> > X = localVec_->get2dView();
491  for (size_t i = 0; i < localVec_->getLocalLength(); ++i) {
492  for (size_t j = 0; j < localVec_->getNumVectors(); ++j) {
493  out << X[j][i];
494  if (j < localVec_->getNumVectors() - 1) {
495  out << " ";
496  }
497  } // for each column
498  out << endl;
499  } // for each row
500 
501 #if 0
502  out << "Nonlocal indices and values:" << endl;
503  for (size_t j = 0; j < (size_t)nonlocalIndices_.size(); ++j) {
504  ArrayView<const global_ordinal_type> inds = nonlocalIndices_[j]();
505  ArrayView<const scalar_type> vals = nonlocalValues_[j]();
506  for (typename ArrayView<const global_ordinal_type>::size_type k = 0;
507  k < inds.size(); ++k) {
508  out << "X(" << inds[k] << "," << j << ") = " << vals[k] << endl;
509  }
510  }
511 #endif // 0
512  } // if vl == VERB_EXTREME
513  } // if (vl != VERB_MEDIUM)
514  else { // vl == VERB_LOW
515  out << endl;
516  }
517  } // if vl != VERB_LOW
518  } // if it is my process' turn to print
519  } // for each process in the communicator
520  } // if vl != VERB_NONE
521  }
522 
524  Teuchos::Array<global_ordinal_type>
525  getSourceIndices (const Teuchos::Comm<int>& comm,
526  const Teuchos::RCP<Teuchos::FancyOStream>& out = Teuchos::null,
527  const bool debug = false) const
528  {
529  using Teuchos::Array;
530  using Teuchos::ArrayView;
531  using Teuchos::RCP;
532  using Teuchos::rcp;
533  using Tpetra::global_size_t;
534  typedef global_ordinal_type GO;
535  const char prefix[] = "Tpetra::MultiVectorFiller::getSourceIndices: ";
536 
537  if (debug && ! out.is_null ()) {
538  std::ostringstream os;
539  os << "Proc " << comm.getRank () << ": getSourceIndices" << std::endl;
540  *out << os.str ();
541  }
542 
543  // Get the nonlocal row indices, sorted and made unique.
544  // It's fair to assume that these are not contiguous.
545  Array<GO> nonlocalIndices = getSortedUniqueNonlocalIndices (comm, out, debug);
546 
547  // Get the local row indices, not necessarily sorted or unique.
548  ArrayView<const GO> localIndices = getLocalIndices ();
549 
550  // Copy the local indices into the full indices array, and sort
551  // them there. We'll merge in the nonlocal indices below. This
552  // can be more efficient than just sorting all the indices, if
553  // there are a lot of nonlocal indices.
554  Array<GO> indices (localIndices.size () + nonlocalIndices.size ());
555  ArrayView<GO> localIndView = indices.view (0, localIndices.size ());
556  TEUCHOS_TEST_FOR_EXCEPTION
557  (localIndView.size () > indices.size (), std::logic_error,
558  prefix << "localIndView.size() = " << localIndView.size ()
559  << " > indices.size() = " << indices.size () << ".");
560  TEUCHOS_TEST_FOR_EXCEPTION
561  (localIndView.size () != localIndices.size (), std::logic_error,
562  prefix << "localIndView.size() = " << localIndView.size ()
563  << " != localIndices.size() = " << localIndices.size () << ".");
564 
565  std::copy (localIndices.begin (), localIndices.end (), localIndView.begin ());
566  std::sort (localIndView.begin (), localIndView.end ());
567 
568  if (debug && ! out.is_null ()) {
569  std::ostringstream os;
570  os << "Proc " << comm.getRank () << ": Right after copying and sorting" << std::endl;
571  *out << os.str ();
572  }
573 
574  // Merge the local and nonlocal indices.
575  if (nonlocalIndices.size () > 0) {
576  ArrayView<GO> nonlclIndView =
577  indices.view (localIndices.size (), nonlocalIndices.size ());
578 
579  // We need a view of the output array, because
580  // std::inplace_merge needs all its iterator inputs to have
581  // the same type. Debug mode builds are pickier than release
582  // mode builds, because the iterators in a debug mode build
583  // are of a different type that does run-time checking (they
584  // aren't just raw pointers).
585  ArrayView<GO> indView = indices.view (0, indices.size ());
586  if (debug && ! out.is_null ()) {
587  std::ostringstream os;
588  os << "Right before std::copy" << std::endl;
589  *out << os.str ();
590  }
591  std::copy (nonlocalIndices.begin (),
592  nonlocalIndices.end (),
593  nonlclIndView.begin ());
594  if (debug && ! out.is_null ()) {
595  std::ostringstream os;
596  os << "Proc " << comm.getRank () << ": Right before std::inplace_merge"
597  << std::endl;
598  *out << os.str ();
599  }
600  std::inplace_merge (indView.begin (),
601  indView.begin () + localIndices.size (),
602  indView.end ());
603  }
604 
605  if (debug && ! out.is_null ()) {
606  std::ostringstream os;
607  os << "Proc " << comm.getRank () << ": Done with getSourceIndices"
608  << std::endl;
609  *out << os.str ();
610  }
611  return indices;
612  }
613 
623  void
624  setNumColumns (const size_t newNumColumns)
625  {
626  using Teuchos::Array;
627  using Teuchos::Range1D;
628  using Teuchos::RCP;
629 
630  const size_t oldNumColumns = numCols_;
631  if (newNumColumns == oldNumColumns) {
632  return; // No side effects if no change.
633  }
634 
635  RCP<MV> newLclVec;
636  if (newNumColumns > oldNumColumns) {
637  newLclVec = rcp (new MV (map_, newNumColumns));
638  // Assign the contents of the old local multivector to the
639  // first oldNumColumns columns of the new local multivector,
640  // then get rid of the old local multivector.
641  RCP<MV> newLclVecView =
642  newLclVec->subViewNonConst (Range1D (0, oldNumColumns-1));
643  *newLclVecView = *localVec_;
644  }
645  else {
646  if (newNumColumns == 0) {
647  // Tpetra::MultiVector doesn't let you construct a
648  // multivector with zero columns.
649  newLclVec = Teuchos::null;
650  }
651  else {
652  newLclVec = localVec_->subViewNonConst (Range1D (0, newNumColumns-1));
653  }
654  }
655 
656  // Leave most side effects until the end, for exception safety.
657  localVec_ = newLclVec;
658  numCols_ = newNumColumns;
659  }
660 
666  void
667  sumIntoGlobalValues (const Teuchos::ArrayView<const global_ordinal_type>& rows,
668  const size_t columnIndex,
669  const Teuchos::ArrayView<const scalar_type>& values)
670  {
671  using Teuchos::ArrayView;
672  typedef local_ordinal_type LO;
673  typedef global_ordinal_type GO;
674  typedef scalar_type ST;
675  typedef decltype (rows.size ()) size_type;
676  const char prefix[] = "Tpetra::MultiVectorFiller::sumIntoGlobalValues: ";
677 
678  if (map_.is_null ()) {
679  return; // the calling process is not participating
680  }
681  const size_type numEnt = rows.size ();
682  TEUCHOS_TEST_FOR_EXCEPTION
683  (numEnt != values.size (), std::invalid_argument, prefix
684  << "rows.size() = " << numEnt << " != values.size() = "
685  << values.size () << ".");
686 
687  // FIXME (31 Aug 2015) Don't do this.
688  if (columnIndex >= getNumColumns()) {
689  // Automatically expand the number of columns. This
690  // implicitly ensures that localVec_ is not null.
691  setNumColumns (columnIndex + 1);
692  }
693 
694  // It would be a lot more efficient for users to get the Kokkos
695  // view outside of their fill loop, and fill into it directly.
696  auto X_j = localVec_->getVector (columnIndex);
697  auto X_j_2d = X_j->template getLocalView<typename MV::dual_view_type::t_host::memory_space> ();
698  auto X_j_1d = Kokkos::subview (X_j_2d, Kokkos::ALL (), 0);
699 
700  const map_type& theMap = *map_;
701  for (size_type k = 0; k < numEnt; ++k) {
702  const ST val = values[k];
703  const GO gblRowInd = rows[k];
704  const LO lclRowInd = theMap.getLocalElement (gblRowInd);
705  if (lclRowInd == Teuchos::OrdinalTraits<LO>::invalid ()) {
706  // The row doesn't belong to the calling process, so stuff
707  // its data in nonlocals_.
708 
709  auto& innerMap = nonlocals_[columnIndex];
710  auto innerIter = innerMap.find (gblRowInd);
711  if (innerIter == innerMap.end ()) {
712  innerMap.insert (std::make_pair (gblRowInd, values[k]));
713  } else {
714  innerIter->second += val;
715  }
716  }
717  else {
718  // FIXME (mfh 27 Feb 2012) Allow different combine modes.
719  // The current combine mode just adds to the current value
720  // at that location.
721  X_j_1d[lclRowInd] += val;
722  }
723  }
724  }
725 
735  void
736  sumIntoGlobalValues (const Teuchos::ArrayView<const global_ordinal_type>& rows,
737  const Teuchos::ArrayView<const scalar_type>& values)
738  {
739  using Teuchos::ArrayView;
740  typedef typename ArrayView<const global_ordinal_type>::size_type size_type;
741 
742  const size_t numCols = getNumColumns();
743  for (size_t j = 0; j < numCols; ++j) {
744  const size_type offset = numCols*j;
745  const size_type len = numCols;
746  this->sumIntoGlobalValues (rows.view (offset, len), j, values.view (offset, len));
747  }
748  }
749 
774  template<class BinaryFunction>
775  void
776  locallyAssemble (MV& X, BinaryFunction& f)
777  {
778  typedef scalar_type ST;
779  typedef local_ordinal_type LO;
780  typedef global_ordinal_type GO;
781  typedef typename MV::dual_view_type::t_host::memory_space host_memory_space;
782 
783  if (X.getMap ().is_null ()) {
784  return; // this process is not participating
785  }
786  const map_type& srcMap = * (X.getMap ());
787 
788 
789 
790  for (size_t j = 0; j < X.getNumVectors (); ++j) {
791  auto X_j = X.getVector (j);
792  auto X_j_2d = X_j->template getLocalView<host_memory_space> ();
793  auto X_j_1d = Kokkos::subview (X_j_2d, Kokkos::ALL (), 0);
794 
795  // First, assemble in the local components from localVec_.
796  if (! localVec_.is_null () && ! localVec_->getMap ().is_null ()
797  && j < localVec_->getNumVectors ()) {
798  auto lcl_j = localVec_->getVector (j);
799  auto lcl_j_2d = lcl_j->template getLocalView<host_memory_space> ();
800  auto lcl_j_1d = Kokkos::subview (lcl_j_2d, Kokkos::ALL (), 0);
801 
802  // Iterate over all rows of localVec_. i_lcl is a local
803  // index with respect to localVec_'s Map, which may differ
804  // from X's Map.
805  const map_type& lclMap = * (localVec_->getMap ());
806  const LO lclNumRows = static_cast<LO> (lcl_j->getLocalLength ());
807  for (LO i_lcl = 0; i_lcl < lclNumRows; ++i_lcl) {
808  const GO i_gbl = lclMap.getGlobalElement (i_lcl);
809  const LO i_X = srcMap.getLocalElement (i_gbl);
810  X_j_1d(i_X) = f (X_j_1d(i_X), lcl_j_1d(i_lcl));
811  }
812  }
813 
814  // Second, assemble in the nonlocal components.
815  auto outerIter = nonlocals_.find (j);
816  if (outerIter != nonlocals_.end ()) {
817  auto beg = outerIter->second.begin ();
818  auto end = outerIter->second.end ();
819  for (auto innerIter = beg; innerIter != end; ++innerIter) {
820  const GO gblRowInd = innerIter->first;
821  const LO lclRowInd = srcMap.getLocalElement (gblRowInd);
822 
823  if (lclRowInd != Teuchos::OrdinalTraits<LO>::invalid ()) {
824  const ST val = innerIter->second;
825  X_j_1d(lclRowInd) = f (X_j_1d(lclRowInd), val);
826  }
827  }
828  }
829  }
830  }
831 
837  void
839  {
840  std::plus<scalar_type> f;
841  locallyAssemble<std::plus<scalar_type> > (X, f);
842  }
843 
849  void clear ()
850  {
851  std::map<size_t, std::map<global_ordinal_type, scalar_type> > newNonlcls;
852  // The standard STL idiom for clearing the contents of a
853  // container completely. Setting the size to zero may not
854  // necessarily deallocate data.
855  std::swap (nonlocals_, newNonlcls);
856 
857  // Don't actually deallocate the multivector of local entries.
858  // Just fill it with zero. This is because the caller hasn't
859  // reset the number of columns.
860  if (! localVec_.is_null ()) {
861  localVec_->putScalar (Teuchos::ScalarTraits<scalar_type>::zero());
862  }
863  }
864 
865  private:
867  Teuchos::RCP<const map_type> map_;
868 
870  size_t numCols_;
871 
873  Teuchos::RCP<MV> localVec_;
874 
876  std::map<size_t, std::map<global_ordinal_type, scalar_type> > nonlocals_;
877 
879  Teuchos::EVerbosityLevel verbLevel_;
880 
882  Teuchos::RCP<Teuchos::FancyOStream> out_;
883 
885  size_t getNumColumns() const { return numCols_; }
886 
888  Teuchos::ArrayView<const global_ordinal_type>
889  getLocalIndices() const
890  {
891  return map_->getNodeElementList ();
892  }
893 
895  Teuchos::Array<global_ordinal_type>
896  getSortedUniqueNonlocalIndices (const Teuchos::Comm<int>& comm,
897  const Teuchos::RCP<Teuchos::FancyOStream>& out = Teuchos::null,
898  const bool debug = false) const
899  {
900  using Teuchos::Array;
901  using Teuchos::ArrayView;
902  using Teuchos::as;
903  typedef global_ordinal_type GO;
904 
905  if (debug && ! out.is_null ()) {
906  std::ostringstream os;
907  os << "Proc " << comm.getRank () << ": getSortedUniqueNonlocalIndices"
908  << std::endl;
909  *out << os.str ();
910  }
911 
912  // FIXME (mfh 30 Aug 2015) The std::set algorithm is harder to
913  // thread-parallelize, but it should be easier to make the new
914  // test pass with this.
915  std::set<GO> indsOut;
916  const size_t numCols = getNumColumns ();
917  for (size_t j = 0; j < numCols; ++j) {
918  auto outerIter = nonlocals_.find (j);
919  if (outerIter != nonlocals_.end ()) {
920  auto beg = outerIter->second.begin ();
921  auto end = outerIter->second.end ();
922  for (auto innerIter = beg; innerIter != end; ++innerIter) {
923  // *innerIter: (global row index, value) pair.
924  indsOut.insert (innerIter->first);
925  }
926  }
927  }
928 
929  if (debug && ! out.is_null ()) {
930  std::ostringstream os;
931  os << "Proc " << comm.getRank () << ": Made nonlocals set" << std::endl;
932  *out << os.str ();
933  }
934 
935  Array<GO> allNonlocals (indsOut.begin (), indsOut.end ());
936  if (debug && ! out.is_null ()) {
937  std::ostringstream os;
938  os << "Proc " << comm.getRank () << ": Done with "
939  "getSortedUniqueNonlocalIndices" << std::endl;
940  *out << os.str ();
941  }
942  return allNonlocals;
943  }
944  };
945 } // namespace Details
946 } // namespace Tpetra
947 
948 namespace Tpetra {
949 
974  template<class MV>
976  public:
977  typedef typename MV::scalar_type scalar_type;
978  typedef typename MV::local_ordinal_type local_ordinal_type;
979  typedef typename MV::global_ordinal_type global_ordinal_type;
980  typedef typename MV::node_type node_type;
981  typedef ::Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type> map_type;
982 
1012  MultiVectorFiller (const Teuchos::RCP<const map_type>& map,
1013  const size_t numCols);
1014 
1034  void
1035  globalAssemble (MV& X_out, const bool forceReuseMap = false);
1036 
1038  void clear () {
1039  data_.clear ();
1040  }
1041 
1042  void
1043  describe (Teuchos::FancyOStream& out,
1044  const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
1045  {
1046  data_.describe (out, verbLevel);
1047  }
1048 
1059  void
1060  sumIntoGlobalValues (Teuchos::ArrayView<const global_ordinal_type> rows,
1061  size_t column,
1062  Teuchos::ArrayView<const scalar_type> values)
1063  {
1064  data_.sumIntoGlobalValues (rows, column, values);
1065  }
1066 
1082  void
1083  sumIntoGlobalValues (Teuchos::ArrayView<const global_ordinal_type> rows,
1084  Teuchos::ArrayView<const scalar_type> values)
1085  {
1086  data_.sumIntoGlobalValues (rows, values);
1087  }
1088 
1089  private:
1091  Teuchos::RCP<const map_type> ctorMap_;
1092 
1097  Teuchos::RCP<const map_type> sourceMap_;
1098 
1104  Teuchos::RCP<const map_type> targetMap_;
1105 
1118  Teuchos::RCP<MV> sourceVec_;
1119 
1125 
1127  Teuchos::RCP<export_type> exporter_;
1128 
1134  void locallyAssemble (MV& X_in) {
1135  data_.locallyAssemble (X_in);
1136  }
1137 
1139  Teuchos::Array<global_ordinal_type>
1140  getSourceIndices (const Teuchos::Comm<int>& comm,
1141  const Teuchos::RCP<Teuchos::FancyOStream>& out = Teuchos::null,
1142  const bool debug = false) const
1143  {
1144  return data_.getSourceIndices (comm, out, debug);
1145  }
1146 
1155  Teuchos::RCP<const map_type>
1156  computeSourceMap (const global_ordinal_type indexBase,
1157  const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
1158  const Teuchos::RCP<Teuchos::FancyOStream>& out = Teuchos::null,
1159  const bool debug = false);
1160  };
1161 
1162  template<class MV>
1163  MultiVectorFiller<MV>::MultiVectorFiller (const Teuchos::RCP<const typename MultiVectorFiller<MV>::map_type>& map, const size_t numCols)
1164  : ctorMap_ (map),
1165  sourceMap_ (Teuchos::null),
1166  targetMap_ (Teuchos::null),
1167  data_ (map, numCols),
1168  exporter_ (Teuchos::null)
1169  {}
1170 
1171  template<class MV>
1172  Teuchos::RCP<const typename MultiVectorFiller<MV>::map_type>
1174  computeSourceMap (const global_ordinal_type indexBase,
1175  const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
1176  const Teuchos::RCP<Teuchos::FancyOStream>& out,
1177  const bool debug)
1178  {
1179  using Teuchos::Array;
1180  using Teuchos::ArrayView;
1181  using Teuchos::rcp;
1182  typedef global_ordinal_type GO;
1183 
1184  if (debug && ! out.is_null ()) {
1185  out->pushTab ();
1186  std::ostringstream os;
1187  const int myRank = comm->getRank ();
1188  os << "Proc " << myRank << ": computeSourceMap" << std::endl;
1189  *out << os.str ();
1190  }
1191 
1192  Array<GO> indices = getSourceIndices (*comm, out, debug);
1193 
1194  if (debug && ! out.is_null ()) {
1195  std::ostringstream os;
1196  const int myRank = comm->getRank ();
1197  os << "Proc " << myRank << ": computeSourceMap: About to create Map"
1198  << std::endl;
1199  *out << os.str ();
1200  out->popTab ();
1201  }
1202 
1203  // Passing "invalid" for the numGlobalElements argument of the Map
1204  // constructor tells the Map to compute the global number of
1205  // elements itself.
1206  typedef Tpetra::global_size_t GST;
1207  const GST INV = Teuchos::OrdinalTraits<GST>::invalid ();
1208  return rcp (new map_type (INV, indices, indexBase, comm));
1209  }
1210 
1211  template<class MV>
1212  void
1214  globalAssemble (MV& X_out, const bool forceReuseMap)
1215  {
1216  using Teuchos::ArrayView;
1217  using Teuchos::Array;
1218  using Teuchos::Range1D;
1219  using Teuchos::RCP;
1220  using Teuchos::rcp;
1221  RCP<Teuchos::FancyOStream> outPtr;
1222  const bool debug = false;
1223 
1224  if (debug) {
1225  outPtr = Teuchos::getFancyOStream (Teuchos::rcpFromRef (std::cerr));
1226  outPtr->pushTab ();
1227  }
1228 
1229  const size_t numVecs = X_out.getNumVectors ();
1230  if (numVecs == 0) {
1231  // Nothing to do! Of course, this does not check for whether
1232  // X_out has the right number of rows. That's OK, though.
1233  // Compare to the definition of the BLAS' _AXPY for an input
1234  // vector containing NaNs, but multiplied by alpha=0.
1235  return;
1236  }
1237  //
1238  // Get the target Map of the Export. If X_out's Map is the same
1239  // as the target Map from last time, then we may be able to
1240  // recycle the Export from last time, if the source Map is also
1241  // the same.
1242  //
1243  RCP<const map_type> targetMap;
1244  bool assumeSameTargetMap = false;
1245  if (targetMap_.is_null()) {
1246  targetMap_ = X_out.getMap();
1247  targetMap = targetMap_;
1248  assumeSameTargetMap = false;
1249  }
1250  else {
1251  if (forceReuseMap) {
1252  targetMap = targetMap_;
1253  assumeSameTargetMap = true;
1254  }
1255  else {
1256  // If X_out's Map is the same as targetMap_, we may be able to
1257  // reuse the Export. Constructing the Export may be more
1258  // expensive than calling isSameAs() (which requires just a
1259  // few reductions and reading through the lists of owned
1260  // global indices), so it's worth checking.
1261  if (targetMap_->isSameAs (*(X_out.getMap()))) {
1262  assumeSameTargetMap = true;
1263  targetMap = targetMap_;
1264  }
1265  }
1266  }
1267 
1268  if (debug && ! outPtr.is_null ()) {
1269  std::ostringstream os;
1270  const Teuchos::Comm<int>& comm = * (X_out.getMap ()->getComm ());
1271  const int myRank = comm.getRank ();
1272  os << "Proc " << myRank << ": Right before getting source Map" << std::endl;
1273  *outPtr << os.str ();
1274  }
1275 
1276  //
1277  // Get the source Map of the Export. If the source Map of the
1278  // Export is the same as last time, then we may be able to recycle
1279  // the Export from last time, if the target Map is also the same.
1280  //
1281  RCP<const map_type> sourceMap;
1282  bool computedSourceMap = false;
1283  {
1284  if (forceReuseMap && ! sourceMap_.is_null()) {
1285  sourceMap = sourceMap_;
1286  }
1287  else {
1288  sourceMap = computeSourceMap (ctorMap_->getIndexBase (),
1289  ctorMap_->getComm (),
1290  outPtr, debug);
1291  computedSourceMap = true;
1292  }
1293  }
1294  if (computedSourceMap) {
1295  sourceMap_ = sourceMap;
1296  }
1297 
1298  if (debug && ! outPtr.is_null ()) {
1299  std::ostringstream os;
1300  const Teuchos::Comm<int>& comm = * (X_out.getMap ()->getComm ());
1301  const int myRank = comm.getRank ();
1302  os << "Proc " << myRank << ": Right before computing Export" << std::endl;
1303  *outPtr << os.str ();
1304  }
1305 
1306  //
1307  // Now that we have the source and target Maps of the Export, we
1308  // can check whether we can recycle the Export from last time.
1309  //
1310  const bool mustComputeExport =
1311  (exporter_.is_null() || (assumeSameTargetMap && ! computedSourceMap));
1312  if (mustComputeExport) {
1313  exporter_ = rcp (new export_type (sourceMap_, targetMap_));
1314  }
1315 
1316  if (debug && ! outPtr.is_null ()) {
1317  std::ostringstream os;
1318  const Teuchos::Comm<int>& comm = * (X_out.getMap ()->getComm ());
1319  const int myRank = comm.getRank ();
1320  os << "Proc " << myRank << ": Right after computing Export" << std::endl;
1321  *outPtr << os.str ();
1322  }
1323 
1324  // Source multivector for the Export.
1325  RCP<MV> X_in;
1326  const bool mustReallocateVec = sourceVec_.is_null() ||
1327  sourceVec_->getNumVectors() < numVecs || ! assumeSameTargetMap;
1328 
1329  if (mustReallocateVec) {
1330  // Reallocating nonlocalVec_ ensures that it has the right Map.
1331  sourceVec_ = rcp (new MV (sourceMap_, numVecs));
1332  X_in = sourceVec_;
1333  } else {
1334  if (sourceVec_->getNumVectors() == numVecs) {
1335  X_in = sourceVec_;
1336  } else { // sourceVec_ has more vectors than needed.
1337  X_in = sourceVec_->subViewNonConst (Range1D (0, numVecs-1));
1338  }
1339  }
1340 
1341  if (debug && ! outPtr.is_null ()) {
1342  std::ostringstream os;
1343  const Teuchos::Comm<int>& comm = * (X_out.getMap ()->getComm ());
1344  const int myRank = comm.getRank ();
1345  os << "Proc " << myRank << ": Right before locallyAssemble" << std::endl;
1346  *outPtr << os.str ();
1347  }
1348 
1349  // "Locally assemble" the data into X_in by summing together
1350  // entries with the same indices.
1351  locallyAssemble (*X_in);
1352 
1353  if (debug && ! outPtr.is_null ()) {
1354  std::ostringstream os;
1355  const Teuchos::Comm<int>& comm = * (X_out.getMap ()->getComm ());
1356  const int myRank = comm.getRank ();
1357  os << "Proc " << myRank << ": Right after locallyAssemble" << std::endl;
1358  *outPtr << os.str ();
1359  }
1360 
1361  // Do the Export.
1362  const Tpetra::CombineMode combineMode = Tpetra::ADD;
1363  X_out.doExport (*X_in, *exporter_, combineMode);
1364 
1365  if (debug && ! outPtr.is_null ()) {
1366  std::ostringstream os;
1367  const Teuchos::Comm<int>& comm = * (X_out.getMap ()->getComm ());
1368  const int myRank = comm.getRank ();
1369  os << "Proc " << myRank << ": Right after Export" << std::endl;
1370  *outPtr << os.str ();
1371  }
1372 
1373  // Clean out the locally owned data for next time.
1374  X_in->putScalar (Teuchos::ScalarTraits<scalar_type>::zero ());
1375 
1376  // FIXME (mfh 27 Aug 2015) Clean out the remote data for next
1377  // time. See Bug 6398.
1378 
1379  if (debug && ! outPtr.is_null ()) {
1380  std::ostringstream os;
1381  const Teuchos::Comm<int>& comm = * (X_out.getMap ()->getComm ());
1382  const int myRank = comm.getRank ();
1383  os << "Proc " << myRank << ": Done with globalAssemble" << std::endl;
1384  *outPtr << os.str ();
1385  outPtr->pushTab ();
1386  }
1387  }
1388 
1389  namespace Test {
1390 
1396  template<class MV>
1398  public:
1399  typedef typename MV::scalar_type scalar_type;
1400  typedef typename MV::local_ordinal_type local_ordinal_type;
1401  typedef typename MV::global_ordinal_type global_ordinal_type;
1402  typedef typename MV::node_type node_type;
1403  typedef ::Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type> map_type;
1404 
1413  static void
1414  testSameMap (const Teuchos::RCP<const map_type>& targetMap,
1415  const global_ordinal_type eltSize, // Must be odd
1416  const size_t numCols,
1417  const Teuchos::RCP<Teuchos::FancyOStream>& outStream=Teuchos::null,
1418  const Teuchos::EVerbosityLevel verbosityLevel=Teuchos::VERB_DEFAULT)
1419  {
1420  using Teuchos::Array;
1421  using Teuchos::ArrayRCP;
1422  using Teuchos::ArrayView;
1423  using Teuchos::as;
1424  using Teuchos::Comm;
1425  using Teuchos::FancyOStream;
1426  using Teuchos::getFancyOStream;
1427  using Teuchos::oblackholestream;
1428  using Teuchos::ptr;
1429  using Teuchos::RCP;
1430  using Teuchos::rcp;
1431  using Teuchos::rcpFromRef;
1432  using Teuchos::REDUCE_SUM;
1433  using Teuchos::reduceAll;
1434  using std::cerr;
1435  using std::endl;
1436 
1437  typedef local_ordinal_type LO;
1438  typedef global_ordinal_type GO;
1439  typedef scalar_type ST;
1440  typedef Teuchos::ScalarTraits<ST> STS;
1441 
1442  TEUCHOS_TEST_FOR_EXCEPTION(eltSize % 2 == 0, std::invalid_argument,
1443  "Element size (eltSize) argument must be odd.");
1444  TEUCHOS_TEST_FOR_EXCEPTION(numCols == 0, std::invalid_argument,
1445  "Number of columns (numCols) argument must be nonzero.");
1446  // Default behavior is to print nothing out.
1447  RCP<FancyOStream> out = outStream.is_null() ?
1448  getFancyOStream (rcp (new oblackholestream)) : outStream;
1449  const Teuchos::EVerbosityLevel verbLevel =
1450  (verbosityLevel == Teuchos::VERB_DEFAULT) ?
1451  Teuchos::VERB_NONE : verbosityLevel;
1452 
1453  //RCP<MV> X = rcp (new MV (targetMap, numCols));
1454 
1455  const GO indexBase = targetMap->getIndexBase();
1456  Array<GO> rows (eltSize);
1457  Array<ST> values (eltSize);
1458  std::fill (values.begin(), values.end(), STS::one());
1459 
1460  // Make this a pointer so we can free its contents, in case
1461  // those contents depend on the input to globalAssemble().
1462  RCP<MultiVectorFiller<MV> > filler =
1463  rcp (new MultiVectorFiller<MV> (targetMap, numCols));
1464 
1465  TEUCHOS_TEST_FOR_EXCEPTION(! targetMap->isContiguous(),
1466  std::invalid_argument, "MultiVectorFiller test currently only works "
1467  "for contiguous Maps.");
1468 
1469  const GO minGlobalIndex = targetMap->getMinGlobalIndex();
1470  const GO maxGlobalIndex = targetMap->getMaxGlobalIndex();
1471  const GO minAllGlobalIndex = targetMap->getMinAllGlobalIndex();
1472  const GO maxAllGlobalIndex = targetMap->getMaxAllGlobalIndex();
1473  for (size_t j = 0; j < numCols; ++j) {
1474  for (GO i = minGlobalIndex; i <= maxGlobalIndex; ++i) {
1475  // Overlap over processes, without running out of bounds.
1476  const GO start = std::max (i - eltSize/2, minAllGlobalIndex);
1477  const GO end = std::min (i + eltSize/2, maxAllGlobalIndex);
1478  const GO len = end - start + 1;
1479 
1480  TEUCHOS_TEST_FOR_EXCEPTION(len > eltSize, std::logic_error,
1481  "At start,end = " << start << "," << end << ", len = " << len
1482  << " > eltSize = " << eltSize << ".");
1483 
1484  for (GO k = 0; k < len; ++k) {
1485  rows[k] = start + k;
1486  }
1487  if (verbLevel == Teuchos::VERB_EXTREME) {
1488  *out << "Inserting: "
1489  << Teuchos::toString (rows.view(0,len)) << ", "
1490  << Teuchos::toString (values.view(0, len)) << std::endl;
1491  }
1492  filler->sumIntoGlobalValues (rows.view(0, len), j, values.view(0, len));
1493  }
1494  }
1495 
1496  if (verbLevel == Teuchos::VERB_EXTREME) {
1497  *out << "Filler:" << std::endl;
1498  filler->describe (*out, verbLevel);
1499  *out << std::endl;
1500  }
1501 
1502  MV X_out (targetMap, numCols);
1503  filler->globalAssemble (X_out);
1504  filler = Teuchos::null;
1505 
1506  if (verbLevel == Teuchos::VERB_EXTREME) {
1507  *out << "X_out:" << std::endl;
1508  X_out.describe (*out, verbLevel);
1509  *out << std::endl;
1510  }
1511 
1512  // Create multivector for comparison.
1513  MV X_expected (targetMap, numCols);
1514  const scalar_type two = STS::one() + STS::one();
1515  for (size_t j = 0; j < numCols; ++j) {
1516  ArrayRCP<ST> X_j = X_expected.getDataNonConst (j);
1517 
1518  // Each entry of the column should have the value eltSize,
1519  // except for the first and last few entries in the whole
1520  // column (globally, not locally).
1521  for (GO i = minGlobalIndex; i <= maxGlobalIndex; ++i) {
1522  const LO localIndex = targetMap->getLocalElement (i);
1523  TEUCHOS_TEST_FOR_EXCEPTION(i == Teuchos::OrdinalTraits<LO>::invalid(),
1524  std::logic_error, "Global index " << i << " is not in the "
1525  "multivector's Map.");
1526 
1527  if (i <= minAllGlobalIndex + eltSize/2) {
1528  X_j[localIndex] = two + as<ST>(i) - as<ST>(indexBase);
1529  }
1530  else if (i >= maxAllGlobalIndex - eltSize/2) {
1531  X_j[localIndex] = two + as<ST>(maxAllGlobalIndex) - as<ST>(i);
1532  }
1533  else {
1534  X_j[localIndex] = as<ST>(eltSize);
1535  }
1536  } // for each global index which my process owns
1537  } // for each column of the multivector
1538 
1539  if (verbLevel == Teuchos::VERB_EXTREME) {
1540  *out << "X_expected:" << std::endl;
1541  X_expected.describe (*out, verbLevel);
1542  *out << std::endl;
1543  }
1544 
1545  Array<GO> errorLocations;
1546  for (size_t j = 0; j < numCols; ++j) {
1547  ArrayRCP<const ST> X_out_j = X_out.getData (j);
1548  ArrayRCP<const ST> X_expected_j = X_expected.getData (j);
1549 
1550  // Each entry of the column should have the value eltSize,
1551  // except for the first and last few entries in the whole
1552  // column (globally, not locally).
1553  for (GO i = minGlobalIndex; i <= maxGlobalIndex; ++i) {
1554  const LO localIndex = targetMap->getLocalElement (i);
1555  TEUCHOS_TEST_FOR_EXCEPTION(i == Teuchos::OrdinalTraits<LO>::invalid(),
1556  std::logic_error, "Global index " << i << " is not in the "
1557  "multivector's Map.");
1558 
1559  // The floating-point additions should be exact in this
1560  // case, except for very large values of eltSize.
1561  if (X_out_j[localIndex] != X_expected_j[localIndex]) {
1562  errorLocations.push_back (i);
1563  }
1564  } // for each global index which my process owns
1565 
1566  const typename Array<GO>::size_type localNumErrors = errorLocations.size();
1567  typename Array<GO>::size_type globalNumErrors = 0;
1568  RCP<const Comm<int> > comm = targetMap->getComm();
1569  reduceAll (*comm, REDUCE_SUM, localNumErrors, ptr (&globalNumErrors));
1570 
1571  if (globalNumErrors != 0) {
1572  std::ostringstream os;
1573  os << "Proc " << comm->getRank() << ": " << localNumErrors
1574  << " incorrect value" << (localNumErrors != 1 ? "s" : "")
1575  << ". Error locations: [ ";
1576  std::copy (errorLocations.begin(), errorLocations.end(),
1577  std::ostream_iterator<GO> (os, " "));
1578  os << "].";
1579  // Iterate through all processes in the communicator,
1580  // printing out each process' local errors.
1581  for (int p = 0; p < comm->getSize(); ++p) {
1582  if (p == comm->getRank()) {
1583  cerr << os.str() << endl;
1584  }
1585  // Barriers to let output finish.
1586  comm->barrier();
1587  comm->barrier();
1588  comm->barrier();
1589  }
1590  TEUCHOS_TEST_FOR_EXCEPTION(globalNumErrors != 0, std::logic_error,
1591  "Over all procs: " << globalNumErrors << " total error"
1592  << (globalNumErrors != 1 ? "s" : "") << ".");
1593  } // if there were any errors in column j
1594  } // for each column j
1595  }
1596  };
1597 
1599  template<class ScalarType,
1600  class LocalOrdinalType,
1601  class GlobalOrdinalType,
1602  class NodeType>
1603  void
1604  testMultiVectorFiller (const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
1605  const size_t unknownsPerNode,
1606  const GlobalOrdinalType unknownsPerElt,
1607  const size_t numCols,
1608  const Teuchos::RCP<Teuchos::FancyOStream>& outStream,
1609  const Teuchos::EVerbosityLevel verbLevel)
1610  {
1612  using Teuchos::FancyOStream;
1613  using Teuchos::getFancyOStream;
1614  using Teuchos::oblackholestream;
1615  using Teuchos::ParameterList;
1616  using Teuchos::RCP;
1617  using Teuchos::rcp;
1618  using std::cerr;
1619  using std::endl;
1620 
1621  typedef ScalarType ST;
1622  typedef LocalOrdinalType LO;
1623  typedef GlobalOrdinalType GO;
1624  typedef NodeType NT;
1625  typedef ::Tpetra::Map<LO, GO, NT> map_type;
1627  typedef Tpetra::global_size_t GST;
1628 
1629  RCP<FancyOStream> out = outStream.is_null() ?
1630  getFancyOStream (rcp (new oblackholestream)) : outStream;
1631  const GST INV = Teuchos::OrdinalTraits<GST>::invalid ();
1632  const GO indexBase = 0;
1633  RCP<const map_type> targetMap =
1634  rcp (new map_type (INV, unknownsPerNode, indexBase, comm));
1635 
1636  std::ostringstream os;
1637  try {
1638  MultiVectorFillerTester<MV>::testSameMap (targetMap, unknownsPerElt, numCols, out, verbLevel);
1639  } catch (std::exception& e) {
1640  os << e.what();
1641  }
1642 
1643  for (int p = 0; p < comm->getSize(); ++p) {
1644  if (p == comm->getRank()) {
1645  *out << "On process " << comm->getRank() << ": " << os.str() << endl;
1646  }
1647  comm->barrier();
1648  comm->barrier();
1649  comm->barrier();
1650  }
1651  }
1652 
1658  void
1659  testSortAndMergeIn ();
1660 
1661  } // namespace Test
1662 } // namespace Tpetra
1663 
1664 
1665 #endif // __Tpetra_MultiVectorFiller_hpp
Tpetra::Details::MultiVectorFillerData2::sumIntoGlobalValues
void sumIntoGlobalValues(const Teuchos::ArrayView< const global_ordinal_type > &rows, const size_t columnIndex, const Teuchos::ArrayView< const scalar_type > &values)
Set entry (rows[i],columnIndex) to values[i], for all i.
Definition: Tpetra_MultiVectorFiller.hpp:667
Tpetra::MultiVectorFiller::clear
void clear()
Clear contents, in preparation for another fill cycle.
Definition: Tpetra_MultiVectorFiller.hpp:1038
Tpetra::Details::MultiVectorFillerData2::MultiVectorFillerData2
MultiVectorFillerData2(const Teuchos::RCP< const map_type > &map, const size_t numColumns, const Teuchos::EVerbosityLevel verbLevel=Teuchos::VERB_DEFAULT, const Teuchos::RCP< Teuchos::FancyOStream > &out=Teuchos::null)
Constructor.
Definition: Tpetra_MultiVectorFiller.hpp:414
Tpetra::Classes::Map::getGlobalElement
GlobalOrdinal getGlobalElement(LocalOrdinal localIndex) const
The global index corresponding to the given local index.
Definition: Tpetra_Map_def.hpp:1114
Tpetra::Details::MultiVectorFillerData::sumIntoGlobalValues
void sumIntoGlobalValues(const Teuchos::ArrayView< const global_ordinal_type > &rows, const Teuchos::ArrayView< const scalar_type > &values)
Definition: Tpetra_MultiVectorFiller.hpp:201
Tpetra::Details::MultiVectorFillerData::MultiVectorFillerData
MultiVectorFillerData(const Teuchos::RCP< const map_type > &map, const size_t numColumns)
Constructor.
Definition: Tpetra_MultiVectorFiller.hpp:151
Tpetra::Details::MultiVectorFillerData2::sumIntoGlobalValues
void sumIntoGlobalValues(const Teuchos::ArrayView< const global_ordinal_type > &rows, const Teuchos::ArrayView< const scalar_type > &values)
Set entry (rows[i],j) to values[i], for all i and j.
Definition: Tpetra_MultiVectorFiller.hpp:736
Tpetra::Test::MultiVectorFillerTester::testSameMap
static void testSameMap(const Teuchos::RCP< const map_type > &targetMap, const global_ordinal_type eltSize, const size_t numCols, const Teuchos::RCP< Teuchos::FancyOStream > &outStream=Teuchos::null, const Teuchos::EVerbosityLevel verbosityLevel=Teuchos::VERB_DEFAULT)
Test global assembly when constructor Map = target Map.
Definition: Tpetra_MultiVectorFiller.hpp:1414
Tpetra::Details::MultiVectorFillerData2::setNumColumns
void setNumColumns(const size_t newNumColumns)
Set the number of columns in the output multivector.
Definition: Tpetra_MultiVectorFiller.hpp:624
Details
Implementation details of Tpetra.
Tpetra::MultiVectorFiller
Adds nonlocal sum-into functionality to Tpetra::MultiVector.
Definition: Tpetra_MultiVectorFiller.hpp:975
Tpetra::Details::MultiVectorFillerData::locallyAssemble
void locallyAssemble(MV &X, BinaryFunction &f)
Locally assemble into X.
Definition: Tpetra_MultiVectorFiller.hpp:248
Tpetra::createContigMapWithNode
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createContigMapWithNode(const global_size_t numElements, const size_t localNumElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const Teuchos::RCP< Node > &node=Teuchos::null)
Nonmember constructor for a (potentially) nonuniformly distributed, contiguous Map for a user-specifi...
Tpetra::Details::MultiVectorFillerData2::clear
void clear()
Clear the contents of the multivector.
Definition: Tpetra_MultiVectorFiller.hpp:849
Tpetra::Details::MultiVectorFillerData::locallyAssemble
void locallyAssemble(MV &X)
Overload of locallyAssemble() for the usual ADD combine mode.
Definition: Tpetra_MultiVectorFiller.hpp:288
Tpetra::ADD
Sum new values into existing values.
Definition: Tpetra_CombineMode.hpp:95
Tpetra::Classes::Map::getLocalElement
LocalOrdinal getLocalElement(GlobalOrdinal globalIndex) const
The local index corresponding to the given global index.
Definition: Tpetra_Map_def.hpp:1091
Tpetra::Details::MultiVectorFillerData2::getSourceIndices
Teuchos::Array< global_ordinal_type > getSourceIndices(const Teuchos::Comm< int > &comm, const Teuchos::RCP< Teuchos::FancyOStream > &out=Teuchos::null, const bool debug=false) const
All source indices (local and nonlocal) of the source Map, sorted and unique.
Definition: Tpetra_MultiVectorFiller.hpp:525
Tpetra::Details::MultiVectorFillerData::clear
void clear()
Clear the contents of the vector, making it implicitly a vector of zeros.
Definition: Tpetra_MultiVectorFiller.hpp:295
Tpetra::MultiVectorFiller::globalAssemble
void globalAssemble(MV &X_out, const bool forceReuseMap=false)
Assemble all the data (local and nonlocal) into X_out.
Definition: Tpetra_MultiVectorFiller.hpp:1214
Tpetra::MultiVectorFiller::sumIntoGlobalValues
void sumIntoGlobalValues(Teuchos::ArrayView< const global_ordinal_type > rows, Teuchos::ArrayView< const scalar_type > values)
Sum data into the multivector.
Definition: Tpetra_MultiVectorFiller.hpp:1083
Tpetra::MultiVectorFiller::sumIntoGlobalValues
void sumIntoGlobalValues(Teuchos::ArrayView< const global_ordinal_type > rows, size_t column, Teuchos::ArrayView< const scalar_type > values)
Sum data into the multivector.
Definition: Tpetra_MultiVectorFiller.hpp:1060
Tpetra::Details::MultiVectorFillerData2::MultiVectorFillerData2
MultiVectorFillerData2(const Teuchos::RCP< const map_type > &map, const Teuchos::EVerbosityLevel verbLevel=Teuchos::VERB_DEFAULT, const Teuchos::RCP< Teuchos::FancyOStream > &out=Teuchos::null)
Default constructor (sets number of columns to zero).
Definition: Tpetra_MultiVectorFiller.hpp:388
Tpetra::Details::MultiVectorFillerData2
Second implementation of fill and local assembly for MultiVectorFiller.
Definition: Tpetra_MultiVectorFiller.hpp:369
Tpetra::Details::MultiVectorFillerData::getSourceIndices
Teuchos::Array< global_ordinal_type > getSourceIndices() const
All source indices (local and nonlocal) of the source Map, sorted and unique.
Definition: Tpetra_MultiVectorFiller.hpp:305
Tpetra::Details::MultiVectorFillerData
Implementation of fill and local assembly for MultiVectorFiller.
Definition: Tpetra_MultiVectorFiller.hpp:110
Tpetra::Classes::MultiVector
One or more distributed dense vectors.
Definition: Tpetra_MultiVector_decl.hpp:389
Tpetra::MultiVectorFiller::MultiVectorFiller
MultiVectorFiller(const Teuchos::RCP< const map_type > &map, const size_t numCols)
Constructor.
Definition: Tpetra_MultiVectorFiller.hpp:1163
Tpetra::Details::MultiVectorFillerData2::locallyAssemble
void locallyAssemble(MV &X)
Do the "local" (not MPI) part of assembly.
Definition: Tpetra_MultiVectorFiller.hpp:838
Tpetra::Classes::Map
A parallel distribution of indices over processes.
Definition: Tpetra_Map_decl.hpp:247
Tpetra::Details::MultiVectorFillerData::MultiVectorFillerData
MultiVectorFillerData(const Teuchos::RCP< const map_type > &map)
Default constructor (sets number of columns to zero).
Definition: Tpetra_MultiVectorFiller.hpp:130
Tpetra::Details::MultiVectorFillerData2::locallyAssemble
void locallyAssemble(MV &X, BinaryFunction &f)
Locally assemble into X, with user-specified combine mode.
Definition: Tpetra_MultiVectorFiller.hpp:776
Tpetra::global_size_t
size_t global_size_t
Global size_t object.
Definition: Tpetra_ConfigDefs.hpp:109
Tpetra
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Tpetra::Classes::Export
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Definition: Tpetra_Export_decl.hpp:124
Tpetra::Details::MultiVectorFillerData::setNumColumns
void setNumColumns(const size_t newNumColumns)
Set the number of columns in the output multivector.
Definition: Tpetra_MultiVectorFiller.hpp:161
Tpetra::Test::MultiVectorFillerTester
Tests for MultiVectorFiller.
Definition: Tpetra_MultiVectorFiller.hpp:1397
Tpetra::CombineMode
CombineMode
Rule for combining data in an Import or Export.
Definition: Tpetra_CombineMode.hpp:94