Tpetra parallel linear algebra  Version of the Day
Tpetra_Map_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
46 
47 #ifndef TPETRA_MAP_DEF_HPP
48 #define TPETRA_MAP_DEF_HPP
49 
50 #include "Tpetra_Directory.hpp" // must include for implicit instantiation to work
51 #include "Tpetra_Details_FixedHashTable.hpp"
52 #include "Tpetra_Details_gathervPrint.hpp"
54 #include "Tpetra_Core.hpp"
55 #include "Tpetra_Util.hpp"
56 #include "Teuchos_as.hpp"
57 #include "Teuchos_TypeNameTraits.hpp"
58 #include "Teuchos_CommHelpers.hpp"
59 #include "Tpetra_Details_mpiIsInitialized.hpp"
60 #include "Tpetra_Details_extractMpiCommFromTeuchos.hpp" // teuchosCommIsAnMpiComm
62 #include <stdexcept>
63 #include <typeinfo>
64 
65 namespace Tpetra {
66 namespace Classes {
67 
68  template <class LocalOrdinal, class GlobalOrdinal, class Node>
70  Map () :
71  comm_ (new Teuchos::SerialComm<int> ()),
72  indexBase_ (0),
73  numGlobalElements_ (0),
74  numLocalElements_ (0),
75  minMyGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
76  maxMyGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
77  minAllGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
78  maxAllGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
79  firstContiguousGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
80  lastContiguousGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
81  uniform_ (false), // trivially
82  contiguous_ (false),
83  distributed_ (false), // no communicator yet
84  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
85  {
87  }
88 
89  template <class LocalOrdinal, class GlobalOrdinal, class Node>
91  Map (global_size_t numGlobalElements,
92  GlobalOrdinal indexBase,
93  const Teuchos::RCP<const Teuchos::Comm<int> > &comm,
94  LocalGlobal lOrG,
95  const Teuchos::RCP<Node> &node) :
96  comm_ (comm),
97  uniform_ (true),
98  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
99  {
100  using Teuchos::as;
101  using Teuchos::broadcast;
102  using Teuchos::outArg;
103  using Teuchos::reduceAll;
104  using Teuchos::REDUCE_MIN;
105  using Teuchos::REDUCE_MAX;
106  using Teuchos::typeName;
107  typedef GlobalOrdinal GO;
108  typedef global_size_t GST;
109  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
110 
112 
113 #ifdef HAVE_TPETRA_DEBUG
114  // In debug mode only, check whether numGlobalElements and
115  // indexBase are the same over all processes in the communicator.
116  {
117  GST proc0NumGlobalElements = numGlobalElements;
118  broadcast<int, GST> (*comm_, 0, outArg (proc0NumGlobalElements));
119  GST minNumGlobalElements = numGlobalElements;
120  GST maxNumGlobalElements = numGlobalElements;
121  reduceAll<int, GST> (*comm, REDUCE_MIN, numGlobalElements, outArg (minNumGlobalElements));
122  reduceAll<int, GST> (*comm, REDUCE_MAX, numGlobalElements, outArg (maxNumGlobalElements));
123  TEUCHOS_TEST_FOR_EXCEPTION(
124  minNumGlobalElements != maxNumGlobalElements || numGlobalElements != minNumGlobalElements,
125  std::invalid_argument,
126  "Tpetra::Map constructor: All processes must provide the same number "
127  "of global elements. Process 0 set numGlobalElements = "
128  << proc0NumGlobalElements << ". The calling process "
129  << comm->getRank () << " set numGlobalElements = " << numGlobalElements
130  << ". The min and max values over all processes are "
131  << minNumGlobalElements << " resp. " << maxNumGlobalElements << ".");
132 
133  GO proc0IndexBase = indexBase;
134  broadcast<int, GO> (*comm_, 0, outArg (proc0IndexBase));
135  GO minIndexBase = indexBase;
136  GO maxIndexBase = indexBase;
137  reduceAll<int, GO> (*comm, REDUCE_MIN, indexBase, outArg (minIndexBase));
138  reduceAll<int, GO> (*comm, REDUCE_MAX, indexBase, outArg (maxIndexBase));
139  TEUCHOS_TEST_FOR_EXCEPTION(
140  minIndexBase != maxIndexBase || indexBase != minIndexBase,
141  std::invalid_argument,
142  "Tpetra::Map constructor: "
143  "All processes must provide the same indexBase argument. "
144  "Process 0 set indexBase = " << proc0IndexBase << ". The calling "
145  "process " << comm->getRank () << " set indexBase = " << indexBase
146  << ". The min and max values over all processes are "
147  << minIndexBase << " resp. " << maxIndexBase << ".");
148  }
149 #endif // HAVE_TPETRA_DEBUG
150 
151  // Distribute the elements across the processes in the given
152  // communicator so that global IDs (GIDs) are
153  //
154  // - Nonoverlapping (only one process owns each GID)
155  // - Contiguous (the sequence of GIDs is nondecreasing, and no two
156  // adjacent GIDs differ by more than one)
157  // - As evenly distributed as possible (the numbers of GIDs on two
158  // different processes do not differ by more than one)
159 
160  // All processes have the same numGlobalElements, but we still
161  // need to check that it is valid. numGlobalElements must be
162  // positive and not the "invalid" value (GSTI).
163  //
164  // This comparison looks funny, but it avoids compiler warnings
165  // for comparing unsigned integers (numGlobalElements_in is a
166  // GST, which is unsigned) while still working if we
167  // later decide to make GST signed.
168  TEUCHOS_TEST_FOR_EXCEPTION(
169  (numGlobalElements < 1 && numGlobalElements != 0),
170  std::invalid_argument,
171  "Tpetra::Map constructor: numGlobalElements (= "
172  << numGlobalElements << ") must be nonnegative.");
173 
174  TEUCHOS_TEST_FOR_EXCEPTION(
175  numGlobalElements == GSTI, std::invalid_argument,
176  "Tpetra::Map constructor: You provided numGlobalElements = Teuchos::"
177  "OrdinalTraits<Tpetra::global_size_t>::invalid(). This version of the "
178  "constructor requires a valid value of numGlobalElements. You "
179  "probably mistook this constructor for the \"contiguous nonuniform\" "
180  "constructor, which can compute the global number of elements for you "
181  "if you set numGlobalElements to that value.");
182 
183  size_t numLocalElements = 0; // will set below
184  if (lOrG == GloballyDistributed) {
185  // Compute numLocalElements:
186  //
187  // If numGlobalElements == numProcs * B + remainder,
188  // then Proc r gets B+1 elements if r < remainder,
189  // and B elements if r >= remainder.
190  //
191  // This strategy is valid for any value of numGlobalElements and
192  // numProcs, including the following border cases:
193  // - numProcs == 1
194  // - numLocalElements < numProcs
195  //
196  // In the former case, remainder == 0 && numGlobalElements ==
197  // numLocalElements. In the latter case, remainder ==
198  // numGlobalElements && numLocalElements is either 0 or 1.
199  const GST numProcs = static_cast<GST> (comm_->getSize ());
200  const GST myRank = static_cast<GST> (comm_->getRank ());
201  const GST quotient = numGlobalElements / numProcs;
202  const GST remainder = numGlobalElements - quotient * numProcs;
203 
204  GO startIndex;
205  if (myRank < remainder) {
206  numLocalElements = static_cast<size_t> (1) + static_cast<size_t> (quotient);
207  // myRank was originally an int, so it should never overflow
208  // reasonable GO types.
209  startIndex = as<GO> (myRank) * as<GO> (numLocalElements);
210  } else {
211  numLocalElements = as<size_t> (quotient);
212  startIndex = as<GO> (myRank) * as<GO> (numLocalElements) +
213  as<GO> (remainder);
214  }
215 
216  minMyGID_ = indexBase + startIndex;
217  maxMyGID_ = indexBase + startIndex + numLocalElements - 1;
218  minAllGID_ = indexBase;
219  maxAllGID_ = indexBase + numGlobalElements - 1;
220  distributed_ = (numProcs > 1);
221  }
222  else { // lOrG == LocallyReplicated
223  numLocalElements = as<size_t> (numGlobalElements);
224  minMyGID_ = indexBase;
225  maxMyGID_ = indexBase + numGlobalElements - 1;
226  distributed_ = false;
227  }
228 
229  minAllGID_ = indexBase;
230  maxAllGID_ = indexBase + numGlobalElements - 1;
231  indexBase_ = indexBase;
232  numGlobalElements_ = numGlobalElements;
233  numLocalElements_ = numLocalElements;
234  firstContiguousGID_ = minMyGID_;
235  lastContiguousGID_ = maxMyGID_;
236  contiguous_ = true;
237 
238  // Create the Directory on demand in getRemoteIndexList().
239  //setupDirectory ();
240  }
241 
242  template <class LocalOrdinal, class GlobalOrdinal, class Node>
244  Map (global_size_t numGlobalElements,
245  size_t numLocalElements,
246  GlobalOrdinal indexBase,
247  const Teuchos::RCP<const Teuchos::Comm<int> > &comm,
248  const Teuchos::RCP<Node> &node) :
249  comm_ (comm),
250  uniform_ (false),
251  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
252  {
253  using Teuchos::as;
254  using Teuchos::broadcast;
255  using Teuchos::outArg;
256  using Teuchos::reduceAll;
257  using Teuchos::REDUCE_MIN;
258  using Teuchos::REDUCE_MAX;
259  using Teuchos::REDUCE_SUM;
260  using Teuchos::scan;
261  typedef GlobalOrdinal GO;
262  typedef global_size_t GST;
263  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
264 
266 
267 #ifdef HAVE_TPETRA_DEBUG
268  // Global sum of numLocalElements over all processes.
269  // Keep this for later debug checks.
270  const GST debugGlobalSum =
271  initialNonuniformDebugCheck (numGlobalElements, numLocalElements,
272  indexBase, comm);
273 #endif // HAVE_TPETRA_DEBUG
274 
275  // Distribute the elements across the nodes so that they are
276  // - non-overlapping
277  // - contiguous
278 
279  // This differs from the first Map constructor (that only takes a
280  // global number of elements) in that the user has specified the
281  // number of local elements, so that the elements are not
282  // (necessarily) evenly distributed over the processes.
283 
284  // Compute my local offset. This is an inclusive scan, so to get
285  // the final offset, we subtract off the input.
286  GO scanResult = 0;
287  scan<int, GO> (*comm, REDUCE_SUM, numLocalElements, outArg (scanResult));
288  const GO myOffset = scanResult - numLocalElements;
289 
290  if (numGlobalElements != GSTI) {
291  numGlobalElements_ = numGlobalElements; // Use the user's value.
292  } else {
293  // Inclusive scan means that the last process has the final sum.
294  // Rather than doing a reduceAll to get the sum of
295  // numLocalElements, we can just have the last process broadcast
296  // its result. That saves us a round of log(numProcs) messages.
297  const int numProcs = comm->getSize ();
298  GST globalSum = scanResult;
299  if (numProcs > 1) {
300  broadcast (*comm, numProcs - 1, outArg (globalSum));
301  }
302  numGlobalElements_ = globalSum;
303 
304 #ifdef HAVE_TPETRA_DEBUG
305  // No need for an all-reduce here; both come from collectives.
306  TEUCHOS_TEST_FOR_EXCEPTION(
307  globalSum != debugGlobalSum, std::logic_error,
308  "Tpetra::Map constructor (contiguous nonuniform): "
309  "globalSum = " << globalSum << " != debugGlobalSum = " << debugGlobalSum
310  << ". Please report this bug to the Tpetra developers.");
311 #endif // HAVE_TPETRA_DEBUG
312  }
313  numLocalElements_ = numLocalElements;
314  indexBase_ = indexBase;
315  minAllGID_ = (numGlobalElements_ == 0) ?
316  std::numeric_limits<GO>::max () :
317  indexBase;
318  maxAllGID_ = (numGlobalElements_ == 0) ?
319  std::numeric_limits<GO>::lowest () :
320  indexBase + static_cast<GO> (numGlobalElements_) - static_cast<GO> (1);
321  minMyGID_ = (numLocalElements_ == 0) ?
322  std::numeric_limits<GO>::max () :
323  indexBase + static_cast<GO> (myOffset);
324  maxMyGID_ = (numLocalElements_ == 0) ?
325  std::numeric_limits<GO>::lowest () :
326  indexBase + myOffset + static_cast<GO> (numLocalElements) - static_cast<GO> (1);
327  firstContiguousGID_ = minMyGID_;
328  lastContiguousGID_ = maxMyGID_;
329  contiguous_ = true;
330  distributed_ = checkIsDist ();
331 
332  // Create the Directory on demand in getRemoteIndexList().
333  //setupDirectory ();
334  }
335 
336  template <class LocalOrdinal, class GlobalOrdinal, class Node>
339  initialNonuniformDebugCheck (const global_size_t numGlobalElements,
340  const size_t numLocalElements,
341  const GlobalOrdinal indexBase,
342  const Teuchos::RCP<const Teuchos::Comm<int> >& comm) const
343  {
344 #ifdef HAVE_TPETRA_DEBUG
345  using Teuchos::broadcast;
346  using Teuchos::outArg;
347  using Teuchos::ptr;
348  using Teuchos::REDUCE_MAX;
349  using Teuchos::REDUCE_MIN;
350  using Teuchos::REDUCE_SUM;
351  using Teuchos::reduceAll;
352  typedef GlobalOrdinal GO;
353  typedef global_size_t GST;
354  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
355 
356  // The user has specified the distribution of indices over the
357  // processes. The distribution is not necessarily contiguous or
358  // equally shared over the processes.
359  //
360  // We assume that the number of local elements can be stored in a
361  // size_t. The instance member numLocalElements_ is a size_t, so
362  // this variable and that should have the same type.
363 
364  GST debugGlobalSum = 0; // Will be global sum of numLocalElements
365  reduceAll<int, GST> (*comm, REDUCE_SUM, static_cast<GST> (numLocalElements),
366  outArg (debugGlobalSum));
367  // In debug mode only, check whether numGlobalElements and
368  // indexBase are the same over all processes in the communicator.
369  {
370  GST proc0NumGlobalElements = numGlobalElements;
371  broadcast<int, GST> (*comm_, 0, outArg (proc0NumGlobalElements));
372  GST minNumGlobalElements = numGlobalElements;
373  GST maxNumGlobalElements = numGlobalElements;
374  reduceAll<int, GST> (*comm, REDUCE_MIN, numGlobalElements,
375  outArg (minNumGlobalElements));
376  reduceAll<int, GST> (*comm, REDUCE_MAX, numGlobalElements,
377  outArg (maxNumGlobalElements));
378  TEUCHOS_TEST_FOR_EXCEPTION(
379  minNumGlobalElements != maxNumGlobalElements ||
380  numGlobalElements != minNumGlobalElements,
381  std::invalid_argument,
382  "Tpetra::Map constructor: All processes must provide the same number "
383  "of global elements. This is true even if that argument is Teuchos::"
384  "OrdinalTraits<global_size_t>::invalid() to signal that the Map should "
385  "compute the global number of elements. Process 0 set numGlobalElements"
386  " = " << proc0NumGlobalElements << ". The calling process "
387  << comm->getRank () << " set numGlobalElements = " << numGlobalElements
388  << ". The min and max values over all processes are "
389  << minNumGlobalElements << " resp. " << maxNumGlobalElements << ".");
390 
391  GO proc0IndexBase = indexBase;
392  broadcast<int, GO> (*comm_, 0, outArg (proc0IndexBase));
393  GO minIndexBase = indexBase;
394  GO maxIndexBase = indexBase;
395  reduceAll<int, GO> (*comm, REDUCE_MIN, indexBase, outArg (minIndexBase));
396  reduceAll<int, GO> (*comm, REDUCE_MAX, indexBase, outArg (maxIndexBase));
397  TEUCHOS_TEST_FOR_EXCEPTION(
398  minIndexBase != maxIndexBase || indexBase != minIndexBase,
399  std::invalid_argument,
400  "Tpetra::Map constructor: "
401  "All processes must provide the same indexBase argument. "
402  "Process 0 set indexBase = " << proc0IndexBase << ". The calling "
403  "process " << comm->getRank () << " set indexBase = " << indexBase
404  << ". The min and max values over all processes are "
405  << minIndexBase << " resp. " << maxIndexBase << ".");
406 
407  // Make sure that the sum of numLocalElements over all processes
408  // equals numGlobalElements.
409  TEUCHOS_TEST_FOR_EXCEPTION
410  (numGlobalElements != GSTI && debugGlobalSum != numGlobalElements,
411  std::invalid_argument, "Tpetra::Map constructor: The sum of each "
412  "process' number of indices over all processes, " << debugGlobalSum
413  << " != numGlobalElements = " << numGlobalElements << ". If you "
414  "would like this constructor to compute numGlobalElements for you, "
415  "you may set numGlobalElements = "
416  "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid() on input. "
417  "Please note that this is NOT necessarily -1.");
418  }
419 
420  return debugGlobalSum;
421 #else
422  return static_cast<global_size_t> (0);
423 #endif // HAVE_TPETRA_DEBUG
424  }
425 
426  template <class LocalOrdinal, class GlobalOrdinal, class Node>
427  void
428  Map<LocalOrdinal,GlobalOrdinal,Node>::
429  initWithNonownedHostIndexList (const global_size_t numGlobalElements,
430  const Kokkos::View<const GlobalOrdinal*,
431  Kokkos::LayoutLeft,
432  Kokkos::HostSpace,
433  Kokkos::MemoryUnmanaged>& entryList_host,
434  const GlobalOrdinal indexBase,
435  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
436  {
437  using Kokkos::LayoutLeft;
438  using Kokkos::subview;
439  using Kokkos::View;
440  using Teuchos::as;
441  using Teuchos::broadcast;
442  using Teuchos::outArg;
443  using Teuchos::ptr;
444  using Teuchos::REDUCE_MAX;
445  using Teuchos::REDUCE_MIN;
446  using Teuchos::REDUCE_SUM;
447  using Teuchos::reduceAll;
448  typedef LocalOrdinal LO;
449  typedef GlobalOrdinal GO;
450  typedef global_size_t GST;
451  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
452 
453  // Make sure that Kokkos has been initialized (Github Issue #513).
454  TEUCHOS_TEST_FOR_EXCEPTION
455  (! Kokkos::is_initialized (), std::runtime_error,
456  "Tpetra::Map constructor: The Kokkos execution space "
457  << Teuchos::TypeNameTraits<execution_space>::name ()
458  << " has not been initialized. "
459  "Please initialize it before creating a Map.")
460 
461  // The user has specified the distribution of indices over the
462  // processes, via the input array of global indices on each
463  // process. The distribution is not necessarily contiguous or
464  // equally shared over the processes.
465 
466  // The length of the input array on this process is the number of
467  // local indices to associate with this process, even though the
468  // input array contains global indices. We assume that the number
469  // of local indices on a process can be stored in a size_t;
470  // numLocalElements_ is a size_t, so this variable and that should
471  // have the same type.
472  const size_t numLocalElements = static_cast<size_t> (entryList_host.size ());
473 
474  initialNonuniformDebugCheck (numGlobalElements, numLocalElements,
475  indexBase, comm);
476 
477  // NOTE (mfh 20 Feb 2013, 10 Oct 2016) In some sense, this global
478  // reduction is redundant, since the directory Map will have to do
479  // the same thing. Thus, we could do the scan and broadcast for
480  // the directory Map here, and give the computed offsets to the
481  // directory Map's constructor. However, a reduction costs less
482  // than a scan and broadcast, so this still saves time if users of
483  // this Map don't ever need the Directory (i.e., if they never
484  // call getRemoteIndexList on this Map).
485  if (numGlobalElements != GSTI) {
486  numGlobalElements_ = numGlobalElements; // Use the user's value.
487  }
488  else { // The user wants us to compute the sum.
489  reduceAll<int, GST> (*comm, REDUCE_SUM,
490  static_cast<GST> (numLocalElements),
491  outArg (numGlobalElements_));
492  }
493 
494  // mfh 20 Feb 2013: We've never quite done the right thing for
495  // duplicate GIDs here. Duplicate GIDs have always been counted
496  // distinctly in numLocalElements_, and thus should get a
497  // different LID. However, we've always used std::map or a hash
498  // table for the GID -> LID lookup table, so distinct GIDs always
499  // map to the same LID. Furthermore, the order of the input GID
500  // list matters, so it's not desirable to sort for determining
501  // uniqueness.
502  //
503  // I've chosen for now to write this code as if the input GID list
504  // contains no duplicates. If this is not desired, we could use
505  // the lookup table itself to determine uniqueness: If we haven't
506  // seen the GID before, it gets a new LID and it's added to the
507  // LID -> GID and GID -> LID tables. If we have seen the GID
508  // before, it doesn't get added to either table. I would
509  // implement this, but it would cost more to do the double lookups
510  // in the table (one to check, and one to insert).
511  //
512  // More importantly, since we build the GID -> LID table in (a
513  // thread-) parallel (way), the order in which duplicate GIDs may
514  // get inserted is not defined. This would make the assignment of
515  // LID to GID nondeterministic.
516 
517  numLocalElements_ = numLocalElements;
518  indexBase_ = indexBase;
519 
520  minMyGID_ = indexBase_;
521  maxMyGID_ = indexBase_;
522 
523  // NOTE (mfh 27 May 2015): While finding the initial contiguous
524  // GID range requires looking at all the GIDs in the range,
525  // dismissing an interval of GIDs only requires looking at the
526  // first and last GIDs. Thus, we could do binary search backwards
527  // from the end in order to catch the common case of a contiguous
528  // interval followed by noncontiguous entries. On the other hand,
529  // we could just expose this case explicitly as yet another Map
530  // constructor, and avoid the trouble of detecting it.
531  if (numLocalElements_ > 0) {
532  // Find contiguous GID range, with the restriction that the
533  // beginning of the range starts with the first entry. While
534  // doing so, fill in the LID -> GID table.
535  View<GO*, LayoutLeft, device_type> lgMap ("lgMap", numLocalElements_);
536  auto lgMap_host = Kokkos::create_mirror_view (lgMap);
537 
538  // The input array entryList_host is already on host, so we
539  // don't need to take a host view of it.
540  // auto entryList_host = Kokkos::create_mirror_view (entryList);
541  // Kokkos::deep_copy (entryList_host, entryList);
542 
543  firstContiguousGID_ = entryList_host[0];
544  lastContiguousGID_ = firstContiguousGID_+1;
545 
546  // FIXME (mfh 23 Sep 2015) We need to copy the input GIDs
547  // anyway, so we have to look at them all. The logical way to
548  // find the first noncontiguous entry would thus be to "reduce,"
549  // where the local reduction result is whether entryList[i] + 1
550  // == entryList[i+1].
551 
552  lgMap_host[0] = firstContiguousGID_;
553  size_t i = 1;
554  for ( ; i < numLocalElements_; ++i) {
555  const GO curGid = entryList_host[i];
556  const LO curLid = as<LO> (i);
557 
558  if (lastContiguousGID_ != curGid) break;
559 
560  // Add the entry to the LID->GID table only after we know that
561  // the current GID is in the initial contiguous sequence, so
562  // that we don't repeat adding it in the first iteration of
563  // the loop below over the remaining noncontiguous GIDs.
564  lgMap_host[curLid] = curGid;
565  ++lastContiguousGID_;
566  }
567  --lastContiguousGID_;
568 
569  // [firstContiguousGID_, lastContigousGID_] is the initial
570  // sequence of contiguous GIDs. We can start the min and max
571  // GID using this range.
572  minMyGID_ = firstContiguousGID_;
573  maxMyGID_ = lastContiguousGID_;
574 
575  // Compute the GID -> LID lookup table, _not_ including the
576  // initial sequence of contiguous GIDs.
577  {
578  const std::pair<size_t, size_t> ncRange (i, entryList_host.extent (0));
579  auto nonContigGids_host = subview (entryList_host, ncRange);
580  TEUCHOS_TEST_FOR_EXCEPTION
581  (static_cast<size_t> (nonContigGids_host.extent (0)) !=
582  static_cast<size_t> (entryList_host.extent (0) - i),
583  std::logic_error, "Tpetra::Map noncontiguous constructor: "
584  "nonContigGids_host.extent(0) = "
585  << nonContigGids_host.extent (0)
586  << " != entryList_host.extent(0) - i = "
587  << (entryList_host.extent (0) - i) << " = "
588  << entryList_host.extent (0) << " - " << i
589  << ". Please report this bug to the Tpetra developers.");
590 
591  // FixedHashTable's constructor expects an owned device View,
592  // so we must deep-copy the subview of the input indices.
593  View<GO*, LayoutLeft, device_type>
594  nonContigGids ("nonContigGids", nonContigGids_host.size ());
595  Kokkos::deep_copy (nonContigGids, nonContigGids_host);
596 
597  glMap_ = global_to_local_table_type (nonContigGids,
598  firstContiguousGID_,
599  lastContiguousGID_,
600  static_cast<LO> (i));
601  }
602 
603  // FIXME (mfh 10 Oct 2016) When we construct the global-to-local
604  // table above, we have to look at all the (noncontiguous) input
605  // indices anyway. Thus, why not have the constructor compute
606  // and return the min and max?
607 
608  for ( ; i < numLocalElements_; ++i) {
609  const GO curGid = entryList_host[i];
610  const LO curLid = as<LO> (i);
611  lgMap_host[curLid] = curGid; // LID -> GID table
612 
613  // While iterating through entryList, we compute its
614  // (process-local) min and max elements.
615  if (curGid < minMyGID_) {
616  minMyGID_ = curGid;
617  }
618  if (curGid > maxMyGID_) {
619  maxMyGID_ = curGid;
620  }
621  }
622 
623  // We filled lgMap on host above; now sync back to device.
624  Kokkos::deep_copy (lgMap, lgMap_host);
625 
626  // "Commit" the local-to-global lookup table we filled in above.
627  lgMap_ = lgMap;
628  // We've already created this, so use it.
629  lgMapHost_ = lgMap_host;
630  }
631  else {
632  minMyGID_ = std::numeric_limits<GlobalOrdinal>::max();
633  maxMyGID_ = std::numeric_limits<GlobalOrdinal>::lowest();
634  // This insures tests for GIDs in the range
635  // [firstContiguousGID_, lastContiguousGID_] fail for processes
636  // with no local elements.
637  firstContiguousGID_ = indexBase_+1;
638  lastContiguousGID_ = indexBase_;
639  // glMap_ was default constructed, so it's already empty.
640  }
641 
642  // Compute the min and max of all processes' GIDs. If
643  // numLocalElements_ == 0 on this process, minMyGID_ and maxMyGID_
644  // are both indexBase_. This is wrong, but fixing it would
645  // require either a fancy sparse all-reduce, or a custom reduction
646  // operator that ignores invalid values ("invalid" means
647  // Tpetra::Details::OrdinalTraits<GO>::invalid()).
648  //
649  // Also, while we're at it, use the same all-reduce to figure out
650  // if the Map is distributed. "Distributed" means that there is
651  // at least one process with a number of local elements less than
652  // the number of global elements.
653  //
654  // We're computing the min and max of all processes' GIDs using a
655  // single MAX all-reduce, because min(x,y) = -max(-x,-y) (when x
656  // and y are signed). (This lets us combine the min and max into
657  // a single all-reduce.) If each process sets localDist=1 if its
658  // number of local elements is strictly less than the number of
659  // global elements, and localDist=0 otherwise, then a MAX
660  // all-reduce on localDist tells us if the Map is distributed (1
661  // if yes, 0 if no). Thus, we can append localDist onto the end
662  // of the data and get the global result from the all-reduce.
663  if (std::numeric_limits<GO>::is_signed) {
664  // Does my process NOT own all the elements?
665  const GO localDist =
666  (as<GST> (numLocalElements_) < numGlobalElements_) ? 1 : 0;
667 
668  GO minMaxInput[3];
669  minMaxInput[0] = -minMyGID_;
670  minMaxInput[1] = maxMyGID_;
671  minMaxInput[2] = localDist;
672 
673  GO minMaxOutput[3];
674  minMaxOutput[0] = 0;
675  minMaxOutput[1] = 0;
676  minMaxOutput[2] = 0;
677  reduceAll<int, GO> (*comm, REDUCE_MAX, 3, minMaxInput, minMaxOutput);
678  minAllGID_ = -minMaxOutput[0];
679  maxAllGID_ = minMaxOutput[1];
680  const GO globalDist = minMaxOutput[2];
681  distributed_ = (comm_->getSize () > 1 && globalDist == 1);
682  }
683  else { // unsigned; use two reductions
684  // This is always correct, no matter the signedness of GO.
685  reduceAll<int, GO> (*comm_, REDUCE_MIN, minMyGID_, outArg (minAllGID_));
686  reduceAll<int, GO> (*comm_, REDUCE_MAX, maxMyGID_, outArg (maxAllGID_));
687  distributed_ = checkIsDist ();
688  }
689 
690  contiguous_ = false; // "Contiguous" is conservative.
691 
692  TEUCHOS_TEST_FOR_EXCEPTION(
693  minAllGID_ < indexBase_,
694  std::invalid_argument,
695  "Tpetra::Map constructor (noncontiguous): "
696  "Minimum global ID = " << minAllGID_ << " over all process(es) is "
697  "less than the given indexBase = " << indexBase_ << ".");
698 
699  // Create the Directory on demand in getRemoteIndexList().
700  //setupDirectory ();
701  }
702 
703  template <class LocalOrdinal, class GlobalOrdinal, class Node>
705  Map (const global_size_t numGlobalElements,
706  const GlobalOrdinal indexList[],
707  const LocalOrdinal indexListSize,
708  const GlobalOrdinal indexBase,
709  const Teuchos::RCP<const Teuchos::Comm<int> >& comm) :
710  comm_ (comm),
711  uniform_ (false),
712  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
713  {
715 
716  // Not quite sure if I trust all code to behave correctly if the
717  // pointer is nonnull but the array length is nonzero, so I'll
718  // make sure the raw pointer is null if the length is zero.
719  const GlobalOrdinal* const indsRaw = indexListSize == 0 ? NULL : indexList;
720  Kokkos::View<const GlobalOrdinal*,
721  Kokkos::LayoutLeft,
722  Kokkos::HostSpace,
723  Kokkos::MemoryUnmanaged> inds (indsRaw, indexListSize);
724  initWithNonownedHostIndexList (numGlobalElements, inds, indexBase, comm);
725  }
726 
727  template <class LocalOrdinal, class GlobalOrdinal, class Node>
729  Map (const global_size_t numGlobalElements,
730  const Teuchos::ArrayView<const GlobalOrdinal>& entryList,
731  const GlobalOrdinal indexBase,
732  const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
733  const Teuchos::RCP<Node>& node) :
734  comm_ (comm),
735  uniform_ (false),
736  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
737  {
739 
740  const size_t numLclInds = static_cast<size_t> (entryList.size ());
741 
742  // Not quite sure if I trust both ArrayView and View to behave
743  // correctly if the pointer is nonnull but the array length is
744  // nonzero, so I'll make sure it's null if the length is zero.
745  const GlobalOrdinal* const indsRaw =
746  numLclInds == 0 ? NULL : entryList.getRawPtr ();
747  Kokkos::View<const GlobalOrdinal*,
748  Kokkos::LayoutLeft,
749  Kokkos::HostSpace,
750  Kokkos::MemoryUnmanaged> inds (indsRaw, numLclInds);
751  initWithNonownedHostIndexList (numGlobalElements, inds, indexBase, comm);
752  }
753 
754  template <class LocalOrdinal, class GlobalOrdinal, class Node>
756  Map (const global_size_t numGlobalElements,
757  const Kokkos::View<const GlobalOrdinal*, device_type>& entryList,
758  const GlobalOrdinal indexBase,
759  const Teuchos::RCP<const Teuchos::Comm<int> >& comm) :
760  comm_ (comm),
761  uniform_ (false),
762  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
763  {
764  using Kokkos::LayoutLeft;
765  using Kokkos::subview;
766  using Kokkos::View;
767  using Teuchos::arcp;
768  using Teuchos::ArrayView;
769  using Teuchos::as;
770  using Teuchos::broadcast;
771  using Teuchos::outArg;
772  using Teuchos::ptr;
773  using Teuchos::REDUCE_MAX;
774  using Teuchos::REDUCE_MIN;
775  using Teuchos::REDUCE_SUM;
776  using Teuchos::reduceAll;
777  using Teuchos::typeName;
778  typedef LocalOrdinal LO;
779  typedef GlobalOrdinal GO;
780  typedef global_size_t GST;
781  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
782 
784 
785  // The user has specified the distribution of indices over the
786  // processes, via the input array of global indices on each
787  // process. The distribution is not necessarily contiguous or
788  // equally shared over the processes.
789 
790  // The length of the input array on this process is the number of
791  // local indices to associate with this process, even though the
792  // input array contains global indices. We assume that the number
793  // of local indices on a process can be stored in a size_t;
794  // numLocalElements_ is a size_t, so this variable and that should
795  // have the same type.
796  const size_t numLocalElements = static_cast<size_t> (entryList.size ());
797 
798  initialNonuniformDebugCheck (numGlobalElements, numLocalElements,
799  indexBase, comm);
800 
801  // NOTE (mfh 20 Feb 2013, 10 Oct 2016) In some sense, this global
802  // reduction is redundant, since the directory Map will have to do
803  // the same thing. Thus, we could do the scan and broadcast for
804  // the directory Map here, and give the computed offsets to the
805  // directory Map's constructor. However, a reduction costs less
806  // than a scan and broadcast, so this still saves time if users of
807  // this Map don't ever need the Directory (i.e., if they never
808  // call getRemoteIndexList on this Map).
809  if (numGlobalElements != GSTI) {
810  numGlobalElements_ = numGlobalElements; // Use the user's value.
811  }
812  else { // The user wants us to compute the sum.
813  reduceAll<int, GST> (*comm, REDUCE_SUM,
814  static_cast<GST> (numLocalElements),
815  outArg (numGlobalElements_));
816  }
817 
818  // mfh 20 Feb 2013: We've never quite done the right thing for
819  // duplicate GIDs here. Duplicate GIDs have always been counted
820  // distinctly in numLocalElements_, and thus should get a
821  // different LID. However, we've always used std::map or a hash
822  // table for the GID -> LID lookup table, so distinct GIDs always
823  // map to the same LID. Furthermore, the order of the input GID
824  // list matters, so it's not desirable to sort for determining
825  // uniqueness.
826  //
827  // I've chosen for now to write this code as if the input GID list
828  // contains no duplicates. If this is not desired, we could use
829  // the lookup table itself to determine uniqueness: If we haven't
830  // seen the GID before, it gets a new LID and it's added to the
831  // LID -> GID and GID -> LID tables. If we have seen the GID
832  // before, it doesn't get added to either table. I would
833  // implement this, but it would cost more to do the double lookups
834  // in the table (one to check, and one to insert).
835  //
836  // More importantly, since we build the GID -> LID table in (a
837  // thread-) parallel (way), the order in which duplicate GIDs may
838  // get inserted is not defined. This would make the assignment of
839  // LID to GID nondeterministic.
840 
841  numLocalElements_ = numLocalElements;
842  indexBase_ = indexBase;
843 
844  minMyGID_ = indexBase_;
845  maxMyGID_ = indexBase_;
846 
847  // NOTE (mfh 27 May 2015): While finding the initial contiguous
848  // GID range requires looking at all the GIDs in the range,
849  // dismissing an interval of GIDs only requires looking at the
850  // first and last GIDs. Thus, we could do binary search backwards
851  // from the end in order to catch the common case of a contiguous
852  // interval followed by noncontiguous entries. On the other hand,
853  // we could just expose this case explicitly as yet another Map
854  // constructor, and avoid the trouble of detecting it.
855  if (numLocalElements_ > 0) {
856  // Find contiguous GID range, with the restriction that the
857  // beginning of the range starts with the first entry. While
858  // doing so, fill in the LID -> GID table.
859  View<GO*, LayoutLeft, device_type> lgMap ("lgMap", numLocalElements_);
860  auto lgMap_host = Kokkos::create_mirror_view (lgMap);
861 
862  // Creating the mirror view is trivial, and the deep_copy is a
863  // no-op, if entryList is on host already.
864  auto entryList_host = Kokkos::create_mirror_view (entryList);
865  Kokkos::deep_copy (entryList_host, entryList);
866 
867  firstContiguousGID_ = entryList_host[0];
868  lastContiguousGID_ = firstContiguousGID_+1;
869 
870  // FIXME (mfh 23 Sep 2015) We need to copy the input GIDs
871  // anyway, so we have to look at them all. The logical way to
872  // find the first noncontiguous entry would thus be to "reduce,"
873  // where the local reduction result is whether entryList[i] + 1
874  // == entryList[i+1].
875 
876  lgMap_host[0] = firstContiguousGID_;
877  size_t i = 1;
878  for ( ; i < numLocalElements_; ++i) {
879  const GO curGid = entryList_host[i];
880  const LO curLid = as<LO> (i);
881 
882  if (lastContiguousGID_ != curGid) break;
883 
884  // Add the entry to the LID->GID table only after we know that
885  // the current GID is in the initial contiguous sequence, so
886  // that we don't repeat adding it in the first iteration of
887  // the loop below over the remaining noncontiguous GIDs.
888  lgMap_host[curLid] = curGid;
889  ++lastContiguousGID_;
890  }
891  --lastContiguousGID_;
892 
893  // [firstContiguousGID_, lastContigousGID_] is the initial
894  // sequence of contiguous GIDs. We can start the min and max
895  // GID using this range.
896  minMyGID_ = firstContiguousGID_;
897  maxMyGID_ = lastContiguousGID_;
898 
899  // Compute the GID -> LID lookup table, _not_ including the
900  // initial sequence of contiguous GIDs.
901  {
902  const std::pair<size_t, size_t> ncRange (i, entryList.extent (0));
903  auto nonContigGids = subview (entryList, ncRange);
904  TEUCHOS_TEST_FOR_EXCEPTION
905  (static_cast<size_t> (nonContigGids.extent (0)) !=
906  static_cast<size_t> (entryList.extent (0) - i),
907  std::logic_error, "Tpetra::Map noncontiguous constructor: "
908  "nonContigGids.extent(0) = "
909  << nonContigGids.extent (0)
910  << " != entryList.extent(0) - i = "
911  << (entryList.extent (0) - i) << " = "
912  << entryList.extent (0) << " - " << i
913  << ". Please report this bug to the Tpetra developers.");
914 
915  glMap_ = global_to_local_table_type (nonContigGids,
916  firstContiguousGID_,
917  lastContiguousGID_,
918  static_cast<LO> (i));
919  }
920 
921  // FIXME (mfh 10 Oct 2016) When we construct the global-to-local
922  // table above, we have to look at all the (noncontiguous) input
923  // indices anyway. Thus, why not have the constructor compute
924  // and return the min and max?
925 
926  for ( ; i < numLocalElements_; ++i) {
927  const GO curGid = entryList_host[i];
928  const LO curLid = static_cast<LO> (i);
929  lgMap_host[curLid] = curGid; // LID -> GID table
930 
931  // While iterating through entryList, we compute its
932  // (process-local) min and max elements.
933  if (curGid < minMyGID_) {
934  minMyGID_ = curGid;
935  }
936  if (curGid > maxMyGID_) {
937  maxMyGID_ = curGid;
938  }
939  }
940 
941  // We filled lgMap on host above; now sync back to device.
942  Kokkos::deep_copy (lgMap, lgMap_host);
943 
944  // "Commit" the local-to-global lookup table we filled in above.
945  lgMap_ = lgMap;
946  // We've already created this, so use it.
947  lgMapHost_ = lgMap_host;
948  }
949  else {
950  minMyGID_ = std::numeric_limits<GlobalOrdinal>::max();
951  maxMyGID_ = std::numeric_limits<GlobalOrdinal>::lowest();
952  // This insures tests for GIDs in the range
953  // [firstContiguousGID_, lastContiguousGID_] fail for processes
954  // with no local elements.
955  firstContiguousGID_ = indexBase_+1;
956  lastContiguousGID_ = indexBase_;
957  // glMap_ was default constructed, so it's already empty.
958  }
959 
960  // Compute the min and max of all processes' GIDs. If
961  // numLocalElements_ == 0 on this process, minMyGID_ and maxMyGID_
962  // are both indexBase_. This is wrong, but fixing it would
963  // require either a fancy sparse all-reduce, or a custom reduction
964  // operator that ignores invalid values ("invalid" means
965  // Tpetra::Details::OrdinalTraits<GO>::invalid()).
966  //
967  // Also, while we're at it, use the same all-reduce to figure out
968  // if the Map is distributed. "Distributed" means that there is
969  // at least one process with a number of local elements less than
970  // the number of global elements.
971  //
972  // We're computing the min and max of all processes' GIDs using a
973  // single MAX all-reduce, because min(x,y) = -max(-x,-y) (when x
974  // and y are signed). (This lets us combine the min and max into
975  // a single all-reduce.) If each process sets localDist=1 if its
976  // number of local elements is strictly less than the number of
977  // global elements, and localDist=0 otherwise, then a MAX
978  // all-reduce on localDist tells us if the Map is distributed (1
979  // if yes, 0 if no). Thus, we can append localDist onto the end
980  // of the data and get the global result from the all-reduce.
981  if (std::numeric_limits<GO>::is_signed) {
982  // Does my process NOT own all the elements?
983  const GO localDist =
984  (as<GST> (numLocalElements_) < numGlobalElements_) ? 1 : 0;
985 
986  GO minMaxInput[3];
987  minMaxInput[0] = -minMyGID_;
988  minMaxInput[1] = maxMyGID_;
989  minMaxInput[2] = localDist;
990 
991  GO minMaxOutput[3];
992  minMaxOutput[0] = 0;
993  minMaxOutput[1] = 0;
994  minMaxOutput[2] = 0;
995  reduceAll<int, GO> (*comm, REDUCE_MAX, 3, minMaxInput, minMaxOutput);
996  minAllGID_ = -minMaxOutput[0];
997  maxAllGID_ = minMaxOutput[1];
998  const GO globalDist = minMaxOutput[2];
999  distributed_ = (comm_->getSize () > 1 && globalDist == 1);
1000  }
1001  else { // unsigned; use two reductions
1002  // This is always correct, no matter the signedness of GO.
1003  reduceAll<int, GO> (*comm_, REDUCE_MIN, minMyGID_, outArg (minAllGID_));
1004  reduceAll<int, GO> (*comm_, REDUCE_MAX, maxMyGID_, outArg (maxAllGID_));
1005  distributed_ = checkIsDist ();
1006  }
1007 
1008  contiguous_ = false; // "Contiguous" is conservative.
1009 
1010  TEUCHOS_TEST_FOR_EXCEPTION(
1011  minAllGID_ < indexBase_,
1012  std::invalid_argument,
1013  "Tpetra::Map constructor (noncontiguous): "
1014  "Minimum global ID = " << minAllGID_ << " over all process(es) is "
1015  "less than the given indexBase = " << indexBase_ << ".");
1016 
1017  // Create the Directory on demand in getRemoteIndexList().
1018  //setupDirectory ();
1019  }
1020 
1021 
1022  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1024  {
1025  if (! Kokkos::is_initialized ()) {
1026  std::ostringstream os;
1027  os << "WARNING: Tpetra::Map destructor (~Map()) is being called after "
1028  "Kokkos::finalize() has been called. This is user error! There are "
1029  "two likely causes: " << std::endl <<
1030  " 1. You have a static Tpetra::Map (or RCP or shared_ptr of a Map)"
1031  << std::endl <<
1032  " 2. You declare and construct a Tpetra::Map (or RCP or shared_ptr "
1033  "of a Tpetra::Map) at the same scope in main() as Kokkos::finalize() "
1034  "or Tpetra::finalize()." << std::endl << std::endl <<
1035  "Don't do either of these! Please refer to GitHib Issue #2372."
1036  << std::endl;
1037  ::Tpetra::Details::printOnce (std::cerr, os.str (),
1038  this->getComm ().getRawPtr ());
1039  }
1040  else {
1041  using ::Tpetra::Details::mpiIsInitialized;
1042  using ::Tpetra::Details::mpiIsFinalized;
1043  using ::Tpetra::Details::teuchosCommIsAnMpiComm;
1044 
1045  Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getComm ();
1046  if (! comm.is_null () && teuchosCommIsAnMpiComm (*comm) &&
1047  mpiIsInitialized () && mpiIsFinalized ()) {
1048  // Tpetra itself does not require MPI, even if building with
1049  // MPI. It is legal to create Tpetra objects that do not use
1050  // MPI, even in an MPI program. However, calling Tpetra stuff
1051  // after MPI_Finalize() has been called is a bad idea, since
1052  // some Tpetra defaults may use MPI if available.
1053  std::ostringstream os;
1054  os << "WARNING: Tpetra::Map destructor (~Map()) is being called after "
1055  "MPI_Finalize() has been called. This is user error! There are "
1056  "two likely causes: " << std::endl <<
1057  " 1. You have a static Tpetra::Map (or RCP or shared_ptr of a Map)"
1058  << std::endl <<
1059  " 2. You declare and construct a Tpetra::Map (or RCP or shared_ptr "
1060  "of a Tpetra::Map) at the same scope in main() as MPI_finalize() or "
1061  "Tpetra::finalize()." << std::endl << std::endl <<
1062  "Don't do either of these! Please refer to GitHib Issue #2372."
1063  << std::endl;
1064  ::Tpetra::Details::printOnce (std::cerr, os.str (), comm.getRawPtr ());
1065  }
1066  }
1067  // mfh 20 Mar 2018: We can't check Tpetra::isInitialized() yet,
1068  // because Tpetra does not yet require Tpetra::initialize /
1069  // Tpetra::finalize.
1070  }
1071 
1072 
1073  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1074  bool
1076  {
1077  TEUCHOS_TEST_FOR_EXCEPTION(
1078  getComm ().is_null (), std::logic_error, "Tpetra::Map::isOneToOne: "
1079  "getComm() returns null. Please report this bug to the Tpetra "
1080  "developers.");
1081 
1082  // This is a collective operation, if it hasn't been called before.
1083  setupDirectory ();
1084  return directory_->isOneToOne (*this);
1085  }
1086 
1087 
1088  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1089  LocalOrdinal
1091  getLocalElement (GlobalOrdinal globalIndex) const
1092  {
1093  if (isContiguous ()) {
1094  if (globalIndex < getMinGlobalIndex () ||
1095  globalIndex > getMaxGlobalIndex ()) {
1096  return Tpetra::Details::OrdinalTraits<LocalOrdinal>::invalid ();
1097  }
1098  return static_cast<LocalOrdinal> (globalIndex - getMinGlobalIndex ());
1099  }
1100  else if (globalIndex >= firstContiguousGID_ &&
1101  globalIndex <= lastContiguousGID_) {
1102  return static_cast<LocalOrdinal> (globalIndex - firstContiguousGID_);
1103  }
1104  else {
1105  // If the given global index is not in the table, this returns
1106  // the same value as OrdinalTraits<LocalOrdinal>::invalid().
1107  return glMap_.get (globalIndex);
1108  }
1109  }
1110 
1111  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1112  GlobalOrdinal
1114  getGlobalElement (LocalOrdinal localIndex) const
1115  {
1116  if (localIndex < getMinLocalIndex () || localIndex > getMaxLocalIndex ()) {
1117  return Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ();
1118  }
1119  if (isContiguous ()) {
1120  return getMinGlobalIndex () + localIndex;
1121  }
1122  else {
1123  // This is a host Kokkos::View access, with no RCP or ArrayRCP
1124  // involvement. As a result, it is thread safe.
1125  //
1126  // lgMapHost_ is a host pointer; this does NOT assume UVM.
1127  return lgMapHost_[localIndex];
1128  }
1129  }
1130 
1131  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1132  bool
1134  isNodeLocalElement (LocalOrdinal localIndex) const
1135  {
1136  if (localIndex < getMinLocalIndex () || localIndex > getMaxLocalIndex ()) {
1137  return false;
1138  } else {
1139  return true;
1140  }
1141  }
1142 
1143  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1144  bool
1146  isNodeGlobalElement (GlobalOrdinal globalIndex) const {
1147  return this->getLocalElement (globalIndex) !=
1148  Tpetra::Details::OrdinalTraits<LocalOrdinal>::invalid ();
1149  }
1150 
1151  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1153  return uniform_;
1154  }
1155 
1156  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1158  return contiguous_;
1159  }
1160 
1161 
1162  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1166  {
1167  return local_map_type (glMap_, lgMap_, getIndexBase (),
1168  getMinGlobalIndex (), getMaxGlobalIndex (),
1169  firstContiguousGID_, lastContiguousGID_,
1170  getNodeNumElements (), isContiguous ());
1171  }
1172 
1173  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1174  bool
1177  {
1178  using Teuchos::outArg;
1179  using Teuchos::REDUCE_MIN;
1180  using Teuchos::reduceAll;
1181  //
1182  // Tests that avoid the Boolean all-reduce below by using
1183  // globally consistent quantities.
1184  //
1185  if (this == &map) {
1186  // Pointer equality on one process always implies pointer
1187  // equality on all processes, since Map is immutable.
1188  return true;
1189  }
1190  else if (getComm ()->getSize () != map.getComm ()->getSize ()) {
1191  // The two communicators have different numbers of processes.
1192  // It's not correct to call isCompatible() in that case. This
1193  // may result in the all-reduce hanging below.
1194  return false;
1195  }
1196  else if (getGlobalNumElements () != map.getGlobalNumElements ()) {
1197  // Two Maps are definitely NOT compatible if they have different
1198  // global numbers of indices.
1199  return false;
1200  }
1201  else if (isContiguous () && isUniform () &&
1202  map.isContiguous () && map.isUniform ()) {
1203  // Contiguous uniform Maps with the same number of processes in
1204  // their communicators, and with the same global numbers of
1205  // indices, are always compatible.
1206  return true;
1207  }
1208  else if (! isContiguous () && ! map.isContiguous () &&
1209  lgMap_.extent (0) != 0 && map.lgMap_.extent (0) != 0 &&
1210  lgMap_.data () == map.lgMap_.data ()) {
1211  // Noncontiguous Maps whose global index lists are nonempty and
1212  // have the same pointer must be the same (and therefore
1213  // contiguous).
1214  //
1215  // Nonempty is important. For example, consider a communicator
1216  // with two processes, and two Maps that share this
1217  // communicator, with zero global indices on the first process,
1218  // and different nonzero numbers of global indices on the second
1219  // process. In that case, on the first process, the pointers
1220  // would both be NULL.
1221  return true;
1222  }
1223 
1224  TEUCHOS_TEST_FOR_EXCEPTION(
1225  getGlobalNumElements () != map.getGlobalNumElements (), std::logic_error,
1226  "Tpetra::Map::isCompatible: There's a bug in this method. We've already "
1227  "checked that this condition is true above, but it's false here. "
1228  "Please report this bug to the Tpetra developers.");
1229 
1230  // Do both Maps have the same number of indices on each process?
1231  const int locallyCompat =
1232  (getNodeNumElements () == map.getNodeNumElements ()) ? 1 : 0;
1233 
1234  int globallyCompat = 0;
1235  reduceAll<int, int> (*comm_, REDUCE_MIN, locallyCompat, outArg (globallyCompat));
1236  return (globallyCompat == 1);
1237  }
1238 
1239  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1240  bool
1243  {
1244  using Teuchos::ArrayView;
1245  typedef GlobalOrdinal GO;
1246  typedef typename ArrayView<const GO>::size_type size_type;
1247 
1248  // If both Maps are contiguous, we can compare their GID ranges
1249  // easily by looking at the min and max GID on this process.
1250  // Otherwise, we'll compare their GID lists. If only one Map is
1251  // contiguous, then we only have to call getNodeElementList() on
1252  // the noncontiguous Map. (It's best to avoid calling it on a
1253  // contiguous Map, since it results in unnecessary storage that
1254  // persists for the lifetime of the Map.)
1255 
1256  if (this == &map) {
1257  // Pointer equality on one process always implies pointer
1258  // equality on all processes, since Map is immutable.
1259  return true;
1260  }
1261  else if (getNodeNumElements () != map.getNodeNumElements ()) {
1262  return false;
1263  }
1264  else if (getMinGlobalIndex () != map.getMinGlobalIndex () ||
1265  getMaxGlobalIndex () != map.getMaxGlobalIndex ()) {
1266  return false;
1267  }
1268  else {
1269  if (isContiguous ()) {
1270  if (map.isContiguous ()) {
1271  return true; // min and max match, so the ranges match.
1272  }
1273  else { // *this is contiguous, but map is not contiguous
1274  TEUCHOS_TEST_FOR_EXCEPTION(
1275  ! this->isContiguous () || map.isContiguous (), std::logic_error,
1276  "Tpetra::Map::locallySameAs: BUG");
1277  ArrayView<const GO> rhsElts = map.getNodeElementList ();
1278  const GO minLhsGid = this->getMinGlobalIndex ();
1279  const size_type numRhsElts = rhsElts.size ();
1280  for (size_type k = 0; k < numRhsElts; ++k) {
1281  const GO curLhsGid = minLhsGid + static_cast<GO> (k);
1282  if (curLhsGid != rhsElts[k]) {
1283  return false; // stop on first mismatch
1284  }
1285  }
1286  return true;
1287  }
1288  }
1289  else if (map.isContiguous ()) { // *this is not contiguous, but map is
1290  TEUCHOS_TEST_FOR_EXCEPTION(
1291  this->isContiguous () || ! map.isContiguous (), std::logic_error,
1292  "Tpetra::Map::locallySameAs: BUG");
1293  ArrayView<const GO> lhsElts = this->getNodeElementList ();
1294  const GO minRhsGid = map.getMinGlobalIndex ();
1295  const size_type numLhsElts = lhsElts.size ();
1296  for (size_type k = 0; k < numLhsElts; ++k) {
1297  const GO curRhsGid = minRhsGid + static_cast<GO> (k);
1298  if (curRhsGid != lhsElts[k]) {
1299  return false; // stop on first mismatch
1300  }
1301  }
1302  return true;
1303  }
1304  else if (this->lgMap_.data () == map.lgMap_.data ()) {
1305  // Pointers to LID->GID "map" (actually just an array) are the
1306  // same, and the number of GIDs are the same.
1307  return this->getNodeNumElements () == map.getNodeNumElements ();
1308  }
1309  else { // we actually have to compare the GIDs
1310  if (this->getNodeNumElements () != map.getNodeNumElements ()) {
1311  return false; // We already checked above, but check just in case
1312  }
1313  else {
1314  ArrayView<const GO> lhsElts = getNodeElementList ();
1315  ArrayView<const GO> rhsElts = map.getNodeElementList ();
1316 
1317  // std::equal requires that the latter range is as large as
1318  // the former. We know the ranges have equal length, because
1319  // they have the same number of local entries.
1320  return std::equal (lhsElts.begin (), lhsElts.end (), rhsElts.begin ());
1321  }
1322  }
1323  }
1324  }
1325 
1326  template <class LocalOrdinal,class GlobalOrdinal, class Node>
1327  bool
1330  {
1331  if (this == &map)
1332  return true;
1333 
1334  // We are going to check if lmap1 is fitted into lmap2
1335  auto lmap1 = map.getLocalMap();
1336  auto lmap2 = this->getLocalMap();
1337 
1338  auto numLocalElements1 = lmap1.getNodeNumElements();
1339  auto numLocalElements2 = lmap2.getNodeNumElements();
1340 
1341  if (numLocalElements1 > numLocalElements2) {
1342  // There are more indices in the first map on this process than in second map.
1343  return false;
1344  }
1345 
1346  if (lmap1.isContiguous () && lmap2.isContiguous ()) {
1347  // When both Maps are contiguous, just check the interval inclusion.
1348  return ((lmap1.getMinGlobalIndex () == lmap2.getMinGlobalIndex ()) &&
1349  (lmap1.getMaxGlobalIndex () <= lmap2.getMaxGlobalIndex ()));
1350  }
1351 
1352  if (lmap1.getMinGlobalIndex () < lmap2.getMinGlobalIndex () ||
1353  lmap1.getMaxGlobalIndex () > lmap2.getMaxGlobalIndex ()) {
1354  // The second map does not include the first map bounds, and thus some of
1355  // the first map global indices are not in the second map.
1356  return false;
1357  }
1358 
1359  typedef Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space> range_type;
1360 
1361  // Check all elements.
1362  LocalOrdinal numDiff = 0;
1363  Kokkos::parallel_reduce("isLocallyFitted", range_type(0, numLocalElements1),
1364  KOKKOS_LAMBDA(const LocalOrdinal i, LocalOrdinal& diff) {
1365  diff += (lmap1.getGlobalElement(i) != lmap2.getGlobalElement(i));
1366  }, numDiff);
1367 
1368  return (numDiff == 0);
1369  }
1370 
1371  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1372  bool
1375  {
1376  using Teuchos::outArg;
1377  using Teuchos::REDUCE_MIN;
1378  using Teuchos::reduceAll;
1379  //
1380  // Tests that avoid the Boolean all-reduce below by using
1381  // globally consistent quantities.
1382  //
1383  if (this == &map) {
1384  // Pointer equality on one process always implies pointer
1385  // equality on all processes, since Map is immutable.
1386  return true;
1387  }
1388  else if (getComm ()->getSize () != map.getComm ()->getSize ()) {
1389  // The two communicators have different numbers of processes.
1390  // It's not correct to call isSameAs() in that case. This
1391  // may result in the all-reduce hanging below.
1392  return false;
1393  }
1394  else if (getGlobalNumElements () != map.getGlobalNumElements ()) {
1395  // Two Maps are definitely NOT the same if they have different
1396  // global numbers of indices.
1397  return false;
1398  }
1399  else if (getMinAllGlobalIndex () != map.getMinAllGlobalIndex () ||
1400  getMaxAllGlobalIndex () != map.getMaxAllGlobalIndex () ||
1401  getIndexBase () != map.getIndexBase ()) {
1402  // If the global min or max global index doesn't match, or if
1403  // the index base doesn't match, then the Maps aren't the same.
1404  return false;
1405  }
1406  else if (isDistributed () != map.isDistributed ()) {
1407  // One Map is distributed and the other is not, which means that
1408  // the Maps aren't the same.
1409  return false;
1410  }
1411  else if (isContiguous () && isUniform () &&
1412  map.isContiguous () && map.isUniform ()) {
1413  // Contiguous uniform Maps with the same number of processes in
1414  // their communicators, with the same global numbers of indices,
1415  // and with matching index bases and ranges, must be the same.
1416  return true;
1417  }
1418 
1419  // The two communicators must have the same number of processes,
1420  // with process ranks occurring in the same order. This uses
1421  // MPI_COMM_COMPARE. The MPI 3.1 standard (Section 6.4) says:
1422  // "Operations that access communicators are local and their
1423  // execution does not require interprocess communication."
1424  // However, just to be sure, I'll put this call after the above
1425  // tests that don't communicate.
1426  if (! ::Tpetra::Details::congruent (*comm_, * (map.getComm ()))) {
1427  return false;
1428  }
1429 
1430  // If we get this far, we need to check local properties and then
1431  // communicate local sameness across all processes.
1432  const int isSame_lcl = locallySameAs (map) ? 1 : 0;
1433 
1434  // Return true if and only if all processes report local sameness.
1435  int isSame_gbl = 0;
1436  reduceAll<int, int> (*comm_, REDUCE_MIN, isSame_lcl, outArg (isSame_gbl));
1437  return isSame_gbl == 1;
1438  }
1439 
1440  namespace { // (anonymous)
1441  template <class LO, class GO, class DT>
1442  class FillLgMap {
1443  public:
1444  FillLgMap (const Kokkos::View<GO*, DT>& lgMap,
1445  const GO startGid) :
1446  lgMap_ (lgMap), startGid_ (startGid)
1447  {
1448  Kokkos::RangePolicy<LO, typename DT::execution_space>
1449  range (static_cast<LO> (0), static_cast<LO> (lgMap.size ()));
1450  Kokkos::parallel_for (range, *this);
1451  }
1452 
1453  KOKKOS_INLINE_FUNCTION void operator () (const LO& lid) const {
1454  lgMap_(lid) = startGid_ + static_cast<GO> (lid);
1455  }
1456 
1457  private:
1458  const Kokkos::View<GO*, DT> lgMap_;
1459  const GO startGid_;
1460  };
1461 
1462  } // namespace (anonymous)
1463 
1464 
1465  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1466  typename Map<LocalOrdinal,GlobalOrdinal,Node>::global_indices_array_type
1468  {
1469  typedef LocalOrdinal LO;
1470  typedef GlobalOrdinal GO;
1471  typedef device_type DT;
1472 
1473  typedef decltype (lgMap_) const_lg_view_type;
1474  typedef typename const_lg_view_type::non_const_type lg_view_type;
1475 
1476  // If the local-to-global mapping doesn't exist yet, and if we
1477  // have local entries, then create and fill the local-to-global
1478  // mapping.
1479  const bool needToCreateLocalToGlobalMapping =
1480  lgMap_.extent (0) == 0 && numLocalElements_ > 0;
1481 
1482  if (needToCreateLocalToGlobalMapping) {
1483 #ifdef HAVE_TEUCHOS_DEBUG
1484  // The local-to-global mapping should have been set up already
1485  // for a noncontiguous map.
1486  TEUCHOS_TEST_FOR_EXCEPTION( ! isContiguous(), std::logic_error,
1487  "Tpetra::Map::getNodeElementList: The local-to-global mapping (lgMap_) "
1488  "should have been set up already for a noncontiguous Map. Please report"
1489  " this bug to the Tpetra team.");
1490 #endif // HAVE_TEUCHOS_DEBUG
1491 
1492  const LO numElts = static_cast<LO> (getNodeNumElements ());
1493 
1494  lg_view_type lgMap ("lgMap", numElts);
1495  FillLgMap<LO, GO, DT> fillIt (lgMap, minMyGID_);
1496 
1497  auto lgMapHost = Kokkos::create_mirror_view (lgMap);
1498  Kokkos::deep_copy (lgMapHost, lgMap);
1499 
1500  // "Commit" the local-to-global lookup table we filled in above.
1501  lgMap_ = lgMap;
1502  lgMapHost_ = lgMapHost;
1503 
1504  // lgMapHost_ may be a UVM View, so insert a fence to ensure
1505  // coherent host access. We only need to do this once, because
1506  // lgMapHost_ is immutable after initialization.
1507  execution_space::fence ();
1508  }
1509 
1510  return lgMap_;
1511  }
1512 
1513  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1514  Teuchos::ArrayView<const GlobalOrdinal>
1516  {
1517  typedef GlobalOrdinal GO; // convenient abbreviation
1518 
1519  // If the local-to-global mapping doesn't exist yet, and if we
1520  // have local entries, then create and fill the local-to-global
1521  // mapping.
1522  (void) this->getMyGlobalIndices ();
1523 
1524  // This does NOT assume UVM; lgMapHost_ is a host pointer.
1525  const GO* lgMapHostRawPtr = lgMapHost_.data ();
1526  // The third argument forces ArrayView not to try to track memory
1527  // in a debug build. We have to use it because the memory does
1528  // not belong to a Teuchos memory management class.
1529  return Teuchos::ArrayView<const GO> (lgMapHostRawPtr,
1530  lgMapHost_.extent (0),
1531  Teuchos::RCP_DISABLE_NODE_LOOKUP);
1532  }
1533 
1534  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1536  return distributed_;
1537  }
1538 
1539  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1541  using Teuchos::TypeNameTraits;
1542  std::ostringstream os;
1543 
1544  os << "Tpetra::Map: {"
1545  << "LocalOrdinalType: " << TypeNameTraits<LocalOrdinal>::name ()
1546  << ", GlobalOrdinalType: " << TypeNameTraits<GlobalOrdinal>::name ()
1547  << ", NodeType: " << TypeNameTraits<Node>::name ();
1548  if (this->getObjectLabel () != "") {
1549  os << ", Label: \"" << this->getObjectLabel () << "\"";
1550  }
1551  os << ", Global number of entries: " << getGlobalNumElements ()
1552  << ", Number of processes: " << getComm ()->getSize ()
1553  << ", Uniform: " << (isUniform () ? "true" : "false")
1554  << ", Contiguous: " << (isContiguous () ? "true" : "false")
1555  << ", Distributed: " << (isDistributed () ? "true" : "false")
1556  << "}";
1557  return os.str ();
1558  }
1559 
1564  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1565  std::string
1567  localDescribeToString (const Teuchos::EVerbosityLevel vl) const
1568  {
1569  typedef LocalOrdinal LO;
1570  using std::endl;
1571 
1572  // This preserves current behavior of Map.
1573  if (vl < Teuchos::VERB_HIGH) {
1574  return std::string ();
1575  }
1576  auto outStringP = Teuchos::rcp (new std::ostringstream ());
1577  Teuchos::RCP<Teuchos::FancyOStream> outp =
1578  Teuchos::getFancyOStream (outStringP);
1579  Teuchos::FancyOStream& out = *outp;
1580 
1581  auto comm = this->getComm ();
1582  const int myRank = comm->getRank ();
1583  const int numProcs = comm->getSize ();
1584  out << "Process " << myRank << " of " << numProcs << ":" << endl;
1585  Teuchos::OSTab tab1 (out);
1586 
1587  const LO numEnt = static_cast<LO> (this->getNodeNumElements ());
1588  out << "My number of entries: " << numEnt << endl
1589  << "My minimum global index: " << this->getMinGlobalIndex () << endl
1590  << "My maximum global index: " << this->getMaxGlobalIndex () << endl;
1591 
1592  if (vl == Teuchos::VERB_EXTREME) {
1593  out << "My global indices: [";
1594  const LO minLclInd = this->getMinLocalIndex ();
1595  for (LO k = 0; k < numEnt; ++k) {
1596  out << minLclInd + this->getGlobalElement (k);
1597  if (k + 1 < numEnt) {
1598  out << ", ";
1599  }
1600  }
1601  out << "]" << endl;
1602  }
1603 
1604  out.flush (); // make sure the ostringstream got everything
1605  return outStringP->str ();
1606  }
1607 
1608  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1609  void
1610  Map<LocalOrdinal,GlobalOrdinal,Node>::
1611  describe (Teuchos::FancyOStream &out,
1612  const Teuchos::EVerbosityLevel verbLevel) const
1613  {
1614  using Teuchos::TypeNameTraits;
1615  using Teuchos::VERB_DEFAULT;
1616  using Teuchos::VERB_NONE;
1617  using Teuchos::VERB_LOW;
1618  using Teuchos::VERB_HIGH;
1619  using std::endl;
1620  typedef LocalOrdinal LO;
1621  typedef GlobalOrdinal GO;
1622  const Teuchos::EVerbosityLevel vl =
1623  (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
1624 
1625  if (vl == VERB_NONE) {
1626  return; // don't print anything
1627  }
1628  // If this Map's Comm is null, then the Map does not participate
1629  // in collective operations with the other processes. In that
1630  // case, it is not even legal to call this method. The reasonable
1631  // thing to do in that case is nothing.
1632  auto comm = this->getComm ();
1633  if (comm.is_null ()) {
1634  return;
1635  }
1636  const int myRank = comm->getRank ();
1637  const int numProcs = comm->getSize ();
1638 
1639  // Only Process 0 should touch the output stream, but this method
1640  // in general may need to do communication. Thus, we may need to
1641  // preserve the current tab level across multiple "if (myRank ==
1642  // 0) { ... }" inner scopes. This is why we sometimes create
1643  // OSTab instances by pointer, instead of by value. We only need
1644  // to create them by pointer if the tab level must persist through
1645  // multiple inner scopes.
1646  Teuchos::RCP<Teuchos::OSTab> tab0, tab1;
1647 
1648  if (myRank == 0) {
1649  // At every verbosity level but VERB_NONE, Process 0 prints.
1650  // By convention, describe() always begins with a tab before
1651  // printing.
1652  tab0 = Teuchos::rcp (new Teuchos::OSTab (out));
1653  out << "\"Tpetra::Map\":" << endl;
1654  tab1 = Teuchos::rcp (new Teuchos::OSTab (out));
1655  {
1656  out << "Template parameters:" << endl;
1657  Teuchos::OSTab tab2 (out);
1658  out << "LocalOrdinal: " << TypeNameTraits<LO>::name () << endl
1659  << "GlobalOrdinal: " << TypeNameTraits<GO>::name () << endl
1660  << "Node: " << TypeNameTraits<Node>::name () << endl;
1661  }
1662  const std::string label = this->getObjectLabel ();
1663  if (label != "") {
1664  out << "Label: \"" << label << "\"" << endl;
1665  }
1666  out << "Global number of entries: " << getGlobalNumElements () << endl
1667  << "Minimum global index: " << getMinAllGlobalIndex () << endl
1668  << "Maximum global index: " << getMaxAllGlobalIndex () << endl
1669  << "Index base: " << getIndexBase () << endl
1670  << "Number of processes: " << numProcs << endl
1671  << "Uniform: " << (isUniform () ? "true" : "false") << endl
1672  << "Contiguous: " << (isContiguous () ? "true" : "false") << endl
1673  << "Distributed: " << (isDistributed () ? "true" : "false") << endl;
1674  }
1675 
1676  // This is collective over the Map's communicator.
1677  if (vl >= VERB_HIGH) { // VERB_HIGH or VERB_EXTREME
1678  const std::string lclStr = this->localDescribeToString (vl);
1679  Tpetra::Details::gathervPrint (out, lclStr, *comm);
1680  }
1681  }
1682 
1683  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1684  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
1686  replaceCommWithSubset (const Teuchos::RCP<const Teuchos::Comm<int> >& newComm) const
1687  {
1688  using Teuchos::RCP;
1689  using Teuchos::rcp;
1690  typedef global_size_t GST;
1691  typedef LocalOrdinal LO;
1692  typedef GlobalOrdinal GO;
1693  typedef Map<LO, GO, Node> map_type;
1694 
1695  // mfh 26 Mar 2013: The lazy way to do this is simply to recreate
1696  // the Map by calling its ordinary public constructor, using the
1697  // original Map's data. This only involves O(1) all-reduces over
1698  // the new communicator, which in the common case only includes a
1699  // small number of processes.
1700 
1701  // Create the Map to return.
1702  if (newComm.is_null () || newComm->getSize () < 1) {
1703  return Teuchos::null; // my process does not participate in the new Map
1704  }
1705  else if (newComm->getSize () == 1) {
1706  // The case where the new communicator has only one process is
1707  // easy. We don't have to communicate to get all the
1708  // information we need. Use the default comm to create the new
1709  // Map, then fill in all the fields directly.
1710  RCP<map_type> newMap (new map_type ());
1711 
1712  newMap->comm_ = newComm;
1713  // mfh 07 Oct 2016: Preserve original behavior, even though the
1714  // original index base may no longer be the globally min global
1715  // index. See #616 for why this doesn't matter so much anymore.
1716  newMap->indexBase_ = this->indexBase_;
1717  newMap->numGlobalElements_ = this->numLocalElements_;
1718  newMap->numLocalElements_ = this->numLocalElements_;
1719  newMap->minMyGID_ = this->minMyGID_;
1720  newMap->maxMyGID_ = this->maxMyGID_;
1721  newMap->minAllGID_ = this->minMyGID_;
1722  newMap->maxAllGID_ = this->maxMyGID_;
1723  newMap->firstContiguousGID_ = this->firstContiguousGID_;
1724  newMap->lastContiguousGID_ = this->lastContiguousGID_;
1725  // Since the new communicator has only one process, neither
1726  // uniformity nor contiguity have changed.
1727  newMap->uniform_ = this->uniform_;
1728  newMap->contiguous_ = this->contiguous_;
1729  // The new communicator only has one process, so the new Map is
1730  // not distributed.
1731  newMap->distributed_ = false;
1732  newMap->lgMap_ = this->lgMap_;
1733  newMap->lgMapHost_ = this->lgMapHost_;
1734  newMap->glMap_ = this->glMap_;
1735  // It's OK not to initialize the new Map's Directory.
1736  // This is initialized lazily, on first call to getRemoteIndexList.
1737 
1738  return newMap;
1739  }
1740  else { // newComm->getSize() != 1
1741  // Even if the original Map is contiguous, the new Map might not
1742  // be, especially if the excluded processes have ranks != 0 or
1743  // newComm->getSize()-1. The common case for this method is to
1744  // exclude many (possibly even all but one) processes, so it
1745  // likely doesn't pay to do the global communication (over the
1746  // original communicator) to figure out whether we can optimize
1747  // the result Map. Thus, we just set up the result Map as
1748  // noncontiguous.
1749  //
1750  // TODO (mfh 07 Oct 2016) We don't actually need to reconstruct
1751  // the global-to-local table, etc. Optimize this code path to
1752  // avoid unnecessary local work.
1753 
1754  // Make Map (re)compute the global number of elements.
1755  const GST RECOMPUTE = Tpetra::Details::OrdinalTraits<GST>::invalid ();
1756  // TODO (mfh 07 Oct 2016) If we use any Map constructor, we have
1757  // to use the noncontiguous Map constructor, since the new Map
1758  // might not be contiguous. Even if the old Map was contiguous,
1759  // some process in the "middle" might have been excluded. If we
1760  // want to avoid local work, we either have to do the setup by
1761  // hand, or write a new Map constructor.
1762 #if 1
1763  // The disabled code here throws the following exception in
1764  // Map's replaceCommWithSubset test:
1765  //
1766  // Throw test that evaluated to true: static_cast<unsigned long long> (numKeys) > static_cast<unsigned long long> (::Kokkos::Details::ArithTraits<ValueType>::max ())
1767  // 10:
1768  // 10: Tpetra::Details::FixedHashTable: The number of keys -3 is greater than the maximum representable ValueType value 2147483647. This means that it is not possible to use this constructor.
1769  // 10: Process 3: origComm->replaceCommWithSubset(subsetComm) threw an exception: /scratch/prj/Trilinos/Trilinos/packages/tpetra/core/src/Tpetra_Details_FixedHashTable_def.hpp:1044:
1770 
1771  auto lgMap = this->getMyGlobalIndices ();
1772  typedef typename std::decay<decltype (lgMap.extent (0)) >::type size_type;
1773  const size_type lclNumInds =
1774  static_cast<size_type> (this->getNodeNumElements ());
1775  using Teuchos::TypeNameTraits;
1776  TEUCHOS_TEST_FOR_EXCEPTION
1777  (lgMap.extent (0) != lclNumInds, std::logic_error,
1778  "Tpetra::Map::replaceCommWithSubset: Result of getMyGlobalIndices() "
1779  "has length " << lgMap.extent (0) << " (of type " <<
1780  TypeNameTraits<size_type>::name () << ") != this->getNodeNumElements()"
1781  " = " << this->getNodeNumElements () << ". The latter, upon being "
1782  "cast to size_type = " << TypeNameTraits<size_type>::name () << ", "
1783  "becomes " << lclNumInds << ". Please report this bug to the Tpetra "
1784  "developers.");
1785 #else
1786  Teuchos::ArrayView<const GO> lgMap = this->getNodeElementList ();
1787 #endif // 1
1788 
1789  const GO indexBase = this->getIndexBase ();
1790  return rcp (new map_type (RECOMPUTE, lgMap, indexBase, newComm));
1791  }
1792  }
1793 
1794  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1795  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
1798  {
1799  using Teuchos::Comm;
1800  using Teuchos::null;
1801  using Teuchos::outArg;
1802  using Teuchos::RCP;
1803  using Teuchos::rcp;
1804  using Teuchos::REDUCE_MIN;
1805  using Teuchos::reduceAll;
1806 
1807  // Create the new communicator. split() returns a valid
1808  // communicator on all processes. On processes where color == 0,
1809  // ignore the result. Passing key == 0 tells MPI to order the
1810  // processes in the new communicator by their rank in the old
1811  // communicator.
1812  const int color = (numLocalElements_ == 0) ? 0 : 1;
1813  // MPI_Comm_split must be called collectively over the original
1814  // communicator. We can't just call it on processes with color
1815  // one, even though we will ignore its result on processes with
1816  // color zero.
1817  RCP<const Comm<int> > newComm = comm_->split (color, 0);
1818  if (color == 0) {
1819  newComm = null;
1820  }
1821 
1822  // Create the Map to return.
1823  if (newComm.is_null ()) {
1824  return null; // my process does not participate in the new Map
1825  } else {
1826  // The default constructor that's useful for clone() above is
1827  // also useful here.
1828  RCP<Map> map = rcp (new Map ());
1829 
1830  map->comm_ = newComm;
1831  map->indexBase_ = indexBase_;
1832  map->numGlobalElements_ = numGlobalElements_;
1833  map->numLocalElements_ = numLocalElements_;
1834  map->minMyGID_ = minMyGID_;
1835  map->maxMyGID_ = maxMyGID_;
1836  map->minAllGID_ = minAllGID_;
1837  map->maxAllGID_ = maxAllGID_;
1838  map->firstContiguousGID_= firstContiguousGID_;
1839  map->lastContiguousGID_ = lastContiguousGID_;
1840 
1841  // Uniformity and contiguity have not changed. The directory
1842  // has changed, but we've taken care of that above.
1843  map->uniform_ = uniform_;
1844  map->contiguous_ = contiguous_;
1845 
1846  // If the original Map was NOT distributed, then the new Map
1847  // cannot be distributed.
1848  //
1849  // If the number of processes in the new communicator is 1, then
1850  // the new Map is not distributed.
1851  //
1852  // Otherwise, we have to check the new Map using an all-reduce
1853  // (over the new communicator). For example, the original Map
1854  // may have had some processes with zero elements, and all other
1855  // processes with the same number of elements as in the whole
1856  // Map. That Map is technically distributed, because of the
1857  // processes with zero elements. Removing those processes would
1858  // make the new Map locally replicated.
1859  if (! distributed_ || newComm->getSize () == 1) {
1860  map->distributed_ = false;
1861  } else {
1862  const int iOwnAllGids = (numLocalElements_ == numGlobalElements_) ? 1 : 0;
1863  int allProcsOwnAllGids = 0;
1864  reduceAll<int, int> (*newComm, REDUCE_MIN, iOwnAllGids, outArg (allProcsOwnAllGids));
1865  map->distributed_ = (allProcsOwnAllGids == 1) ? false : true;
1866  }
1867 
1868  map->lgMap_ = lgMap_;
1869  map->lgMapHost_ = lgMapHost_;
1870  map->glMap_ = glMap_;
1871 
1872  // Map's default constructor creates an uninitialized Directory.
1873  // The Directory will be initialized on demand in
1874  // getRemoteIndexList().
1875  //
1876  // FIXME (mfh 26 Mar 2013) It should be possible to "filter" the
1877  // directory more efficiently than just recreating it. If
1878  // directory recreation proves a bottleneck, we can always
1879  // revisit this. On the other hand, Directory creation is only
1880  // collective over the new, presumably much smaller
1881  // communicator, so it may not be worth the effort to optimize.
1882 
1883  return map;
1884  }
1885  }
1886 
1887  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1888  void
1890  {
1891  TEUCHOS_TEST_FOR_EXCEPTION(
1892  directory_.is_null (), std::logic_error, "Tpetra::Map::setupDirectory: "
1893  "The Directory is null. "
1894  "Please report this bug to the Tpetra developers.");
1895 
1896  // Only create the Directory if it hasn't been created yet.
1897  // This is a collective operation.
1898  if (! directory_->initialized ()) {
1899  directory_->initialize (*this);
1900  }
1901  }
1902 
1903  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1904  LookupStatus
1905  Map<LocalOrdinal,GlobalOrdinal,Node>::
1906  getRemoteIndexList (const Teuchos::ArrayView<const GlobalOrdinal>& GIDs,
1907  const Teuchos::ArrayView<int>& PIDs,
1908  const Teuchos::ArrayView<LocalOrdinal>& LIDs) const
1909  {
1910  using Tpetra::Details::OrdinalTraits;
1911  typedef Teuchos::ArrayView<int>::size_type size_type;
1912 
1913  // Empty Maps (i.e., containing no indices on any processes in the
1914  // Map's communicator) are perfectly valid. In that case, if the
1915  // input GID list is nonempty, we fill the output arrays with
1916  // invalid values, and return IDNotPresent to notify the caller.
1917  // It's perfectly valid to give getRemoteIndexList GIDs that the
1918  // Map doesn't own. SubmapImport test 2 needs this functionality.
1919  if (getGlobalNumElements () == 0) {
1920  if (GIDs.size () == 0) {
1921  return AllIDsPresent; // trivially
1922  } else {
1923  for (size_type k = 0; k < PIDs.size (); ++k) {
1924  PIDs[k] = OrdinalTraits<int>::invalid ();
1925  }
1926  for (size_type k = 0; k < LIDs.size (); ++k) {
1927  LIDs[k] = OrdinalTraits<LocalOrdinal>::invalid ();
1928  }
1929  return IDNotPresent;
1930  }
1931  }
1932 
1933  // getRemoteIndexList must be called collectively, and Directory
1934  // initialization is collective too, so it's OK to initialize the
1935  // Directory on demand.
1936  setupDirectory ();
1937  return directory_->getDirectoryEntries (*this, GIDs, PIDs, LIDs);
1938  }
1939 
1940  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1941  LookupStatus
1943  getRemoteIndexList (const Teuchos::ArrayView<const GlobalOrdinal> & GIDs,
1944  const Teuchos::ArrayView<int> & PIDs) const
1945  {
1946  if (getGlobalNumElements () == 0) {
1947  if (GIDs.size () == 0) {
1948  return AllIDsPresent; // trivially
1949  } else {
1950  // The Map contains no indices, so all output PIDs are invalid.
1951  for (Teuchos::ArrayView<int>::size_type k = 0; k < PIDs.size (); ++k) {
1952  PIDs[k] = Tpetra::Details::OrdinalTraits<int>::invalid ();
1953  }
1954  return IDNotPresent;
1955  }
1956  }
1957 
1958  // getRemoteIndexList must be called collectively, and Directory
1959  // initialization is collective too, so it's OK to initialize the
1960  // Directory on demand.
1961  setupDirectory ();
1962  return directory_->getDirectoryEntries (*this, GIDs, PIDs);
1963  }
1964 
1965  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1966  Teuchos::RCP<const Teuchos::Comm<int> >
1968  return comm_;
1969  }
1970 
1971  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1972  Teuchos::RCP<Node>
1974  // Node instances don't do anything any more, but sometimes it
1975  // helps for them to be nonnull.
1976  return Teuchos::rcp (new Node);
1977  }
1978 
1979  template <class LocalOrdinal,class GlobalOrdinal, class Node>
1981  using Teuchos::as;
1982  using Teuchos::outArg;
1983  using Teuchos::REDUCE_MIN;
1984  using Teuchos::reduceAll;
1985 
1986  bool global = false;
1987  if (comm_->getSize () > 1) {
1988  // The communicator has more than one process, but that doesn't
1989  // necessarily mean the Map is distributed.
1990  int localRep = 0;
1991  if (numGlobalElements_ == as<global_size_t> (numLocalElements_)) {
1992  // The number of local elements on this process equals the
1993  // number of global elements.
1994  //
1995  // NOTE (mfh 22 Nov 2011) Does this still work if there were
1996  // duplicates in the global ID list on input (the third Map
1997  // constructor), so that the number of local elements (which
1998  // are not duplicated) on this process could be less than the
1999  // number of global elements, even if this process owns all
2000  // the elements?
2001  localRep = 1;
2002  }
2003  int allLocalRep;
2004  reduceAll<int, int> (*comm_, REDUCE_MIN, localRep, outArg (allLocalRep));
2005  if (allLocalRep != 1) {
2006  // At least one process does not own all the elements.
2007  // This makes the Map a distributed Map.
2008  global = true;
2009  }
2010  }
2011  // If the communicator has only one process, then the Map is not
2012  // distributed.
2013  return global;
2014  }
2015 
2016 } // namespace Classes
2017 } // namespace Tpetra
2018 
2019 template <class LocalOrdinal, class GlobalOrdinal>
2020 Teuchos::RCP< const Tpetra::Map<LocalOrdinal,GlobalOrdinal> >
2021 Tpetra::createLocalMap (const size_t numElements,
2022  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2023 {
2024  typedef LocalOrdinal LO;
2025  typedef GlobalOrdinal GO;
2026  typedef typename ::Tpetra::Map<LO, GO>::node_type NT;
2027  return createLocalMapWithNode<LO, GO, NT> (numElements, comm);
2028 }
2029 
2030 template <class LocalOrdinal, class GlobalOrdinal>
2031 Teuchos::RCP< const Tpetra::Map<LocalOrdinal,GlobalOrdinal> >
2032 Tpetra::createUniformContigMap (const global_size_t numElements,
2033  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2034 {
2035  typedef LocalOrdinal LO;
2036  typedef GlobalOrdinal GO;
2037  typedef typename ::Tpetra::Map<LO, GO>::node_type NT;
2038  return createUniformContigMapWithNode<LO, GO, NT> (numElements, comm);
2039 }
2040 
2041 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2042 Teuchos::RCP< const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >
2043 Tpetra::createUniformContigMapWithNode (const global_size_t numElements,
2044  const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
2045  const Teuchos::RCP<Node>& node)
2046 {
2047  using Teuchos::rcp;
2049  const GlobalOrdinal indexBase = static_cast<GlobalOrdinal> (0);
2050 
2051  if (node.is_null ()) {
2052  return rcp (new map_type (numElements, indexBase, comm, GloballyDistributed));
2053  }
2054  else {
2055  return rcp (new map_type (numElements, indexBase, comm, GloballyDistributed, node));
2056  }
2057 }
2058 
2059 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2060 Teuchos::RCP< const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >
2061 Tpetra::createLocalMapWithNode (const size_t numElements,
2062  const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
2063  const Teuchos::RCP<Node>& node)
2064 {
2065  using Tpetra::global_size_t;
2066  using Teuchos::rcp;
2068  const GlobalOrdinal indexBase = static_cast<GlobalOrdinal> (0);
2069  const global_size_t globalNumElts = static_cast<global_size_t> (numElements);
2070 
2071  if (node.is_null ()) {
2072  return rcp (new map_type (globalNumElts, indexBase, comm, LocallyReplicated));
2073  }
2074  else {
2075  return rcp (new map_type (globalNumElts, indexBase, comm, LocallyReplicated, node));
2076  }
2077 }
2078 
2079 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2080 Teuchos::RCP< const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >
2082  const size_t localNumElements,
2083  const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
2084  const Teuchos::RCP<Node>& /* node */)
2085 {
2086  using Teuchos::rcp;
2088  const GlobalOrdinal indexBase = static_cast<GlobalOrdinal> (0);
2089 
2090  return rcp (new map_type (numElements, localNumElements, indexBase, comm));
2091 }
2092 
2093 template <class LocalOrdinal, class GlobalOrdinal>
2094 Teuchos::RCP< const Tpetra::Map<LocalOrdinal,GlobalOrdinal> >
2096  const size_t localNumElements,
2097  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2098 {
2099  typedef LocalOrdinal LO;
2100  typedef GlobalOrdinal GO;
2102 
2103  return Tpetra::createContigMapWithNode<LO, GO, NT> (numElements, localNumElements, comm);
2104 }
2105 
2106 
2107 template <class LocalOrdinal, class GlobalOrdinal>
2108 Teuchos::RCP< const Tpetra::Map<LocalOrdinal,GlobalOrdinal> >
2109 Tpetra::createNonContigMap(const Teuchos::ArrayView<const GlobalOrdinal>& elementList,
2110  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2111 {
2112  typedef LocalOrdinal LO;
2113  typedef GlobalOrdinal GO;
2115 
2116  return Tpetra::createNonContigMapWithNode<LO, GO, NT> (elementList, comm);
2117 }
2118 
2119 
2120 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2121 Teuchos::RCP< const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >
2122 Tpetra::createNonContigMapWithNode (const Teuchos::ArrayView<const GlobalOrdinal>& elementList,
2123  const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
2124  const Teuchos::RCP<Node>& /* node */)
2125 {
2126  using Teuchos::rcp;
2128  using GST = Tpetra::global_size_t;
2129  const GST INV = Tpetra::Details::OrdinalTraits<GST>::invalid ();
2130  // FIXME (mfh 22 Jul 2016) This is what I found here, but maybe this
2131  // shouldn't be zero, given that the index base is supposed to equal
2132  // the globally min global index?
2133  const GlobalOrdinal indexBase = 0;
2134 
2135  return rcp (new map_type (INV, elementList, indexBase, comm));
2136 }
2137 
2138 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2139 Teuchos::RCP< const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >
2140 Tpetra::createWeightedContigMapWithNode (const int myWeight,
2141  const Tpetra::global_size_t numElements,
2142  const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
2143  const Teuchos::RCP<Node>& /* node */)
2144 {
2145  Teuchos::RCP< Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > map;
2146  int sumOfWeights, elemsLeft, localNumElements;
2147  const int numImages = comm->getSize();
2148  const int myImageID = comm->getRank();
2149  Teuchos::reduceAll<int>(*comm,Teuchos::REDUCE_SUM,myWeight,Teuchos::outArg(sumOfWeights));
2150  const double myShare = ((double)myWeight) / ((double)sumOfWeights);
2151  localNumElements = (int)std::floor( myShare * ((double)numElements) );
2152  // std::cout << "numElements: " << numElements << " myWeight: " << myWeight << " sumOfWeights: " << sumOfWeights << " myShare: " << myShare << std::endl;
2153  Teuchos::reduceAll<int>(*comm,Teuchos::REDUCE_SUM,localNumElements,Teuchos::outArg(elemsLeft));
2154  elemsLeft = numElements - elemsLeft;
2155  // std::cout << "(before) localNumElements: " << localNumElements << " elemsLeft: " << elemsLeft << std::endl;
2156  // i think this is true. just test it for now.
2157  TEUCHOS_TEST_FOR_EXCEPT(elemsLeft < -numImages || numImages < elemsLeft);
2158  if (elemsLeft < 0) {
2159  // last elemsLeft nodes lose an element
2160  if (myImageID >= numImages-elemsLeft) --localNumElements;
2161  }
2162  else if (elemsLeft > 0) {
2163  // first elemsLeft nodes gain an element
2164  if (myImageID < elemsLeft) ++localNumElements;
2165  }
2166  // std::cout << "(after) localNumElements: " << localNumElements << std::endl;
2167  return createContigMapWithNode<LocalOrdinal,GlobalOrdinal,Node>(numElements,localNumElements,comm);
2168 }
2169 
2170 
2171 template<class LO, class GO, class NT>
2172 Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >
2173 Tpetra::createOneToOne (const Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >& M)
2174 {
2175  using Teuchos::Array;
2176  using Teuchos::ArrayView;
2177  using Teuchos::as;
2178  using Teuchos::rcp;
2179  typedef Tpetra::Map<LO, GO, NT> map_type;
2180  typedef global_size_t GST;
2181  const GST GINV = Tpetra::Details::OrdinalTraits<GST>::invalid ();
2182  const int myRank = M->getComm ()->getRank ();
2183 
2184  // Bypasses for special cases where either M is known to be
2185  // one-to-one, or the one-to-one version of M is easy to compute.
2186  // This is why we take M as an RCP, not as a const reference -- so
2187  // that we can return M itself if it is 1-to-1.
2188  if (! M->isDistributed ()) {
2189  // For a locally replicated Map, we assume that users want to push
2190  // all the GIDs to Process 0.
2191 
2192  // mfh 05 Nov 2013: getGlobalNumElements() does indeed return what
2193  // you think it should return, in this special case of a locally
2194  // replicated contiguous Map.
2195  const GST numGlobalEntries = M->getGlobalNumElements ();
2196  if (M->isContiguous ()) {
2197  const size_t numLocalEntries =
2198  (myRank == 0) ? as<size_t> (numGlobalEntries) : static_cast<size_t> (0);
2199  return rcp (new map_type (numGlobalEntries, numLocalEntries,
2200  M->getIndexBase (), M->getComm ()));
2201  }
2202  else {
2203  ArrayView<const GO> myGids =
2204  (myRank == 0) ? M->getNodeElementList () : Teuchos::null;
2205  return rcp (new map_type (GINV, myGids (), M->getIndexBase (),
2206  M->getComm ()));
2207  }
2208  }
2209  else if (M->isContiguous ()) {
2210  // Contiguous, distributed Maps are one-to-one by construction.
2211  // (Locally replicated Maps can be contiguous.)
2212  return M;
2213  }
2214  else {
2216  const size_t numMyElems = M->getNodeNumElements ();
2217  ArrayView<const GO> myElems = M->getNodeElementList ();
2218  Array<int> owner_procs_vec (numMyElems);
2219 
2220  directory.getDirectoryEntries (*M, myElems, owner_procs_vec ());
2221 
2222  Array<GO> myOwned_vec (numMyElems);
2223  size_t numMyOwnedElems = 0;
2224  for (size_t i = 0; i < numMyElems; ++i) {
2225  const GO GID = myElems[i];
2226  const int owner = owner_procs_vec[i];
2227 
2228  if (myRank == owner) {
2229  myOwned_vec[numMyOwnedElems++] = GID;
2230  }
2231  }
2232  myOwned_vec.resize (numMyOwnedElems);
2233 
2234  return rcp (new map_type (GINV, myOwned_vec (), M->getIndexBase (),
2235  M->getComm ()));
2236  }
2237 }
2238 
2239 template<class LocalOrdinal, class GlobalOrdinal, class Node>
2240 Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >
2243 {
2244  using Teuchos::Array;
2245  using Teuchos::ArrayView;
2246  using Teuchos::rcp;
2247  typedef LocalOrdinal LO;
2248  typedef GlobalOrdinal GO;
2249  typedef Tpetra::Map<LO,GO,Node> map_type;
2250  int myID = M->getComm()->getRank();
2251 
2252  // FIXME (mfh 20 Feb 2013) We should have a bypass for contiguous
2253  // Maps (which are 1-to-1 by construction).
2254 
2255  //Based off Epetra's one to one.
2256 
2258  directory.initialize (*M, tie_break);
2259  size_t numMyElems = M->getNodeNumElements ();
2260  ArrayView<const GO> myElems = M->getNodeElementList ();
2261  Array<int> owner_procs_vec (numMyElems);
2262 
2263  directory.getDirectoryEntries (*M, myElems, owner_procs_vec ());
2264 
2265  Array<GO> myOwned_vec (numMyElems);
2266  size_t numMyOwnedElems = 0;
2267  for (size_t i = 0; i < numMyElems; ++i) {
2268  GO GID = myElems[i];
2269  int owner = owner_procs_vec[i];
2270 
2271  if (myID == owner) {
2272  myOwned_vec[numMyOwnedElems++] = GID;
2273  }
2274  }
2275  myOwned_vec.resize (numMyOwnedElems);
2276 
2277  // FIXME (mfh 08 May 2014) The above Directory should be perfectly
2278  // valid for the new Map. Why can't we reuse it?
2279  const global_size_t GINV =
2280  Tpetra::Details::OrdinalTraits<global_size_t>::invalid ();
2281  return rcp (new map_type (GINV, myOwned_vec (), M->getIndexBase (),
2282  M->getComm ()));
2283 }
2284 
2285 //
2286 // Explicit instantiation macro
2287 //
2288 // Must be expanded from within the Tpetra namespace!
2289 //
2290 
2292 #define TPETRA_MAP_INSTANT(LO,GO,NODE) \
2293  \
2294  namespace Classes { template class Map< LO , GO , NODE >; } \
2295  \
2296  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2297  createLocalMapWithNode<LO,GO,NODE> (const size_t numElements, \
2298  const Teuchos::RCP< const Teuchos::Comm< int > >& comm, \
2299  const Teuchos::RCP< NODE >& node); \
2300  \
2301  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2302  createContigMapWithNode<LO,GO,NODE> (const global_size_t numElements, \
2303  const size_t localNumElements, \
2304  const Teuchos::RCP< const Teuchos::Comm< int > >& comm, \
2305  const Teuchos::RCP< NODE > &node); \
2306  \
2307  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2308  createNonContigMapWithNode(const Teuchos::ArrayView<const GO> &elementList, \
2309  const Teuchos::RCP<const Teuchos::Comm<int> > &comm, \
2310  const Teuchos::RCP<NODE> &node); \
2311  \
2312  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2313  createUniformContigMapWithNode<LO,GO,NODE> (const global_size_t numElements, \
2314  const Teuchos::RCP< const Teuchos::Comm< int > >& comm, \
2315  const Teuchos::RCP< NODE > &node); \
2316  \
2317  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2318  createWeightedContigMapWithNode<LO,GO,NODE> (const int thisNodeWeight, \
2319  const global_size_t numElements, \
2320  const Teuchos::RCP< const Teuchos::Comm< int > >& comm, \
2321  const Teuchos::RCP< NODE >& node); \
2322  \
2323  template Teuchos::RCP<const Map<LO,GO,NODE> > \
2324  createOneToOne (const Teuchos::RCP<const Map<LO,GO,NODE> >& M); \
2325  \
2326  template Teuchos::RCP<const Map<LO,GO,NODE> > \
2327  createOneToOne (const Teuchos::RCP<const Map<LO,GO,NODE> >& M, \
2328  const Tpetra::Details::TieBreak<LO,GO>& tie_break); \
2329 
2330 
2332 #define TPETRA_MAP_INSTANT_DEFAULTNODE(LO,GO) \
2333  template Teuchos::RCP< const Map<LO,GO> > \
2334  createLocalMap<LO,GO>( const size_t, const Teuchos::RCP< const Teuchos::Comm< int > > &); \
2335  \
2336  template Teuchos::RCP< const Map<LO,GO> > \
2337  createContigMap<LO,GO>( global_size_t, size_t, \
2338  const Teuchos::RCP< const Teuchos::Comm< int > > &); \
2339  \
2340  template Teuchos::RCP< const Map<LO,GO> > \
2341  createNonContigMap(const Teuchos::ArrayView<const GO> &, \
2342  const Teuchos::RCP<const Teuchos::Comm<int> > &); \
2343  \
2344  template Teuchos::RCP< const Map<LO,GO> > \
2345  createUniformContigMap<LO,GO>( const global_size_t, \
2346  const Teuchos::RCP< const Teuchos::Comm< int > > &); \
2347 
2348 #endif // TPETRA_MAP_DEF_HPP
Tpetra::Classes::Directory::getDirectoryEntries
LookupStatus getDirectoryEntries(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs) const
Given a global ID list, return the list of their owning process IDs.
Tpetra::IDNotPresent
Definition: Tpetra_ConfigDefs.hpp:126
Tpetra::Classes::Directory::initialize
void initialize(const map_type &map)
Initialize the Directory with its Map.
Tpetra::Details::congruent
bool congruent(const Teuchos::Comm< int > &comm1, const Teuchos::Comm< int > &comm2)
Whether the two communicators are congruent.
Definition: Tpetra_Util.cpp:65
Tpetra::Classes::Map::isDistributed
bool isDistributed() const
Whether this Map is globally distributed or locally replicated.
Definition: Tpetra_Map_def.hpp:1535
Tpetra::Classes::Map::getLocalMap
local_map_type getLocalMap() const
Get the local Map for Kokkos kernels.
Definition: Tpetra_Map_def.hpp:1165
Tpetra::Classes::Map::node_type
Node node_type
The type of the Kokkos Node.
Definition: Tpetra_Map_decl.hpp:257
Tpetra::Classes::Map::getGlobalNumElements
global_size_t getGlobalNumElements() const
The number of elements in this Map.
Definition: Tpetra_Map_decl.hpp:569
Tpetra::Classes::Map< LO, GO, NT >::device_type
NT ::device_type device_type
The Kokkos device type over which to allocate Views and perform work.
Definition: Tpetra_Map_decl.hpp:270
Tpetra::Classes::Map::getMinGlobalIndex
GlobalOrdinal getMinGlobalIndex() const
The minimum global index owned by the calling process.
Definition: Tpetra_Map_decl.hpp:623
Tpetra_Details_initializeKokkos.hpp
Declaration of Tpetra::Details::initializeKokkos.
Tpetra::Classes::Map::getMaxGlobalIndex
GlobalOrdinal getMaxGlobalIndex() const
The maximum global index owned by the calling process.
Definition: Tpetra_Map_decl.hpp:632
Tpetra::createLocalMapWithNode
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createLocalMapWithNode(const size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const Teuchos::RCP< Node > &node=Teuchos::null)
Nonmember constructor for a locally replicated Map with a specified Kokkos Node.
Tpetra_Details_printOnce.hpp
Declaration of Tpetra::Details::printOnce.
Details
Implementation details of Tpetra.
Tpetra::Details::gathervPrint
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator,...
Definition: Tpetra_Details_gathervPrint.cpp:52
Tpetra::Classes::Map::isUniform
bool isUniform() const
Whether the range of global indices is uniform.
Definition: Tpetra_Map_def.hpp:1152
Tpetra::createLocalMap
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createLocalMap(const size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a locally replicated Map with the default Kokkos Node.
Tpetra::createNonContigMap
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createNonContigMap(const Teuchos::ArrayView< const GlobalOrdinal > &elementList, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a non-contiguous Map using the default Kokkos::Device type.
Tpetra::createContigMapWithNode
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createContigMapWithNode(const global_size_t numElements, const size_t localNumElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const Teuchos::RCP< Node > &node=Teuchos::null)
Nonmember constructor for a (potentially) nonuniformly distributed, contiguous Map for a user-specifi...
Tpetra::createWeightedContigMapWithNode
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createWeightedContigMapWithNode(const int thisNodeWeight, const global_size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const Teuchos::RCP< Node > &node=Teuchos::null)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified,...
Tpetra::AllIDsPresent
Definition: Tpetra_ConfigDefs.hpp:125
Tpetra::LocalGlobal
LocalGlobal
Enum for local versus global allocation of Map entries.
Definition: Tpetra_ConfigDefs.hpp:118
Tpetra::createNonContigMapWithNode
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createNonContigMapWithNode(const Teuchos::ArrayView< const GlobalOrdinal > &elementList, const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const Teuchos::RCP< Node > &node=Teuchos::null)
Nonmember constructor for a noncontiguous Map with a user-specified, possibly nondefault Kokkos Node ...
Tpetra::createUniformContigMap
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createUniformContigMap(const global_size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a uniformly distributed, contiguous Map with the default Kokkos Node.
Tpetra::Details::FixedHashTable< GO, LO, device_type >
Tpetra::Classes::Map::Map
Map()
Default constructor (that does nothing).
Definition: Tpetra_Map_def.hpp:70
Tpetra_Core.hpp
Functions for initializing and finalizing Tpetra.
Tpetra::createUniformContigMapWithNode
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createUniformContigMapWithNode(const global_size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const Teuchos::RCP< Node > &node=Teuchos::null)
Non-member constructor for a uniformly distributed, contiguous Map with a user-specified Kokkos Node.
Tpetra::Details::Classes::LocalMap
"Local" part of Map suitable for Kokkos kernels.
Definition: Tpetra_Details_LocalMap.hpp:72
Tpetra::LookupStatus
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
Definition: Tpetra_ConfigDefs.hpp:124
Tpetra::createOneToOne
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Creates a one-to-one version of the given Map where each GID lives on only one process.
Tpetra_Util.hpp
Stand-alone utility functions and macros.
Tpetra::Classes::Map::getMinAllGlobalIndex
GlobalOrdinal getMinAllGlobalIndex() const
The minimum global index over all processes in the communicator.
Definition: Tpetra_Map_decl.hpp:641
Tpetra::Classes::Map::getNodeElementList
Teuchos::ArrayView< const GlobalOrdinal > getNodeElementList() const
Return a NONOWNING view of the global indices owned by this process.
Definition: Tpetra_Map_def.hpp:1515
Tpetra::Details::Classes::TieBreak
Interface for breaking ties in ownership.
Definition: Tpetra_TieBreak.hpp:69
Tpetra::createContigMap
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createContigMap(const global_size_t numElements, const size_t localNumElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a (potentially) non-uniformly distributed, contiguous Map using the defaul...
Tpetra::Classes::Map
A parallel distribution of indices over processes.
Definition: Tpetra_Map_decl.hpp:247
Tpetra::Map
Classes::Map< LocalOrdinal, GlobalOrdinal, Node > Map
Alias for Tpetra::Classes::Map.
Definition: Tpetra_Map_fwd.hpp:71
Tpetra::Classes::Map::getIndexBase
GlobalOrdinal getIndexBase() const
The index base for this Map.
Definition: Tpetra_Map_decl.hpp:587
Tpetra::Classes::Map::getComm
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
Definition: Tpetra_Map_def.hpp:1967
Tpetra::Classes::Map::getNodeNumElements
size_t getNodeNumElements() const
The number of elements belonging to the calling process.
Definition: Tpetra_Map_decl.hpp:578
Tpetra_Details_extractMpiCommFromTeuchos.hpp
Declaration of Tpetra::Details::extractMpiCommFromTeuchos.
Tpetra::Classes::Map::getMaxAllGlobalIndex
GlobalOrdinal getMaxAllGlobalIndex() const
The maximum global index over all processes in the communicator.
Definition: Tpetra_Map_decl.hpp:650
Tpetra::global_size_t
size_t global_size_t
Global size_t object.
Definition: Tpetra_ConfigDefs.hpp:109
Tpetra
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Tpetra::Classes::Directory
Implement mapping from global ID to process ID and local ID.
Definition: Tpetra_Directory_decl.hpp:134
Tpetra::deep_copy
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Definition: Tpetra_MultiVector_decl.hpp:2453
Tpetra::Classes::Map::isContiguous
bool isContiguous() const
True if this Map is distributed contiguously, else false.
Definition: Tpetra_Map_def.hpp:1157
Tpetra::Details::initializeKokkos
void initializeKokkos()
Initialize Kokkos, using command-line arguments (if any) given to Teuchos::GlobalMPISession.
Definition: Tpetra_Details_initializeKokkos.cpp:53