Tpetra parallel linear algebra  Version of the Day
Tpetra_CrsGraph_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_CRSGRAPH_DEF_HPP
43 #define TPETRA_CRSGRAPH_DEF_HPP
44 
52 
56 #include "Tpetra_Details_gathervPrint.hpp"
57 #include "Tpetra_Details_getGraphDiagOffsets.hpp"
58 #include "Tpetra_Details_makeColMap.hpp"
61 #include "Tpetra_Distributor.hpp"
62 #include "Teuchos_SerialDenseMatrix.hpp"
63 #include "Tpetra_Vector.hpp"
64 #include "Tpetra_Import_Util.hpp"
65 #include "Tpetra_Import_Util2.hpp"
66 #include "Tpetra_Details_packCrsGraph.hpp"
67 #include "Tpetra_Details_unpackCrsGraphAndCombine.hpp"
69 #include <algorithm>
70 #include <limits>
71 #include <sstream>
72 #include <string>
73 #include <utility>
74 #ifdef HAVE_TPETRA_DEBUG
75 # include <map>
76 # include <vector>
77 #endif // HAVE_TPETRA_DEBUG
78 
79 namespace Tpetra {
80  namespace Details {
81  namespace Impl {
82 
83  template<class LO, class GO, class DT, class OffsetType, class NumEntType>
84  class ConvertColumnIndicesFromGlobalToLocal {
85  public:
86  ConvertColumnIndicesFromGlobalToLocal (const ::Kokkos::View<LO*, DT>& lclColInds,
87  const ::Kokkos::View<const GO*, DT>& gblColInds,
88  const ::Kokkos::View<const OffsetType*, DT>& ptr,
89  const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
90  const ::Kokkos::View<const NumEntType*, DT>& numRowEnt) :
91  lclColInds_ (lclColInds),
92  gblColInds_ (gblColInds),
93  ptr_ (ptr),
94  lclColMap_ (lclColMap),
95  numRowEnt_ (numRowEnt)
96  {}
97 
98  KOKKOS_FUNCTION void
99  operator () (const LO& lclRow, OffsetType& curNumBad) const
100  {
101  const OffsetType offset = ptr_(lclRow);
102  // NOTE (mfh 26 Jun 2016) It's always legal to cast the number
103  // of entries in a row to LO, as long as the row doesn't have
104  // too many duplicate entries.
105  const LO numEnt = static_cast<LO> (numRowEnt_(lclRow));
106  for (LO j = 0; j < numEnt; ++j) {
107  const GO gid = gblColInds_(offset + j);
108  const LO lid = lclColMap_.getLocalElement (gid);
109  lclColInds_(offset + j) = lid;
110  if (lid == ::Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
111  ++curNumBad;
112  }
113  }
114  }
115 
116  static OffsetType
117  run (const ::Kokkos::View<LO*, DT>& lclColInds,
118  const ::Kokkos::View<const GO*, DT>& gblColInds,
119  const ::Kokkos::View<const OffsetType*, DT>& ptr,
120  const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
121  const ::Kokkos::View<const NumEntType*, DT>& numRowEnt)
122  {
123  typedef ::Kokkos::RangePolicy<typename DT::execution_space, LO> range_type;
124  typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> functor_type;
125 
126  const LO lclNumRows = ptr.extent (0) == 0 ?
127  static_cast<LO> (0) : static_cast<LO> (ptr.extent (0) - 1);
128  OffsetType numBad = 0;
129  // Count of "bad" column indices is a reduction over rows.
130  ::Kokkos::parallel_reduce (range_type (0, lclNumRows),
131  functor_type (lclColInds, gblColInds, ptr,
132  lclColMap, numRowEnt),
133  numBad);
134  return numBad;
135  }
136 
137  private:
138  ::Kokkos::View<LO*, DT> lclColInds_;
139  ::Kokkos::View<const GO*, DT> gblColInds_;
140  ::Kokkos::View<const OffsetType*, DT> ptr_;
142  ::Kokkos::View<const NumEntType*, DT> numRowEnt_;
143  };
144 
145  } // namespace Impl
146 
161  template<class LO, class GO, class DT, class OffsetType, class NumEntType>
162  OffsetType
163  convertColumnIndicesFromGlobalToLocal (const Kokkos::View<LO*, DT>& lclColInds,
164  const Kokkos::View<const GO*, DT>& gblColInds,
165  const Kokkos::View<const OffsetType*, DT>& ptr,
166  const LocalMap<LO, GO, DT>& lclColMap,
167  const Kokkos::View<const NumEntType*, DT>& numRowEnt)
168  {
169  using Impl::ConvertColumnIndicesFromGlobalToLocal;
170  typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> impl_type;
171  return impl_type::run (lclColInds, gblColInds, ptr, lclColMap, numRowEnt);
172  }
173 
174  template<class ViewType, class LO>
175  class MaxDifference {
176  public:
177  MaxDifference (const ViewType& ptr) : ptr_ (ptr) {}
178 
179  KOKKOS_INLINE_FUNCTION void init (LO& dst) const {
180  dst = 0;
181  }
182 
183  KOKKOS_INLINE_FUNCTION void
184  join (volatile LO& dst, const volatile LO& src) const
185  {
186  dst = (src > dst) ? src : dst;
187  }
188 
189  KOKKOS_INLINE_FUNCTION void
190  operator () (const LO lclRow, LO& maxNumEnt) const
191  {
192  const LO numEnt = static_cast<LO> (ptr_(lclRow+1) - ptr_(lclRow));
193  maxNumEnt = (numEnt > maxNumEnt) ? numEnt : maxNumEnt;
194  }
195  private:
196  typename ViewType::const_type ptr_;
197  };
198 
199  template<class ViewType, class LO>
200  typename ViewType::non_const_value_type
201  maxDifference (const char kernelLabel[],
202  const ViewType& ptr,
203  const LO lclNumRows)
204  {
205  if (lclNumRows == 0) {
206  // mfh 07 May 2018: Weirdly, I need this special case,
207  // otherwise I get the wrong answer.
208  return static_cast<LO> (0);
209  }
210  else {
211  using execution_space = typename ViewType::execution_space;
212  using range_type = Kokkos::RangePolicy<execution_space, LO>;
213  LO theMaxNumEnt {0};
214  Kokkos::parallel_reduce (kernelLabel,
215  range_type (0, lclNumRows),
216  MaxDifference<ViewType, LO> (ptr),
217  theMaxNumEnt);
218  return theMaxNumEnt;
219  }
220  }
221 
222  } // namespace Details
223 
224 namespace Classes {
225 
226  template <class LocalOrdinal, class GlobalOrdinal, class Node>
228  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
229  size_t maxNumEntriesPerRow,
230  ProfileType pftype,
231  const Teuchos::RCP<Teuchos::ParameterList>& params) :
232  dist_object_type (rowMap)
233  , rowMap_ (rowMap)
234  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
235  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
236  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
237  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
238  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
239  , pftype_ (pftype)
240  , numAllocForAllRows_ (maxNumEntriesPerRow)
241  , storageStatus_ (pftype == StaticProfile ?
242  ::Tpetra::Details::STORAGE_1D_UNPACKED :
243  ::Tpetra::Details::STORAGE_2D)
244  , indicesAreAllocated_ (false)
245  , indicesAreLocal_ (false)
246  , indicesAreGlobal_ (false)
247  , fillComplete_ (false)
248  , lowerTriangular_ (false)
249  , upperTriangular_ (false)
250  , indicesAreSorted_ (true)
251  , noRedundancies_ (true)
252  , haveLocalConstants_ (false)
253  , haveGlobalConstants_ (false)
254  , sortGhostsAssociatedWithEachProcessor_ (true)
255  {
256  const char tfecfFuncName[] = "CrsGraph(rowMap,maxNumEntriesPerRow,"
257  "pftype,params): ";
258  staticAssertions ();
259  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
260  (maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (),
261  std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be "
262  "a valid size_t value, which in this case means it must not be "
263  "Teuchos::OrdinalTraits<size_t>::invalid().");
264  resumeFill (params);
265  checkInternalState ();
266  }
267 
268  template <class LocalOrdinal, class GlobalOrdinal, class Node>
270  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
271  const Teuchos::RCP<const map_type>& colMap,
272  const size_t maxNumEntriesPerRow,
273  const ProfileType pftype,
274  const Teuchos::RCP<Teuchos::ParameterList>& params) :
275  dist_object_type (rowMap)
276  , rowMap_ (rowMap)
277  , colMap_ (colMap)
278  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
279  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
280  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
281  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
282  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
283  , pftype_ (pftype)
284  , numAllocForAllRows_ (maxNumEntriesPerRow)
285  , storageStatus_ (pftype == StaticProfile ?
286  ::Tpetra::Details::STORAGE_1D_UNPACKED :
287  ::Tpetra::Details::STORAGE_2D)
288  , indicesAreAllocated_ (false)
289  , indicesAreLocal_ (false)
290  , indicesAreGlobal_ (false)
291  , fillComplete_ (false)
292  , lowerTriangular_ (false)
293  , upperTriangular_ (false)
294  , indicesAreSorted_ (true)
295  , noRedundancies_ (true)
296  , haveLocalConstants_ (false)
297  , haveGlobalConstants_ (false)
298  , sortGhostsAssociatedWithEachProcessor_ (true)
299  {
300  const char tfecfFuncName[] = "CrsGraph(rowMap,colMap,maxNumEntriesPerRow,"
301  "pftype,params): ";
302  staticAssertions ();
303  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
304  maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (),
305  std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be "
306  "a valid size_t value, which in this case means it must not be "
307  "Teuchos::OrdinalTraits<size_t>::invalid().");
308  resumeFill (params);
309  checkInternalState ();
310  }
311 
312  template <class LocalOrdinal, class GlobalOrdinal, class Node>
314  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
315  const Teuchos::ArrayRCP<const size_t>& numEntPerRow,
316  const ProfileType pftype,
317  const Teuchos::RCP<Teuchos::ParameterList>& params) :
318  dist_object_type (rowMap)
319  , rowMap_ (rowMap)
320  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
321  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
322  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
323  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
324  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
325  , pftype_ (pftype)
326  , numAllocForAllRows_ (0)
327  , storageStatus_ (pftype == StaticProfile ?
328  ::Tpetra::Details::STORAGE_1D_UNPACKED :
329  ::Tpetra::Details::STORAGE_2D)
330  , indicesAreAllocated_ (false)
331  , indicesAreLocal_ (false)
332  , indicesAreGlobal_ (false)
333  , fillComplete_ (false)
334  , lowerTriangular_ (false)
335  , upperTriangular_ (false)
336  , indicesAreSorted_ (true)
337  , noRedundancies_ (true)
338  , haveLocalConstants_ (false)
339  , haveGlobalConstants_ (false)
340  , sortGhostsAssociatedWithEachProcessor_ (true)
341  {
342  const char tfecfFuncName[] = "CrsGraph(rowMap,numEntPerRow,pftype,params): ";
343  staticAssertions ();
344 
345  const size_t lclNumRows = rowMap.is_null () ?
346  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
347  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
348  static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
349  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
350  << " != the local number of rows " << lclNumRows << " as specified by "
351  "the input row Map.");
352 
353  const bool debug = ::Tpetra::Details::Behavior::debug ();
354  if (debug) {
355  for (size_t r = 0; r < lclNumRows; ++r) {
356  const size_t curRowCount = numEntPerRow[r];
357  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
358  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
359  std::invalid_argument, "numEntPerRow(" << r << ") "
360  "specifies an invalid number of entries "
361  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
362  }
363  }
364 
365  // Deep-copy the input (ArrayRCP, therefore host accessible) into
366  // k_numAllocPerRow_. The latter is a const View, so we have to
367  // copy into a nonconst View first, then assign.
368  typedef decltype (k_numAllocPerRow_) out_view_type;
369  typedef typename out_view_type::non_const_type nc_view_type;
370  typedef Kokkos::View<const size_t*,
371  typename nc_view_type::array_layout,
372  Kokkos::HostSpace,
373  Kokkos::MemoryUnmanaged> in_view_type;
374  in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
375  nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
376  lclNumRows);
377  Kokkos::deep_copy (numAllocPerRowOut, numAllocPerRowIn);
378  k_numAllocPerRow_ = numAllocPerRowOut;
379 
380  resumeFill (params);
381  checkInternalState ();
382  }
383 
384  template <class LocalOrdinal, class GlobalOrdinal, class Node>
386  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
387  const Kokkos::DualView<const size_t*, execution_space>& numEntPerRow,
388  const ProfileType pftype,
389  const Teuchos::RCP<Teuchos::ParameterList>& params) :
390  dist_object_type (rowMap)
391  , rowMap_ (rowMap)
392  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
393  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
394  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
395  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
396  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
397  , pftype_ (pftype)
398  , k_numAllocPerRow_ (numEntPerRow.h_view)
399  , numAllocForAllRows_ (0)
400  , storageStatus_ (pftype == StaticProfile ?
401  ::Tpetra::Details::STORAGE_1D_UNPACKED :
402  ::Tpetra::Details::STORAGE_2D)
403  , indicesAreAllocated_ (false)
404  , indicesAreLocal_ (false)
405  , indicesAreGlobal_ (false)
406  , fillComplete_ (false)
407  , lowerTriangular_ (false)
408  , upperTriangular_ (false)
409  , indicesAreSorted_ (true)
410  , noRedundancies_ (true)
411  , haveLocalConstants_ (false)
412  , haveGlobalConstants_ (false)
413  , sortGhostsAssociatedWithEachProcessor_ (true)
414  {
415  const char tfecfFuncName[] = "CrsGraph(rowMap,numEntPerRow,pftype,params): ";
416  staticAssertions ();
417 
418  const size_t lclNumRows = rowMap.is_null () ?
419  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
420  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
421  static_cast<size_t> (numEntPerRow.extent (0)) != lclNumRows,
422  std::invalid_argument, "numEntPerRow has length " <<
423  numEntPerRow.extent (0) << " != the local number of rows " <<
424  lclNumRows << " as specified by " "the input row Map.");
425 
426  const bool debug = ::Tpetra::Details::Behavior::debug ();
427  if (debug) {
428  for (size_t r = 0; r < lclNumRows; ++r) {
429  const size_t curRowCount = numEntPerRow.h_view(r);
430  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
431  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
432  std::invalid_argument, "numEntPerRow(" << r << ") "
433  "specifies an invalid number of entries "
434  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
435  }
436  }
437 
438  resumeFill (params);
439  checkInternalState ();
440  }
441 
442 
443  template <class LocalOrdinal, class GlobalOrdinal, class Node>
445  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
446  const Teuchos::RCP<const map_type>& colMap,
447  const Kokkos::DualView<const size_t*, execution_space>& numEntPerRow,
448  const ProfileType pftype,
449  const Teuchos::RCP<Teuchos::ParameterList>& params) :
450  dist_object_type (rowMap)
451  , rowMap_ (rowMap)
452  , colMap_ (colMap)
453  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
454  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
455  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
456  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
457  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
458  , pftype_ (pftype)
459  , k_numAllocPerRow_ (numEntPerRow.h_view)
460  , numAllocForAllRows_ (0)
461  , storageStatus_ (pftype == StaticProfile ?
462  ::Tpetra::Details::STORAGE_1D_UNPACKED :
463  ::Tpetra::Details::STORAGE_2D)
464  , indicesAreAllocated_ (false)
465  , indicesAreLocal_ (false)
466  , indicesAreGlobal_ (false)
467  , fillComplete_ (false)
468  , lowerTriangular_ (false)
469  , upperTriangular_ (false)
470  , indicesAreSorted_ (true)
471  , noRedundancies_ (true)
472  , haveLocalConstants_ (false)
473  , haveGlobalConstants_ (false)
474  , sortGhostsAssociatedWithEachProcessor_ (true)
475  {
476  const char tfecfFuncName[] = "CrsGraph(rowMap,colMap,numEntPerRow,pftype,params): ";
477  staticAssertions ();
478 
479  const size_t lclNumRows = rowMap.is_null () ?
480  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
481  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
482  static_cast<size_t> (numEntPerRow.extent (0)) != lclNumRows,
483  std::invalid_argument, "numEntPerRow has length " <<
484  numEntPerRow.extent (0) << " != the local number of rows " <<
485  lclNumRows << " as specified by " "the input row Map.");
486 
487  const bool debug = ::Tpetra::Details::Behavior::debug ();
488  if (debug) {
489  for (size_t r = 0; r < lclNumRows; ++r) {
490  const size_t curRowCount = numEntPerRow.h_view(r);
491  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
492  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
493  std::invalid_argument, "numEntPerRow(" << r << ") "
494  "specifies an invalid number of entries "
495  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
496  }
497  }
498 
499  resumeFill (params);
500  checkInternalState ();
501  }
502 
503 
504  template <class LocalOrdinal, class GlobalOrdinal, class Node>
506  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
507  const Teuchos::RCP<const map_type>& colMap,
508  const Teuchos::ArrayRCP<const size_t>& numEntPerRow,
509  ProfileType pftype,
510  const Teuchos::RCP<Teuchos::ParameterList>& params) :
511  dist_object_type (rowMap)
512  , rowMap_ (rowMap)
513  , colMap_ (colMap)
514  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
515  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
516  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
517  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
518  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
519  , pftype_ (pftype)
520  , numAllocForAllRows_ (0)
521  , storageStatus_ (pftype == StaticProfile ?
522  ::Tpetra::Details::STORAGE_1D_UNPACKED :
523  ::Tpetra::Details::STORAGE_2D)
524  , indicesAreAllocated_ (false)
525  , indicesAreLocal_ (false)
526  , indicesAreGlobal_ (false)
527  , fillComplete_ (false)
528  , lowerTriangular_ (false)
529  , upperTriangular_ (false)
530  , indicesAreSorted_ (true)
531  , noRedundancies_ (true)
532  , haveLocalConstants_ (false)
533  , haveGlobalConstants_ (false)
534  , sortGhostsAssociatedWithEachProcessor_ (true)
535  {
536  const char tfecfFuncName[] = "CrsGraph(rowMap,colMap,numEntPerRow,pftype,"
537  "params): ";
538  staticAssertions ();
539 
540  const size_t lclNumRows = rowMap.is_null () ?
541  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
542  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
543  static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
544  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
545  << " != the local number of rows " << lclNumRows << " as specified by "
546  "the input row Map.");
547 
548  const bool debug = ::Tpetra::Details::Behavior::debug ();
549  if (debug) {
550  for (size_t r = 0; r < lclNumRows; ++r) {
551  const size_t curRowCount = numEntPerRow[r];
552  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
553  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
554  std::invalid_argument, "numEntPerRow(" << r << ") "
555  "specifies an invalid number of entries "
556  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
557  }
558  }
559 
560  // Deep-copy the input (ArrayRCP, therefore host accessible) into
561  // k_numAllocPerRow_. The latter is a const View, so we have to
562  // copy into a nonconst View first, then assign.
563  typedef decltype (k_numAllocPerRow_) out_view_type;
564  typedef typename out_view_type::non_const_type nc_view_type;
565  typedef Kokkos::View<const size_t*,
566  typename nc_view_type::array_layout,
567  Kokkos::HostSpace,
568  Kokkos::MemoryUnmanaged> in_view_type;
569  in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
570  nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
571  lclNumRows);
572  Kokkos::deep_copy (numAllocPerRowOut, numAllocPerRowIn);
573  k_numAllocPerRow_ = numAllocPerRowOut;
574 
575  resumeFill (params);
576  checkInternalState ();
577  }
578 
579 
580  template <class LocalOrdinal, class GlobalOrdinal, class Node>
582  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
583  const Teuchos::RCP<const map_type>& colMap,
584  const typename local_graph_type::row_map_type& rowPointers,
585  const typename local_graph_type::entries_type::non_const_type& columnIndices,
586  const Teuchos::RCP<Teuchos::ParameterList>& params) :
587  dist_object_type (rowMap)
588  , rowMap_(rowMap)
589  , colMap_(colMap)
590  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
591  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
592  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
593  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
594  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
595  , pftype_(StaticProfile)
596  , numAllocForAllRows_(0)
597  , storageStatus_ (::Tpetra::Details::STORAGE_1D_PACKED)
598  , indicesAreAllocated_(true)
599  , indicesAreLocal_(true)
600  , indicesAreGlobal_(false)
601  , fillComplete_(false)
602  , lowerTriangular_ (false)
603  , upperTriangular_ (false)
604  , indicesAreSorted_(true)
605  , noRedundancies_(true)
606  , haveLocalConstants_ (false)
607  , haveGlobalConstants_ (false)
608  , sortGhostsAssociatedWithEachProcessor_(true)
609  {
610  staticAssertions ();
611  setAllIndices (rowPointers, columnIndices);
612  checkInternalState ();
613  }
614 
615 
616  template <class LocalOrdinal, class GlobalOrdinal, class Node>
618  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
619  const Teuchos::RCP<const map_type>& colMap,
620  const Teuchos::ArrayRCP<size_t>& rowPointers,
621  const Teuchos::ArrayRCP<LocalOrdinal> & columnIndices,
622  const Teuchos::RCP<Teuchos::ParameterList>& params) :
623  dist_object_type (rowMap)
624  , rowMap_ (rowMap)
625  , colMap_ (colMap)
626  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
627  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
628  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
629  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
630  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
631  , pftype_ (StaticProfile)
632  , numAllocForAllRows_ (0)
633  , storageStatus_ (::Tpetra::Details::STORAGE_1D_PACKED)
634  , indicesAreAllocated_ (true)
635  , indicesAreLocal_ (true)
636  , indicesAreGlobal_ (false)
637  , fillComplete_ (false)
638  , lowerTriangular_ (false)
639  , upperTriangular_ (false)
640  , indicesAreSorted_ (true)
641  , noRedundancies_ (true)
642  , haveLocalConstants_ (false)
643  , haveGlobalConstants_ (false)
644  , sortGhostsAssociatedWithEachProcessor_ (true)
645  {
646  staticAssertions ();
647  setAllIndices (rowPointers, columnIndices);
648  checkInternalState ();
649  }
650 
651 
652  template <class LocalOrdinal, class GlobalOrdinal, class Node>
654  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
655  const Teuchos::RCP<const map_type>& colMap,
656  const local_graph_type& k_local_graph_,
657  const Teuchos::RCP<Teuchos::ParameterList>& params)
658  : CrsGraph (k_local_graph_,
659  rowMap,
660  colMap,
661  Teuchos::null,
662  Teuchos::null,
663  params)
664  {}
665 
666  template <class LocalOrdinal, class GlobalOrdinal, class Node>
668  CrsGraph (const local_graph_type& k_local_graph_,
669  const Teuchos::RCP<const map_type>& rowMap,
670  const Teuchos::RCP<const map_type>& colMap,
671  const Teuchos::RCP<const map_type>& domainMap,
672  const Teuchos::RCP<const map_type>& rangeMap,
673  const Teuchos::RCP<Teuchos::ParameterList>& params)
674  : DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type> (rowMap)
675  , rowMap_ (rowMap)
676  , colMap_ (colMap)
677  , lclGraph_ (k_local_graph_)
678  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
679  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
680  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
681  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
682  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
683  , pftype_ (StaticProfile)
684  , numAllocForAllRows_ (0)
685  , storageStatus_ (::Tpetra::Details::STORAGE_1D_PACKED)
686  , indicesAreAllocated_ (true)
687  , indicesAreLocal_ (true)
688  , indicesAreGlobal_ (false)
689  , fillComplete_ (false)
690  , lowerTriangular_ (false)
691  , upperTriangular_ (false)
692  , indicesAreSorted_ (true)
693  , noRedundancies_ (true)
694  , haveLocalConstants_ (false)
695  , haveGlobalConstants_ (false)
696  , sortGhostsAssociatedWithEachProcessor_ (true)
697  {
698  staticAssertions();
699  const char tfecfFuncName[] = "CrsGraph(Kokkos::LocalStaticCrsGraph,Map,Map,Map,Map)";
700 
701  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
702  colMap.is_null (), std::runtime_error,
703  ": The input column Map must be nonnull.");
704  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
705  k_local_graph_.numRows () != rowMap->getNodeNumElements (),
706  std::runtime_error,
707  ": The input row Map and the input local graph need to have the same "
708  "number of rows. The row Map claims " << rowMap->getNodeNumElements ()
709  << " row(s), but the local graph claims " << k_local_graph_.numRows ()
710  << " row(s).");
711  // NOTE (mfh 17 Mar 2014) getNodeNumRows() returns
712  // rowMap_->getNodeNumElements(), but it doesn't have to.
713  // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
714  // k_local_graph_.numRows () != getNodeNumRows (), std::runtime_error,
715  // ": The input row Map and the input local graph need to have the same "
716  // "number of rows. The row Map claims " << getNodeNumRows () << " row(s), "
717  // "but the local graph claims " << k_local_graph_.numRows () << " row(s).");
718  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
719  k_lclInds1D_.extent (0) != 0 || k_gblInds1D_.extent (0) != 0, std::logic_error,
720  ": cannot have 1D data structures allocated.");
721  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
722  ! lclInds2D_.is_null () || ! gblInds2D_.is_null (), std::logic_error,
723  ": cannot have 2D data structures allocated.");
724 
725  setDomainRangeMaps (domainMap.is_null() ? rowMap_ : domainMap,
726  rangeMap .is_null() ? rowMap_ : rangeMap);
727  Teuchos::Array<int> remotePIDs (0); // unused output argument
728  this->makeImportExport (remotePIDs, false);
729 
730  k_lclInds1D_ = lclGraph_.entries;
731  k_rowPtrs_ = lclGraph_.row_map;
732 
733  const bool callComputeGlobalConstants = params.get () == nullptr ||
734  params->get ("compute global constants", true);
735  const bool computeLocalTriangularConstants = params.get () == nullptr ||
736  params->get ("compute local triangular constants", true);
737 
738  if (callComputeGlobalConstants) {
739  this->computeGlobalConstants (computeLocalTriangularConstants);
740  }
741  this->fillComplete_ = true;
742  this->checkInternalState ();
743  }
744 
745  template <class LocalOrdinal, class GlobalOrdinal, class Node>
748  {}
749 
750  template <class LocalOrdinal, class GlobalOrdinal, class Node>
751  Teuchos::RCP<const Teuchos::ParameterList>
754  {
755  using Teuchos::RCP;
756  using Teuchos::ParameterList;
757  using Teuchos::parameterList;
758 
759  RCP<ParameterList> params = parameterList ("Tpetra::CrsGraph");
760 
761  // Make a sublist for the Import.
762  RCP<ParameterList> importSublist = parameterList ("Import");
763 
764  // FIXME (mfh 02 Apr 2012) We should really have the Import and
765  // Export objects fill in these lists. However, we don't want to
766  // create an Import or Export unless we need them. For now, we
767  // know that the Import and Export just pass the list directly to
768  // their Distributor, so we can create a Distributor here
769  // (Distributor's constructor is a lightweight operation) and have
770  // it fill in the list.
771 
772  // Fill in Distributor default parameters by creating a
773  // Distributor and asking it to do the work.
774  Distributor distributor (rowMap_->getComm (), importSublist);
775  params->set ("Import", *importSublist, "How the Import performs communication.");
776 
777  // Make a sublist for the Export. For now, it's a clone of the
778  // Import sublist. It's not a shallow copy, though, since we
779  // might like the Import to do communication differently than the
780  // Export.
781  params->set ("Export", *importSublist, "How the Export performs communication.");
782 
783  return params;
784  }
785 
786 
787  template <class LocalOrdinal, class GlobalOrdinal, class Node>
788  void
790  setParameterList (const Teuchos::RCP<Teuchos::ParameterList>& params)
791  {
792  Teuchos::RCP<const Teuchos::ParameterList> validParams =
793  getValidParameters ();
794  params->validateParametersAndSetDefaults (*validParams);
795  this->setMyParamList (params);
796  }
797 
798 
799  template <class LocalOrdinal, class GlobalOrdinal, class Node>
803  {
804  return rowMap_->getGlobalNumElements ();
805  }
806 
807 
808  template <class LocalOrdinal, class GlobalOrdinal, class Node>
812  {
813  const char tfecfFuncName[] = "getGlobalNumCols: ";
814  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
815  ! isFillComplete () || getDomainMap ().is_null (), std::runtime_error,
816  "The graph does not have a domain Map. You may not call this method in "
817  "that case.");
818  return getDomainMap ()->getGlobalNumElements ();
819  }
820 
821 
822  template <class LocalOrdinal, class GlobalOrdinal, class Node>
823  size_t
826  {
827  return this->rowMap_.is_null () ?
828  static_cast<size_t> (0) :
829  this->rowMap_->getNodeNumElements ();
830  }
831 
832 
833  template <class LocalOrdinal, class GlobalOrdinal, class Node>
834  size_t
837  {
838  const char tfecfFuncName[] = "getNodeNumCols: ";
839  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
840  ! hasColMap (), std::runtime_error,
841  "The graph does not have a column Map. You may not call this method "
842  "unless the graph has a column Map. This requires either that a custom "
843  "column Map was given to the constructor, or that fillComplete() has "
844  "been called.");
845  return colMap_.is_null () ? static_cast<size_t> (0) :
846  colMap_->getNodeNumElements ();
847  }
848 
849 
850  template <class LocalOrdinal, class GlobalOrdinal, class Node>
851  size_t
854  {
855  return nodeNumDiags_;
856  }
857 
858 
859  template <class LocalOrdinal, class GlobalOrdinal, class Node>
860  size_t
863  {
864  return this->getNodeNumDiagsImpl ();
865  }
866 
867 
868  template <class LocalOrdinal, class GlobalOrdinal, class Node>
872  {
873  const char tfecfFuncName[] = "getGlobalNumDiags: ";
874  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
875  (! this->haveGlobalConstants_, std::logic_error,
876  "The graph does not have global constants computed, "
877  "but the user has requested them.");
878 
879  return globalNumDiags_;
880  }
881 
882 
883  template <class LocalOrdinal, class GlobalOrdinal, class Node>
887  {
888  return this->getGlobalNumDiagsImpl ();
889  }
890 
891 
892  template <class LocalOrdinal, class GlobalOrdinal, class Node>
893  Teuchos::RCP<Node>
895  getNode () const
896  {
897  return rowMap_.is_null () ? Teuchos::null : rowMap_->getNode ();
898  }
899 
900 
901  template <class LocalOrdinal, class GlobalOrdinal, class Node>
902  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
904  getRowMap () const
905  {
906  return rowMap_;
907  }
908 
909 
910  template <class LocalOrdinal, class GlobalOrdinal, class Node>
911  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
913  getColMap () const
914  {
915  return colMap_;
916  }
917 
918 
919  template <class LocalOrdinal, class GlobalOrdinal, class Node>
920  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
923  {
924  return domainMap_;
925  }
926 
927 
928  template <class LocalOrdinal, class GlobalOrdinal, class Node>
929  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
931  getRangeMap () const
932  {
933  return rangeMap_;
934  }
935 
936 
937  template <class LocalOrdinal, class GlobalOrdinal, class Node>
938  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::import_type>
940  getImporter () const
941  {
942  return importer_;
943  }
944 
945 
946  template <class LocalOrdinal, class GlobalOrdinal, class Node>
947  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::export_type>
949  getExporter () const
950  {
951  return exporter_;
952  }
953 
954 
955  template <class LocalOrdinal, class GlobalOrdinal, class Node>
956  bool
958  hasColMap () const
959  {
960  return ! colMap_.is_null ();
961  }
962 
963 
964  template <class LocalOrdinal, class GlobalOrdinal, class Node>
965  bool
968  {
969  // FIXME (mfh 07 Aug 2014) Why wouldn't storage be optimized if
970  // getNodeNumRows() is zero?
971 
972  const bool isOpt = indicesAreAllocated_ &&
973  k_numRowEntries_.extent (0) == 0 &&
974  getNodeNumRows () > 0;
975 
976  const char tfecfFuncName[] = "isStorageOptimized: ";
977  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
978  (isOpt && getProfileType () == DynamicProfile, std::logic_error,
979  "The matrix claims to have optimized storage, but getProfileType() "
980  "returns DynamicProfile. This should never happen. Please report this "
981  "bug to the Tpetra developers.");
982 
983  return isOpt;
984  }
985 
986 
987  template <class LocalOrdinal, class GlobalOrdinal, class Node>
991  {
992  return pftype_;
993  }
994 
995 
996  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1000  {
1001  const char tfecfFuncName[] = "getGlobalNumEntries: ";
1002  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1003  (! this->haveGlobalConstants_, std::logic_error,
1004  "The graph does not have global constants computed, "
1005  "but the user has requested them.");
1006 
1007  return globalNumEntries_;
1008  }
1009 
1010 
1011  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1012  size_t
1015  {
1016  typedef LocalOrdinal LO;
1017 
1018  if (this->indicesAreAllocated_) {
1019  const LO lclNumRows = this->getNodeNumRows ();
1020  if (lclNumRows == 0) {
1021  return static_cast<size_t> (0);
1022  }
1023  else {
1024  // Avoid the "*this capture" issue by creating a local Kokkos::View.
1025  auto numEntPerRow = this->k_numRowEntries_;
1026  const LO numNumEntPerRow = numEntPerRow.extent (0);
1027  if (numNumEntPerRow == 0) {
1028  if (static_cast<LO> (this->lclGraph_.row_map.extent (0)) <
1029  static_cast<LO> (lclNumRows + 1)) {
1030  return static_cast<size_t> (0);
1031  }
1032  else {
1033  return ::Tpetra::Details::getEntryOnHost (this->lclGraph_.row_map, lclNumRows);
1034  }
1035  }
1036  else { // k_numRowEntries_ is populated
1037  // k_numRowEntries_ is actually be a host View, so we run
1038  // the sum in its native execution space. This also means
1039  // that we can use explicit capture (which could perhaps
1040  // improve build time) instead of KOKKOS_LAMBDA, and avoid
1041  // any CUDA build issues with trying to run a __device__ -
1042  // only function on host.
1043  typedef typename num_row_entries_type::execution_space
1044  host_exec_space;
1045  typedef Kokkos::RangePolicy<host_exec_space, LO> range_type;
1046 
1047  const LO upperLoopBound = lclNumRows < numNumEntPerRow ?
1048  lclNumRows :
1049  numNumEntPerRow;
1050  size_t nodeNumEnt = 0;
1051  Kokkos::parallel_reduce ("Tpetra::CrsGraph::getNumNodeEntries",
1052  range_type (0, upperLoopBound),
1053  [=] (const LO& k, size_t& lclSum) {
1054  lclSum += numEntPerRow(k);
1055  }, nodeNumEnt);
1056  return nodeNumEnt;
1057  }
1058  }
1059  }
1060  else { // nothing allocated on this process, so no entries
1061  return static_cast<size_t> (0);
1062  }
1063  }
1064 
1065 
1066  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1070  {
1071  const char tfecfFuncName[] = "getGlobalMaxNumRowEntries: ";
1072  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1073  (! this->haveGlobalConstants_, std::logic_error,
1074  "The graph does not have global constants computed, "
1075  "but the user has requested them.");
1076 
1077  return globalMaxNumRowEntries_;
1078  }
1079 
1080 
1081  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1082  size_t
1085  {
1086  return nodeMaxNumRowEntries_;
1087  }
1088 
1089 
1090  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1091  bool
1094  {
1095  return fillComplete_;
1096  }
1097 
1098 
1099  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1100  bool
1103  {
1104  return ! fillComplete_;
1105  }
1106 
1107 
1108  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1109  bool
1112  {
1113  return this->lowerTriangular_;
1114  }
1115 
1116 
1117  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1118  bool
1121  {
1122  return this->isLowerTriangularImpl ();
1123  }
1124 
1125 
1126  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1127  bool
1130  {
1131  return this->upperTriangular_;
1132  }
1133 
1134 
1135  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1136  bool
1139  {
1140  return this->isUpperTriangularImpl ();
1141  }
1142 
1143 
1144  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1145  bool
1148  {
1149  return indicesAreLocal_;
1150  }
1151 
1152 
1153  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1154  bool
1157  {
1158  return indicesAreGlobal_;
1159  }
1160 
1161 
1162  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1163  size_t
1166  {
1167  typedef LocalOrdinal LO;
1168 
1169  if (this->indicesAreAllocated_) {
1170  const LO lclNumRows = this->getNodeNumRows ();
1171  if (lclNumRows == 0) {
1172  return static_cast<size_t> (0);
1173  }
1174  else if (this->storageStatus_ == ::Tpetra::Details::STORAGE_1D_PACKED) {
1175  if (static_cast<LO> (this->lclGraph_.row_map.extent (0)) <
1176  static_cast<LO> (lclNumRows + 1)) {
1177  return static_cast<size_t> (0);
1178  }
1179  else {
1180  return ::Tpetra::Details::getEntryOnHost (this->lclGraph_.row_map, lclNumRows);
1181  }
1182  }
1183  else if (this->storageStatus_ == ::Tpetra::Details::STORAGE_1D_UNPACKED) {
1184  if (this->k_rowPtrs_.extent (0) == 0) {
1185  return static_cast<size_t> (0);
1186  }
1187  else {
1188  return ::Tpetra::Details::getEntryOnHost (this->k_rowPtrs_, lclNumRows);
1189  }
1190  }
1191  else if (this->storageStatus_ == ::Tpetra::Details::STORAGE_2D) {
1192  size_t numAllocated = 0;
1193  if (this->isLocallyIndexed ()) {
1194  for (LocalOrdinal lclRow = 0; lclRow < lclNumRows; ++lclRow) {
1195  numAllocated += this->lclInds2D_[lclRow].size ();
1196  }
1197  }
1198  else if (this->isGloballyIndexed ()) {
1199  for (LocalOrdinal lclRow = 0; lclRow < lclNumRows; ++lclRow) {
1200  numAllocated += this->gblInds2D_[lclRow].size ();
1201  }
1202  }
1203  // Neither locally nor globally indexed, means no indices allocated.
1204  return numAllocated;
1205  }
1206  else {
1207  return static_cast<size_t> (0);
1208  }
1209  }
1210  else {
1211  return Tpetra::Details::OrdinalTraits<size_t>::invalid ();
1212  }
1213  }
1214 
1215 
1216  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1217  Teuchos::RCP<const Teuchos::Comm<int> >
1219  getComm () const
1220  {
1221  return this->rowMap_.is_null () ? Teuchos::null : this->rowMap_->getComm ();
1222  }
1223 
1224 
1225  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1226  GlobalOrdinal
1229  {
1230  return rowMap_->getIndexBase ();
1231  }
1232 
1233 
1234  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1235  bool
1237  indicesAreAllocated () const
1238  {
1239  return indicesAreAllocated_;
1240  }
1241 
1242 
1243  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1244  bool
1246  isSorted () const
1247  {
1248  return indicesAreSorted_;
1249  }
1250 
1251 
1252  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1253  bool
1255  isMerged () const
1256  {
1257  return noRedundancies_;
1258  }
1259 
1260 
1261  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1262  void
1265  {
1266  // FIXME (mfh 07 May 2013) How do we know that the change
1267  // introduced a redundancy, or even that it invalidated the sorted
1268  // order of indices? CrsGraph has always made this conservative
1269  // guess. It could be a bit costly to check at insertion time,
1270  // though.
1271  indicesAreSorted_ = false;
1272  noRedundancies_ = false;
1273 
1274  // We've modified the graph, so we'll have to recompute local
1275  // constants like the number of diagonal entries on this process.
1276  haveLocalConstants_ = false;
1277  }
1278 
1279 
1280  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1281  void
1283  allocateIndices (const ELocalGlobal lg)
1284  {
1285  using Teuchos::arcp;
1286  using Teuchos::Array;
1287  using Teuchos::ArrayRCP;
1288  typedef Teuchos::ArrayRCP<size_t>::size_type size_type;
1289  typedef typename local_graph_type::row_map_type::non_const_type
1290  non_const_row_map_type;
1291  typedef typename local_graph_type::entries_type::non_const_type
1292  lcl_col_inds_type;
1293  typedef Kokkos::View<GlobalOrdinal*,
1294  typename lcl_col_inds_type::array_layout,
1295  device_type> gbl_col_inds_type;
1296  const char tfecfFuncName[] = "allocateIndices: ";
1297  const char suffix[] = " Please report this bug to the Tpetra developers.";
1298 
1299  // This is a protected function, only callable by us. If it was
1300  // called incorrectly, it is our fault. That's why the tests
1301  // below throw std::logic_error instead of std::invalid_argument.
1302  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1303  (this->isLocallyIndexed () && lg == GlobalIndices, std::logic_error,
1304  "The graph is locally indexed, but Tpetra code is calling this method "
1305  "with lg=GlobalIndices." << suffix);
1306  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1307  (this->isGloballyIndexed () && lg == LocalIndices, std::logic_error,
1308  "The graph is globally indexed, but Tpetra code is calling this method "
1309  "with lg=LocalIndices. " << suffix);
1310  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1311  (this->indicesAreAllocated (), std::logic_error, "The graph's indices "
1312  "are already allocated, but Tpetra is calling allocateIndices again."
1313  << suffix);
1314  const size_t numRows = this->getNodeNumRows ();
1315 
1316  if (this->getProfileType () == StaticProfile) {
1317  //
1318  // STATIC ALLOCATION PROFILE
1319  //
1320  non_const_row_map_type k_rowPtrs ("Tpetra::CrsGraph::ptr", numRows + 1);
1321 
1322  if (this->k_numAllocPerRow_.extent (0) != 0) {
1323  // It's OK to throw std::invalid_argument here, because we
1324  // haven't incurred any side effects yet. Throwing that
1325  // exception (and not, say, std::logic_error) implies that the
1326  // instance can recover.
1327  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1328  (this->k_numAllocPerRow_.extent (0) != numRows,
1329  std::invalid_argument, "k_numAllocPerRow_ is allocated, that is, "
1330  "has nonzero length " << this->k_numAllocPerRow_.extent (0)
1331  << ", but its length != numRows = " << numRows << ".");
1332 
1333  // k_numAllocPerRow_ is a host View, but k_rowPtrs (the thing
1334  // we want to compute here) lives on device. That's OK;
1335  // computeOffsetsFromCounts can handle this case.
1337 
1338  // FIXME (mfh 27 Jun 2016) Currently, computeOffsetsFromCounts
1339  // doesn't attempt to check its input for "invalid" flag
1340  // values. For now, we omit that feature of the sequential
1341  // code disabled below.
1342  computeOffsetsFromCounts (k_rowPtrs, k_numAllocPerRow_);
1343  }
1344  else {
1345  // It's OK to throw std::invalid_argument here, because we
1346  // haven't incurred any side effects yet. Throwing that
1347  // exception (and not, say, std::logic_error) implies that the
1348  // instance can recover.
1349  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1350  (this->numAllocForAllRows_ ==
1351  Tpetra::Details::OrdinalTraits<size_t>::invalid (),
1352  std::invalid_argument, "numAllocForAllRows_ has an invalid value, "
1353  "namely Tpetra::Details::OrdinalTraits<size_t>::invalid() = " <<
1354  Tpetra::Details::OrdinalTraits<size_t>::invalid () << ".");
1355 
1357  computeOffsetsFromConstantCount (k_rowPtrs, this->numAllocForAllRows_);
1358  }
1359 
1360  // "Commit" the resulting row offsets.
1361  this->k_rowPtrs_ = k_rowPtrs;
1362 
1363  const size_type numInds = ::Tpetra::Details::getEntryOnHost (this->k_rowPtrs_, numRows);
1364  // const size_type numInds = static_cast<size_type> (this->k_rowPtrs_(numRows));
1365  if (lg == LocalIndices) {
1366  k_lclInds1D_ = lcl_col_inds_type ("Tpetra::CrsGraph::ind", numInds);
1367  }
1368  else {
1369  k_gblInds1D_ = gbl_col_inds_type ("Tpetra::CrsGraph::ind", numInds);
1370  }
1371  storageStatus_ = ::Tpetra::Details::STORAGE_1D_UNPACKED;
1372  }
1373  else {
1374  //
1375  // DYNAMIC ALLOCATION PROFILE
1376  //
1377  const bool useNumAllocPerRow =
1378  (this->k_numAllocPerRow_.extent (0) != 0);
1379 
1380  if (lg == LocalIndices) {
1381  this->lclInds2D_ = arcp<Array<LocalOrdinal> > (numRows);
1382  for (size_t i = 0; i < numRows; ++i) {
1383  const size_t howMany = useNumAllocPerRow ?
1384  this->k_numAllocPerRow_(i) :
1385  this->numAllocForAllRows_;
1386  if (howMany > 0) {
1387  this->lclInds2D_[i].resize (howMany);
1388  }
1389  }
1390  }
1391  else { // allocate global indices
1392  this->gblInds2D_ = arcp<Array<GlobalOrdinal> > (numRows);
1393  for (size_t i = 0; i < numRows; ++i) {
1394  const size_t howMany = useNumAllocPerRow ?
1395  this->k_numAllocPerRow_(i) :
1396  this->numAllocForAllRows_;
1397  if (howMany > 0) {
1398  this->gblInds2D_[i].resize (howMany);
1399  }
1400  }
1401  }
1402  this->storageStatus_ = ::Tpetra::Details::STORAGE_2D;
1403  }
1404 
1405  this->indicesAreLocal_ = (lg == LocalIndices);
1406  this->indicesAreGlobal_ = (lg == GlobalIndices);
1407 
1408  if (numRows > 0) { // reallocate k_numRowEntries_ & fill w/ 0s
1409  using Kokkos::ViewAllocateWithoutInitializing;
1410  typedef decltype (k_numRowEntries_) row_ent_type;
1411  const char label[] = "Tpetra::CrsGraph::numRowEntries";
1412 
1413  row_ent_type numRowEnt (ViewAllocateWithoutInitializing (label), numRows);
1414  Kokkos::deep_copy (numRowEnt, static_cast<size_t> (0)); // fill w/ 0s
1415  this->k_numRowEntries_ = numRowEnt; // "commit" our allocation
1416  }
1417 
1418  // Once indices are allocated, CrsGraph needs to free this information.
1419  this->numAllocForAllRows_ = 0;
1420  this->k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
1421  this->indicesAreAllocated_ = true;
1422 
1423  try {
1424  this->checkInternalState ();
1425  }
1426  catch (std::logic_error& e) {
1427  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1428  (true, std::logic_error, "At end of allocateIndices, "
1429  "checkInternalState threw std::logic_error: "
1430  << e.what ());
1431  }
1432  catch (std::exception& e) {
1433  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1434  (true, std::runtime_error, "At end of allocateIndices, "
1435  "checkInternalState threw std::exception: "
1436  << e.what ());
1437  }
1438  catch (...) {
1439  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1440  (true, std::runtime_error, "At end of allocateIndices, "
1441  "checkInternalState threw an exception "
1442  "not a subclass of std::exception.");
1443  }
1444  }
1445 
1446 
1447  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1448  Teuchos::ArrayView<const LocalOrdinal>
1449  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1450  getLocalView (const RowInfo rowinfo) const
1451  {
1452  using Kokkos::subview;
1453  typedef LocalOrdinal LO;
1454  typedef Kokkos::View<const LO*, execution_space,
1455  Kokkos::MemoryUnmanaged> row_view_type;
1456 
1457  if (rowinfo.allocSize == 0) {
1458  return Teuchos::ArrayView<const LO> ();
1459  }
1460  else { // nothing in the row to view
1461  if (k_lclInds1D_.extent (0) != 0) { // 1-D storage
1462  const size_t start = rowinfo.offset1D;
1463  const size_t len = rowinfo.allocSize;
1464  const std::pair<size_t, size_t> rng (start, start + len);
1465  // mfh 23 Nov 2015: Don't just create a subview of
1466  // k_lclInds1D_ directly, because that first creates a
1467  // _managed_ subview, then returns an unmanaged version of
1468  // that. That touches the reference count, which costs
1469  // performance in a measurable way.
1470  row_view_type rowView = subview (row_view_type (k_lclInds1D_), rng);
1471  const LO* const rowViewRaw = (len == 0) ? NULL : rowView.data ();
1472  return Teuchos::ArrayView<const LO> (rowViewRaw, len, Teuchos::RCP_DISABLE_NODE_LOOKUP);
1473  }
1474  else if (! lclInds2D_[rowinfo.localRow].empty ()) { // 2-D storage
1475  return lclInds2D_[rowinfo.localRow] ();
1476  }
1477  else {
1478  return Teuchos::ArrayView<const LO> (); // nothing in the row to view
1479  }
1480  }
1481  }
1482 
1483  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1484  LocalOrdinal
1486  getLocalViewRawConst (const LocalOrdinal*& lclInds,
1487  LocalOrdinal& capacity,
1488  const RowInfo& rowInfo) const
1489  {
1490  lclInds = NULL;
1491  capacity = 0;
1492 
1493  if (rowInfo.allocSize != 0) {
1494  if (k_lclInds1D_.extent (0) != 0) { // 1-D storage
1495 #ifdef HAVE_TPETRA_DEBUG
1496  if (rowInfo.offset1D + rowInfo.allocSize >
1497  static_cast<size_t> (k_lclInds1D_.extent (0))) {
1498  return static_cast<LocalOrdinal> (-1);
1499  }
1500 #endif // HAVE_TPETRA_DEBUG
1501  lclInds = &k_lclInds1D_[rowInfo.offset1D];
1502  capacity = rowInfo.allocSize;
1503  }
1504  else { // 2-D storage
1505 #ifdef HAVE_TPETRA_DEBUG
1506  if (rowInfo.localRow >= static_cast<size_t> (lclInds2D_.size ())) {
1507  return static_cast<LocalOrdinal> (-1);
1508  }
1509 #endif // HAVE_TPETRA_DEBUG
1510  // Use a const reference so we don't touch the ArrayRCP's ref
1511  // count, since ArrayRCP's ref count is not thread safe.
1512  const auto& curRow = lclInds2D_[rowInfo.localRow];
1513  if (! curRow.empty ()) {
1514  lclInds = curRow.getRawPtr ();
1515  capacity = curRow.size ();
1516  }
1517  }
1518  }
1519  return static_cast<LocalOrdinal> (0);
1520  }
1521 
1522  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1523  Teuchos::ArrayView<LocalOrdinal>
1526  {
1527  using Kokkos::subview;
1528  typedef LocalOrdinal LO;
1529  typedef Kokkos::View<LO*, execution_space,
1530  Kokkos::MemoryUnmanaged> row_view_type;
1531 
1532  if (rowinfo.allocSize == 0) { // nothing in the row to view
1533  return Teuchos::ArrayView<LO> ();
1534  }
1535  else {
1536  if (k_lclInds1D_.extent (0) != 0) { // 1-D storage
1537  const size_t start = rowinfo.offset1D;
1538  const size_t len = rowinfo.allocSize;
1539  const std::pair<size_t, size_t> rng (start, start + len);
1540  // mfh 23 Nov 2015: Don't just create a subview of
1541  // k_lclInds1D_ directly, because that first creates a
1542  // _managed_ subview, then returns an unmanaged version of
1543  // that. That touches the reference count, which costs
1544  // performance in a measurable way.
1545  row_view_type rowView = subview (row_view_type (k_lclInds1D_), rng);
1546  LO* const rowViewRaw = (len == 0) ? NULL : rowView.data ();
1547  return Teuchos::ArrayView<LO> (rowViewRaw, len, Teuchos::RCP_DISABLE_NODE_LOOKUP);
1548  }
1549  else if (! lclInds2D_[rowinfo.localRow].empty ()) { // 2-D storage
1550  return lclInds2D_[rowinfo.localRow] ();
1551  }
1552  else {
1553  return Teuchos::ArrayView<LO> (); // nothing in the row to view
1554  }
1555  }
1556  }
1557 
1558 
1559  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1560  Kokkos::View<const LocalOrdinal*,
1562  Kokkos::MemoryUnmanaged>
1564  getLocalKokkosRowView (const RowInfo& rowInfo) const
1565  {
1566  typedef LocalOrdinal LO;
1567  typedef Kokkos::View<const LO*, execution_space,
1568  Kokkos::MemoryUnmanaged> row_view_type;
1569 
1570  if (rowInfo.allocSize == 0) {
1571  return row_view_type ();
1572  }
1573  else { // nothing in the row to view
1574  if (k_lclInds1D_.extent (0) != 0) { // 1-D storage
1575  const size_t start = rowInfo.offset1D;
1576  const size_t len = rowInfo.allocSize;
1577  const std::pair<size_t, size_t> rng (start, start + len);
1578  // mfh 23 Nov 2015: Don't just create a subview of
1579  // k_lclInds1D_ directly, because that first creates a
1580  // _managed_ subview, then returns an unmanaged version of
1581  // that. That touches the reference count, which costs
1582  // performance in a measurable way.
1583  return Kokkos::subview (row_view_type (k_lclInds1D_), rng);
1584  }
1585  else if (! this->lclInds2D_[rowInfo.localRow].empty ()) { // 2-D storage
1586  // Use a reference, so that I don't touch the
1587  // Teuchos::ArrayView reference count in a debug build. (It
1588  // has no reference count in a release build.) This ensures
1589  // thread safety.
1590  //
1591  // lclInds2D_ lives on host, so this code does not assume UVM.
1592  Teuchos::Array<LO>& lclInds = this->lclInds2D_[rowInfo.localRow];
1593  return row_view_type (lclInds.getRawPtr (), lclInds.size ());
1594  }
1595  else {
1596  return row_view_type (); // nothing in the row to view
1597  }
1598  }
1599  }
1600 
1601 
1602  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1603  Kokkos::View<LocalOrdinal*,
1604  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::execution_space,
1605  Kokkos::MemoryUnmanaged>
1606  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1607  getLocalKokkosRowViewNonConst (const RowInfo& rowInfo)
1608  {
1609  typedef LocalOrdinal LO;
1610  typedef Kokkos::View<LO*, execution_space,
1611  Kokkos::MemoryUnmanaged> row_view_type;
1612 
1613  if (rowInfo.allocSize == 0) {
1614  return row_view_type ();
1615  }
1616  else { // nothing in the row to view
1617  if (k_lclInds1D_.extent (0) != 0) { // 1-D storage
1618  const size_t start = rowInfo.offset1D;
1619  const size_t len = rowInfo.allocSize;
1620  const std::pair<size_t, size_t> rng (start, start + len);
1621  // mfh 23 Nov 2015: Don't just create a subview of
1622  // k_lclInds1D_ directly, because that first creates a
1623  // _managed_ subview, then returns an unmanaged version of
1624  // that. That touches the reference count, which costs
1625  // performance in a measurable way.
1626  return Kokkos::subview (row_view_type (this->k_lclInds1D_), rng);
1627  }
1628  else if (! this->lclInds2D_[rowInfo.localRow].empty ()) { // 2-D storage
1629  // Use a reference, so that I don't touch the
1630  // Teuchos::ArrayView reference count in a debug build. (It
1631  // has no reference count in a release build.) This ensures
1632  // thread safety.
1633  //
1634  // lclInds2D_ lives on host, so this code does not assume UVM.
1635  Teuchos::Array<LO>& cols = this->lclInds2D_[rowInfo.localRow];
1636  LO* const colsRaw = cols.getRawPtr ();
1637  return row_view_type (colsRaw, cols.size ());
1638  }
1639  else {
1640  return row_view_type (); // nothing in the row to view
1641  }
1642  }
1643  }
1644 
1645 
1646  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1647  Kokkos::View<const GlobalOrdinal*,
1648  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::execution_space,
1649  Kokkos::MemoryUnmanaged>
1650  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1651  getGlobalKokkosRowView (const RowInfo& rowinfo) const
1652  {
1653  typedef GlobalOrdinal GO;
1654  typedef Kokkos::View<const GO*, execution_space,
1655  Kokkos::MemoryUnmanaged> row_view_type;
1656 
1657  if (rowinfo.allocSize == 0) {
1658  return row_view_type ();
1659  }
1660  else { // nothing in the row to view
1661  if (this->k_gblInds1D_.extent (0) != 0) { // 1-D storage
1662  const size_t start = rowinfo.offset1D;
1663  const size_t len = rowinfo.allocSize;
1664  const std::pair<size_t, size_t> rng (start, start + len);
1665  // mfh 23 Nov 2015: Don't just create a subview of
1666  // k_gblInds1D_ directly, because that first creates a
1667  // _managed_ subview, then returns an unmanaged version of
1668  // that. That touches the reference count, which costs
1669  // performance in a measurable way.
1670  return Kokkos::subview (row_view_type (this->k_gblInds1D_), rng);
1671  }
1672  else if (! this->gblInds2D_[rowinfo.localRow].empty ()) { // 2-D storage
1673  // Use a reference, so that I don't touch the
1674  // Teuchos::ArrayView reference count in a debug build. (It
1675  // has no reference count in a release build.) This ensures
1676  // thread safety.
1677  //
1678  // gblInds2D_ lives on host, so this code does not assume UVM.
1679  Teuchos::Array<GO>& cols = this->gblInds2D_[rowinfo.localRow];
1680  return row_view_type (cols.getRawPtr (), cols.size ());
1681  }
1682  else {
1683  return row_view_type (); // nothing in the row to view
1684  }
1685  }
1686  }
1687 
1688 
1689  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1690  Teuchos::ArrayView<const GlobalOrdinal>
1691  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1692  getGlobalView (const RowInfo& rowinfo) const
1693  {
1694  Teuchos::ArrayView<const GlobalOrdinal> view;
1695  if (rowinfo.allocSize > 0) {
1696  if (k_gblInds1D_.extent (0) != 0) {
1697  auto rng = std::make_pair (rowinfo.offset1D,
1698  rowinfo.offset1D + rowinfo.allocSize);
1699  // mfh 23 Nov 2015: Don't just create a subview of
1700  // k_gblInds1D_ directly, because that first creates a
1701  // _managed_ subview, then returns an unmanaged version of
1702  // that. That touches the reference count, which costs
1703  // performance in a measurable way.
1704  Kokkos::View<const GlobalOrdinal*, execution_space,
1705  Kokkos::MemoryUnmanaged> k_gblInds1D_unmanaged = k_gblInds1D_;
1706  view = Kokkos::Compat::getConstArrayView (Kokkos::subview (k_gblInds1D_unmanaged, rng));
1707  }
1708  else if (! gblInds2D_[rowinfo.localRow].empty()) {
1709  view = gblInds2D_[rowinfo.localRow] ();
1710  }
1711  }
1712  return view;
1713  }
1714 
1715 
1716  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1717  LocalOrdinal
1719  getGlobalViewRawConst (const GlobalOrdinal*& gblInds,
1720  LocalOrdinal& capacity,
1721  const RowInfo& rowInfo) const
1722  {
1723  gblInds = NULL;
1724  capacity = 0;
1725 
1726  if (rowInfo.allocSize != 0) {
1727  if (k_gblInds1D_.extent (0) != 0) { // 1-D storage
1728 #ifdef HAVE_TPETRA_DEBUG
1729  if (rowInfo.offset1D + rowInfo.allocSize >
1730  static_cast<size_t> (k_gblInds1D_.extent (0))) {
1731  return static_cast<LocalOrdinal> (-1);
1732  }
1733 #endif // HAVE_TPETRA_DEBUG
1734  gblInds = &k_gblInds1D_[rowInfo.offset1D];
1735  capacity = rowInfo.allocSize;
1736  }
1737  else {
1738 #ifdef HAVE_TPETRA_DEBUG
1739  if (rowInfo.localRow >= static_cast<size_t> (gblInds2D_.size ())) {
1740  return static_cast<LocalOrdinal> (-1);
1741  }
1742 #endif // HAVE_TPETRA_DEBUG
1743  const auto& curRow = gblInds2D_[rowInfo.localRow];
1744  if (! curRow.empty ()) {
1745  gblInds = curRow.getRawPtr ();
1746  capacity = curRow.size ();
1747  }
1748  }
1749  }
1750  return static_cast<LocalOrdinal> (0);
1751  }
1752 
1753 
1754  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1755  Teuchos::ArrayView<GlobalOrdinal>
1758  {
1759  Teuchos::ArrayView<GlobalOrdinal> view;
1760  if (rowinfo.allocSize > 0) {
1761  if (k_gblInds1D_.extent (0) != 0) {
1762  auto rng = std::make_pair (rowinfo.offset1D,
1763  rowinfo.offset1D + rowinfo.allocSize);
1764  // mfh 23 Nov 2015: Don't just create a subview of
1765  // k_gblInds1D_ directly, because that first creates a
1766  // _managed_ subview, then returns an unmanaged version of
1767  // that. That touches the reference count, which costs
1768  // performance in a measurable way.
1769  Kokkos::View<GlobalOrdinal*, execution_space,
1770  Kokkos::MemoryUnmanaged> k_gblInds1D_unmanaged = k_gblInds1D_;
1771  view = Kokkos::Compat::getArrayView (Kokkos::subview (k_gblInds1D_unmanaged, rng));
1772  }
1773  else if (! gblInds2D_[rowinfo.localRow].empty()) {
1774  view = gblInds2D_[rowinfo.localRow] ();
1775  }
1776  }
1777  return view;
1778  }
1779 
1780 
1781  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1782  RowInfo
1784  getRowInfo (const LocalOrdinal myRow) const
1785  {
1786  const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1787  RowInfo ret;
1788  if (this->rowMap_.is_null () || ! this->rowMap_->isNodeLocalElement (myRow)) {
1789  ret.localRow = STINV;
1790  ret.allocSize = 0;
1791  ret.numEntries = 0;
1792  ret.offset1D = STINV;
1793  return ret;
1794  }
1795 
1796  ret.localRow = static_cast<size_t> (myRow);
1797  if (this->indicesAreAllocated ()) {
1798  if (this->getProfileType () == StaticProfile) {
1799  // Offsets tell us the allocation size in this case.
1800  if (this->k_rowPtrs_.extent (0) == 0) {
1801  ret.offset1D = 0;
1802  ret.allocSize = 0;
1803  }
1804  else {
1805  ret.offset1D = this->k_rowPtrs_(myRow);
1806  ret.allocSize = this->k_rowPtrs_(myRow+1) - this->k_rowPtrs_(myRow);
1807  }
1808 
1809  ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
1810  ret.allocSize :
1811  this->k_numRowEntries_(myRow);
1812  }
1813  else { // DynamicProfile
1814  ret.offset1D = STINV;
1815  if (this->isLocallyIndexed ()) {
1816  ret.allocSize = (this->lclInds2D_.size () == 0) ?
1817  size_t (0) :
1818  this->lclInds2D_[myRow].size ();
1819  }
1820  else if (this->isGloballyIndexed ()) {
1821  ret.allocSize = (this->gblInds2D_.size () == 0) ?
1822  size_t (0) :
1823  this->gblInds2D_[myRow].size ();
1824  }
1825  else { // neither locally nor globally indexed means no indices alloc'd
1826  ret.allocSize = 0;
1827  }
1828 
1829  ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
1830  size_t (0) :
1831  this->k_numRowEntries_(myRow);
1832  }
1833  }
1834  else { // haven't performed allocation yet; probably won't hit this code
1835  // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1836  // allocate, rather than doing lazy allocation at first insert.
1837  // This will make k_numAllocPerRow_ obsolete.
1838  ret.allocSize = (this->k_numAllocPerRow_.extent (0) != 0) ?
1839  this->k_numAllocPerRow_(myRow) : // this is a host View
1840  this->numAllocForAllRows_;
1841  ret.numEntries = 0;
1842  ret.offset1D = STINV;
1843  }
1844 
1845  return ret;
1846  }
1847 
1848 
1849  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1850  RowInfo
1852  getRowInfoFromGlobalRowIndex (const GlobalOrdinal gblRow) const
1853  {
1854  const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1855  RowInfo ret;
1856  if (this->rowMap_.is_null ()) {
1857  ret.localRow = STINV;
1858  ret.allocSize = 0;
1859  ret.numEntries = 0;
1860  ret.offset1D = STINV;
1861  return ret;
1862  }
1863  const LocalOrdinal myRow = this->rowMap_->getLocalElement (gblRow);
1864  if (myRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid ()) {
1865  ret.localRow = STINV;
1866  ret.allocSize = 0;
1867  ret.numEntries = 0;
1868  ret.offset1D = STINV;
1869  return ret;
1870  }
1871 
1872  ret.localRow = static_cast<size_t> (myRow);
1873  if (this->indicesAreAllocated ()) {
1874  // graph data structures have the info that we need
1875  //
1876  // if static graph, offsets tell us the allocation size
1877  if (this->getProfileType() == StaticProfile) {
1878  if (this->k_rowPtrs_.extent (0) == 0) {
1879  ret.offset1D = 0;
1880  ret.allocSize = 0;
1881  }
1882  else {
1883  ret.offset1D = this->k_rowPtrs_(myRow);
1884  ret.allocSize = this->k_rowPtrs_(myRow+1) - this->k_rowPtrs_(myRow);
1885  }
1886 
1887  ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
1888  ret.allocSize :
1889  this->k_numRowEntries_(myRow);
1890  }
1891  else { // DynamicProfile
1892  ret.offset1D = STINV;
1893  if (this->isLocallyIndexed ()) {
1894  ret.allocSize = (this->lclInds2D_.size () == 0) ?
1895  size_t (0) :
1896  this->lclInds2D_[myRow].size ();
1897  }
1898  else {
1899  ret.allocSize = (this->gblInds2D_.size () == 0) ?
1900  size_t (0) :
1901  this->gblInds2D_[myRow].size ();
1902  }
1903 
1904  ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
1905  size_t (0) :
1906  this->k_numRowEntries_(myRow);
1907  }
1908  }
1909  else { // haven't performed allocation yet; probably won't hit this code
1910  // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1911  // allocate, rather than doing lazy allocation at first insert.
1912  // This will make k_numAllocPerRow_ obsolete.
1913  ret.allocSize = (this->k_numAllocPerRow_.extent (0) != 0) ?
1914  this->k_numAllocPerRow_(myRow) : // this is a host View
1915  this->numAllocForAllRows_;
1916  ret.numEntries = 0;
1917  ret.offset1D = STINV;
1918  }
1919 
1920  return ret;
1921  }
1922 
1923 
1924  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1925  void
1927  staticAssertions () const
1928  {
1929  using Teuchos::OrdinalTraits;
1930  typedef LocalOrdinal LO;
1931  typedef GlobalOrdinal GO;
1932  typedef global_size_t GST;
1933 
1934  // Assumption: sizeof(GlobalOrdinal) >= sizeof(LocalOrdinal):
1935  // This is so that we can store local indices in the memory
1936  // formerly occupied by global indices.
1937  static_assert (sizeof (GlobalOrdinal) >= sizeof (LocalOrdinal),
1938  "Tpetra::CrsGraph: sizeof(GlobalOrdinal) must be >= sizeof(LocalOrdinal).");
1939  // Assumption: max(size_t) >= max(LocalOrdinal)
1940  // This is so that we can represent any LocalOrdinal as a size_t.
1941  static_assert (sizeof (size_t) >= sizeof (LocalOrdinal),
1942  "Tpetra::CrsGraph: sizeof(size_t) must be >= sizeof(LocalOrdinal).");
1943  static_assert (sizeof(GST) >= sizeof(size_t),
1944  "Tpetra::CrsGraph: sizeof(Tpetra::global_size_t) must be >= sizeof(size_t).");
1945 
1946  // FIXME (mfh 30 Sep 2015) We're not using
1947  // Teuchos::CompileTimeAssert any more. Can we do these checks
1948  // with static_assert?
1949 
1950  // can't call max() with CompileTimeAssert, because it isn't a
1951  // constant expression; will need to make this a runtime check
1952  const char msg[] = "Tpetra::CrsGraph: Object cannot be created with the "
1953  "given template arguments: size assumptions are not valid.";
1954  TEUCHOS_TEST_FOR_EXCEPTION(
1955  static_cast<size_t> (Teuchos::OrdinalTraits<LO>::max ()) > Teuchos::OrdinalTraits<size_t>::max (),
1956  std::runtime_error, msg);
1957  TEUCHOS_TEST_FOR_EXCEPTION(
1958  static_cast<GST> (Teuchos::OrdinalTraits<LO>::max ()) > static_cast<GST> (Teuchos::OrdinalTraits<GO>::max ()),
1959  std::runtime_error, msg);
1960  TEUCHOS_TEST_FOR_EXCEPTION(
1961  static_cast<size_t> (Teuchos::OrdinalTraits<GO>::max ()) > Teuchos::OrdinalTraits<GST>::max(),
1962  std::runtime_error, msg);
1963  TEUCHOS_TEST_FOR_EXCEPTION(
1964  Teuchos::OrdinalTraits<size_t>::max () > Teuchos::OrdinalTraits<GST>::max (),
1965  std::runtime_error, msg);
1966  }
1967 
1968 
1969  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1970  size_t
1973  const SLocalGlobalViews &newInds,
1974  const ELocalGlobal lg,
1975  const ELocalGlobal I)
1976  {
1977  using Teuchos::ArrayView;
1978  typedef LocalOrdinal LO;
1979  typedef GlobalOrdinal GO;
1980 
1981 #ifdef HAVE_TPETRA_DEBUG
1982  const char tfecfFuncName[] = "insertIndices: ";
1983  const size_t oldNumEnt = this->getNumEntriesInLocalRow (rowinfo.localRow);
1984 #endif // HAVE_TPETRA_DEBUG
1985 
1986 #ifdef HAVE_TPETRA_DEBUG
1987  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1988  (lg != GlobalIndices && lg != LocalIndices, std::invalid_argument,
1989  "lg must be either GlobalIndices or LocalIndices.");
1990 #endif // HAVE_TPETRA_DEBUG
1991  size_t numNewInds = 0;
1992  if (lg == GlobalIndices) { // input indices are global
1993  ArrayView<const GO> new_ginds = newInds.ginds;
1994  numNewInds = new_ginds.size();
1995  if (I == GlobalIndices) { // store global indices
1996  ArrayView<GO> gind_view = this->getGlobalViewNonConst (rowinfo);
1997 #ifdef HAVE_TPETRA_DEBUG
1998  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1999  (static_cast<size_t> (gind_view.size ()) <
2000  rowinfo.numEntries + numNewInds, std::logic_error,
2001  "gind_view.size() = " << gind_view.size ()
2002  << " < rowinfo.numEntries (= " << rowinfo.numEntries
2003  << ") + numNewInds (= " << numNewInds << ").");
2004 #endif // HAVE_TPETRA_DEBUG
2005  GO* const gblColInds_out = gind_view.getRawPtr () + rowinfo.numEntries;
2006  for (size_t k = 0; k < numNewInds; ++k) {
2007  gblColInds_out[k] = new_ginds[k];
2008  }
2009  }
2010  else if (I == LocalIndices) { // store local indices
2011  ArrayView<LO> lind_view = this->getLocalViewNonConst (rowinfo);
2012 #ifdef HAVE_TPETRA_DEBUG
2013  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2014  (static_cast<size_t> (lind_view.size ()) <
2015  rowinfo.numEntries + numNewInds, std::logic_error,
2016  "lind_view.size() = " << lind_view.size ()
2017  << " < rowinfo.numEntries (= " << rowinfo.numEntries
2018  << ") + numNewInds (= " << numNewInds << ").");
2019 #endif // HAVE_TPETRA_DEBUG
2020  LO* const lclColInds_out = lind_view.getRawPtr () + rowinfo.numEntries;
2021  for (size_t k = 0; k < numNewInds; ++k) {
2022  lclColInds_out[k] = colMap_->getLocalElement (new_ginds[k]);
2023  }
2024  }
2025  }
2026  else if (lg == LocalIndices) { // input indices are local
2027  ArrayView<const LO> new_linds = newInds.linds;
2028  numNewInds = new_linds.size();
2029  if (I == LocalIndices) { // store local indices
2030  ArrayView<LO> lind_view = this->getLocalViewNonConst (rowinfo);
2031 #ifdef HAVE_TPETRA_DEBUG
2032  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2033  (static_cast<size_t> (lind_view.size ()) <
2034  rowinfo.numEntries + numNewInds, std::logic_error,
2035  "lind_view.size() = " << lind_view.size ()
2036  << " < rowinfo.numEntries (= " << rowinfo.numEntries
2037  << ") + numNewInds (= " << numNewInds << ").");
2038 #endif // HAVE_TPETRA_DEBUG
2039  LO* const lclColInds_out = lind_view.getRawPtr () + rowinfo.numEntries;
2040  for (size_t k = 0; k < numNewInds; ++k) {
2041  lclColInds_out[k] = new_linds[k];
2042  }
2043  }
2044  else if (I == GlobalIndices) {
2045  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "Tpetra::CrsGraph::"
2046  "insertIndices: the case where the input indices are local and the "
2047  "indices to write are global (lg=LocalIndices, I=GlobalIndices) is "
2048  "not implemented, because it does not make sense." << std::endl <<
2049  "If you have correct local column indices, that means the graph has "
2050  "a column Map. In that case, you should be storing local indices.");
2051  }
2052  }
2053 
2054  rowinfo.numEntries += numNewInds;
2055  this->k_numRowEntries_(rowinfo.localRow) += numNewInds;
2056  this->setLocallyModified ();
2057 
2058 #ifdef HAVE_TPETRA_DEBUG
2059  const size_t chkNewNumEnt =
2060  this->getNumEntriesInLocalRow (rowinfo.localRow);
2061  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2062  (chkNewNumEnt != oldNumEnt + numNewInds, std::logic_error,
2063  "chkNewNumEnt = " << chkNewNumEnt
2064  << " != oldNumEnt (= " << oldNumEnt
2065  << ") + numNewInds (= " << numNewInds << ").");
2066 #endif // HAVE_TPETRA_DEBUG
2067 
2068  return numNewInds;
2069  }
2070 
2071  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2072  size_t
2074  insertGlobalIndicesImpl (const LocalOrdinal lclRow,
2075  const GlobalOrdinal inputGblColInds[],
2076  const size_t numInputInds)
2077  {
2078  return this->insertGlobalIndicesImpl (this->getRowInfo (lclRow),
2079  inputGblColInds, numInputInds);
2080  }
2081 
2082  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2083  size_t
2086  const GlobalOrdinal inputGblColInds[],
2087  const size_t numInputInds)
2088  {
2089  using Kokkos::subview;
2090  typedef LocalOrdinal LO;
2091  typedef GlobalOrdinal GO;
2092  typedef Kokkos::pair<size_t, size_t> range_type;
2093  const char tfecfFuncName[] = "insertGlobalIndicesImpl: ";
2094 
2095  const LO lclRow = static_cast<LO> (rowInfo.localRow);
2096  size_t newNumEntries = rowInfo.numEntries + numInputInds; // preliminary
2097 
2098  if (newNumEntries > rowInfo.allocSize) {
2099  if (this->getProfileType () == StaticProfile) {
2100  // Count how many new indices are just duplicates of the old
2101  // ones. If enough are duplicates, then we're safe.
2102  //
2103  // TODO (09 Sep 2016) CrsGraph never supported this use case
2104  // before. Thus, we're justified in not optimizing it. We
2105  // could use binary search if the graph's current entries are
2106  // sorted, for example, but we just use linear search for now.
2107  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2108  (rowInfo.numEntries > rowInfo.allocSize, std::logic_error,
2109  "For local row " << lclRow << ", rowInfo.numEntries = "
2110  << rowInfo.numEntries << " > rowInfo.allocSize = "
2111  << rowInfo.allocSize
2112  << ". Please report this bug to the Tpetra developers.");
2113 
2114  size_t dupCount = 0;
2115  if (k_gblInds1D_.extent (0) != 0) {
2116  const size_t curOffset = rowInfo.offset1D;
2117  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2118  (static_cast<size_t> (k_gblInds1D_.extent (0)) < curOffset,
2119  std::logic_error, "k_gblInds1D_.extent(0) = "
2120  << this->k_gblInds1D_.extent (0)
2121  << " < offset1D = " << curOffset << ". "
2122  "Please report this bug to the Tpetra developers.");
2123  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2124  (static_cast<size_t> (k_gblInds1D_.extent (0)) <
2125  curOffset + rowInfo.numEntries,
2126  std::logic_error, "k_gblInds1D_.extent(0) = "
2127  << this->k_gblInds1D_.extent (0)
2128  << " < offset1D (= " << curOffset << ") + rowInfo.numEntries (= "
2129  << rowInfo.numEntries << "). "
2130  "Please report this bug to the Tpetra developers.");
2131  const Kokkos::pair<size_t, size_t>
2132  range (curOffset, curOffset + rowInfo.numEntries);
2133 
2134  auto gblIndsCur = subview (this->k_gblInds1D_, range);
2135  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2136  (static_cast<size_t> (gblIndsCur.extent (0)) !=
2137  rowInfo.numEntries, std::logic_error,
2138  "gblIndsCur.extent(0) = " << gblIndsCur.extent (0)
2139  << " != rowInfo.numEntries = " << rowInfo.numEntries
2140  << ". Please report this bug to the Tpetra developers.");
2141 
2142  for (size_t k_new = 0; k_new < numInputInds; ++k_new) {
2143  const GO gblIndToInsert = inputGblColInds[k_new];
2144  for (size_t k_old = 0; k_old < rowInfo.numEntries; ++k_old) {
2145  if (gblIndsCur[k_old] == gblIndToInsert) {
2146  // Input could itself have duplicates. Input is
2147  // const, so we can't remove duplicates. That's OK
2148  // here, though, because dupCount just refers to the
2149  // number of input entries that actually need to be
2150  // inserted.
2151  ++dupCount;
2152  }
2153  }
2154  } // for k_new in 0 .. numInputInds - 1
2155  } // if global 1-D indexing (k_gblInds1D_ not empty)
2156  else { // global 2-D indexing
2157  // mfh 21 Jul 2017: We use a Teuchos::Array<GO>& as the
2158  // left-hand side, because creating an Teuchos::ArrayView or
2159  // Teuchos::ArrayRCP that views a Teuchos::ArrayRCP is not
2160  // thread safe.
2161  Teuchos::Array<GO>& gblInds_out = this->gblInds2D_[lclRow];
2162  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2163  (rowInfo.allocSize != static_cast<size_t> (gblInds_out.size ()),
2164  std::logic_error, "rowInfo.allocSize = " << rowInfo.allocSize
2165  << " != gblInds_out.size() = " << gblInds_out.size ()
2166  << ". Please report this bug to the Tpetra developers.");
2167  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2168  (rowInfo.numEntries > static_cast<size_t> (gblInds_out.size ()),
2169  std::logic_error, "rowInfo.numEntries = " << rowInfo.numEntries
2170  << " > gblInds_out.size() = " << gblInds_out.size ()
2171  << ". Please report this bug to the Tpetra developers.");
2172  // mfh 21 Jul 2017: Creating a subview of a
2173  // Teuchos::ArrayView is not thread safe, but we don't need
2174  // to do this anyway.
2175  //auto gblIndsCur_out = gblInds_out (0, rowInfo.numEntries);
2176 
2177  for (size_t k_new = 0; k_new < numInputInds; ++k_new) {
2178  const GO gblIndToInsert = inputGblColInds[k_new];
2179  for (size_t k_old = 0; k_old < rowInfo.numEntries; ++k_old) {
2180  if (gblInds_out[k_old] == gblIndToInsert) {
2181  // Input could itself have duplicates. Input is
2182  // const, so we can't remove duplicates. That's OK
2183  // here, though, because dupCount just refers to the
2184  // number of input entries that actually need to be
2185  // inserted.
2186  ++dupCount;
2187  }
2188  } // for k_old in 0 .. rowInfo.numEntries - 1
2189  } // for k_new in 0 .. numInputInds - 1
2190  } // whether 1-D or 2-D indexing
2191 
2192  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2193  (numInputInds < dupCount, std::logic_error, "numInputInds = "
2194  << numInputInds << " < dupCount = " << dupCount
2195  << ". Please report this bug to the Tpetra developers.");
2196  const size_t numNewToInsert = numInputInds - dupCount;
2197 
2198  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2199  (rowInfo.numEntries + numNewToInsert > rowInfo.allocSize,
2200  std::runtime_error, "For local row " << lclRow << " on Process " <<
2201  this->getComm ()->getRank () << ", even after excluding " << dupCount
2202  << " duplicate(s) in input, the new number of entries " <<
2203  (rowInfo.numEntries + numNewToInsert) << " still exceeds this row's "
2204  "static allocation size " << rowInfo.allocSize << ". You must "
2205  "either fix the upper bound on the number of entries in this row, "
2206  "or switch from StaticProfile to DynamicProfile.");
2207 
2208  if (k_gblInds1D_.extent (0) != 0) { // global 1-D indexing
2209  const size_t curOffset = rowInfo.offset1D;
2210  auto gblIndsCur =
2211  subview (k_gblInds1D_, range_type (curOffset,
2212  curOffset + rowInfo.numEntries));
2213  auto gblIndsNew =
2214  subview (k_gblInds1D_, range_type (curOffset + rowInfo.numEntries,
2215  curOffset + rowInfo.allocSize));
2216 
2217  size_t curPos = 0;
2218  for (size_t k_new = 0; k_new < numInputInds; ++k_new) {
2219  const GO gblIndToInsert = inputGblColInds[k_new];
2220 
2221  bool isAlreadyInOld = false;
2222  for (size_t k_old = 0; k_old < rowInfo.numEntries; ++k_old) {
2223  if (gblIndsCur[k_old] == gblIndToInsert) {
2224  isAlreadyInOld = true;
2225  break;
2226  }
2227  }
2228  if (! isAlreadyInOld) {
2229  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2230  (curPos >= numNewToInsert, std::logic_error, "curPos = " <<
2231  curPos << " >= numNewToInsert = " << numNewToInsert << ". "
2232  "Please report this bug to the Tpetra developers.");
2233  gblIndsNew[curPos] = gblIndToInsert;
2234  ++curPos;
2235  }
2236  } // for each input column index
2237  }
2238  else { // global 2-D indexing
2239  // mfh 21 Jul 2017: This is not thread safe, because
2240  // creating an Teuchos::ArrayView or Teuchos::ArrayRCP that
2241  // views a Teuchos::ArrayRCP is not. We could fix that by
2242  // making the left-hand side Teuchos::ArrayRCP<GO>&, but
2243  // that would not solve the problem below.
2244  Teuchos::ArrayView<GO> gblInds = (this->gblInds2D_[lclRow]) ();
2245  // Teuchos::ArrayView::operator() takes (offset, size).
2246  //
2247  // mfh 21 Jul 2017: This is not thread safe, because
2248  // creating a subview of a Teuchos::ArrayView is not.
2249  auto gblIndsCur = gblInds (0, rowInfo.numEntries);
2250  auto gblIndsNew = gblInds (rowInfo.numEntries,
2251  rowInfo.allocSize - rowInfo.numEntries);
2252 
2253  size_t curPos = 0;
2254  for (size_t k_new = 0; k_new < numInputInds; ++k_new) {
2255  const GO gblIndToInsert = inputGblColInds[k_new];
2256 
2257  bool isAlreadyInOld = false;
2258  for (size_t k_old = 0; k_old < rowInfo.numEntries; ++k_old) {
2259  if (gblIndsCur[k_old] == gblIndToInsert) {
2260  isAlreadyInOld = true;
2261  break;
2262  }
2263  }
2264  if (! isAlreadyInOld) {
2265  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2266  (curPos >= numNewToInsert, std::logic_error, "curPos = " <<
2267  curPos << " >= numNewToInsert = " << numNewToInsert << ". "
2268  "Please report this bug to the Tpetra developers.");
2269  gblIndsNew[curPos] = gblIndToInsert;
2270  ++curPos;
2271  }
2272  } // for k_new in 0 .. numInputInds - 1
2273  } // whether 1-D or 2-D indexing
2274 
2275  this->k_numRowEntries_(lclRow) = rowInfo.numEntries + numNewToInsert;
2276  this->setLocallyModified ();
2277 
2278 #ifdef HAVE_TPETRA_DEBUG
2279  newNumEntries = rowInfo.numEntries + numNewToInsert;
2280  const size_t chkNewNumEntries = this->getNumEntriesInLocalRow (lclRow);
2281  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2282  (chkNewNumEntries != newNumEntries, std::logic_error,
2283  "After inserting new entries, getNumEntriesInLocalRow(" << lclRow <<
2284  ") = " << chkNewNumEntries << " != newNumEntries = " << newNumEntries
2285  << ". Please report this bug to the Tpetra developers.");
2286 #endif // HAVE_TPETRA_DEBUG
2287 
2288  return numNewToInsert; // all done!
2289  } // if the graph is StaticProfile
2290  else { // DynamicProfile
2291  // update allocation, doubling size to reduce # reallocations
2292  size_t newAllocSize = 2*rowInfo.allocSize;
2293  if (newAllocSize < newNumEntries) {
2294  newAllocSize = newNumEntries;
2295  }
2296  this->gblInds2D_[lclRow].resize (newAllocSize);
2297  }
2298  } // newNumEntries > rowInfo.allocSize
2299 
2300  // Copy new indices at end of global index array
2301  if (k_gblInds1D_.extent (0) != 0) {
2302  const size_t offset = rowInfo.offset1D + rowInfo.numEntries;
2303  for (size_t k = 0; k < numInputInds; ++k) {
2304  this->k_gblInds1D_[offset + k] = inputGblColInds[k];
2305  }
2306  }
2307  else {
2308  GO* const whereToPutGblColInds =
2309  this->gblInds2D_[lclRow].getRawPtr () + rowInfo.numEntries;
2310  for (size_t k_new = 0; k_new < numInputInds; ++k_new) {
2311  whereToPutGblColInds[k_new] = inputGblColInds[k_new];
2312  }
2313  }
2314 
2315  this->k_numRowEntries_(lclRow) += numInputInds;
2316  this->setLocallyModified ();
2317 
2318 #ifdef HAVE_TPETRA_DEBUG
2319  {
2320  const size_t chkNewNumEntries = this->getNumEntriesInLocalRow (lclRow);
2321  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2322  (chkNewNumEntries != newNumEntries, std::logic_error,
2323  "getNumEntriesInLocalRow(lclRow=" << lclRow << ") = "
2324  << chkNewNumEntries << " != newNumEntries = " << newNumEntries
2325  << ". Please report this bug to the Tpetra developers.");
2326  }
2327 #endif // HAVE_TPETRA_DEBUG
2328 
2329  return numInputInds;
2330  }
2331 
2332 
2333  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2334  void
2336  insertLocalIndicesImpl (const LocalOrdinal myRow,
2337  const Teuchos::ArrayView<const LocalOrdinal>& indices)
2338  {
2339  using Kokkos::MemoryUnmanaged;
2340  using Kokkos::subview;
2341  using Kokkos::View;
2342  typedef LocalOrdinal LO;
2343  const char* tfecfFuncName ("insertLocallIndicesImpl: ");
2344 
2345  const RowInfo rowInfo = this->getRowInfo(myRow);
2346  const size_t numNewInds = indices.size();
2347  const size_t newNumEntries = rowInfo.numEntries + numNewInds;
2348  if (newNumEntries > rowInfo.allocSize) {
2349  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2350  getProfileType() == StaticProfile, std::runtime_error,
2351  "New indices exceed statically allocated graph structure.");
2352 
2353  // update allocation, doubling size to reduce number of reallocations
2354  size_t newAllocSize = 2*rowInfo.allocSize;
2355  if (newAllocSize < newNumEntries) {
2356  newAllocSize = newNumEntries;
2357  }
2358  this->lclInds2D_[myRow].resize(newAllocSize);
2359  }
2360 
2361  // Store the new indices at the end of row myRow.
2362  if (this->k_lclInds1D_.extent (0) != 0) {
2363  typedef View<const LO*, execution_space, MemoryUnmanaged> input_view_type;
2364  typedef View<LO*, execution_space, MemoryUnmanaged> row_view_type;
2365 
2366  input_view_type inputInds (indices.getRawPtr (), indices.size ());
2367  const size_t start = rowInfo.offset1D + rowInfo.numEntries; // end of row
2368  const std::pair<size_t, size_t> rng (start, start + newNumEntries);
2369  // mfh 23 Nov 2015: Don't just create a subview of k_lclInds1D_
2370  // directly, because that first creates a _managed_ subview,
2371  // then returns an unmanaged version of that. That touches the
2372  // reference count, which costs performance in a measurable way.
2373  row_view_type myInds = subview (row_view_type (this->k_lclInds1D_), rng);
2374  Kokkos::deep_copy (myInds, inputInds);
2375  }
2376  else {
2377  std::copy (indices.begin (), indices.end (),
2378  this->lclInds2D_[myRow].begin () + rowInfo.numEntries);
2379  }
2380 
2381  this->k_numRowEntries_(myRow) += numNewInds;
2382  this->setLocallyModified ();
2383 #ifdef HAVE_TPETRA_DEBUG
2384  {
2385  const size_t chkNewNumEntries = this->getNumEntriesInLocalRow (myRow);
2386  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2387  (chkNewNumEntries != newNumEntries, std::logic_error,
2388  "getNumEntriesInLocalRow(" << myRow << ") = " << chkNewNumEntries
2389  << " != newNumEntries = " << newNumEntries
2390  << ". Please report this bug to the Tpetra developers.");
2391  }
2392 #endif // HAVE_TPETRA_DEBUG
2393  }
2394 
2395 
2396  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2397  size_t
2398  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
2399  sortAndMergeRowIndices (const RowInfo& rowInfo,
2400  const bool sorted,
2401  const bool merged)
2402  {
2403  const size_t origNumEnt = rowInfo.numEntries;
2404  if (origNumEnt != Tpetra::Details::OrdinalTraits<size_t>::invalid () &&
2405  origNumEnt != 0) {
2406  auto lclColInds = this->getLocalKokkosRowViewNonConst (rowInfo);
2407 
2408  LocalOrdinal* const lclColIndsRaw = lclColInds.data ();
2409  if (! sorted) {
2410  // FIXME (mfh 08 May 2017) This assumes CUDA UVM.
2411  std::sort (lclColIndsRaw, lclColIndsRaw + origNumEnt);
2412  }
2413 
2414  if (! merged) {
2415  LocalOrdinal* const beg = lclColIndsRaw;
2416  LocalOrdinal* const end = beg + rowInfo.numEntries;
2417  // FIXME (mfh 08 May 2017) This assumes CUDA UVM.
2418  LocalOrdinal* const newend = std::unique (beg, end);
2419  const size_t newNumEnt = newend - beg;
2420 
2421  // NOTE (mfh 08 May 2017) This is a host View, so it does not assume UVM.
2422  this->k_numRowEntries_(rowInfo.localRow) = newNumEnt;
2423  return origNumEnt - newNumEnt; // the number of duplicates in the row
2424  }
2425  else {
2426  return static_cast<size_t> (0); // assume no duplicates
2427  }
2428  }
2429  else {
2430  return static_cast<size_t> (0); // no entries in the row
2431  }
2432  }
2433 
2434 
2435  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2436  void
2438  setDomainRangeMaps (const Teuchos::RCP<const map_type>& domainMap,
2439  const Teuchos::RCP<const map_type>& rangeMap)
2440  {
2441  // simple pointer comparison for equality
2442  if (domainMap_ != domainMap) {
2443  domainMap_ = domainMap;
2444  importer_ = Teuchos::null;
2445  }
2446  if (rangeMap_ != rangeMap) {
2447  rangeMap_ = rangeMap;
2448  exporter_ = Teuchos::null;
2449  }
2450  }
2451 
2452 
2453  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2454  void
2457  {
2458  globalNumEntries_ = Teuchos::OrdinalTraits<global_size_t>::invalid ();
2459  globalNumDiags_ = Teuchos::OrdinalTraits<global_size_t>::invalid ();
2460  globalMaxNumRowEntries_ = Teuchos::OrdinalTraits<global_size_t>::invalid ();
2461  haveGlobalConstants_ = false;
2462  }
2463 
2464 
2465  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2466  void
2469  {
2470  const bool debug = ::Tpetra::Details::Behavior::debug ();
2471  if (debug) {
2472  const char tfecfFuncName[] = "checkInternalState: ";
2473  const char suffix[] = " Please report this bug to the Tpetra developers.";
2474 
2475  const global_size_t GSTI = Teuchos::OrdinalTraits<global_size_t>::invalid ();
2476  //const size_t STI = Teuchos::OrdinalTraits<size_t>::invalid (); // unused
2477  // check the internal state of this data structure
2478  // this is called by numerous state-changing methods, in a debug build, to ensure that the object
2479  // always remains in a valid state
2480 
2481  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2482  (this->rowMap_.is_null (), std::logic_error,
2483  "Row Map is null." << suffix);
2484  // This may access the row Map, so we need to check first (above)
2485  // whether the row Map is null.
2486  const LocalOrdinal lclNumRows =
2487  static_cast<LocalOrdinal> (this->getNodeNumRows ());
2488 
2489  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2490  (this->isFillActive () == this->isFillComplete (), std::logic_error,
2491  "Graph cannot be both fill active and fill complete." << suffix);
2492  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2493  (this->isFillComplete () &&
2494  (this->colMap_.is_null () ||
2495  this->rangeMap_.is_null () ||
2496  this->domainMap_.is_null ()),
2497  std::logic_error,
2498  "Graph is full complete, but at least one of {column, range, domain} "
2499  "Map is null." << suffix);
2500  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2501  (this->isStorageOptimized () && ! this->indicesAreAllocated (),
2502  std::logic_error, "Storage is optimized, but indices are not "
2503  "allocated, not even trivially." << suffix);
2504  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2505  (this->indicesAreAllocated_ &&
2506  (this->storageStatus_ == ::Tpetra::Details::STORAGE_1D_PACKED ||
2507  this->storageStatus_ == ::Tpetra::Details::STORAGE_1D_UNPACKED) &&
2508  this->pftype_ == DynamicProfile, std::logic_error,
2509  "Graph claims to have allocated indices and 1-D storage "
2510  "(either packed or unpacked), but also claims to be DynamicProfile.");
2511  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2512  (this->indicesAreAllocated_ &&
2513  this->storageStatus_ == ::Tpetra::Details::STORAGE_2D &&
2514  this->pftype_ == StaticProfile, std::logic_error,
2515  "Graph claims to have allocated indices and 2-D storage, "
2516  "but also claims to be StaticProfile.");
2517  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2518  (this->indicesAreAllocated_ &&
2519  this->storageStatus_ == ::Tpetra::Details::STORAGE_2D &&
2520  this->isLocallyIndexed () &&
2521  static_cast<LocalOrdinal> (this->lclInds2D_.size ()) != lclNumRows,
2522  std::logic_error,
2523  "Graph claims to have allocated indices, be locally indexed, and have "
2524  "2-D storage, but lclInds2D_.size() = " << this->lclInds2D_.size ()
2525  << " != getNodeNumRows() = " << lclNumRows << ".");
2526  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2527  (this->indicesAreAllocated_ &&
2528  this->storageStatus_ == ::Tpetra::Details::STORAGE_2D &&
2529  this->isGloballyIndexed () &&
2530  static_cast<LocalOrdinal> (this->gblInds2D_.size ()) != lclNumRows,
2531  std::logic_error,
2532  "Graph claims to have allocated indices, be globally indexed, and have "
2533  "2-D storage, but gblInds2D_.size() = " << this->gblInds2D_.size ()
2534  << " != getNodeNumRows() = " << lclNumRows << ".");
2535 
2536  size_t nodeAllocSize = 0;
2537  try {
2538  nodeAllocSize = this->getNodeAllocationSize ();
2539  }
2540  catch (std::logic_error& e) {
2541  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2542  (true, std::runtime_error, "getNodeAllocationSize threw "
2543  "std::logic_error: " << e.what ());
2544  }
2545  catch (std::exception& e) {
2546  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2547  (true, std::runtime_error, "getNodeAllocationSize threw an "
2548  "std::exception: " << e.what ());
2549  }
2550  catch (...) {
2551  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2552  (true, std::runtime_error, "getNodeAllocationSize threw an exception "
2553  "not a subclass of std::exception.");
2554  }
2555 
2556  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2557  (this->isStorageOptimized () &&
2558  nodeAllocSize != this->getNodeNumEntries (),
2559  std::logic_error, "Storage is optimized, but "
2560  "this->getNodeAllocationSize() = " << nodeAllocSize
2561  << " != this->getNodeNumEntries() = " << this->getNodeNumEntries ()
2562  << "." << suffix);
2563  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2564  (! this->haveGlobalConstants_ &&
2565  (this->globalNumEntries_ != GSTI ||
2566  this->globalMaxNumRowEntries_ != GSTI),
2567  std::logic_error, "Graph claims not to have global constants, but "
2568  "some of the global constants are not marked as invalid." << suffix);
2569  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2570  (this->haveGlobalConstants_ &&
2571  (this->globalNumEntries_ == GSTI ||
2572  this->globalMaxNumRowEntries_ == GSTI),
2573  std::logic_error, "Graph claims to have global constants, but "
2574  "some of them are marked as invalid." << suffix);
2575  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2576  (this->haveGlobalConstants_ &&
2577  (this->globalNumEntries_ < this->getNodeNumEntries () ||
2578  this->globalMaxNumRowEntries_ < this->nodeMaxNumRowEntries_),
2579  std::logic_error, "Graph claims to have global constants, and "
2580  "all of the values of the global constants are valid, but "
2581  "some of the local constants are greater than "
2582  "their corresponding global constants." << suffix);
2583  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2584  (this->indicesAreAllocated () &&
2585  (this->numAllocForAllRows_ != 0 ||
2586  this->k_numAllocPerRow_.extent (0) != 0),
2587  std::logic_error, "The graph claims that its indices are allocated, but "
2588  "either numAllocForAllRows_ (= " << this->numAllocForAllRows_ << ") is "
2589  "nonzero, or k_numAllocPerRow_ has nonzero dimension. In other words, "
2590  "the graph is supposed to release its \"allocation specifications\" "
2591  "when it allocates its indices." << suffix);
2592  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2593  (this->isStorageOptimized () && this->pftype_ != StaticProfile,
2594  std::logic_error,
2595  "Storage is optimized, but graph is not StaticProfile." << suffix);
2596  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2597  (this->isGloballyIndexed () &&
2598  this->k_rowPtrs_.extent (0) != 0 &&
2599  (static_cast<size_t> (this->k_rowPtrs_.extent (0)) != static_cast<size_t> (lclNumRows + 1) ||
2600  this->k_rowPtrs_(lclNumRows) != static_cast<size_t> (this->k_gblInds1D_.extent (0))),
2601  std::logic_error, "If k_rowPtrs_ has nonzero size and "
2602  "the graph is globally indexed, then "
2603  "k_rowPtrs_ must have N+1 rows, and "
2604  "k_rowPtrs_(N) must equal k_gblInds1D_.extent(0)." << suffix);
2605  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2606  (this->isLocallyIndexed () &&
2607  this->k_rowPtrs_.extent (0) != 0 &&
2608  (static_cast<size_t> (k_rowPtrs_.extent (0)) != static_cast<size_t> (lclNumRows + 1) ||
2609  this->k_rowPtrs_(lclNumRows) != static_cast<size_t> (this->k_lclInds1D_.extent (0))),
2610  std::logic_error, "If k_rowPtrs_ has nonzero size and "
2611  "the graph is locally indexed, then "
2612  "k_rowPtrs_ must have N+1 rows, and "
2613  "k_rowPtrs_(N) must equal k_lclInds1D_.extent(0)." << suffix);
2614 
2615  if (this->pftype_ == DynamicProfile) {
2616  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2617  (this->indicesAreAllocated () &&
2618  this->getNodeNumRows () > 0 &&
2619  this->lclInds2D_.is_null () &&
2620  this->gblInds2D_.is_null (),
2621  std::logic_error, "Graph has DynamicProfile, indices are allocated, and "
2622  "the calling process has nonzero rows, but 2-D column index storage "
2623  "(whether local or global) is not present." << suffix);
2624  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2625  (this->indicesAreAllocated () &&
2626  this->getNodeNumRows () > 0 &&
2627  this->k_numRowEntries_.extent (0) == 0,
2628  std::logic_error, "Graph has DynamicProfile, indices are allocated, and "
2629  "the calling process has nonzero rows, but k_numRowEntries_ is not "
2630  "present." << suffix);
2631  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2632  (this->k_lclInds1D_.extent (0) != 0 ||
2633  this->k_gblInds1D_.extent (0) != 0,
2634  std::logic_error, "Graph has DynamicProfile, but "
2635  "1-D allocations are present." << suffix);
2636  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2637  (this->k_rowPtrs_.extent (0) != 0,
2638  std::logic_error, "Graph has DynamicProfile, but "
2639  "row offsets are present." << suffix);
2640  }
2641  else if (this->pftype_ == StaticProfile) {
2642  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2643  (this->indicesAreAllocated () &&
2644  nodeAllocSize > 0 &&
2645  this->k_lclInds1D_.extent (0) == 0 &&
2646  this->k_gblInds1D_.extent (0) == 0,
2647  std::logic_error, "Graph has StaticProfile and is allocated "
2648  "nonnontrivally, but 1-D allocations are not present." << suffix);
2649  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2650  (this->lclInds2D_ != Teuchos::null || this->gblInds2D_ != Teuchos::null,
2651  std::logic_error, "Graph has StaticProfile, but 2-D allocations are "
2652  "present." << suffix);
2653  }
2654 
2655  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2656  (! this->indicesAreAllocated () &&
2657  ((this->k_rowPtrs_.extent (0) != 0 ||
2658  this->k_numRowEntries_.extent (0) != 0) ||
2659  this->k_lclInds1D_.extent (0) != 0 ||
2660  this->lclInds2D_ != Teuchos::null ||
2661  this->k_gblInds1D_.extent (0) != 0 ||
2662  this->gblInds2D_ != Teuchos::null),
2663  std::logic_error, "If indices are not allocated, "
2664  "then none of the buffers should be." << suffix);
2665  // indices may be local or global only if they are allocated
2666  // (numAllocated is redundant; could simply be indicesAreLocal_ ||
2667  // indicesAreGlobal_)
2668  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2669  ((this->indicesAreLocal_ || this->indicesAreGlobal_) &&
2670  ! this->indicesAreAllocated_,
2671  std::logic_error, "Indices may be local or global only if they are "
2672  "allocated." << suffix);
2673  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2674  (this->indicesAreLocal_ && this->indicesAreGlobal_,
2675  std::logic_error, "Indices may not be both local and global." << suffix);
2676  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2677  (this->indicesAreLocal_ &&
2678  (this->k_gblInds1D_.extent (0) != 0 || ! this->gblInds2D_.is_null ()),
2679  std::logic_error, "Indices are local, but either "
2680  "k_gblInds1D_.extent(0) (= "
2681  << this->k_gblInds1D_.extent (0) << ") != 0, or "
2682  "gblInds2D_ is not null. In other words, if indices are local, "
2683  "then global allocations should not be present." << suffix);
2684  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2685  (this->indicesAreGlobal_ &&
2686  (this->k_lclInds1D_.extent (0) != 0 ||
2687  ! this->lclInds2D_.is_null ()),
2688  std::logic_error, "Indices are global, but either "
2689  "k_lclInds1D_.extent(0) (= "
2690  << this->k_lclInds1D_.extent (0) << ") != 0, or "
2691  "lclInds2D_ is not null. In other words, if indices are global, "
2692  "then local allocations should not be present." << suffix);
2693  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2694  (this->indicesAreLocal_ &&
2695  nodeAllocSize > 0 &&
2696  this->k_lclInds1D_.extent (0) == 0 &&
2697  this->getNodeNumRows () > 0 &&
2698  this->lclInds2D_.is_null (),
2699  std::logic_error, "Indices are local, getNodeAllocationSize() = "
2700  << nodeAllocSize << " > 0, k_lclInds1D_.extent(0) = 0, "
2701  "getNodeNumRows() = " << this->getNodeNumRows () << " > 0, and "
2702  "lclInds2D_ is null." << suffix);
2703  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2704  (this->indicesAreGlobal_ &&
2705  nodeAllocSize > 0 &&
2706  this->k_gblInds1D_.extent (0) == 0 &&
2707  this->getNodeNumRows () > 0 &&
2708  this->gblInds2D_.is_null (),
2709  std::logic_error, "Indices are global, getNodeAllocationSize() = "
2710  << nodeAllocSize << " > 0, k_gblInds1D_.extent(0) = 0, "
2711  "getNodeNumRows() = " << this->getNodeNumRows () << " > 0, and "
2712  "gblInds2D_ is null." << suffix);
2713  // check the actual allocations
2714  if (this->indicesAreAllocated () &&
2715  this->pftype_ == StaticProfile &&
2716  this->k_rowPtrs_.extent (0) != 0) {
2717  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2718  (static_cast<size_t> (this->k_rowPtrs_.extent (0)) !=
2719  this->getNodeNumRows () + 1,
2720  std::logic_error, "Graph is StaticProfile, indices are allocated, and "
2721  "k_rowPtrs_ has nonzero length, but k_rowPtrs_.extent(0) = "
2722  << this->k_rowPtrs_.extent (0) << " != getNodeNumRows()+1 = "
2723  << (this->getNodeNumRows () + 1) << "." << suffix);
2724  const size_t actualNumAllocated =
2725  ::Tpetra::Details::getEntryOnHost (this->k_rowPtrs_, this->getNodeNumRows ());
2726  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2727  (this->isLocallyIndexed () &&
2728  static_cast<size_t> (this->k_lclInds1D_.extent (0)) != actualNumAllocated,
2729  std::logic_error, "Graph is StaticProfile and locally indexed, "
2730  "indices are allocated, and k_rowPtrs_ has nonzero length, but "
2731  "k_lclInds1D_.extent(0) = " << this->k_lclInds1D_.extent (0)
2732  << " != actualNumAllocated = " << actualNumAllocated << suffix);
2733  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2734  (this->isGloballyIndexed () &&
2735  static_cast<size_t> (this->k_gblInds1D_.extent (0)) != actualNumAllocated,
2736  std::logic_error, "Graph is StaticProfile and globally indexed, "
2737  "indices are allocated, and k_rowPtrs_ has nonzero length, but "
2738  "k_gblInds1D_.extent(0) = " << this->k_gblInds1D_.extent (0)
2739  << " != actualNumAllocated = " << actualNumAllocated << suffix);
2740  }
2741  }
2742  }
2743 
2744 
2745  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2746  size_t
2748  getNumEntriesInGlobalRow (GlobalOrdinal globalRow) const
2749  {
2750  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (globalRow);
2751  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2752  return Teuchos::OrdinalTraits<size_t>::invalid ();
2753  }
2754  else {
2755  return rowInfo.numEntries;
2756  }
2757  }
2758 
2759 
2760  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2761  size_t
2763  getNumEntriesInLocalRow (LocalOrdinal localRow) const
2764  {
2765  const RowInfo rowInfo = this->getRowInfo (localRow);
2766  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2767  return Teuchos::OrdinalTraits<size_t>::invalid ();
2768  }
2769  else {
2770  return rowInfo.numEntries;
2771  }
2772  }
2773 
2774 
2775  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2776  size_t
2778  getNumAllocatedEntriesInGlobalRow (GlobalOrdinal globalRow) const
2779  {
2780  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (globalRow);
2781  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2782  return Teuchos::OrdinalTraits<size_t>::invalid ();
2783  }
2784  else {
2785  return rowInfo.allocSize;
2786  }
2787  }
2788 
2789 
2790  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2791  size_t
2793  getNumAllocatedEntriesInLocalRow (LocalOrdinal localRow) const
2794  {
2795  const RowInfo rowInfo = this->getRowInfo (localRow);
2796  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2797  return Teuchos::OrdinalTraits<size_t>::invalid ();
2798  }
2799  else {
2800  return rowInfo.allocSize;
2801  }
2802  }
2803 
2804 
2805  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2806  Teuchos::ArrayRCP<const size_t>
2809  {
2810  using Kokkos::ViewAllocateWithoutInitializing;
2811  using Kokkos::create_mirror_view;
2812  using Teuchos::ArrayRCP;
2813  typedef typename local_graph_type::row_map_type row_map_type;
2814  typedef typename row_map_type::non_const_value_type row_offset_type;
2815 #ifdef HAVE_TPETRA_DEBUG
2816  const char prefix[] = "Tpetra::CrsGraph::getNodeRowPtrs: ";
2817  const char suffix[] = " Please report this bug to the Tpetra developers.";
2818 #endif // HAVE_TPETRA_DEBUG
2819  const size_t size = k_rowPtrs_.extent (0);
2820  const bool same = Kokkos::Impl::is_same<size_t, row_offset_type>::value;
2821 
2822  if (size == 0) {
2823  return ArrayRCP<const size_t> ();
2824  }
2825 
2826  ArrayRCP<const row_offset_type> ptr_rot;
2827  ArrayRCP<const size_t> ptr_st;
2828  if (same) { // size_t == row_offset_type
2829  // NOTE (mfh 22 Mar 2015) In a debug build of Kokkos, the result
2830  // of create_mirror_view might actually be a new allocation.
2831  // This helps with debugging when there are two memory spaces.
2832  typename row_map_type::HostMirror ptr_h = create_mirror_view (k_rowPtrs_);
2833  Kokkos::deep_copy (ptr_h, k_rowPtrs_);
2834 #ifdef HAVE_TPETRA_DEBUG
2835  TEUCHOS_TEST_FOR_EXCEPTION(
2836  ptr_h.extent (0) != k_rowPtrs_.extent (0), std::logic_error,
2837  prefix << "size_t == row_offset_type, but ptr_h.extent(0) = "
2838  << ptr_h.extent (0) << " != k_rowPtrs_.extent(0) = "
2839  << k_rowPtrs_.extent (0) << ".");
2840  TEUCHOS_TEST_FOR_EXCEPTION(
2841  same && size != 0 && k_rowPtrs_.data () == NULL, std::logic_error,
2842  prefix << "size_t == row_offset_type and k_rowPtrs_.extent(0) = "
2843  << size << " != 0, but k_rowPtrs_.data() == NULL." << suffix);
2844  TEUCHOS_TEST_FOR_EXCEPTION(
2845  same && size != 0 && ptr_h.data () == NULL, std::logic_error,
2846  prefix << "size_t == row_offset_type and k_rowPtrs_.extent(0) = "
2847  << size << " != 0, but create_mirror_view(k_rowPtrs_).data() "
2848  "== NULL." << suffix);
2849 #endif // HAVE_TPETRA_DEBUG
2850  ptr_rot = Kokkos::Compat::persistingView (ptr_h);
2851  }
2852  else { // size_t != row_offset_type
2853  typedef Kokkos::View<size_t*, device_type> ret_view_type;
2854  ret_view_type ptr_d (ViewAllocateWithoutInitializing ("ptr"), size);
2855  ::Tpetra::Details::copyOffsets (ptr_d, k_rowPtrs_);
2856  typename ret_view_type::HostMirror ptr_h = create_mirror_view (ptr_d);
2857  Kokkos::deep_copy (ptr_h, ptr_d);
2858  ptr_st = Kokkos::Compat::persistingView (ptr_h);
2859  }
2860 #ifdef HAVE_TPETRA_DEBUG
2861  TEUCHOS_TEST_FOR_EXCEPTION(
2862  same && size != 0 && ptr_rot.is_null (), std::logic_error,
2863  prefix << "size_t == row_offset_type and size = " << size
2864  << " != 0, but ptr_rot is null." << suffix);
2865  TEUCHOS_TEST_FOR_EXCEPTION(
2866  ! same && size != 0 && ptr_st.is_null (), std::logic_error,
2867  prefix << "size_t != row_offset_type and size = " << size
2868  << " != 0, but ptr_st is null." << suffix);
2869 #endif // HAVE_TPETRA_DEBUG
2870 
2871  // If size_t == row_offset_type, return a persisting host view of
2872  // k_rowPtrs_. Otherwise, return a size_t host copy of k_rowPtrs_.
2873 #ifdef HAVE_TPETRA_DEBUG
2874  ArrayRCP<const size_t> retval =
2875  Kokkos::Impl::if_c<same,
2876  ArrayRCP<const row_offset_type>,
2877  ArrayRCP<const size_t> >::select (ptr_rot, ptr_st);
2878  TEUCHOS_TEST_FOR_EXCEPTION(
2879  size != 0 && retval.is_null (), std::logic_error,
2880  prefix << "size = " << size << " != 0, but retval is null." << suffix);
2881  return retval;
2882 #else
2883  return Kokkos::Impl::if_c<same,
2884  ArrayRCP<const row_offset_type>,
2885  ArrayRCP<const size_t> >::select (ptr_rot, ptr_st);
2886 #endif // HAVE_TPETRA_DEBUG
2887  }
2888 
2889 
2890  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2891  Teuchos::ArrayRCP<const LocalOrdinal>
2894  {
2895  return Kokkos::Compat::persistingView (k_lclInds1D_);
2896  }
2897 
2898 
2899  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2900  void
2902  getLocalRowCopy (LocalOrdinal localRow,
2903  const Teuchos::ArrayView<LocalOrdinal>&indices,
2904  size_t& numEntries) const
2905  {
2906  using Teuchos::ArrayView;
2907  typedef LocalOrdinal LO;
2908  typedef GlobalOrdinal GO;
2909  const char tfecfFuncName[] = "getLocalRowCopy: ";
2910 
2911  TEUCHOS_TEST_FOR_EXCEPTION(
2912  isGloballyIndexed () && ! hasColMap (), std::runtime_error,
2913  "Tpetra::CrsGraph::getLocalRowCopy: The graph is globally indexed and "
2914  "does not have a column Map yet. That means we don't have local indices "
2915  "for columns yet, so it doesn't make sense to call this method. If the "
2916  "graph doesn't have a column Map yet, you should call fillComplete on "
2917  "it first.");
2918 
2919  // This does the right thing (reports an empty row) if the input
2920  // row is invalid.
2921  const RowInfo rowinfo = this->getRowInfo (localRow);
2922  // No side effects on error.
2923  const size_t theNumEntries = rowinfo.numEntries;
2924  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2925  (static_cast<size_t> (indices.size ()) < theNumEntries, std::runtime_error,
2926  "Specified storage (size==" << indices.size () << ") does not suffice "
2927  "to hold all " << theNumEntries << " entry/ies for this row.");
2928  numEntries = theNumEntries;
2929 
2930  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2931  if (isLocallyIndexed ()) {
2932  ArrayView<const LO> lview = getLocalView (rowinfo);
2933  for (size_t j = 0; j < theNumEntries; ++j) {
2934  indices[j] = lview[j];
2935  }
2936  }
2937  else if (isGloballyIndexed ()) {
2938  ArrayView<const GO> gview = getGlobalView (rowinfo);
2939  for (size_t j = 0; j < theNumEntries; ++j) {
2940  indices[j] = colMap_->getLocalElement (gview[j]);
2941  }
2942  }
2943  }
2944  }
2945 
2946 
2947  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2948  void
2950  getGlobalRowCopy (GlobalOrdinal globalRow,
2951  const Teuchos::ArrayView<GlobalOrdinal>& indices,
2952  size_t& numEntries) const
2953  {
2954  using Teuchos::ArrayView;
2955  const char tfecfFuncName[] = "getGlobalRowCopy: ";
2956 
2957  // This does the right thing (reports an empty row) if the input
2958  // row is invalid.
2959  const RowInfo rowinfo = getRowInfoFromGlobalRowIndex (globalRow);
2960  const size_t theNumEntries = rowinfo.numEntries;
2961  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2962  static_cast<size_t> (indices.size ()) < theNumEntries, std::runtime_error,
2963  "Specified storage (size==" << indices.size () << ") does not suffice "
2964  "to hold all " << theNumEntries << " entry/ies for this row.");
2965  numEntries = theNumEntries; // first side effect
2966 
2967  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2968  if (isLocallyIndexed ()) {
2969  ArrayView<const LocalOrdinal> lview = getLocalView (rowinfo);
2970  for (size_t j = 0; j < theNumEntries; ++j) {
2971  indices[j] = colMap_->getGlobalElement (lview[j]);
2972  }
2973  }
2974  else if (isGloballyIndexed ()) {
2975  ArrayView<const GlobalOrdinal> gview = getGlobalView (rowinfo);
2976  for (size_t j = 0; j < theNumEntries; ++j) {
2977  indices[j] = gview[j];
2978  }
2979  }
2980  }
2981  }
2982 
2983 
2984  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2985  void
2987  getLocalRowView (const LocalOrdinal localRow,
2988  Teuchos::ArrayView<const LocalOrdinal>& indices) const
2989  {
2990  const char tfecfFuncName[] = "getLocalRowView: ";
2991  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2992  isGloballyIndexed (), std::runtime_error, "The graph's indices are "
2993  "currently stored as global indices, so we cannot return a view with "
2994  "local column indices, whether or not the graph has a column Map. If "
2995  "the graph _does_ have a column Map, use getLocalRowCopy() instead.");
2996 
2997  // This does the right thing (reports an empty row) if the input
2998  // row is invalid.
2999  const RowInfo rowInfo = getRowInfo (localRow);
3000  indices = Teuchos::null;
3001  if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
3002  rowInfo.numEntries > 0) {
3003  indices = this->getLocalView (rowInfo);
3004  // getLocalView returns a view of the _entire_ row, including
3005  // any extra space at the end (which 1-D unpacked storage
3006  // might have, for example). That's why we have to take a
3007  // subview of the returned view.
3008  indices = indices (0, rowInfo.numEntries);
3009  }
3010 
3011 #ifdef HAVE_TPETRA_DEBUG
3012  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3013  (static_cast<size_t> (indices.size ()) !=
3014  getNumEntriesInLocalRow (localRow), std::logic_error, "indices.size() "
3015  "= " << indices.size () << " != getNumEntriesInLocalRow(localRow=" <<
3016  localRow << ") = " << getNumEntriesInLocalRow (localRow) <<
3017  ". Please report this bug to the Tpetra developers.");
3018 #endif // HAVE_TPETRA_DEBUG
3019  }
3020 
3021 
3022  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3023  void
3025  getGlobalRowView (const GlobalOrdinal globalRow,
3026  Teuchos::ArrayView<const GlobalOrdinal>& indices) const
3027  {
3028  const char tfecfFuncName[] = "getGlobalRowView: ";
3029  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3030  isLocallyIndexed (), std::runtime_error, "The graph's indices are "
3031  "currently stored as local indices, so we cannot return a view with "
3032  "global column indices. Use getGlobalRowCopy() instead.");
3033 
3034  // This does the right thing (reports an empty row) if the input
3035  // row is invalid.
3036  const RowInfo rowInfo = getRowInfoFromGlobalRowIndex (globalRow);
3037  indices = Teuchos::null;
3038  if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
3039  rowInfo.numEntries > 0) {
3040  indices = (this->getGlobalView (rowInfo)) (0, rowInfo.numEntries);
3041  }
3042 
3043 #ifdef HAVE_TPETRA_DEBUG
3044  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3045  (static_cast<size_t> (indices.size ()) != getNumEntriesInGlobalRow (globalRow),
3046  std::logic_error, "indices.size() = " << indices.size ()
3047  << " != getNumEntriesInGlobalRow(globalRow=" << globalRow << ") = "
3048  << getNumEntriesInGlobalRow (globalRow)
3049  << ". Please report this bug to the Tpetra developers.");
3050 #endif // HAVE_TPETRA_DEBUG
3051  }
3052 
3053 
3054  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3055  void
3057  insertLocalIndices (const LocalOrdinal localRow,
3058  const Teuchos::ArrayView<const LocalOrdinal>& indices)
3059  {
3060  const char tfecfFuncName[] = "insertLocalIndices";
3061 
3062  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3063  ! isFillActive (), std::runtime_error,
3064  ": requires that fill is active.");
3065  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3066  isGloballyIndexed (), std::runtime_error,
3067  ": graph indices are global; use insertGlobalIndices().");
3068  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3069  ! hasColMap (), std::runtime_error,
3070  ": cannot insert local indices without a column map.");
3071  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3072  ! rowMap_->isNodeLocalElement (localRow), std::runtime_error,
3073  ": row does not belong to this node.");
3074  if (! indicesAreAllocated ()) {
3075  allocateIndices (LocalIndices);
3076  }
3077 
3078 #ifdef HAVE_TPETRA_DEBUG
3079  // In a debug build, if the graph has a column Map, test whether
3080  // any of the given column indices are not in the column Map.
3081  // Keep track of the invalid column indices so we can tell the
3082  // user about them.
3083  if (hasColMap ()) {
3084  using Teuchos::Array;
3085  using Teuchos::toString;
3086  using std::endl;
3087  typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type size_type;
3088 
3089  const map_type& colMap = * (getColMap ());
3090  Array<LocalOrdinal> badColInds;
3091  bool allInColMap = true;
3092  for (size_type k = 0; k < indices.size (); ++k) {
3093  if (! colMap.isNodeLocalElement (indices[k])) {
3094  allInColMap = false;
3095  badColInds.push_back (indices[k]);
3096  }
3097  }
3098  if (! allInColMap) {
3099  std::ostringstream os;
3100  os << "Tpetra::CrsGraph::insertLocalIndices: You attempted to insert "
3101  "entries in owned row " << localRow << ", at the following column "
3102  "indices: " << toString (indices) << "." << endl;
3103  os << "Of those, the following indices are not in the column Map on "
3104  "this process: " << toString (badColInds) << "." << endl << "Since "
3105  "the graph has a column Map already, it is invalid to insert entries "
3106  "at those locations.";
3107  TEUCHOS_TEST_FOR_EXCEPTION(! allInColMap, std::invalid_argument, os.str ());
3108  }
3109  }
3110 #endif // HAVE_TPETRA_DEBUG
3111 
3112  insertLocalIndicesImpl (localRow, indices);
3113 
3114 #ifdef HAVE_TPETRA_DEBUG
3115  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3116  indicesAreAllocated() == false || isLocallyIndexed() == false,
3117  std::logic_error,
3118  ": Violated stated post-conditions. Please contact Tpetra team.");
3119 #endif
3120  }
3121 
3122  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3123  void
3125  insertLocalIndices (const LocalOrdinal localRow,
3126  const LocalOrdinal numEnt,
3127  const LocalOrdinal inds[])
3128  {
3129  Teuchos::ArrayView<const LocalOrdinal> indsT (inds, numEnt);
3130  this->insertLocalIndices (localRow, indsT);
3131  }
3132 
3133 
3134  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3135  void
3137  insertGlobalIndices (const GlobalOrdinal gblRow,
3138  const LocalOrdinal numInputInds,
3139  const GlobalOrdinal inputGblColInds[])
3140  {
3141  typedef LocalOrdinal LO;
3142  const char tfecfFuncName[] = "insertGlobalIndices: ";
3143 
3144  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3145  (this->isLocallyIndexed (), std::runtime_error,
3146  "graph indices are local; use insertLocalIndices().");
3147  // This can't really be satisfied for now, because if we are
3148  // fillComplete(), then we are local. In the future, this may
3149  // change. However, the rule that modification require active
3150  // fill will not change.
3151  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3152  (! this->isFillActive (), std::runtime_error,
3153  "You are not allowed to call this method if fill is not active. "
3154  "If fillComplete has been called, you must first call resumeFill "
3155  "before you may insert indices.");
3156  if (! this->indicesAreAllocated ()) {
3157  this->allocateIndices (GlobalIndices);
3158  }
3159  const LO lclRow = this->rowMap_->getLocalElement (gblRow);
3160  if (lclRow != Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
3161 #ifdef HAVE_TPETRA_DEBUG
3162  if (this->hasColMap ()) {
3163  using std::endl;
3164  const map_type& colMap = * (this->colMap_);
3165  // In a debug build, keep track of the nonowned ("bad") column
3166  // indices, so that we can display them in the exception
3167  // message. In a release build, just ditch the loop early if
3168  // we encounter a nonowned column index.
3169  std::vector<GlobalOrdinal> badColInds;
3170  bool allInColMap = true;
3171  for (LO k = 0; k < numInputInds; ++k) {
3172  if (! colMap.isNodeGlobalElement (inputGblColInds[k])) {
3173  allInColMap = false;
3174  badColInds.push_back (inputGblColInds[k]);
3175  }
3176  }
3177  if (! allInColMap) {
3178  std::ostringstream os;
3179  os << "You attempted to insert entries in owned row " << gblRow
3180  << ", at the following column indices: [";
3181  for (LO k = 0; k < numInputInds; ++k) {
3182  os << inputGblColInds[k];
3183  if (k + static_cast<LO> (1) < numInputInds) {
3184  os << ",";
3185  }
3186  }
3187  os << "]." << endl << "Of those, the following indices are not in "
3188  "the column Map on this process: [";
3189  for (size_t k = 0; k < badColInds.size (); ++k) {
3190  os << badColInds[k];
3191  if (k + size_t (1) < badColInds.size ()) {
3192  os << ",";
3193  }
3194  }
3195  os << "]." << endl << "Since the matrix has a column Map already, "
3196  "it is invalid to insert entries at those locations.";
3197  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3198  (true, std::invalid_argument, os.str ());
3199  }
3200  }
3201 #endif // HAVE_TPETRA_DEBUG
3202  this->insertGlobalIndicesImpl (lclRow, inputGblColInds, numInputInds);
3203  }
3204  else { // a nonlocal row
3205  this->insertGlobalIndicesIntoNonownedRows (gblRow, inputGblColInds,
3206  numInputInds);
3207  }
3208  }
3209 
3210 
3211  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3212  void
3214  insertGlobalIndices (const GlobalOrdinal gblRow,
3215  const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds)
3216  {
3217  this->insertGlobalIndices (gblRow, inputGblColInds.size (),
3218  inputGblColInds.getRawPtr ());
3219  }
3220 
3221 
3222  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3223  void
3225  insertGlobalIndicesFiltered (const LocalOrdinal lclRow,
3226  const GlobalOrdinal gblColInds[],
3227  const LocalOrdinal numGblColInds)
3228  {
3229  typedef LocalOrdinal LO;
3230  typedef GlobalOrdinal GO;
3231  const char tfecfFuncName[] = "insertGlobalIndicesFiltered: ";
3232 
3233  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3234  (this->isLocallyIndexed (), std::runtime_error,
3235  "Graph indices are local; use insertLocalIndices().");
3236  // This can't really be satisfied for now, because if we are
3237  // fillComplete(), then we are local. In the future, this may
3238  // change. However, the rule that modification require active
3239  // fill will not change.
3240  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3241  (! this->isFillActive (), std::runtime_error,
3242  "You are not allowed to call this method if fill is not active. "
3243  "If fillComplete has been called, you must first call resumeFill "
3244  "before you may insert indices.");
3245  if (! this->indicesAreAllocated ()) {
3246  this->allocateIndices (GlobalIndices);
3247  }
3248 
3249  Teuchos::ArrayView<const GO> gblColInds_av (gblColInds, numGblColInds);
3250  // If we have a column Map, use it to filter the entries.
3251  if (! this->colMap_.is_null ()) {
3252  const map_type& colMap = * (this->colMap_);
3253 
3254  LO curOffset = 0;
3255  while (curOffset < numGblColInds) {
3256  // Find a sequence of input indices that are in the column Map
3257  // on the calling process. Doing a sequence at a time,
3258  // instead of one at a time, amortizes some overhead.
3259  LO endOffset = curOffset;
3260  for ( ; endOffset < numGblColInds; ++endOffset) {
3261  const LO lclCol = colMap.getLocalElement (gblColInds[endOffset]);
3262  if (lclCol == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
3263  break; // first entry, in current sequence, not in the column Map
3264  }
3265  }
3266  // curOffset, endOffset: half-exclusive range of indices in
3267  // the column Map on the calling process. If endOffset ==
3268  // curOffset, the range is empty.
3269  const LO numIndInSeq = (endOffset - curOffset);
3270  if (numIndInSeq != 0) {
3271  this->insertGlobalIndicesImpl (lclRow, gblColInds + curOffset,
3272  numIndInSeq);
3273  }
3274  // Invariant before this line: Either endOffset ==
3275  // numGblColInds, or gblColInds[endOffset] is not in the
3276  // column Map on the calling process.
3277  curOffset = endOffset + 1;
3278  }
3279  }
3280  else {
3281  this->insertGlobalIndicesImpl (lclRow, gblColInds_av.getRawPtr (),
3282  gblColInds_av.size ());
3283  }
3284  }
3285 
3286  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3287  void
3289  insertGlobalIndicesIntoNonownedRows (const GlobalOrdinal gblRow,
3290  const GlobalOrdinal gblColInds[],
3291  const LocalOrdinal numGblColInds)
3292  {
3293  // This creates the std::vector if it doesn't exist yet.
3294  // std::map's operator[] does a lookup each time, so it's better
3295  // to pull nonlocals_[grow] out of the loop.
3296  std::vector<GlobalOrdinal>& nonlocalRow = this->nonlocals_[gblRow];
3297  for (LocalOrdinal k = 0; k < numGblColInds; ++k) {
3298  // FIXME (mfh 20 Jul 2017) Would be better to use a set, in
3299  // order to avoid duplicates. globalAssemble() sorts these
3300  // anyway.
3301  nonlocalRow.push_back (gblColInds[k]);
3302  }
3303  }
3304 
3305  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3306  void
3308  removeLocalIndices (LocalOrdinal lrow)
3309  {
3310  const char tfecfFuncName[] = "removeLocalIndices: ";
3311  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3312  ! isFillActive (), std::runtime_error, "requires that fill is active.");
3313  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3314  isStorageOptimized (), std::runtime_error,
3315  "cannot remove indices after optimizeStorage() has been called.");
3316  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3317  isGloballyIndexed (), std::runtime_error, "graph indices are global.");
3318  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3319  ! rowMap_->isNodeLocalElement (lrow), std::runtime_error,
3320  "Local row " << lrow << " is not in the row Map on the calling process.");
3321  if (! indicesAreAllocated ()) {
3322  allocateIndices (LocalIndices);
3323  }
3324 
3325  // FIXME (mfh 13 Aug 2014) What if they haven't been cleared on
3326  // all processes?
3327  clearGlobalConstants ();
3328 
3329  if (k_numRowEntries_.extent (0) != 0) {
3330  this->k_numRowEntries_(lrow) = 0;
3331  }
3332 #ifdef HAVE_TPETRA_DEBUG
3333  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3334  getNumEntriesInLocalRow (lrow) != 0 ||
3335  ! indicesAreAllocated () ||
3336  ! isLocallyIndexed (), std::logic_error,
3337  ": Violated stated post-conditions. Please contact Tpetra team.");
3338 #endif // HAVE_TPETRA_DEBUG
3339  }
3340 
3341 
3342  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3343  void
3345  setAllIndices (const typename local_graph_type::row_map_type& rowPointers,
3346  const typename local_graph_type::entries_type::non_const_type& columnIndices)
3347  {
3348  const char tfecfFuncName[] = "setAllIndices: ";
3349  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3350  ! hasColMap () || getColMap ().is_null (), std::runtime_error,
3351  "The graph must have a column Map before you may call this method.");
3352  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3353  static_cast<size_t> (rowPointers.size ()) != this->getNodeNumRows () + 1,
3354  std::runtime_error, "rowPointers.size() = " << rowPointers.size () <<
3355  " != this->getNodeNumRows()+1 = " << (this->getNodeNumRows () + 1) <<
3356  ".");
3357 
3358  // FIXME (mfh 07 Aug 2014) We need to relax this restriction,
3359  // since the future model will be allocation at construction, not
3360  // lazy allocation on first insert.
3361  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3362  (this->k_lclInds1D_.extent (0) != 0 ||
3363  this->k_gblInds1D_.extent (0) != 0,
3364  std::runtime_error, "You may not call this method if 1-D data "
3365  "structures are already allocated.");
3366 
3367  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3368  (this->lclInds2D_ != Teuchos::null ||
3369  this->gblInds2D_ != Teuchos::null,
3370  std::runtime_error, "You may not call this method if 2-D data "
3371  "structures are already allocated.");
3372 
3373  indicesAreAllocated_ = true;
3374  indicesAreLocal_ = true;
3375  pftype_ = StaticProfile; // if the profile wasn't static before, it sure is now.
3376  k_lclInds1D_ = columnIndices;
3377  k_rowPtrs_ = rowPointers;
3378  // Storage MUST be packed, since the interface doesn't give any
3379  // way to indicate any extra space at the end of each row.
3380  storageStatus_ = ::Tpetra::Details::STORAGE_1D_PACKED;
3381 
3382  // Build the local graph.
3383  lclGraph_ = local_graph_type (k_lclInds1D_, k_rowPtrs_);
3384 
3385  // These normally get cleared out at the end of allocateIndices.
3386  // It makes sense to clear them out here, because at the end of
3387  // this method, the graph is allocated on the calling process.
3388  numAllocForAllRows_ = 0;
3389  k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
3390 
3391  checkInternalState ();
3392  }
3393 
3394 
3395  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3396  void
3398  setAllIndices (const Teuchos::ArrayRCP<size_t>& rowPointers,
3399  const Teuchos::ArrayRCP<LocalOrdinal>& columnIndices)
3400  {
3401  using Kokkos::View;
3402  typedef typename local_graph_type::row_map_type row_map_type;
3403  typedef typename row_map_type::array_layout layout_type;
3404  typedef typename row_map_type::non_const_value_type row_offset_type;
3405  typedef View<size_t*, layout_type , Kokkos::HostSpace,
3406  Kokkos::MemoryUnmanaged> input_view_type;
3407  typedef typename row_map_type::non_const_type nc_row_map_type;
3408 
3409  const size_t size = static_cast<size_t> (rowPointers.size ());
3410  const bool same = Kokkos::Impl::is_same<size_t, row_offset_type>::value;
3411  input_view_type ptr_in (rowPointers.getRawPtr (), size);
3412 
3413  nc_row_map_type ptr_rot ("Tpetra::CrsGraph::ptr", size);
3414 
3415  if (same) { // size_t == row_offset_type
3416  // This compile-time logic ensures that the compiler never sees
3417  // an assignment of View<row_offset_type*, ...> to View<size_t*,
3418  // ...> unless size_t == row_offset_type.
3419  input_view_type ptr_decoy (rowPointers.getRawPtr (), size); // never used
3420  Kokkos::deep_copy (Kokkos::Impl::if_c<same,
3421  nc_row_map_type,
3422  input_view_type>::select (ptr_rot, ptr_decoy),
3423  ptr_in);
3424  }
3425  else { // size_t != row_offset_type
3426  // CudaUvmSpace != HostSpace, so this will be false in that case.
3427  const bool inHostMemory =
3428  Kokkos::Impl::is_same<typename row_map_type::memory_space,
3429  Kokkos::HostSpace>::value;
3430  if (inHostMemory) {
3431  // Copy (with cast from size_t to row_offset_type, with bounds
3432  // checking if necessary) to ptr_rot.
3433  ::Tpetra::Details::copyOffsets (ptr_rot, ptr_in);
3434  }
3435  else { // Copy input row offsets to device first.
3436  //
3437  // FIXME (mfh 24 Mar 2015) If CUDA UVM, running in the host's
3438  // execution space would avoid the double copy.
3439  //
3440  View<size_t*, layout_type ,execution_space > ptr_st ("Tpetra::CrsGraph::ptr", size);
3441  Kokkos::deep_copy (ptr_st, ptr_in);
3442  // Copy on device (casting from size_t to row_offset_type,
3443  // with bounds checking if necessary) to ptr_rot. This
3444  // executes in the output View's execution space, which is the
3445  // same as execution_space.
3446  ::Tpetra::Details::copyOffsets (ptr_rot, ptr_st);
3447  }
3448  }
3449 
3450  Kokkos::View<LocalOrdinal*, layout_type , execution_space > k_ind =
3451  Kokkos::Compat::getKokkosViewDeepCopy<device_type> (columnIndices ());
3452  setAllIndices (ptr_rot, k_ind);
3453  }
3454 
3455 
3456  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3457  void
3459  getNumEntriesPerLocalRowUpperBound (Teuchos::ArrayRCP<const size_t>& boundPerLocalRow,
3460  size_t& boundForAllLocalRows,
3461  bool& boundSameForAllLocalRows) const
3462  {
3463  const char tfecfFuncName[] = "getNumEntriesPerLocalRowUpperBound: ";
3464  const char suffix[] = " Please report this bug to the Tpetra developers.";
3465 
3466  // The three output arguments. We assign them to the actual
3467  // output arguments at the end, in order to implement
3468  // transactional semantics.
3469  Teuchos::ArrayRCP<const size_t> numEntriesPerRow;
3470  size_t numEntriesForAll = 0;
3471  bool allRowsSame = true;
3472 
3473  const ptrdiff_t numRows = static_cast<ptrdiff_t> (this->getNodeNumRows ());
3474 
3475  if (this->indicesAreAllocated ()) {
3476  if (this->isStorageOptimized ()) {
3477  // left with the case that we have optimized storage. in this
3478  // case, we have to construct a list of row sizes.
3479  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3480  (this->getProfileType () != StaticProfile, std::logic_error,
3481  "The graph is not StaticProfile, but storage appears to be optimized."
3482  << suffix);
3483  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3484  (numRows != 0 && k_rowPtrs_.extent (0) == 0, std::logic_error,
3485  "The graph has " << numRows << " (> 0) row" << (numRows != 1 ? "s" : "")
3486  << " on the calling process, but the k_rowPtrs_ array has zero entries."
3487  << suffix);
3488  Teuchos::ArrayRCP<size_t> numEnt;
3489  if (numRows != 0) {
3490  numEnt = Teuchos::arcp<size_t> (numRows);
3491  }
3492 
3493  // We have to iterate through the row offsets anyway, so we
3494  // might as well check whether all rows' bounds are the same.
3495  bool allRowsReallySame = false;
3496  for (ptrdiff_t i = 0; i < numRows; ++i) {
3497  numEnt[i] = this->k_rowPtrs_(i+1) - this->k_rowPtrs_(i);
3498  if (i != 0 && numEnt[i] != numEnt[i-1]) {
3499  allRowsReallySame = false;
3500  }
3501  }
3502  if (allRowsReallySame) {
3503  if (numRows == 0) {
3504  numEntriesForAll = 0;
3505  } else {
3506  numEntriesForAll = numEnt[1] - numEnt[0];
3507  }
3508  allRowsSame = true;
3509  }
3510  else {
3511  numEntriesPerRow = numEnt; // Teuchos::arcp_const_cast<const size_t> (numEnt);
3512  allRowsSame = false; // conservatively; we don't check the array
3513  }
3514  }
3515  else if (k_numRowEntries_.extent (0) != 0) {
3516  // This is a shallow copy; the ArrayRCP wraps the View in a
3517  // custom destructor, which ensures correct deallocation if
3518  // that is the only reference to the View. Furthermore, this
3519  // View is a host View, so this doesn't assume UVM.
3520  numEntriesPerRow = Kokkos::Compat::persistingView (k_numRowEntries_);
3521  allRowsSame = false; // conservatively; we don't check the array
3522  }
3523  else {
3524  numEntriesForAll = 0;
3525  allRowsSame = true;
3526  }
3527  }
3528  else { // indices not allocated
3529  if (k_numAllocPerRow_.extent (0) != 0) {
3530  // This is a shallow copy; the ArrayRCP wraps the View in a
3531  // custom destructor, which ensures correct deallocation if
3532  // that is the only reference to the View. Furthermore, this
3533  // View is a host View, so this doesn't assume UVM.
3534  numEntriesPerRow = Kokkos::Compat::persistingView (k_numAllocPerRow_);
3535  allRowsSame = false; // conservatively; we don't check the array
3536  }
3537  else {
3538  numEntriesForAll = numAllocForAllRows_;
3539  allRowsSame = true;
3540  }
3541  }
3542 
3543  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3544  (numEntriesForAll != 0 && numEntriesPerRow.size () != 0, std::logic_error,
3545  "numEntriesForAll and numEntriesPerRow are not consistent. The former "
3546  "is nonzero (" << numEntriesForAll << "), but the latter has nonzero "
3547  "size " << numEntriesPerRow.size () << "." << suffix);
3548  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3549  (numEntriesForAll != 0 && ! allRowsSame, std::logic_error,
3550  "numEntriesForAll and allRowsSame are not consistent. The former "
3551  "is nonzero (" << numEntriesForAll << "), but the latter is false."
3552  << suffix);
3553  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3554  (numEntriesPerRow.size () != 0 && allRowsSame, std::logic_error,
3555  "numEntriesPerRow and allRowsSame are not consistent. The former has "
3556  "nonzero length " << numEntriesForAll << ", but the latter is true."
3557  << suffix);
3558 
3559  boundPerLocalRow = numEntriesPerRow;
3560  boundForAllLocalRows = numEntriesForAll;
3561  boundSameForAllLocalRows = allRowsSame;
3562  }
3563 
3564 
3565  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3566  void
3569  {
3570  using Teuchos::Comm;
3571  using Teuchos::outArg;
3572  using Teuchos::RCP;
3573  using Teuchos::rcp;
3574  using Teuchos::REDUCE_MAX;
3575  using Teuchos::REDUCE_MIN;
3576  using Teuchos::reduceAll;
3577  typedef CrsGraph<LocalOrdinal, GlobalOrdinal, Node> crs_graph_type;
3578  typedef LocalOrdinal LO;
3579  typedef GlobalOrdinal GO;
3580  typedef typename Teuchos::Array<GO>::size_type size_type;
3581  const char tfecfFuncName[] = "globalAssemble: "; // for exception macro
3582 
3583  RCP<const Comm<int> > comm = getComm ();
3584 
3585  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3586  (! isFillActive (), std::runtime_error, "Fill must be active before "
3587  "you may call this method.");
3588 
3589  const size_t myNumNonlocalRows = this->nonlocals_.size ();
3590 
3591  // If no processes have nonlocal rows, then we don't have to do
3592  // anything. Checking this is probably cheaper than constructing
3593  // the Map of nonlocal rows (see below) and noticing that it has
3594  // zero global entries.
3595  {
3596  const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
3597  int someoneHasNonlocalRows = 0;
3598  reduceAll<int, int> (*comm, REDUCE_MAX, iHaveNonlocalRows,
3599  outArg (someoneHasNonlocalRows));
3600  if (someoneHasNonlocalRows == 0) {
3601  return; // no process has nonlocal rows, so nothing to do
3602  }
3603  }
3604 
3605  // 1. Create a list of the "nonlocal" rows on each process. this
3606  // requires iterating over nonlocals_, so while we do this,
3607  // deduplicate the entries and get a count for each nonlocal
3608  // row on this process.
3609  // 2. Construct a new row Map corresponding to those rows. This
3610  // Map is likely overlapping. We know that the Map is not
3611  // empty on all processes, because the above all-reduce and
3612  // return exclude that case.
3613 
3614  RCP<const map_type> nonlocalRowMap;
3615  // Keep this for CrsGraph's constructor, so we can use StaticProfile.
3616  Teuchos::ArrayRCP<size_t> numEntPerNonlocalRow (myNumNonlocalRows);
3617  {
3618  Teuchos::Array<GO> myNonlocalGblRows (myNumNonlocalRows);
3619  size_type curPos = 0;
3620  for (auto mapIter = this->nonlocals_.begin ();
3621  mapIter != this->nonlocals_.end ();
3622  ++mapIter, ++curPos) {
3623  myNonlocalGblRows[curPos] = mapIter->first;
3624  std::vector<GO>& gblCols = mapIter->second; // by ref; change in place
3625  std::sort (gblCols.begin (), gblCols.end ());
3626  auto vecLast = std::unique (gblCols.begin (), gblCols.end ());
3627  gblCols.erase (vecLast, gblCols.end ());
3628  numEntPerNonlocalRow[curPos] = gblCols.size ();
3629  }
3630 
3631  // Currently, Map requires that its indexBase be the global min
3632  // of all its global indices. Map won't compute this for us, so
3633  // we must do it. If our process has no nonlocal rows, set the
3634  // "min" to the max possible GO value. This ensures that if
3635  // some process has at least one nonlocal row, then it will pick
3636  // that up as the min. We know that at least one process has a
3637  // nonlocal row, since the all-reduce and return at the top of
3638  // this method excluded that case.
3639  GO myMinNonlocalGblRow = std::numeric_limits<GO>::max ();
3640  {
3641  auto iter = std::min_element (myNonlocalGblRows.begin (),
3642  myNonlocalGblRows.end ());
3643  if (iter != myNonlocalGblRows.end ()) {
3644  myMinNonlocalGblRow = *iter;
3645  }
3646  }
3647  GO gblMinNonlocalGblRow = 0;
3648  reduceAll<int, GO> (*comm, REDUCE_MIN, myMinNonlocalGblRow,
3649  outArg (gblMinNonlocalGblRow));
3650  const GO indexBase = gblMinNonlocalGblRow;
3651  const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid ();
3652  nonlocalRowMap = rcp (new map_type (INV, myNonlocalGblRows (), indexBase, comm));
3653  }
3654 
3655  // 3. Use the column indices for each nonlocal row, as stored in
3656  // nonlocals_, to construct a CrsGraph corresponding to
3657  // nonlocal rows. We may use StaticProfile, since we have
3658  // exact counts of the number of entries in each nonlocal row.
3659 
3660  RCP<crs_graph_type> nonlocalGraph =
3661  rcp (new crs_graph_type (nonlocalRowMap, numEntPerNonlocalRow,
3662  StaticProfile));
3663  {
3664  size_type curPos = 0;
3665  for (auto mapIter = this->nonlocals_.begin ();
3666  mapIter != this->nonlocals_.end ();
3667  ++mapIter, ++curPos) {
3668  const GO gblRow = mapIter->first;
3669  std::vector<GO>& gblCols = mapIter->second; // by ref just to avoid copy
3670  const LO numEnt = static_cast<LO> (numEntPerNonlocalRow[curPos]);
3671  nonlocalGraph->insertGlobalIndices (gblRow, numEnt, gblCols.data ());
3672  }
3673  }
3674  // There's no need to fill-complete the nonlocals graph.
3675  // We just use it as a temporary container for the Export.
3676 
3677  // 4. If the original row Map is one to one, then we can Export
3678  // directly from nonlocalGraph into this. Otherwise, we have
3679  // to create a temporary graph with a one-to-one row Map,
3680  // Export into that, then Import from the temporary graph into
3681  // *this.
3682 
3683  auto origRowMap = this->getRowMap ();
3684  const bool origRowMapIsOneToOne = origRowMap->isOneToOne ();
3685 
3686  if (origRowMapIsOneToOne) {
3687  export_type exportToOrig (nonlocalRowMap, origRowMap);
3688  this->doExport (*nonlocalGraph, exportToOrig, Tpetra::INSERT);
3689  // We're done at this point!
3690  }
3691  else {
3692  // If you ask a Map whether it is one to one, it does some
3693  // communication and stashes intermediate results for later use
3694  // by createOneToOne. Thus, calling createOneToOne doesn't cost
3695  // much more then the original cost of calling isOneToOne.
3696  auto oneToOneRowMap = Tpetra::createOneToOne (origRowMap);
3697  export_type exportToOneToOne (nonlocalRowMap, oneToOneRowMap);
3698 
3699  // Create a temporary graph with the one-to-one row Map.
3700  //
3701  // TODO (mfh 09 Sep 2016) Estimate the number of entries in each
3702  // row, to avoid reallocation during the Export operation.
3703  crs_graph_type oneToOneGraph (oneToOneRowMap, 0);
3704  // Export from graph of nonlocals into the temp one-to-one graph.
3705  oneToOneGraph.doExport (*nonlocalGraph, exportToOneToOne, Tpetra::INSERT);
3706 
3707  // We don't need the graph of nonlocals anymore, so get rid of
3708  // it, to keep the memory high-water mark down.
3709  nonlocalGraph = Teuchos::null;
3710 
3711  // Import from the one-to-one graph to the original graph.
3712  import_type importToOrig (oneToOneRowMap, origRowMap);
3713  this->doImport (oneToOneGraph, importToOrig, Tpetra::INSERT);
3714  }
3715 
3716  // It's safe now to clear out nonlocals_, since we've already
3717  // committed side effects to *this. The standard idiom for
3718  // clearing a Container like std::map, is to swap it with an empty
3719  // Container and let the swapped Container fall out of scope.
3720  decltype (this->nonlocals_) newNonlocals;
3721  std::swap (this->nonlocals_, newNonlocals);
3722 
3723  checkInternalState ();
3724  }
3725 
3726 
3727  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3728  void
3730  resumeFill (const Teuchos::RCP<Teuchos::ParameterList>& params)
3731  {
3732 #ifdef HAVE_TPETRA_DEBUG
3733  const char tfecfFuncName[] = "resumeFill";
3734 
3735  Teuchos::barrier( *rowMap_->getComm() );
3736 #endif // HAVE_TPETRA_DEBUG
3737  clearGlobalConstants();
3738  if (params != Teuchos::null) this->setParameterList (params);
3739  lowerTriangular_ = false;
3740  upperTriangular_ = false;
3741  // either still sorted/merged or initially sorted/merged
3742  indicesAreSorted_ = true;
3743  noRedundancies_ = true;
3744  fillComplete_ = false;
3745 #ifdef HAVE_TPETRA_DEBUG
3746  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3747  ! isFillActive() || isFillComplete(), std::logic_error,
3748  "::resumeFill(): At end of method, either fill is not active or fill is "
3749  "complete. This violates stated post-conditions. Please report this bug "
3750  "to the Tpetra developers.");
3751 #endif // HAVE_TPETRA_DEBUG
3752  }
3753 
3754 
3755  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3756  void
3758  fillComplete (const Teuchos::RCP<Teuchos::ParameterList>& params)
3759  {
3760  // If the graph already has domain and range Maps, don't clobber
3761  // them. If it doesn't, use the current row Map for both the
3762  // domain and range Maps.
3763  //
3764  // NOTE (mfh 28 Sep 2014): If the graph was constructed without a
3765  // column Map, and column indices are inserted which are not in
3766  // the row Map on any process, this will cause troubles. However,
3767  // that is not a common case for most applications that we
3768  // encounter, and checking for it might require more
3769  // communication.
3770  Teuchos::RCP<const map_type> domMap = this->getDomainMap ();
3771  if (domMap.is_null ()) {
3772  domMap = this->getRowMap ();
3773  }
3774  Teuchos::RCP<const map_type> ranMap = this->getRangeMap ();
3775  if (ranMap.is_null ()) {
3776  ranMap = this->getRowMap ();
3777  }
3778  this->fillComplete (domMap, ranMap, params);
3779  }
3780 
3781 
3782  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3783  void
3785  fillComplete (const Teuchos::RCP<const map_type>& domainMap,
3786  const Teuchos::RCP<const map_type>& rangeMap,
3787  const Teuchos::RCP<Teuchos::ParameterList>& params)
3788  {
3789  const char tfecfFuncName[] = "fillComplete: ";
3790 
3791 #ifdef HAVE_TPETRA_DEBUG
3792  rowMap_->getComm ()->barrier ();
3793 #endif // HAVE_TPETRA_DEBUG
3794 
3795  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( ! isFillActive() || isFillComplete(),
3796  std::runtime_error, "Graph fill state must be active (isFillActive() "
3797  "must be true) before calling fillComplete().");
3798 
3799  const int numProcs = getComm ()->getSize ();
3800 
3801  //
3802  // Read and set parameters
3803  //
3804 
3805  // Does the caller want to sort remote GIDs (within those owned by
3806  // the same process) in makeColMap()?
3807  if (! params.is_null ()) {
3808  if (params->isParameter ("sort column map ghost gids")) {
3809  sortGhostsAssociatedWithEachProcessor_ =
3810  params->get<bool> ("sort column map ghost gids",
3811  sortGhostsAssociatedWithEachProcessor_);
3812  }
3813  else if (params->isParameter ("Sort column Map ghost GIDs")) {
3814  sortGhostsAssociatedWithEachProcessor_ =
3815  params->get<bool> ("Sort column Map ghost GIDs",
3816  sortGhostsAssociatedWithEachProcessor_);
3817  }
3818  }
3819 
3820  // If true, the caller promises that no process did nonlocal
3821  // changes since the last call to fillComplete.
3822  bool assertNoNonlocalInserts = false;
3823  if (! params.is_null ()) {
3824  assertNoNonlocalInserts =
3825  params->get<bool> ("No Nonlocal Changes", assertNoNonlocalInserts);
3826  }
3827 
3828  //
3829  // Allocate indices, if they haven't already been allocated
3830  //
3831  if (! indicesAreAllocated ()) {
3832  if (hasColMap ()) {
3833  // We have a column Map, so use local indices.
3834  allocateIndices (LocalIndices);
3835  } else {
3836  // We don't have a column Map, so use global indices.
3837  allocateIndices (GlobalIndices);
3838  }
3839  }
3840 
3841  //
3842  // Do global assembly, if requested and if the communicator
3843  // contains more than one process.
3844  //
3845  const bool mayNeedGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1;
3846  if (mayNeedGlobalAssemble) {
3847  // This first checks if we need to do global assembly.
3848  // The check costs a single all-reduce.
3849  globalAssemble ();
3850  }
3851  else {
3852  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3853  (numProcs > 1 && this->nonlocals_.size() > 0, std::runtime_error,
3854  "The graph's communicator contains only one process, "
3855  "but there are nonlocal entries. "
3856  "This probably means that invalid entries were added to the graph.");
3857  }
3858 
3859  // Set domain and range Map. This may clear the Import / Export
3860  // objects if the new Maps differ from any old ones.
3861  setDomainRangeMaps (domainMap, rangeMap);
3862 
3863  // If the graph does not already have a column Map (either from
3864  // the user constructor calling the version of the constructor
3865  // that takes a column Map, or from a previous fillComplete call),
3866  // then create it.
3867  Teuchos::Array<int> remotePIDs (0);
3868  const bool mustBuildColMap = ! this->hasColMap ();
3869  if (mustBuildColMap) {
3870  this->makeColMap (remotePIDs); // resized on output
3871  }
3872 
3873  // Make indices local, if they aren't already.
3874  // The method doesn't do any work if the indices are already local.
3875  const std::pair<size_t, std::string> makeIndicesLocalResult =
3876  this->makeIndicesLocal ();
3877  const bool debug = ::Tpetra::Details::Behavior::debug ();
3878  if (debug) { // In debug mode, print error output on all processes
3879  using ::Tpetra::Details::gathervPrint;
3880  using Teuchos::RCP;
3881  using Teuchos::REDUCE_MIN;
3882  using Teuchos::reduceAll;
3883  using Teuchos::outArg;
3884 
3885  RCP<const map_type> map = this->getMap ();
3886  RCP<const Teuchos::Comm<int> > comm;
3887  if (! map.is_null ()) {
3888  comm = map->getComm ();
3889  }
3890  if (comm.is_null ()) {
3891  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3892  (makeIndicesLocalResult.first != 0, std::runtime_error,
3893  makeIndicesLocalResult.second);
3894  }
3895  else {
3896  const int lclSuccess = (makeIndicesLocalResult.first == 0);
3897  int gblSuccess = 0; // output argument
3898  reduceAll (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
3899  if (gblSuccess != 1) {
3900  std::ostringstream os;
3901  gathervPrint (os, makeIndicesLocalResult.second, *comm);
3902  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3903  (true, std::runtime_error, os.str ());
3904  }
3905  }
3906  }
3907  else {
3908  // TODO (mfh 20 Jul 2017) Instead of throwing here, pass along
3909  // the error state to makeImportExport or
3910  // computeGlobalConstants, which may do all-reduces and thus may
3911  // have the opportunity to communicate that error state.
3912  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3913  (makeIndicesLocalResult.first != 0, std::runtime_error,
3914  makeIndicesLocalResult.second);
3915  }
3916 
3917  // If this process has no indices, then CrsGraph considers it
3918  // already trivially sorted and merged. Thus, this method need
3919  // not be called on all processes in the row Map's communicator.
3920  this->sortAndMergeAllIndices (this->isSorted (), this->isMerged ());
3921 
3922  // Make Import and Export objects, if they haven't been made
3923  // already. If we made a column Map above, reuse information from
3924  // that process to avoid communiation in the Import setup.
3925  this->makeImportExport (remotePIDs, mustBuildColMap);
3926 
3927  // Create the Kokkos::StaticCrsGraph, if it doesn't already exist.
3928  this->fillLocalGraph (params);
3929 
3930  const bool callComputeGlobalConstants = params.get () == nullptr ||
3931  params->get ("compute global constants", true);
3932  const bool computeLocalTriangularConstants = params.get () == nullptr ||
3933  params->get ("compute local triangular constants", true);
3934  if (callComputeGlobalConstants) {
3935  this->computeGlobalConstants (computeLocalTriangularConstants);
3936  }
3937  else {
3938  this->computeLocalConstants (computeLocalTriangularConstants);
3939  }
3940  this->fillComplete_ = true;
3941  this->checkInternalState ();
3942  }
3943 
3944 
3945  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3946  void
3948  expertStaticFillComplete (const Teuchos::RCP<const map_type>& domainMap,
3949  const Teuchos::RCP<const map_type>& rangeMap,
3950  const Teuchos::RCP<const import_type>& importer,
3951  const Teuchos::RCP<const export_type>& exporter,
3952  const Teuchos::RCP<Teuchos::ParameterList>& params)
3953  {
3954  const char tfecfFuncName[] = "expertStaticFillComplete: ";
3955 #ifdef HAVE_TPETRA_MMM_TIMINGS
3956  std::string label;
3957  if(!params.is_null())
3958  label = params->get("Timer Label",label);
3959  std::string prefix = std::string("Tpetra ")+ label + std::string(": ");
3960  using Teuchos::TimeMonitor;
3961  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Setup"))));
3962 #endif
3963 
3964 
3965  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3966  domainMap.is_null () || rangeMap.is_null (),
3967  std::runtime_error, "The input domain Map and range Map must be nonnull.");
3968  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3969  pftype_ != StaticProfile, std::runtime_error, "You may not call this "
3970  "method unless the graph is StaticProfile.");
3971  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3972  isFillComplete () || ! hasColMap (), std::runtime_error, "You may not "
3973  "call this method unless the graph has a column Map.");
3974  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3975  getNodeNumRows () > 0 && k_rowPtrs_.extent (0) == 0,
3976  std::runtime_error, "The calling process has getNodeNumRows() = "
3977  << getNodeNumRows () << " > 0 rows, but the row offsets array has not "
3978  "been set.");
3979  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3980  static_cast<size_t> (k_rowPtrs_.extent (0)) != getNodeNumRows () + 1,
3981  std::runtime_error, "The row offsets array has length " <<
3982  k_rowPtrs_.extent (0) << " != getNodeNumRows()+1 = " <<
3983  (getNodeNumRows () + 1) << ".");
3984 
3985  // Note: We don't need to do the following things which are normally done in fillComplete:
3986  // allocateIndices, globalAssemble, makeColMap, makeIndicesLocal, sortAndMergeAllIndices
3987 
3988  // Constants from allocateIndices
3989  //
3990  // mfh 08 Aug 2014: numAllocForAllRows_ and k_numAllocPerRow_ go
3991  // away once the graph is allocated. expertStaticFillComplete
3992  // either presumes that the graph is allocated, or "allocates" it.
3993  //
3994  // FIXME (mfh 08 Aug 2014) The goal for the Kokkos refactor
3995  // version of CrsGraph is to allocate in the constructor, not
3996  // lazily on first insert. That will make both
3997  // numAllocForAllRows_ and k_numAllocPerRow_ obsolete.
3998  numAllocForAllRows_ = 0;
3999  k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
4000  indicesAreAllocated_ = true;
4001 
4002  // Constants from makeIndicesLocal
4003  //
4004  // The graph has a column Map, so its indices had better be local.
4005  indicesAreLocal_ = true;
4006  indicesAreGlobal_ = false;
4007 
4008  // set domain/range map: may clear the import/export objects
4009 #ifdef HAVE_TPETRA_MMM_TIMINGS
4010  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Maps"))));
4011 #endif
4012  setDomainRangeMaps (domainMap, rangeMap);
4013 
4014  // Presume the user sorted and merged the arrays first
4015  indicesAreSorted_ = true;
4016  noRedundancies_ = true;
4017 
4018  // makeImportExport won't create a new importer/exporter if I set one here first.
4019 #ifdef HAVE_TPETRA_MMM_TIMINGS
4020  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckI"))));
4021 #endif
4022 
4023  importer_ = Teuchos::null;
4024  exporter_ = Teuchos::null;
4025  if (importer != Teuchos::null) {
4026  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4027  ! importer->getSourceMap ()->isSameAs (*getDomainMap ()) ||
4028  ! importer->getTargetMap ()->isSameAs (*getColMap ()),
4029  std::invalid_argument,": importer does not match matrix maps.");
4030  importer_ = importer;
4031 
4032  }
4033 
4034 #ifdef HAVE_TPETRA_MMM_TIMINGS
4035  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckE"))));
4036 #endif
4037 
4038  if (exporter != Teuchos::null) {
4039  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4040  ! exporter->getSourceMap ()->isSameAs (*getRowMap ()) ||
4041  ! exporter->getTargetMap ()->isSameAs (*getRangeMap ()),
4042  std::invalid_argument,": exporter does not match matrix maps.");
4043  exporter_ = exporter;
4044  }
4045 
4046 #ifdef HAVE_TPETRA_MMM_TIMINGS
4047  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXmake"))));
4048 #endif
4049  Teuchos::Array<int> remotePIDs (0); // unused output argument
4050  this->makeImportExport (remotePIDs, false);
4051 
4052  // Since we have a StaticProfile, fillLocalGraph will do the right thing...
4053 #ifdef HAVE_TPETRA_MMM_TIMINGS
4054  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-fLG"))));
4055 #endif
4056  this->fillLocalGraph (params);
4057 
4058  const bool callComputeGlobalConstants = params.get () == nullptr ||
4059  params->get ("compute global constants", true);
4060  const bool computeLocalTriangularConstants = params.get () == nullptr ||
4061  params->get ("compute local triangular constants", true);
4062 
4063  if (callComputeGlobalConstants) {
4064 #ifdef HAVE_TPETRA_MMM_TIMINGS
4065  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC (const)"))));
4066 #endif // HAVE_TPETRA_MMM_TIMINGS
4067  this->computeGlobalConstants (computeLocalTriangularConstants);
4068  }
4069  else {
4070 #ifdef HAVE_TPETRA_MMM_TIMINGS
4071  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC (noconst)"))));
4072 #endif // HAVE_TPETRA_MMM_TIMINGS
4073  this->computeLocalConstants (computeLocalTriangularConstants);
4074  }
4075 
4076  fillComplete_ = true;
4077 
4078 #ifdef HAVE_TPETRA_MMM_TIMINGS
4079  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cIS"))));
4080 #endif
4081  checkInternalState ();
4082  }
4083 
4084 
4085  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4086  void
4088  fillLocalGraph (const Teuchos::RCP<Teuchos::ParameterList>& params)
4089  {
4091  typedef decltype (k_numRowEntries_) row_entries_type;
4092  typedef typename local_graph_type::row_map_type row_map_type;
4093  typedef typename row_map_type::non_const_type non_const_row_map_type;
4094  typedef typename local_graph_type::entries_type::non_const_type lclinds_1d_type;
4095  const char tfecfFuncName[] = "fillLocalGraph (called from fillComplete or "
4096  "expertStaticFillComplete): ";
4097  const size_t lclNumRows = this->getNodeNumRows ();
4098 
4099  // This method's goal is to fill in the two arrays (compressed
4100  // sparse row format) that define the sparse graph's structure.
4101  //
4102  // Use the nonconst version of row_map_type for ptr_d, because
4103  // the latter is const and we need to modify ptr_d here.
4104  non_const_row_map_type ptr_d;
4105  row_map_type ptr_d_const;
4106  lclinds_1d_type ind_d;
4107 
4108  bool requestOptimizedStorage = true;
4109  if (! params.is_null () && ! params->get ("Optimize Storage", true)) {
4110  requestOptimizedStorage = false;
4111  }
4112  if (this->getProfileType () == DynamicProfile) {
4113  // Pack 2-D storage (DynamicProfile) into 1-D packed storage.
4114  //
4115  // DynamicProfile means that the graph's column indices are
4116  // currently stored in a 2-D "unpacked" format, in the
4117  // arrays-of-arrays lclInds2D_. We allocate 1-D storage
4118  // (ind_d) and then copy from 2-D storage (lclInds2D_) into 1-D
4119  // storage (ind_d).
4120 #ifdef HAVE_TPETRA_DEBUG
4121  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4122  (static_cast<size_t> (this->k_numRowEntries_.extent (0)) !=
4123  lclNumRows, std::logic_error, "(DynamicProfile branch) "
4124  "k_numRowEntries_.extent(0) = " << k_numRowEntries_.extent (0)
4125  << " != getNodeNumRows() = " << lclNumRows << "");
4126 #endif // HAVE_TPETRA_DEBUG
4127 
4128  // Pack the row offsets into ptr_d, by doing a sum-scan of the
4129  // array of valid entry counts per row (k_numRowEntries_). The
4130  // pack method can handle its counts input being a host View.
4131  //
4132  // Total number of entries in the matrix on the calling
4133  // process. We will compute this in the loop below. It's
4134  // cheap to compute and useful as a sanity check.
4135  size_t lclTotalNumEntries = 0;
4136  {
4137  // Allocate the packed row offsets array.
4138  ptr_d = non_const_row_map_type ("Tpetra::CrsGraph::ptr", lclNumRows+1);
4139  typename row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
4140  // This function can handle that numRowEnt_h lives on host.
4141  lclTotalNumEntries = computeOffsetsFromCounts (ptr_d, numRowEnt_h);
4142  ptr_d_const = ptr_d;
4143  }
4144 
4145 #ifdef HAVE_TPETRA_DEBUG
4146  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4147  (static_cast<size_t> (ptr_d.extent (0)) != lclNumRows + 1,
4148  std::logic_error, "(DynamicProfile branch) After packing ptr_d, "
4149  "ptr_d.extent(0) = " << ptr_d.extent (0) << " != "
4150  "(lclNumRows+1) = " << (lclNumRows+1) << ".");
4151  {
4152  const auto valToCheck = ::Tpetra::Details::getEntryOnHost (ptr_d, lclNumRows);
4153  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4154  (valToCheck != lclTotalNumEntries, std::logic_error,
4155  "(DynamicProfile branch) After packing ptr_d, ptr_d(lclNumRows = "
4156  << lclNumRows << ") = " << valToCheck << " != total number of "
4157  "entries on the calling process = " << lclTotalNumEntries << ".");
4158  }
4159 #endif // HAVE_TPETRA_DEBUG
4160 
4161  // Allocate the array of packed column indices.
4162  ind_d = lclinds_1d_type ("Tpetra::CrsGraph::ind", lclTotalNumEntries);
4163  // Pack the column indices. We have to do this sequentially on
4164  // host, since lclInds2D_ is an ArrayRCP<Array<LO>>, which
4165  // doesn't work in parallel kernels (its iterators aren't even
4166  // thread safe in debug mode).
4167  {
4168  auto ptr_h = Kokkos::create_mirror_view (ptr_d);
4169  Kokkos::deep_copy (ptr_h, ptr_d); // we need the entries on host
4170  auto ind_h = Kokkos::create_mirror_view (ind_d); // will fill on host
4171 
4172  // k_numRowEntries_ is a host View already, so we can use it here.
4173  typename row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
4174  for (size_t row = 0; row < lclNumRows; ++row) {
4175  const size_t numEnt = numRowEnt_h(row);
4176  std::copy (lclInds2D_[row].begin (),
4177  lclInds2D_[row].begin () + numEnt,
4178  ind_h.data () + ptr_h(row));
4179  }
4180  Kokkos::deep_copy (ind_d, ind_h);
4181  }
4182 
4183 #ifdef HAVE_TPETRA_DEBUG
4184  // Sanity check of packed row offsets.
4185  if (ptr_d.extent (0) != 0) {
4186  const size_t numOffsets = static_cast<size_t> (ptr_d.extent (0));
4187  const size_t valToCheck = ::Tpetra::Details::getEntryOnHost (ptr_d, numOffsets - 1);
4188  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4189  (valToCheck != static_cast<size_t> (ind_d.extent (0)),
4190  std::logic_error, "(DynamicProfile branch) After packing column "
4191  "indices, ptr_d(" << (numOffsets-1) << ") = " << valToCheck
4192  << " != ind_d.extent(0) = " << ind_d.extent (0) << ".");
4193  }
4194 #endif // HAVE_TPETRA_DEBUG
4195  }
4196  else if (getProfileType () == StaticProfile) {
4197  // StaticProfile means that the graph's column indices are
4198  // currently stored in a 1-D format, with row offsets in
4199  // k_rowPtrs_ and local column indices in k_lclInds1D_.
4200 
4201 #ifdef HAVE_TPETRA_DEBUG
4202  // StaticProfile also means that the graph's array of row
4203  // offsets must already be allocated.
4204  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4205  (k_rowPtrs_.extent (0) == 0, std::logic_error,
4206  "(StaticProfile branch) k_rowPtrs_ has size zero, but shouldn't");
4207  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4208  (k_rowPtrs_.extent (0) != lclNumRows + 1, std::logic_error,
4209  "(StaticProfile branch) k_rowPtrs_.extent(0) = "
4210  << k_rowPtrs_.extent (0) << " != (lclNumRows + 1) = "
4211  << (lclNumRows + 1) << ".");
4212  {
4213  const size_t numOffsets = k_rowPtrs_.extent (0);
4214  const auto valToCheck =
4215  ::Tpetra::Details::getEntryOnHost (k_rowPtrs_, numOffsets - 1);
4216  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4217  (numOffsets != 0 &&
4218  k_lclInds1D_.extent (0) != valToCheck,
4219  std::logic_error, "(StaticProfile branch) numOffsets = " <<
4220  numOffsets << " != 0 and k_lclInds1D_.extent(0) = " <<
4221  k_lclInds1D_.extent (0) << " != k_rowPtrs_(" << numOffsets <<
4222  ") = " << valToCheck << ".");
4223  }
4224 #endif // HAVE_TPETRA_DEBUG
4225 
4226  size_t allocSize = 0;
4227  try {
4228  allocSize = this->getNodeAllocationSize ();
4229  }
4230  catch (std::logic_error& e) {
4231  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4232  (true, std::logic_error, "In fillLocalGraph, getNodeAllocationSize "
4233  "threw std::logic_error: " << e.what ());
4234  }
4235  catch (std::runtime_error& e) {
4236  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4237  (true, std::runtime_error, "In fillLocalGraph, getNodeAllocationSize "
4238  "threw std::runtime_error: " << e.what ());
4239  }
4240  catch (std::exception& e) {
4241  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4242  (true, std::runtime_error, "In fillLocalGraph, getNodeAllocationSize "
4243  "threw std::exception: " << e.what ());
4244  }
4245  catch (...) {
4246  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4247  (true, std::runtime_error, "In fillLocalGraph, getNodeAllocationSize "
4248  "threw an exception not a subclass of std::exception.");
4249  }
4250 
4251  if (this->getNodeNumEntries () != allocSize) {
4252  // The graph's current 1-D storage is "unpacked." This means
4253  // the row offsets may differ from what the final row offsets
4254  // should be. This could happen, for example, if the user
4255  // specified StaticProfile in the constructor and set an upper
4256  // bound on the number of entries in each row, but didn't fill
4257  // all those entries.
4258 
4259 #ifdef HAVE_TPETRA_DEBUG
4260  if (k_rowPtrs_.extent (0) != 0) {
4261  const size_t numOffsets =
4262  static_cast<size_t> (k_rowPtrs_.extent (0));
4263  const auto valToCheck =
4264  ::Tpetra::Details::getEntryOnHost (k_rowPtrs_, numOffsets - 1);
4265  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4266  (valToCheck != static_cast<size_t> (k_lclInds1D_.extent (0)),
4267  std::logic_error, "(StaticProfile unpacked branch) Before "
4268  "allocating or packing, k_rowPtrs_(" << (numOffsets-1) << ") = "
4269  << valToCheck << " != k_lclInds1D_.extent(0) = "
4270  << k_lclInds1D_.extent (0) << ".");
4271  }
4272 #endif // HAVE_TPETRA_DEBUG
4273 
4274  // Pack the row offsets into ptr_d, by doing a sum-scan of the
4275  // array of valid entry counts per row (k_numRowEntries_).
4276 
4277  // Total number of entries in the matrix on the calling
4278  // process. We will compute this in the loop below. It's
4279  // cheap to compute and useful as a sanity check.
4280  size_t lclTotalNumEntries = 0;
4281  {
4282  // Allocate the packed row offsets array.
4283  ptr_d = non_const_row_map_type ("Tpetra::CrsGraph::ptr", lclNumRows + 1);
4284  ptr_d_const = ptr_d;
4285 
4286  // It's ok that k_numRowEntries_ is a host View; the
4287  // function can handle this.
4288  typename row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
4289 #ifdef HAVE_TPETRA_DEBUG
4290  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4291  (static_cast<size_t> (numRowEnt_h.extent (0)) != lclNumRows,
4292  std::logic_error, "(StaticProfile unpacked branch) "
4293  "numRowEnt_h.extent(0) = " << numRowEnt_h.extent (0)
4294  << " != getNodeNumRows() = " << lclNumRows << "");
4295 #endif // HAVE_TPETRA_DEBUG
4296 
4297  lclTotalNumEntries = computeOffsetsFromCounts (ptr_d, numRowEnt_h);
4298 
4299 #ifdef HAVE_TPETRA_DEBUG
4300  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4301  (static_cast<size_t> (ptr_d.extent (0)) != lclNumRows + 1,
4302  std::logic_error, "(StaticProfile unpacked branch) After "
4303  "allocating ptr_d, ptr_d.extent(0) = " << ptr_d.extent (0)
4304  << " != lclNumRows+1 = " << (lclNumRows+1) << ".");
4305  {
4306  const auto valToCheck = ::Tpetra::Details::getEntryOnHost (ptr_d, lclNumRows);
4307  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4308  (valToCheck != lclTotalNumEntries, std::logic_error,
4309  "Tpetra::CrsGraph::fillLocalGraph: In StaticProfile unpacked "
4310  "branch, after filling ptr_d, ptr_d(lclNumRows=" << lclNumRows
4311  << ") = " << valToCheck << " != total number of entries on "
4312  "the calling process = " << lclTotalNumEntries << ".");
4313  }
4314 #endif // HAVE_TPETRA_DEBUG
4315  }
4316 
4317  // Allocate the array of packed column indices.
4318  ind_d = lclinds_1d_type ("Tpetra::CrsGraph::ind", lclTotalNumEntries);
4319 
4320  // k_rowPtrs_ and k_lclInds1D_ are currently unpacked. Pack
4321  // them, using the packed row offsets array ptr_d that we
4322  // created above.
4323  //
4324  // FIXME (mfh 08 Aug 2014) If "Optimize Storage" is false (in
4325  // CrsMatrix?), we need to keep around the unpacked row
4326  // offsets and column indices.
4327 
4328  // Pack the column indices from unpacked k_lclInds1D_ into
4329  // packed ind_d. We will replace k_lclInds1D_ below.
4330  typedef pack_functor<
4331  typename local_graph_type::entries_type::non_const_type,
4332  row_map_type> inds_packer_type;
4333  inds_packer_type f (ind_d, k_lclInds1D_, ptr_d, k_rowPtrs_);
4334  {
4335  typedef typename decltype (ind_d)::execution_space exec_space;
4336  typedef Kokkos::RangePolicy<exec_space, LocalOrdinal> range_type;
4337  Kokkos::parallel_for (range_type (0, lclNumRows), f);
4338  }
4339 
4340 #ifdef HAVE_TPETRA_DEBUG
4341  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4342  (ptr_d.extent (0) == 0, std::logic_error, "(StaticProfile "
4343  "\"Optimize Storage\"=true branch) After packing, "
4344  "ptr_d.extent(0) = 0. This probably means k_rowPtrs_ was "
4345  "never allocated.");
4346  if (ptr_d.extent (0) != 0) {
4347  const size_t numOffsets = static_cast<size_t> (ptr_d.extent (0));
4348  const auto valToCheck = ::Tpetra::Details::getEntryOnHost (ptr_d, numOffsets - 1);
4349  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4350  (static_cast<size_t> (valToCheck) != ind_d.extent (0),
4351  std::logic_error, "(StaticProfile \"Optimize Storage\"=true "
4352  "branch) After packing, ptr_d(" << (numOffsets-1) << ") = "
4353  << valToCheck << " != ind_d.extent(0) = "
4354  << ind_d.extent (0) << ".");
4355  }
4356 #endif // HAVE_TPETRA_DEBUG
4357  }
4358  else { // We don't have to pack, so just set the pointers.
4359  ptr_d_const = k_rowPtrs_;
4360  ind_d = k_lclInds1D_;
4361 
4362 #ifdef HAVE_TPETRA_DEBUG
4363  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4364  (ptr_d_const.extent (0) == 0, std::logic_error, "(StaticProfile "
4365  "\"Optimize Storage\"=false branch) ptr_d_const.extent(0) = 0. "
4366  "This probably means that k_rowPtrs_ was never allocated.");
4367  if (ptr_d_const.extent (0) != 0) {
4368  const size_t numOffsets =
4369  static_cast<size_t> (ptr_d_const.extent (0));
4370  const size_t valToCheck =
4371  ::Tpetra::Details::getEntryOnHost (ptr_d_const, numOffsets - 1);
4372  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4373  (valToCheck != static_cast<size_t> (ind_d.extent (0)),
4374  std::logic_error, "(StaticProfile \"Optimize Storage\"=false "
4375  "branch) ptr_d_const(" << (numOffsets-1) << ") = " << valToCheck
4376  << " != ind_d.extent(0) = " << ind_d.extent (0) << ".");
4377  }
4378 #endif // HAVE_TPETRA_DEBUG
4379  }
4380  }
4381 
4382 #ifdef HAVE_TPETRA_DEBUG
4383  // Extra sanity checks.
4384  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4385  (static_cast<size_t> (ptr_d_const.extent (0)) != lclNumRows + 1,
4386  std::logic_error, "After packing, ptr_d_const.extent(0) = " <<
4387  ptr_d_const.extent (0) << " != lclNumRows+1 = " << (lclNumRows+1)
4388  << ".");
4389  if (ptr_d_const.extent (0) != 0) {
4390  const size_t numOffsets = static_cast<size_t> (ptr_d_const.extent (0));
4391  const auto valToCheck = ::Tpetra::Details::getEntryOnHost (ptr_d_const, numOffsets - 1);
4392  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4393  (static_cast<size_t> (valToCheck) != ind_d.extent (0),
4394  std::logic_error, "After packing, ptr_d_const(" << (numOffsets-1)
4395  << ") = " << valToCheck << " != ind_d.extent(0) = "
4396  << ind_d.extent (0) << ".");
4397  }
4398 #endif // HAVE_TPETRA_DEBUG
4399 
4400  if (requestOptimizedStorage) {
4401  // With optimized storage, we don't need to store the 2-D column
4402  // indices array-of-arrays, or the array of row entry counts.
4403 
4404  // Free graph data structures that are only needed for 2-D or
4405  // unpacked 1-D storage.
4406  lclInds2D_ = Teuchos::null;
4407  k_numRowEntries_ = row_entries_type ();
4408 
4409  // Keep the new 1-D packed allocations.
4410  k_rowPtrs_ = ptr_d_const;
4411  k_lclInds1D_ = ind_d;
4412 
4413  // The graph is definitely StaticProfile now, whether or not it
4414  // was before.
4415  pftype_ = StaticProfile;
4416  storageStatus_ = ::Tpetra::Details::STORAGE_1D_PACKED;
4417  }
4418 
4419  // FIXME (mfh 28 Aug 2014) "Local Graph" sublist no longer used.
4420 
4421  // Build the local graph.
4422  lclGraph_ = local_graph_type (ind_d, ptr_d_const);
4423 
4424  // TODO (mfh 13 Mar 2014) getNodeNumDiags(), isUpperTriangular(),
4425  // and isLowerTriangular() depend on computeGlobalConstants(), in
4426  // particular the part where it looks at the local matrix. You
4427  // have to use global indices to determine which entries are
4428  // diagonal, or above or below the diagonal. However, lower or
4429  // upper triangularness is a local property.
4430  }
4431 
4432  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4433  void
4434  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4435  replaceColMap (const Teuchos::RCP<const map_type>& newColMap)
4436  {
4437  // NOTE: This safety check matches the code, but not the documentation of Crsgraph
4438  //
4439  // FIXME (mfh 18 Aug 2014) This will break if the calling process
4440  // has no entries, because in that case, currently it is neither
4441  // locally nor globally indexed. This will change once we get rid
4442  // of lazy allocation (so that the constructor allocates indices
4443  // and therefore commits to local vs. global).
4444  const char tfecfFuncName[] = "replaceColMap: ";
4445  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4446  isLocallyIndexed () || isGloballyIndexed (), std::runtime_error,
4447  "Requires matching maps and non-static graph.");
4448  colMap_ = newColMap;
4449  }
4450 
4451  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4452  void
4454  reindexColumns (const Teuchos::RCP<const map_type>& newColMap,
4455  const Teuchos::RCP<const import_type>& newImport,
4456  const bool sortIndicesInEachRow)
4457  {
4458  using Teuchos::REDUCE_MIN;
4459  using Teuchos::reduceAll;
4460  using Teuchos::RCP;
4461  typedef GlobalOrdinal GO;
4462  typedef LocalOrdinal LO;
4463  typedef typename local_graph_type::entries_type::non_const_type col_inds_type;
4464  const char tfecfFuncName[] = "reindexColumns: ";
4465 
4466  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4467  isFillComplete (), std::runtime_error, "The graph is fill complete "
4468  "(isFillComplete() returns true). You must call resumeFill() before "
4469  "you may call this method.");
4470 
4471  // mfh 19 Aug 2014: This method does NOT redistribute data; it
4472  // doesn't claim to do the work of an Import or Export. This
4473  // means that for all processes, the calling process MUST own all
4474  // column indices, in both the old column Map (if it exists) and
4475  // the new column Map. We check this via an all-reduce.
4476  //
4477  // Some processes may be globally indexed, others may be locally
4478  // indexed, and others (that have no graph entries) may be
4479  // neither. This method will NOT change the graph's current
4480  // state. If it's locally indexed, it will stay that way, and
4481  // vice versa. It would easy to add an option to convert indices
4482  // from global to local, so as to save a global-to-local
4483  // conversion pass. However, we don't do this here. The intended
4484  // typical use case is that the graph already has a column Map and
4485  // is locally indexed, and this is the case for which we optimize.
4486 
4487  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
4488 
4489  // Attempt to convert indices to the new column Map's version of
4490  // local. This will fail if on the calling process, the graph has
4491  // indices that are not on that process in the new column Map.
4492  // After the local conversion attempt, we will do an all-reduce to
4493  // see if any processes failed.
4494 
4495  // If this is false, then either the graph contains a column index
4496  // which is invalid in the CURRENT column Map, or the graph is
4497  // locally indexed but currently has no column Map. In either
4498  // case, there is no way to convert the current local indices into
4499  // global indices, so that we can convert them into the new column
4500  // Map's local indices. It's possible for this to be true on some
4501  // processes but not others, due to replaceColMap.
4502  bool allCurColIndsValid = true;
4503  // On the calling process, are all valid current column indices
4504  // also in the new column Map on the calling process? In other
4505  // words, does local reindexing suffice, or should the user have
4506  // done an Import or Export instead?
4507  bool localSuffices = true;
4508 
4509  // Final arrays for the local indices. We will allocate exactly
4510  // one of these ONLY if the graph is locally indexed on the
4511  // calling process, and ONLY if the graph has one or more entries
4512  // (is not empty) on the calling process. In that case, we
4513  // allocate the first (1-D storage) if the graph has a static
4514  // profile, else we allocate the second (2-D storage).
4515  typename local_graph_type::entries_type::non_const_type newLclInds1D;
4516  Teuchos::ArrayRCP<Teuchos::Array<LO> > newLclInds2D;
4517 
4518  // If indices aren't allocated, that means the calling process
4519  // owns no entries in the graph. Thus, there is nothing to
4520  // convert, and it trivially succeeds locally.
4521  if (indicesAreAllocated ()) {
4522  if (isLocallyIndexed ()) {
4523  if (hasColMap ()) { // locally indexed, and currently has a column Map
4524  const map_type& oldColMap = * (getColMap ());
4525  if (pftype_ == StaticProfile) {
4526  // Allocate storage for the new local indices.
4527  const size_t allocSize = this->getNodeAllocationSize ();
4528  newLclInds1D = col_inds_type ("Tpetra::CrsGraph::ind", allocSize);
4529  // Attempt to convert the new indices locally.
4530  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
4531  const RowInfo rowInfo = this->getRowInfo (lclRow);
4532  const size_t beg = rowInfo.offset1D;
4533  const size_t end = beg + rowInfo.numEntries;
4534  for (size_t k = beg; k < end; ++k) {
4535  // FIXME (mfh 21 Aug 2014) This assumes UVM. Should
4536  // use a DualView instead.
4537  const LO oldLclCol = k_lclInds1D_(k);
4538  if (oldLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4539  allCurColIndsValid = false;
4540  break; // Stop at the first invalid index
4541  }
4542  const GO gblCol = oldColMap.getGlobalElement (oldLclCol);
4543 
4544  // The above conversion MUST succeed. Otherwise, the
4545  // current local index is invalid, which means that
4546  // the graph was constructed incorrectly.
4547  if (gblCol == Teuchos::OrdinalTraits<GO>::invalid ()) {
4548  allCurColIndsValid = false;
4549  break; // Stop at the first invalid index
4550  }
4551  else {
4552  const LO newLclCol = newColMap->getLocalElement (gblCol);
4553  if (newLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4554  localSuffices = false;
4555  break; // Stop at the first invalid index
4556  }
4557  // FIXME (mfh 21 Aug 2014) This assumes UVM. Should
4558  // use a DualView instead.
4559  newLclInds1D(k) = newLclCol;
4560  }
4561  } // for each entry in the current row
4562  } // for each locally owned row
4563  }
4564  else { // pftype_ == DynamicProfile
4565  // Allocate storage for the new local indices. We only
4566  // allocate the outer array here; we will allocate the
4567  // inner arrays below.
4568  newLclInds2D = Teuchos::arcp<Teuchos::Array<LO> > (lclNumRows);
4569 
4570  // Attempt to convert the new indices locally.
4571  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
4572  const RowInfo rowInfo = this->getRowInfo (lclRow);
4573  newLclInds2D.resize (rowInfo.allocSize);
4574 
4575  Teuchos::ArrayView<const LO> oldLclRowView = getLocalView (rowInfo);
4576  Teuchos::ArrayView<LO> newLclRowView = (newLclInds2D[lclRow]) ();
4577 
4578  for (size_t k = 0; k < rowInfo.numEntries; ++k) {
4579  const LO oldLclCol = oldLclRowView[k];
4580  if (oldLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4581  allCurColIndsValid = false;
4582  break; // Stop at the first invalid index
4583  }
4584  const GO gblCol = oldColMap.getGlobalElement (oldLclCol);
4585 
4586  // The above conversion MUST succeed. Otherwise, the
4587  // local index is invalid and the graph is wrong.
4588  if (gblCol == Teuchos::OrdinalTraits<GO>::invalid ()) {
4589  allCurColIndsValid = false;
4590  break; // Stop at the first invalid index
4591  }
4592  else {
4593  const LO newLclCol = newColMap->getLocalElement (gblCol);
4594  if (newLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4595  localSuffices = false;
4596  break; // Stop at the first invalid index.
4597  }
4598  newLclRowView[k] = newLclCol;
4599  }
4600  } // for each entry in the current row
4601  } // for each locally owned row
4602  } // pftype_
4603  }
4604  else { // locally indexed, but no column Map
4605  // This case is only possible if replaceColMap() was called
4606  // with a null argument on the calling process. It's
4607  // possible, but it means that this method can't possibly
4608  // succeed, since we have no way of knowing how to convert
4609  // the current local indices to global indices.
4610  allCurColIndsValid = false;
4611  }
4612  }
4613  else { // globally indexed
4614  // If the graph is globally indexed, we don't need to save
4615  // local indices, but we _do_ need to know whether the current
4616  // global indices are valid in the new column Map. We may
4617  // need to do a getRemoteIndexList call to find this out.
4618  //
4619  // In this case, it doesn't matter whether the graph currently
4620  // has a column Map. We don't need the old column Map to
4621  // convert from global indices to the _new_ column Map's local
4622  // indices. Furthermore, we can use the same code, whether
4623  // the graph is static or dynamic profile.
4624 
4625  // Test whether the current global indices are in the new
4626  // column Map on the calling process.
4627  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
4628  const RowInfo rowInfo = this->getRowInfo (lclRow);
4629  Teuchos::ArrayView<const GO> oldGblRowView = getGlobalView (rowInfo);
4630  for (size_t k = 0; k < rowInfo.numEntries; ++k) {
4631  const GO gblCol = oldGblRowView[k];
4632  if (! newColMap->isNodeGlobalElement (gblCol)) {
4633  localSuffices = false;
4634  break; // Stop at the first invalid index
4635  }
4636  } // for each entry in the current row
4637  } // for each locally owned row
4638  } // locally or globally indexed
4639  } // whether indices are allocated
4640 
4641  // Do an all-reduce to check both possible error conditions.
4642  int lclSuccess[2];
4643  lclSuccess[0] = allCurColIndsValid ? 1 : 0;
4644  lclSuccess[1] = localSuffices ? 1 : 0;
4645  int gblSuccess[2];
4646  gblSuccess[0] = 0;
4647  gblSuccess[1] = 0;
4648  RCP<const Teuchos::Comm<int> > comm =
4649  getRowMap ().is_null () ? Teuchos::null : getRowMap ()->getComm ();
4650  if (! comm.is_null ()) {
4651  reduceAll<int, int> (*comm, REDUCE_MIN, 2, lclSuccess, gblSuccess);
4652  }
4653 
4654  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4655  gblSuccess[0] == 0, std::runtime_error, "It is not possible to continue."
4656  " The most likely reason is that the graph is locally indexed, but the "
4657  "column Map is missing (null) on some processes, due to a previous call "
4658  "to replaceColMap().");
4659 
4660  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4661  gblSuccess[1] == 0, std::runtime_error, "On some process, the graph "
4662  "contains column indices that are in the old column Map, but not in the "
4663  "new column Map (on that process). This method does NOT redistribute "
4664  "data; it does not claim to do the work of an Import or Export operation."
4665  " This means that for all processess, the calling process MUST own all "
4666  "column indices, in both the old column Map and the new column Map. In "
4667  "this case, you will need to do an Import or Export operation to "
4668  "redistribute data.");
4669 
4670  // Commit the results.
4671  if (isLocallyIndexed ()) {
4672  if (pftype_ == StaticProfile) {
4673  k_lclInds1D_ = newLclInds1D;
4674  } else { // dynamic profile
4675  lclInds2D_ = newLclInds2D;
4676  }
4677  // We've reindexed, so we don't know if the indices are sorted.
4678  //
4679  // FIXME (mfh 17 Sep 2014) It could make sense to check this,
4680  // since we're already going through all the indices above. We
4681  // could also sort each row in place; that way, we would only
4682  // have to make one pass over the rows.
4683  indicesAreSorted_ = false;
4684  if (sortIndicesInEachRow) {
4685  // NOTE (mfh 17 Sep 2014) The graph must be locally indexed in
4686  // order to call this method.
4687  //
4688  // FIXME (mfh 17 Sep 2014) This violates the strong exception
4689  // guarantee. It would be better to sort the new index arrays
4690  // before committing them.
4691  const bool sorted = false; // need to resort
4692  const bool merged = true; // no need to merge, since no dups
4693  this->sortAndMergeAllIndices (sorted, merged);
4694  }
4695  }
4696  colMap_ = newColMap;
4697 
4698  if (newImport.is_null ()) {
4699  // FIXME (mfh 19 Aug 2014) Should use the above all-reduce to
4700  // check whether the input Import is null on any process.
4701  //
4702  // If the domain Map hasn't been set yet, we can't compute a new
4703  // Import object. Leave it what it is; it should be null, but
4704  // it doesn't matter. If the domain Map _has_ been set, then
4705  // compute a new Import object if necessary.
4706  if (! domainMap_.is_null ()) {
4707  if (! domainMap_->isSameAs (* newColMap)) {
4708  importer_ = Teuchos::rcp (new import_type (domainMap_, newColMap));
4709  } else {
4710  importer_ = Teuchos::null; // don't need an Import
4711  }
4712  }
4713  } else {
4714  // The caller gave us an Import object. Assume that it's valid.
4715  importer_ = newImport;
4716  }
4717  }
4718 
4719 
4720  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4721  void
4723  replaceDomainMapAndImporter (const Teuchos::RCP<const map_type>& newDomainMap,
4724  const Teuchos::RCP<const import_type>& newImporter)
4725  {
4726  const char prefix[] = "Tpetra::CrsGraph::replaceDomainMapAndImporter: ";
4727  TEUCHOS_TEST_FOR_EXCEPTION(
4728  colMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4729  "this method unless the graph already has a column Map.");
4730  TEUCHOS_TEST_FOR_EXCEPTION(
4731  newDomainMap.is_null (), std::invalid_argument,
4732  prefix << "The new domain Map must be nonnull.");
4733 
4734  const bool debug = ::Tpetra::Details::Behavior::debug ();
4735  if (debug) {
4736  if (newImporter.is_null ()) {
4737  // It's not a good idea to put expensive operations in a macro
4738  // clause, even if they are side effect - free, because macros
4739  // don't promise that they won't evaluate their arguments more
4740  // than once. It's polite for them to do so, but not required.
4741  const bool colSameAsDom = colMap_->isSameAs (*newDomainMap);
4742  TEUCHOS_TEST_FOR_EXCEPTION
4743  (colSameAsDom, std::invalid_argument, "If the new Import is null, "
4744  "then the new domain Map must be the same as the current column Map.");
4745  }
4746  else {
4747  const bool colSameAsTgt =
4748  colMap_->isSameAs (* (newImporter->getTargetMap ()));
4749  const bool newDomSameAsSrc =
4750  newDomainMap->isSameAs (* (newImporter->getSourceMap ()));
4751  TEUCHOS_TEST_FOR_EXCEPTION
4752  (! colSameAsTgt || ! newDomSameAsSrc, std::invalid_argument, "If the "
4753  "new Import is nonnull, then the current column Map must be the same "
4754  "as the new Import's target Map, and the new domain Map must be the "
4755  "same as the new Import's source Map.");
4756  }
4757  }
4758 
4759  domainMap_ = newDomainMap;
4760  importer_ = Teuchos::rcp_const_cast<import_type> (newImporter);
4761  }
4762 
4763  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4767  {
4768  return lclGraph_;
4769  }
4770 
4771  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4772  void
4774  computeGlobalConstants (const bool computeLocalTriangularConstants)
4775  {
4776  using ::Tpetra::Details::ProfilingRegion;
4777  using Teuchos::ArrayView;
4778  using Teuchos::outArg;
4779  using Teuchos::reduceAll;
4780  typedef global_size_t GST;
4781 
4782  ProfilingRegion regionCGC ("Tpetra::CrsGraph::computeGlobalConstants");
4783 
4784  this->computeLocalConstants (computeLocalTriangularConstants);
4785 
4786  // Compute global constants from local constants. Processes that
4787  // already have local constants still participate in the
4788  // all-reduces, using their previously computed values.
4789  if (! this->haveGlobalConstants_) {
4790  const Teuchos::Comm<int>& comm = * (this->getComm ());
4791  // Promote all the nodeNum* and nodeMaxNum* quantities from
4792  // size_t to global_size_t, when doing the all-reduces for
4793  // globalNum* / globalMaxNum* results.
4794  //
4795  // FIXME (mfh 07 May 2013) Unfortunately, we either have to do
4796  // this in two all-reduces (one for the sum and the other for
4797  // the max), or use a custom MPI_Op that combines the sum and
4798  // the max. The latter might even be slower than two
4799  // all-reduces on modern network hardware. It would also be a
4800  // good idea to use nonblocking all-reduces (MPI 3), so that we
4801  // don't have to wait around for the first one to finish before
4802  // starting the second one.
4803  GST lcl[2], gbl[2];
4804  lcl[0] = static_cast<GST> (this->getNodeNumEntries ());
4805 
4806  // mfh 03 May 2018: nodeNumDiags_ is invalid if
4807  // computeLocalTriangularConstants is false, but there's no
4808  // practical network latency difference between an all-reduce of
4809  // length 1 and an all-reduce of length 2, so it's not worth
4810  // distinguishing between the two. However, we do want to avoid
4811  // integer overflow, so we'll just set the input local sum to
4812  // zero in that case.
4813  lcl[1] = computeLocalTriangularConstants ?
4814  static_cast<GST> (this->nodeNumDiags_) :
4815  static_cast<GST> (0);
4816 
4817  reduceAll<int,GST> (comm, Teuchos::REDUCE_SUM, 2, lcl, gbl);
4818  this->globalNumEntries_ = gbl[0];
4819 
4820  // mfh 03 May 2018: If not computing local triangular
4821  // properties, users want this to be invalid, not just zero.
4822  // This will help with debugging.
4823  this->globalNumDiags_ = computeLocalTriangularConstants ?
4824  gbl[1] :
4825  Teuchos::OrdinalTraits<GST>::invalid ();
4826 
4827  const GST lclMaxNumRowEnt = static_cast<GST> (this->nodeMaxNumRowEntries_);
4828  reduceAll<int, GST> (comm, Teuchos::REDUCE_MAX, lclMaxNumRowEnt,
4829  outArg (this->globalMaxNumRowEntries_));
4830  this->haveGlobalConstants_ = true;
4831  }
4832  }
4833 
4834 
4835  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4836  void
4838  computeLocalConstants (const bool computeLocalTriangularConstants)
4839  {
4841  using ::Tpetra::Details::ProfilingRegion;
4842 
4843  ProfilingRegion regionCLC ("Tpetra::CrsGraph::computeLocalConstants");
4844  if (this->haveLocalConstants_) {
4845  return;
4846  }
4847 
4848  // Reset local properties
4849  this->lowerTriangular_ = false;
4850  this->upperTriangular_ = false;
4851  this->nodeMaxNumRowEntries_ = Teuchos::OrdinalTraits<size_t>::invalid ();
4852  this->nodeNumDiags_ = Teuchos::OrdinalTraits<size_t>::invalid ();
4853 
4854  if (computeLocalTriangularConstants) {
4855  const bool hasRowAndColumnMaps =
4856  this->rowMap_.get () != nullptr && this->colMap_.get () != nullptr;
4857  if (hasRowAndColumnMaps) {
4858  auto lclRowMap = this->rowMap_->getLocalMap ();
4859  auto lclColMap = this->colMap_->getLocalMap ();
4860 
4861  // Make sure that the GPU can see any updates made on host.
4862  // This code only reads the local graph, so we don't need a
4863  // fence afterwards.
4864  execution_space::fence ();
4865 
4866  // mfh 01 May 2018: See GitHub Issue #2658.
4867  constexpr bool ignoreMapsForTriStruct = true;
4868  auto result =
4869  determineLocalTriangularStructure (this->lclGraph_, lclRowMap,
4870  lclColMap, ignoreMapsForTriStruct);
4871  this->lowerTriangular_ = result.couldBeLowerTriangular;
4872  this->upperTriangular_ = result.couldBeUpperTriangular;
4873  this->nodeMaxNumRowEntries_ = result.maxNumRowEnt;
4874  this->nodeNumDiags_ = result.diagCount;
4875  }
4876  else {
4877  this->nodeMaxNumRowEntries_ = 0;
4878  this->nodeNumDiags_ = 0;
4879  }
4880  }
4881  else {
4882  using LO = local_ordinal_type;
4883  // Make sure that the GPU can see any updates made on host.
4884  // This code only reads the local graph, so we don't need a
4885  // fence afterwards.
4886  execution_space::fence ();
4887 
4888  auto ptr = this->lclGraph_.row_map;
4889  const LO lclNumRows = ptr.extent(0) == 0 ?
4890  static_cast<LO> (0) :
4891  (static_cast<LO> (ptr.extent(0)) - static_cast<LO> (1));
4892 
4893  const LO lclMaxNumRowEnt =
4894  ::Tpetra::Details::maxDifference ("Tpetra::CrsGraph: nodeMaxNumRowEntries",
4895  ptr, lclNumRows);
4896  this->nodeMaxNumRowEntries_ = static_cast<size_t> (lclMaxNumRowEnt);
4897  }
4898  this->haveLocalConstants_ = true;
4899  }
4900 
4901 
4902  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4903  std::pair<size_t, std::string>
4906  {
4907  using ::Tpetra::Details::ProfilingRegion;
4908  using Teuchos::arcp;
4909  using Teuchos::Array;
4910  using std::endl;
4911  typedef LocalOrdinal LO;
4912  typedef GlobalOrdinal GO;
4913  typedef device_type DT;
4914  typedef typename local_graph_type::row_map_type::non_const_value_type offset_type;
4915  typedef decltype (k_numRowEntries_) row_entries_type;
4916  typedef typename row_entries_type::non_const_value_type num_ent_type;
4917  typedef typename local_graph_type::entries_type::non_const_type
4918  lcl_col_inds_type;
4919  typedef Kokkos::View<GO*, typename lcl_col_inds_type::array_layout,
4920  device_type> gbl_col_inds_type;
4921  const char tfecfFuncName[] = "makeIndicesLocal: ";
4922  ProfilingRegion regionMakeIndicesLocal ("Tpetra::CrsGraph::makeIndicesLocal");
4923 
4924  // These are somewhat global properties, so it's safe to have
4925  // exception checks for them, rather than returning an error code.
4926  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4927  (! this->hasColMap (), std::logic_error, "The graph does not have a "
4928  "column Map yet. This method should never be called in that case. "
4929  "Please report this bug to the Tpetra developers.");
4930  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4931  (this->getColMap ().is_null (), std::logic_error, "The graph claims "
4932  "that it has a column Map, because hasColMap() returns true. However, "
4933  "the result of getColMap() is null. This should never happen. Please "
4934  "report this bug to the Tpetra developers.");
4935 
4936  // Return value 1: The number of column indices (counting
4937  // duplicates) that could not be converted to local indices,
4938  // because they were not in the column Map on the calling process.
4939  size_t lclNumErrs = 0;
4940  std::ostringstream errStrm; // for return value 2 (error string)
4941 
4942  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
4943  const map_type& colMap = * (this->getColMap ());
4944 
4945  if (this->isGloballyIndexed () && lclNumRows != 0) {
4946  // This is a host-accessible View.
4947  typename row_entries_type::const_type h_numRowEnt =
4948  this->k_numRowEntries_;
4949 
4950  // Allocate space for local indices.
4951  if (this->getProfileType () == StaticProfile) {
4952  // If GO and LO are the same size, we can reuse the existing
4953  // array of 1-D index storage to convert column indices from
4954  // GO to LO. Otherwise, we'll just allocate a new buffer.
4955  constexpr bool LO_GO_same = std::is_same<LO, GO>::value;
4956  if (LO_GO_same) {
4957  // This prevents a build error (illegal assignment) if
4958  // LO_GO_same is _not_ true. Only the first branch
4959  // (returning k_gblInds1D_) should ever get taken.
4960  k_lclInds1D_ = Kokkos::Impl::if_c<LO_GO_same,
4962  lcl_col_inds_type>::select (k_gblInds1D_, k_lclInds1D_);
4963  }
4964  else {
4965  if (k_rowPtrs_.extent (0) == 0) {
4966  errStrm << "k_rowPtrs_.extent(0) == 0. This should never "
4967  "happen here. Please report this bug to the Tpetra developers."
4968  << endl;
4969  // Need to return early.
4970  return std::make_pair (Tpetra::Details::OrdinalTraits<size_t>::invalid (),
4971  errStrm.str ());
4972  }
4973  const auto numEnt = ::Tpetra::Details::getEntryOnHost (k_rowPtrs_, lclNumRows);
4974 
4975  // mfh 17 Dec 2016: We don't need initial zero-fill of
4976  // k_lclInds1D_, because we will fill it below anyway.
4977  // AllowPadding would only help for aligned access (e.g.,
4978  // for vectorization) if we also were to pad each row to the
4979  // same alignment, so we'll skip AllowPadding for now.
4980 
4981  // using Kokkos::AllowPadding;
4982  using Kokkos::view_alloc;
4983  using Kokkos::WithoutInitializing;
4984 
4985  // When giving the label as an argument to
4986  // Kokkos::view_alloc, the label must be a string and not a
4987  // char*, else the code won't compile. This is because
4988  // view_alloc also allows a raw pointer as its first
4989  // argument. See
4990  // https://github.com/kokkos/kokkos/issues/434. This is a
4991  // large allocation typically, so the overhead of creating
4992  // an std::string is minor.
4993  const std::string label ("Tpetra::CrsGraph::lclind");
4994  k_lclInds1D_ =
4995  lcl_col_inds_type (view_alloc (label, WithoutInitializing), numEnt);
4996  }
4997 
4998  auto lclColMap = colMap.getLocalMap ();
4999  // This is a "device mirror" of the host View h_numRowEnt.
5000  //
5001  // NOTE (mfh 27 Sep 2016) Currently, the right way to get a
5002  // Device instance is to use its default constructor. See the
5003  // following Kokkos issue:
5004  //
5005  // https://github.com/kokkos/kokkos/issues/442
5006  auto k_numRowEnt = Kokkos::create_mirror_view (device_type (), h_numRowEnt);
5007 
5009  lclNumErrs =
5010  convertColumnIndicesFromGlobalToLocal<LO, GO, DT, offset_type, num_ent_type> (k_lclInds1D_,
5011  k_gblInds1D_,
5012  k_rowPtrs_,
5013  lclColMap,
5014  k_numRowEnt);
5015  if (lclNumErrs != 0) {
5016  const int myRank = [this] () {
5017  auto map = this->getMap ();
5018  if (map.is_null ()) {
5019  return 0;
5020  }
5021  else {
5022  auto comm = map->getComm ();
5023  return comm.is_null () ? 0 : comm->getRank ();
5024  }
5025  } ();
5026  const bool pluralNumErrs = (lclNumErrs != static_cast<size_t> (1));
5027  errStrm << "(Process " << myRank << ") When converting column "
5028  "indices from global to local, we encountered " << lclNumErrs
5029  << " ind" << (pluralNumErrs ? "ices" : "ex")
5030  << " that do" << (pluralNumErrs ? "es" : "")
5031  << " not live in the column Map on this process." << endl;
5032  }
5033 
5034  // We've converted column indices from global to local, so we
5035  // can deallocate the global column indices (which we know are
5036  // in 1-D storage, because the graph has static profile).
5037  k_gblInds1D_ = gbl_col_inds_type ();
5038  }
5039  else { // the graph has dynamic profile (2-D index storage)
5040  // Avoid any drama with *this capture, by extracting the
5041  // variables that the thread-parallel loop will need below.
5042  // This is just a shallow copy.
5043  Teuchos::ArrayRCP<Teuchos::Array<LO> > lclInds2D (lclNumRows);
5044  Teuchos::ArrayRCP<Teuchos::Array<GO> > gblInds2D = this->gblInds2D_;
5045 
5046  // We must use a host thread parallelization here, because
5047  // Teuchos::ArrayRCP does not work in CUDA.
5048  typedef typename Kokkos::View<LO*, device_type>::HostMirror::execution_space
5049  host_execution_space;
5050  typedef Kokkos::RangePolicy<host_execution_space, LO> range_type;
5051  Kokkos::parallel_reduce (
5052  "Tpetra::CrsGraph::makeIndicesLocal (DynamicProfile)",
5053  range_type (0, lclNumRows),
5054  [&gblInds2D, &h_numRowEnt, &lclInds2D, &colMap] (const LO& lclRow, size_t& numErrs) {
5055  const GO* const curGblInds = gblInds2D[lclRow].getRawPtr ();
5056  // NOTE (mfh 26 Jun 2016) It's always legal to cast the
5057  // number of entries in a row to LO, as long as the row
5058  // doesn't have too many duplicate entries.
5059  const LO rna = static_cast<LO> (gblInds2D[lclRow].size ());
5060  const LO numEnt = static_cast<LO> (h_numRowEnt(lclRow));
5061  lclInds2D[lclRow].resize (rna); // purely thread-local, so safe
5062  LO* const curLclInds = lclInds2D[lclRow].getRawPtr ();
5063  for (LO j = 0; j < numEnt; ++j) {
5064  const GO gid = curGblInds[j];
5065  const LO lid = colMap.getLocalElement (gid);
5066  curLclInds[j] = lid;
5067  if (lid == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
5068  ++numErrs;
5069  }
5070  }
5071  }, lclNumErrs);
5072 
5073  this->lclInds2D_ = lclInds2D; // "commit" the result
5074 
5075  // If we detected an error in the above loop, go back and find
5076  // the global column indices not in the column Map on the
5077  // calling process.
5078  if (lclNumErrs != 0) {
5079  const int myRank = [this] () {
5080  auto map = this->getMap ();
5081  if (map.is_null ()) {
5082  return 0;
5083  }
5084  else {
5085  auto comm = map->getComm ();
5086  return comm.is_null () ? 0 : comm->getRank ();
5087  }
5088  } ();
5089 
5090  // If there are too many errors, don't bother printing them.
5091  constexpr size_t tooManyErrsToPrint = 200; // arbitrary constant
5092  if (lclNumErrs > tooManyErrsToPrint) {
5093  errStrm << "(Process " << myRank << ") When converting column "
5094  "indices from global to local, we encountered " << lclNumErrs
5095  << " indices that do not live in the column Map on this "
5096  "process. That's too many to print." << endl;
5097  }
5098  else {
5099  // Map from local row index, to any global column indices
5100  // that do not live in the column Map on the calling process.
5101  std::map<LO, std::vector<GO> > badColInds;
5102  // List of local rows lclRow for which h_numRowEnt[lclRow]
5103  // > gblInds2D_[lclRow].size().
5104  std::vector<LO> badLclRows;
5105 
5106  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
5107  const size_t numEnt = static_cast<size_t> (h_numRowEnt[lclRow]);
5108 
5109  Teuchos::ArrayView<const GO> curGblInds = gblInds2D_[lclRow] ();
5110  if (numEnt > static_cast<size_t> (curGblInds.size ())) {
5111  badLclRows.push_back (lclRow);
5112  }
5113  else {
5114  for (size_t j = 0; j < numEnt; ++j) {
5115  const GO gid = curGblInds[j];
5116  const LO lid = colMap.getLocalElement (gid);
5117  if (lid == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
5118  badColInds[lclRow].push_back (gid);
5119  }
5120  }
5121  }
5122  }
5123 
5124  const bool pluralNumErrs = (lclNumErrs != static_cast<size_t> (1));
5125  errStrm << "(Process " << myRank << ") When converting column "
5126  "indices from global to local, we encountered " << lclNumErrs
5127  << " ind" << (pluralNumErrs ? "ices" : "ex") << " that "
5128  "do" << (pluralNumErrs ? "es" : "")
5129  << " not live in the column Map on this process." << endl
5130  << "(Process " << myRank << ") Here are the bad global "
5131  "indices, listed by local row: " << endl;
5132  for (auto && eachPair : badColInds) {
5133  const LO lclRow = eachPair.first;
5134  const GO gblRow = rowMap_->getGlobalElement (lclRow);
5135  errStrm << "(Process " << myRank << ") Local row " << lclRow
5136  << " (global row " << gblRow << "): [";
5137  const size_t numBad = eachPair.second.size ();
5138  for (size_t k = 0; k < numBad; ++k) {
5139  errStrm << eachPair.second[k];
5140  if (k + size_t (1) < numBad) {
5141  errStrm << ",";
5142  }
5143  }
5144  errStrm << "]" << endl;
5145  }
5146 
5147  if (badLclRows.size () != 0) {
5148  if (lclNumErrs == 0) {
5149  // We really want lclNumErrs to be just the count of
5150  // bad column indices, but lclNumErrs != 0 also
5151  // doubles as a generic indication of error.
5152  lclNumErrs = badLclRows.size ();
5153  }
5154 
5155  errStrm << "(Process " << myRank << ") When converting column "
5156  "indices from global to local, we (also) encountered the "
5157  "following local rows lclRow on this process for which "
5158  "h_numRowEnt[lclRow] > gblInds2D_[lclRow].size(). This "
5159  "likely indicates a bug in Tpetra." << endl
5160  << "(Process " << myRank << ") [";
5161  const size_t numBad = badLclRows.size ();
5162  for (size_t k = 0; k < numBad; ++k) {
5163  const LO lclRow = badLclRows[k];
5164  errStrm << "{lclRow: " << lclRow
5165  << "h_numRowEnt[lclRow]: " << h_numRowEnt[lclRow]
5166  << "gblInds2D_[lclRow].size(): "
5167  << gblInds2D_[lclRow].size () << "}";
5168  if (k + size_t (1) < numBad) {
5169  errStrm << ", ";
5170  }
5171  }
5172  errStrm << "]" << endl;
5173  }
5174  }
5175  }
5176 
5177  this->gblInds2D_ = Teuchos::null;
5178  }
5179  } // globallyIndexed() && lclNumRows > 0
5180 
5181  this->lclGraph_ = local_graph_type (this->k_lclInds1D_, this->k_rowPtrs_);
5182  this->indicesAreLocal_ = true;
5183  this->indicesAreGlobal_ = false;
5184  this->checkInternalState ();
5185 
5186  return std::make_pair (lclNumErrs, errStrm.str ());
5187  }
5188 
5189 
5190  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5191  void
5193  makeColMap (Teuchos::Array<int>& remotePIDs)
5194  {
5195  using ::Tpetra::Details::ProfilingRegion;
5196  ProfilingRegion regionSortAndMerge ("Tpetra::CrsGraph::makeColMap");
5197  const bool debug = ::Tpetra::Details::Behavior::debug ();
5198 
5199  // this->colMap_ should be null at this point, but we accept the
5200  // future possibility that it might not be (esp. if we decide
5201  // later to support graph structure changes after first
5202  // fillComplete, which CrsGraph does not currently (as of 12 Feb
5203  // 2017) support).
5204  Teuchos::RCP<const map_type> colMap = this->colMap_;
5205  const bool sortEachProcsGids =
5206  this->sortGhostsAssociatedWithEachProcessor_;
5207 
5208  // FIXME (mfh 12 Feb 2017) ::Tpetra::Details::makeColMap returns a
5209  // per-process error code. If an error does occur on a process,
5210  // ::Tpetra::Details::makeColMap does NOT promise that all processes will
5211  // notice that error. This is the caller's responsibility. For
5212  // now, we only propagate (to all processes) and report the error
5213  // in debug mode. In the future, we need to add the local/global
5214  // error handling scheme used in BlockCrsMatrix to this class.
5215  if (debug) {
5216  using Teuchos::outArg;
5217  using Teuchos::REDUCE_MIN;
5218  using Teuchos::reduceAll;
5219  const char tfecfFuncName[] = "makeColMap: ";
5220 
5221  std::ostringstream errStrm;
5222  const int lclErrCode =
5223  ::Tpetra::Details::makeColMap (colMap, remotePIDs, this->getDomainMap (),
5224  *this, sortEachProcsGids, &errStrm);
5225  auto comm = this->getComm ();
5226  if (! comm.is_null ()) {
5227  const int lclSuccess = (lclErrCode == 0) ? 1 : 0;
5228  int gblSuccess = 0; // output argument
5229  reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess,
5230  outArg (gblSuccess));
5231  if (gblSuccess != 1) {
5232  std::ostringstream os;
5233  Tpetra::Details::gathervPrint (os, errStrm.str (), *comm);
5234  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5235  (true, std::runtime_error, "An error happened on at least one "
5236  "(MPI) process in the CrsGraph's communicator. Here are all "
5237  "processes' error messages:" << std::endl << os.str ());
5238  }
5239  }
5240  }
5241  else {
5242  (void) ::Tpetra::Details::makeColMap (colMap, remotePIDs, this->getDomainMap (),
5243  *this, sortEachProcsGids, NULL);
5244  }
5245  // See above. We want to admit the possibility of makeColMap
5246  // actually revising an existing column Map, even though that
5247  // doesn't currently (as of 10 May 2017) happen.
5248  this->colMap_ = colMap;
5249 
5250  checkInternalState ();
5251  }
5252 
5253 
5254  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5255  void
5257  sortAndMergeAllIndices (const bool sorted, const bool merged)
5258  {
5259  using ::Tpetra::Details::ProfilingRegion;
5260  typedef LocalOrdinal LO;
5261  typedef typename Kokkos::View<LO*, device_type>::HostMirror::execution_space
5262  host_execution_space;
5263  typedef Kokkos::RangePolicy<host_execution_space, LO> range_type;
5264  const char tfecfFuncName[] = "sortAndMergeAllIndices: ";
5265  ProfilingRegion regionSortAndMerge ("Tpetra::CrsGraph::sortAndMergeAllIndices");
5266 
5267  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5268  (this->isGloballyIndexed (), std::logic_error,
5269  "This method may only be called after makeIndicesLocal." );
5270 
5271  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5272  (! merged && this->isStorageOptimized (), std::logic_error,
5273  "The graph is already storage optimized, so we shouldn't be merging any "
5274  "indices. Please report this bug to the Tpetra developers.");
5275 
5276  if (! sorted || ! merged) {
5277  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
5278  size_t totalNumDups = 0;
5279  // FIXME (mfh 08 May 2017) This may assume CUDA UVM.
5280  Kokkos::parallel_reduce (range_type (0, lclNumRows),
5281  [this, sorted, merged] (const LO& lclRow, size_t& numDups) {
5282  const RowInfo rowInfo = this->getRowInfo (lclRow);
5283  numDups += this->sortAndMergeRowIndices (rowInfo, sorted, merged);
5284  }, totalNumDups);
5285  this->indicesAreSorted_ = true; // we just sorted every row
5286  this->noRedundancies_ = true; // we just merged every row
5287  }
5288  }
5289 
5290 
5291  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5292  void
5293  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5294  makeImportExport (Teuchos::Array<int>& remotePIDs,
5295  const bool useRemotePIDs)
5296  {
5297  using ::Tpetra::Details::ProfilingRegion;
5298  using Teuchos::ParameterList;
5299  using Teuchos::RCP;
5300  using Teuchos::rcp;
5301  const char tfecfFuncName[] = "makeImportExport: ";
5302  ProfilingRegion regionMIE ("Tpetra::CrsGraph::makeImportExport");
5303 
5304  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5305  (! this->hasColMap (), std::logic_error,
5306  "This method may not be called unless the graph has a column Map.");
5307  RCP<ParameterList> params = this->getNonconstParameterList (); // could be null
5308 
5309  // Don't do any checks to see if we need to create the Import, if
5310  // it exists already.
5311  //
5312  // FIXME (mfh 25 Mar 2013) This will become incorrect if we
5313  // change CrsGraph in the future to allow changing the column
5314  // Map after fillComplete. For now, the column Map is fixed
5315  // after the first fillComplete call.
5316  if (importer_.is_null ()) {
5317  // Create the Import instance if necessary.
5318  if (domainMap_ != colMap_ && (! domainMap_->isSameAs (*colMap_))) {
5319  if (params.is_null () || ! params->isSublist ("Import")) {
5320  if (useRemotePIDs) {
5321  importer_ = rcp (new import_type (domainMap_, colMap_, remotePIDs));
5322  }
5323  else {
5324  importer_ = rcp (new import_type (domainMap_, colMap_));
5325  }
5326  }
5327  else {
5328  RCP<ParameterList> importSublist = sublist (params, "Import", true);
5329  if (useRemotePIDs) {
5330  RCP<import_type> newImp =
5331  rcp (new import_type (domainMap_, colMap_, remotePIDs));
5332  newImp->setParameterList (importSublist); // nonconst method
5333  importer_ = newImp; // assign nonconst to const
5334  }
5335  else {
5336  importer_ = rcp (new import_type (domainMap_, colMap_, importSublist));
5337  }
5338  }
5339  }
5340  }
5341 
5342  // Don't do any checks to see if we need to create the Export, if
5343  // it exists already.
5344  if (exporter_.is_null ()) {
5345  // Create the Export instance if necessary.
5346  if (rangeMap_ != rowMap_ && ! rangeMap_->isSameAs (*rowMap_)) {
5347  if (params.is_null () || ! params->isSublist ("Export")) {
5348  exporter_ = rcp (new export_type (rowMap_, rangeMap_));
5349  }
5350  else {
5351  RCP<ParameterList> exportSublist = sublist (params, "Export", true);
5352  exporter_ = rcp (new export_type (rowMap_, rangeMap_, exportSublist));
5353  }
5354  }
5355  }
5356  }
5357 
5358 
5359  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5360  std::string
5363  {
5364  std::ostringstream oss;
5365  oss << dist_object_type::description ();
5366  if (isFillComplete ()) {
5367  oss << "{status = fill complete"
5368  << ", global rows = " << getGlobalNumRows()
5369  << ", global cols = " << getGlobalNumCols()
5370  << ", global num entries = " << getGlobalNumEntries()
5371  << "}";
5372  }
5373  else {
5374  oss << "{status = fill not complete"
5375  << ", global rows = " << getGlobalNumRows()
5376  << "}";
5377  }
5378  return oss.str();
5379  }
5380 
5381 
5382  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5383  void
5385  describe (Teuchos::FancyOStream &out,
5386  const Teuchos::EVerbosityLevel verbLevel) const
5387  {
5388  using Teuchos::ArrayView;
5389  using Teuchos::Comm;
5390  using Teuchos::RCP;
5391  using Teuchos::VERB_DEFAULT;
5392  using Teuchos::VERB_NONE;
5393  using Teuchos::VERB_LOW;
5394  using Teuchos::VERB_MEDIUM;
5395  using Teuchos::VERB_HIGH;
5396  using Teuchos::VERB_EXTREME;
5397  using std::endl;
5398  using std::setw;
5399 
5400  Teuchos::EVerbosityLevel vl = verbLevel;
5401  if (vl == VERB_DEFAULT) vl = VERB_LOW;
5402  RCP<const Comm<int> > comm = this->getComm();
5403  const int myImageID = comm->getRank(),
5404  numImages = comm->getSize();
5405  size_t width = 1;
5406  for (size_t dec=10; dec<getGlobalNumRows(); dec *= 10) {
5407  ++width;
5408  }
5409  width = std::max<size_t> (width, static_cast<size_t> (11)) + 2;
5410  Teuchos::OSTab tab (out);
5411  // none: print nothing
5412  // low: print O(1) info from node 0
5413  // medium: print O(P) info, num entries per node
5414  // high: print O(N) info, num entries per row
5415  // extreme: print O(NNZ) info: print graph indices
5416  //
5417  // for medium and higher, print constituent objects at specified verbLevel
5418  if (vl != VERB_NONE) {
5419  if (myImageID == 0) out << this->description() << std::endl;
5420  // O(1) globals, minus what was already printed by description()
5421  if (isFillComplete() && myImageID == 0) {
5422  out << "Global number of diagonals = " << globalNumDiags_ << std::endl;
5423  out << "Global max number of row entries = " << globalMaxNumRowEntries_ << std::endl;
5424  }
5425  // constituent objects
5426  if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
5427  if (myImageID == 0) out << "\nRow map: " << std::endl;
5428  rowMap_->describe(out,vl);
5429  if (colMap_ != Teuchos::null) {
5430  if (myImageID == 0) out << "\nColumn map: " << std::endl;
5431  colMap_->describe(out,vl);
5432  }
5433  if (domainMap_ != Teuchos::null) {
5434  if (myImageID == 0) out << "\nDomain map: " << std::endl;
5435  domainMap_->describe(out,vl);
5436  }
5437  if (rangeMap_ != Teuchos::null) {
5438  if (myImageID == 0) out << "\nRange map: " << std::endl;
5439  rangeMap_->describe(out,vl);
5440  }
5441  }
5442  // O(P) data
5443  if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
5444  for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
5445  if (myImageID == imageCtr) {
5446  out << "Node ID = " << imageCtr << std::endl
5447  << "Node number of entries = " << this->getNodeNumEntries () << std::endl
5448  << "Node number of diagonals = " << nodeNumDiags_ << std::endl
5449  << "Node max number of entries = " << nodeMaxNumRowEntries_ << std::endl;
5450  if (! indicesAreAllocated ()) {
5451  out << "Indices are not allocated." << std::endl;
5452  }
5453  }
5454  comm->barrier();
5455  comm->barrier();
5456  comm->barrier();
5457  }
5458  }
5459  // O(N) and O(NNZ) data
5460  if (vl == VERB_HIGH || vl == VERB_EXTREME) {
5461  for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
5462  if (myImageID == imageCtr) {
5463  out << std::setw(width) << "Node ID"
5464  << std::setw(width) << "Global Row"
5465  << std::setw(width) << "Num Entries";
5466  if (vl == VERB_EXTREME) {
5467  out << " Entries";
5468  }
5469  out << std::endl;
5470  const LocalOrdinal lclNumRows =
5471  static_cast<LocalOrdinal> (this->getNodeNumRows ());
5472  for (LocalOrdinal r=0; r < lclNumRows; ++r) {
5473  const RowInfo rowinfo = this->getRowInfo (r);
5474  GlobalOrdinal gid = rowMap_->getGlobalElement(r);
5475  out << std::setw(width) << myImageID
5476  << std::setw(width) << gid
5477  << std::setw(width) << rowinfo.numEntries;
5478  if (vl == VERB_EXTREME) {
5479  out << " ";
5480  if (isGloballyIndexed()) {
5481  ArrayView<const GlobalOrdinal> rowview = getGlobalView(rowinfo);
5482  for (size_t j=0; j < rowinfo.numEntries; ++j) out << rowview[j] << " ";
5483  }
5484  else if (isLocallyIndexed()) {
5485  ArrayView<const LocalOrdinal> rowview = getLocalView(rowinfo);
5486  for (size_t j=0; j < rowinfo.numEntries; ++j) out << colMap_->getGlobalElement(rowview[j]) << " ";
5487  }
5488  }
5489  out << std::endl;
5490  }
5491  }
5492  comm->barrier();
5493  comm->barrier();
5494  comm->barrier();
5495  }
5496  }
5497  }
5498  }
5499 
5500 
5501  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5502  bool
5504  checkSizes (const SrcDistObject& /* source */)
5505  {
5506  // It's not clear what kind of compatibility checks on sizes can
5507  // be performed here. Epetra_CrsGraph doesn't check any sizes for
5508  // compatibility.
5509  return true;
5510  }
5511 
5512 
5513  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5514  void
5517  size_t numSameIDs,
5518  const Teuchos::ArrayView<const LocalOrdinal> &permuteToLIDs,
5519  const Teuchos::ArrayView<const LocalOrdinal> &permuteFromLIDs)
5520  {
5521  using Teuchos::Array;
5522  using Teuchos::ArrayView;
5523  typedef LocalOrdinal LO;
5524  typedef GlobalOrdinal GO;
5525  const char tfecfFuncName[] = "copyAndPermute";
5527  typedef RowGraph<LO, GO, node_type> row_graph_type;
5528 
5529  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5530  permuteToLIDs.size() != permuteFromLIDs.size(), std::runtime_error,
5531  ": permuteToLIDs and permuteFromLIDs must have the same size.");
5532  // Make sure that the source object has the right type. We only
5533  // actually need it to be a RowGraph, with matching first three
5534  // template parameters. If it's a CrsGraph, we can use view mode
5535  // instead of copy mode to get each row's data.
5536  //
5537  // FIXME (mfh 07 Jul 2013) It should not be necessary for any of
5538  // the template parameters but GO to match. GO has to match
5539  // because the graph has to send indices as global ordinals, if
5540  // the source and target graphs do not have the same column Map.
5541  // If LO doesn't match, the graphs could communicate using global
5542  // indices. It could be possible that Node affects the graph's
5543  // storage format, but packAndPrepare should assume a common
5544  // communication format in any case.
5545  const row_graph_type* srcRowGraph = dynamic_cast<const row_graph_type*> (&source);
5546  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5547  srcRowGraph == NULL, std::invalid_argument,
5548  ": The source object must be a RowGraph with matching first three "
5549  "template parameters.");
5550 
5551  // If the source object is actually a CrsGraph, we can use view
5552  // mode instead of copy mode to access the entries in each row,
5553  // if the graph is not fill complete.
5554  const this_type* srcCrsGraph = dynamic_cast<const this_type*> (&source);
5555 
5556  const map_type& srcRowMap = * (srcRowGraph->getRowMap ());
5557  const map_type& tgtRowMap = * (this->getRowMap ());
5558  const bool src_filled = srcRowGraph->isFillComplete ();
5559  Array<GO> row_copy;
5560  LO myid = 0;
5561 
5562  //
5563  // "Copy" part of "copy and permute."
5564  //
5565  if (src_filled || srcCrsGraph == NULL) {
5566  // If the source graph is fill complete, we can't use view mode,
5567  // because the data might be stored in a different format not
5568  // compatible with the expectations of view mode. Also, if the
5569  // source graph is not a CrsGraph, we can't use view mode,
5570  // because RowGraph only provides copy mode access to the data.
5571  for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
5572  const GO gid = srcRowMap.getGlobalElement (myid);
5573  size_t row_length = srcRowGraph->getNumEntriesInGlobalRow (gid);
5574  row_copy.resize (row_length);
5575  size_t check_row_length = 0;
5576  srcRowGraph->getGlobalRowCopy (gid, row_copy (), check_row_length);
5577  this->insertGlobalIndices (gid, row_copy ());
5578  }
5579  } else {
5580  for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
5581  const GO gid = srcRowMap.getGlobalElement (myid);
5582  ArrayView<const GO> row;
5583  srcCrsGraph->getGlobalRowView (gid, row);
5584  this->insertGlobalIndices (gid, row);
5585  }
5586  }
5587 
5588  //
5589  // "Permute" part of "copy and permute."
5590  //
5591  if (src_filled || srcCrsGraph == NULL) {
5592  for (LO i = 0; i < permuteToLIDs.size (); ++i) {
5593  const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs[i]);
5594  const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs[i]);
5595  size_t row_length = srcRowGraph->getNumEntriesInGlobalRow (srcgid);
5596  row_copy.resize (row_length);
5597  size_t check_row_length = 0;
5598  srcRowGraph->getGlobalRowCopy (srcgid, row_copy (), check_row_length);
5599  this->insertGlobalIndices (mygid, row_copy ());
5600  }
5601  } else {
5602  for (LO i = 0; i < permuteToLIDs.size (); ++i) {
5603  const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs[i]);
5604  const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs[i]);
5605  ArrayView<const GO> row;
5606  srcCrsGraph->getGlobalRowView (srcgid, row);
5607  this->insertGlobalIndices (mygid, row);
5608  }
5609  }
5610  }
5611 
5612 
5613  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5614  void
5616  packAndPrepare (const SrcDistObject& source,
5617  const Teuchos::ArrayView<const LocalOrdinal> &exportLIDs,
5618  Teuchos::Array<GlobalOrdinal> &exports,
5619  const Teuchos::ArrayView<size_t> & numPacketsPerLID,
5620  size_t& constantNumPackets,
5621  Distributor& distor)
5622  {
5624  typedef RowGraph<LocalOrdinal, GlobalOrdinal, node_type> row_graph_type;
5625  const char tfecfFuncName[] = "packAndPrepare: ";
5626  ProfilingRegion regionPackAndPrepare ("Tpetra::CrsGraph::packAndPrepare");
5627 
5628  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5629  (exportLIDs.size () != numPacketsPerLID.size (), std::runtime_error,
5630  "exportLIDs.size() = " << exportLIDs.size ()
5631  << " != numPacketsPerLID.size() = " << numPacketsPerLID.size () << ".");
5632  const row_graph_type& srcGraph = dynamic_cast<const row_graph_type&> (source);
5633 
5634  // We don't check whether src_graph has had fillComplete called,
5635  // because it doesn't matter whether the *source* graph has been
5636  // fillComplete'd. The target graph can not be fillComplete'd yet.
5637  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5638  (this->isFillComplete (), std::runtime_error,
5639  "The target graph of an Import or Export must not be fill complete.");
5640  srcGraph.pack (exportLIDs, exports, numPacketsPerLID,
5641  constantNumPackets, distor);
5642  }
5643 
5644 
5645  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5646  void
5648  pack (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5649  Teuchos::Array<GlobalOrdinal>& exports,
5650  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5651  size_t& constantNumPackets,
5652  Distributor& /* distor */) const
5653  {
5654  typedef LocalOrdinal LO;
5655  typedef GlobalOrdinal GO;
5656  typedef typename Kokkos::View<size_t*,
5657  device_type>::HostMirror::execution_space host_execution_space;
5658  typedef typename device_type::execution_space device_execution_space;
5659  const char tfecfFuncName[] = "pack: ";
5660  constexpr bool debug = false;
5661  const int myRank = debug ? this->getMap ()->getComm ()->getRank () : 0;
5662 
5663  const auto numExportLIDs = exportLIDs.size ();
5664  if (debug) {
5665  std::ostringstream os;
5666  os << "Proc " << myRank << ": CrsGraph::pack: numExportLIDs = "
5667  << numExportLIDs << std::endl;
5668  std::cerr << os.str ();
5669  }
5670  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5671  (numExportLIDs != numPacketsPerLID.size (), std::runtime_error,
5672  "exportLIDs.size() = " << numExportLIDs << " != numPacketsPerLID.size()"
5673  " = " << numPacketsPerLID.size () << ".");
5674 
5675  // We may be accessing UVM data on host below, so ensure that the
5676  // device is done accessing it.
5677  device_execution_space::fence ();
5678 
5679  const map_type& rowMap = * (this->getRowMap ());
5680  const map_type* const colMapPtr = this->colMap_.getRawPtr ();
5681  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5682  (this->isLocallyIndexed () && colMapPtr == NULL, std::logic_error,
5683  "This graph claims to be locally indexed, but its column Map is NULL. "
5684  "This should never happen. Please report this bug to the Tpetra "
5685  "developers.");
5686 
5687  // We may pack different amounts of data for different rows.
5688  constantNumPackets = 0;
5689 
5690  // mfh 20 Sep 2017: Teuchos::ArrayView isn't thread safe (well,
5691  // it might be now, but we might as well be safe).
5692  size_t* const numPacketsPerLID_raw = numPacketsPerLID.getRawPtr ();
5693  const LO* const exportLIDs_raw = exportLIDs.getRawPtr ();
5694 
5695  // Count the total number of packets (column indices, in the case
5696  // of a CrsGraph) to pack. While doing so, set
5697  // numPacketsPerLID[i] to the number of entries owned by the
5698  // calling process in (local) row exportLIDs[i] of the graph, that
5699  // the caller wants us to send out.
5700  Kokkos::RangePolicy<host_execution_space, LO> inputRange (0, numExportLIDs);
5701  size_t totalNumPackets = 0;
5702  size_t errCount = 0;
5703  // lambdas turn what they capture const, so we can't
5704  // atomic_add(&errCount,1). Instead, we need a View to modify.
5705  typedef Kokkos::Device<host_execution_space, Kokkos::HostSpace>
5706  host_device_type;
5707  Kokkos::View<size_t, host_device_type> errCountView (&errCount);
5708  constexpr size_t ONE = 1;
5709 
5710  Kokkos::parallel_reduce ("Tpetra::CrsGraph::pack: totalNumPackets",
5711  inputRange,
5712  [=] (const LO& i, size_t& curTotalNumPackets) {
5713  const GO gblRow = rowMap.getGlobalElement (exportLIDs_raw[i]);
5714  if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
5715  Kokkos::atomic_add (&errCountView(), ONE);
5716  numPacketsPerLID_raw[i] = 0;
5717  }
5718  else {
5719  const size_t numEnt = this->getNumEntriesInGlobalRow (gblRow);
5720  numPacketsPerLID_raw[i] = numEnt;
5721  curTotalNumPackets += numEnt;
5722  }
5723  },
5724  totalNumPackets);
5725 
5726  if (debug) {
5727  std::ostringstream os;
5728  os << "Proc " << myRank << ": CrsGraph::pack: "
5729  << "totalNumPackets = " << totalNumPackets << std::endl;
5730  std::cerr << os.str ();
5731  }
5732  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5733  (errCount != 0, std::logic_error, "totalNumPackets count encountered "
5734  "one or more errors! errCount = " << errCount
5735  << ", totalNumPackets = " << totalNumPackets << ".");
5736  errCount = 0;
5737 
5738  // Allocate space for all the column indices to pack.
5739  exports.resize (totalNumPackets);
5740 
5741  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5742  (! this->supportsRowViews (), std::logic_error,
5743  "this->supportsRowViews() returns false; this should never happen. "
5744  "Please report this bug to the Tpetra developers.");
5745 
5746  // Loop again over the rows to export, and pack rows of indices
5747  // into the output buffer.
5748 
5749  if (debug) {
5750  std::ostringstream os;
5751  os << "Proc " << myRank << ": CrsGraph::pack: pack into exports" << std::endl;
5752  std::cerr << os.str ();
5753  }
5754 
5755  // Teuchos::ArrayView may not be thread safe, or may not be
5756  // efficiently thread safe. Better to use the raw pointer.
5757  GO* const exports_raw = exports.getRawPtr ();
5758  errCount = 0;
5759  Kokkos::parallel_scan ("Tpetra::CrsGraph::pack: pack from views",
5760  inputRange,
5761  [=] (const LO& i, size_t& exportsOffset, const bool final) {
5762  const size_t curOffset = exportsOffset;
5763  const GO gblRow = rowMap.getGlobalElement (exportLIDs_raw[i]);
5764  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (gblRow);
5765 
5766  if (rowInfo.localRow == Tpetra::Details::OrdinalTraits<size_t>::invalid ()) {
5767  if (debug) {
5768  std::ostringstream os;
5769  os << "Proc " << myRank << ": INVALID rowInfo: "
5770  << "i = " << i << ", lclRow = " << exportLIDs_raw[i] << std::endl;
5771  std::cerr << os.str ();
5772  }
5773  Kokkos::atomic_add (&errCountView(), ONE);
5774  }
5775  else if (curOffset + rowInfo.numEntries > totalNumPackets) {
5776  if (debug) {
5777  std::ostringstream os;
5778  os << "Proc " << myRank << ": UH OH! For i=" << i << ", lclRow="
5779  << exportLIDs_raw[i] << ", gblRow=" << gblRow << ", curOffset "
5780  "(= " << curOffset << ") + numEnt (= " << rowInfo.numEntries
5781  << ") > totalNumPackets (= " << totalNumPackets << ")."
5782  << std::endl;
5783  std::cerr << os.str ();
5784  }
5785  Kokkos::atomic_add (&errCountView(), ONE);
5786  }
5787  else {
5788  const LO numEnt = static_cast<LO> (rowInfo.numEntries);
5789  if (this->isLocallyIndexed ()) {
5790  const LO* lclColInds = NULL;
5791  LO capacity = 0;
5792  const LO errCode =
5793  this->getLocalViewRawConst (lclColInds, capacity, rowInfo);
5794  if (errCode == 0) {
5795  if (final) {
5796  for (LO k = 0; k < numEnt; ++k) {
5797  const LO lclColInd = lclColInds[k];
5798  const GO gblColInd = colMapPtr->getGlobalElement (lclColInd);
5799  // Pack it, even if it's wrong. Let the receiving
5800  // process deal with it. Otherwise, we'll miss out
5801  // on any correct data.
5802  exports_raw[curOffset + k] = gblColInd;
5803  } // for each entry in the row
5804  } // final pass?
5805  exportsOffset = curOffset + numEnt;
5806  }
5807  else { // error in getting local row view
5808  Kokkos::atomic_add (&errCountView(), ONE);
5809  }
5810  }
5811  else if (this->isGloballyIndexed ()) {
5812  const GO* gblColInds = NULL;
5813  LO capacity = 0;
5814  const LO errCode =
5815  this->getGlobalViewRawConst (gblColInds, capacity, rowInfo);
5816  if (errCode == 0) {
5817  if (final) {
5818  for (LO k = 0; k < numEnt; ++k) {
5819  const GO gblColInd = gblColInds[k];
5820  // Pack it, even if it's wrong. Let the receiving
5821  // process deal with it. Otherwise, we'll miss out
5822  // on any correct data.
5823  exports_raw[curOffset + k] = gblColInd;
5824  } // for each entry in the row
5825  } // final pass?
5826  exportsOffset = curOffset + numEnt;
5827  }
5828  else { // error in getting global row view
5829  Kokkos::atomic_add (&errCountView(), ONE);
5830  }
5831  }
5832  // If neither globally nor locally indexed, then the graph
5833  // has no entries in this row (or indeed, in any row on this
5834  // process) to pack.
5835  }
5836  });
5837 
5838  // We may have accessed UVM data on host above, so ensure that the
5839  // device sees these changes.
5840  device_execution_space::fence ();
5841 
5842  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5843  (errCount != 0, std::logic_error, "Packing encountered "
5844  "one or more errors! errCount = " << errCount
5845  << ", totalNumPackets = " << totalNumPackets << ".");
5846  }
5847 
5848 
5849  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5850  void
5852  unpackAndCombine (const Teuchos::ArrayView<const LocalOrdinal> &importLIDs,
5853  const Teuchos::ArrayView<const GlobalOrdinal> &imports,
5854  const Teuchos::ArrayView<size_t> &numPacketsPerLID,
5855  size_t constantNumPackets,
5856  Distributor& /* distor */,
5857  CombineMode /* CM */)
5858  {
5859  typedef LocalOrdinal LO;
5860  typedef GlobalOrdinal GO;
5861 
5862  // FIXME (mfh 02 Apr 2012) REPLACE combine mode has a perfectly
5863  // reasonable meaning, whether or not the matrix is fill complete.
5864  // It's just more work to implement.
5865 
5866  // We are not checking the value of the CombineMode input
5867  // argument. For CrsGraph, we only support import/export
5868  // operations if fillComplete has not yet been called. Any
5869  // incoming column-indices are inserted into the target graph. In
5870  // this context, CombineMode values of ADD vs INSERT are
5871  // equivalent. What is the meaning of REPLACE for CrsGraph? If a
5872  // duplicate column-index is inserted, it will be compressed out
5873  // when fillComplete is called.
5874  //
5875  // Note: I think REPLACE means that an existing row is replaced by
5876  // the imported row, i.e., the existing indices are cleared. CGB,
5877  // 6/17/2010
5878 
5879  const char tfecfFuncName[] = "unpackAndCombine: ";
5880  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5881  importLIDs.size() != numPacketsPerLID.size(), std::runtime_error,
5882  "importLIDs and numPacketsPerLID must have the same size.");
5883  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5884  isFillComplete (), std::runtime_error,
5885  "Import or Export operations are not allowed on the destination "
5886  "CrsGraph if it is fill complete.");
5887 
5888  const map_type& rowMap = * (this->rowMap_);
5889  const size_t numImportLIDs = static_cast<size_t> (importLIDs.size ());
5890  size_t importsOffset = 0;
5891  for (size_t i = 0; i < numImportLIDs; ++i) {
5892  const LO lclRow = importLIDs[i];
5893  const GO gblRow = rowMap.getGlobalElement (lclRow);
5894  const LO numEnt = numPacketsPerLID[i];
5895  const GO* const gblColInds = (numEnt == 0) ? NULL : &imports[importsOffset];
5896  if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
5897  // This row is not in the row Map on the calling process.
5898  this->insertGlobalIndicesIntoNonownedRows (gblRow, gblColInds, numEnt);
5899  }
5900  else {
5901  this->insertGlobalIndicesFiltered (lclRow, gblColInds, numEnt);
5902  }
5903  importsOffset += numEnt;
5904  }
5905  }
5906 
5907 
5908  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5909  void
5911  removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& newMap)
5912  {
5913  using Teuchos::Comm;
5914  using Teuchos::null;
5915  using Teuchos::ParameterList;
5916  using Teuchos::RCP;
5917 
5918  // We'll set all the state "transactionally," so that this method
5919  // satisfies the strong exception guarantee. This object's state
5920  // won't be modified until the end of this method.
5921  RCP<const map_type> rowMap, domainMap, rangeMap, colMap;
5922  RCP<import_type> importer;
5923  RCP<export_type> exporter;
5924 
5925  rowMap = newMap;
5926  RCP<const Comm<int> > newComm =
5927  (newMap.is_null ()) ? null : newMap->getComm ();
5928 
5929  if (! domainMap_.is_null ()) {
5930  if (domainMap_.getRawPtr () == rowMap_.getRawPtr ()) {
5931  // Common case: original domain and row Maps are identical.
5932  // In that case, we need only replace the original domain Map
5933  // with the new Map. This ensures that the new domain and row
5934  // Maps _stay_ identical.
5935  domainMap = newMap;
5936  } else {
5937  domainMap = domainMap_->replaceCommWithSubset (newComm);
5938  }
5939  }
5940  if (! rangeMap_.is_null ()) {
5941  if (rangeMap_.getRawPtr () == rowMap_.getRawPtr ()) {
5942  // Common case: original range and row Maps are identical. In
5943  // that case, we need only replace the original range Map with
5944  // the new Map. This ensures that the new range and row Maps
5945  // _stay_ identical.
5946  rangeMap = newMap;
5947  } else {
5948  rangeMap = rangeMap_->replaceCommWithSubset (newComm);
5949  }
5950  }
5951  if (! colMap.is_null ()) {
5952  colMap = colMap_->replaceCommWithSubset (newComm);
5953  }
5954 
5955  // (Re)create the Export and / or Import if necessary.
5956  if (! newComm.is_null ()) {
5957  RCP<ParameterList> params = this->getNonconstParameterList (); // could be null
5958  //
5959  // The operations below are collective on the new communicator.
5960  //
5961  // (Re)create the Export object if necessary. If I haven't
5962  // called fillComplete yet, I don't have a rangeMap, so I must
5963  // first check if the _original_ rangeMap is not null. Ditto
5964  // for the Import object and the domain Map.
5965  if (! rangeMap_.is_null () &&
5966  rangeMap != rowMap &&
5967  ! rangeMap->isSameAs (*rowMap)) {
5968  if (params.is_null () || ! params->isSublist ("Export")) {
5969  exporter = rcp (new export_type (rowMap, rangeMap));
5970  }
5971  else {
5972  RCP<ParameterList> exportSublist = sublist (params, "Export", true);
5973  exporter = rcp (new export_type (rowMap, rangeMap, exportSublist));
5974  }
5975  }
5976  // (Re)create the Import object if necessary.
5977  if (! domainMap_.is_null () &&
5978  domainMap != colMap &&
5979  ! domainMap->isSameAs (*colMap)) {
5980  if (params.is_null () || ! params->isSublist ("Import")) {
5981  importer = rcp (new import_type (domainMap, colMap));
5982  } else {
5983  RCP<ParameterList> importSublist = sublist (params, "Import", true);
5984  importer = rcp (new import_type (domainMap, colMap, importSublist));
5985  }
5986  }
5987  } // if newComm is not null
5988 
5989  // Defer side effects until the end. If no destructors throw
5990  // exceptions (they shouldn't anyway), then this method satisfies
5991  // the strong exception guarantee.
5992  exporter_ = exporter;
5993  importer_ = importer;
5994  rowMap_ = rowMap;
5995  // mfh 31 Mar 2013: DistObject's map_ is the row Map of a CrsGraph
5996  // or CrsMatrix. CrsGraph keeps a redundant pointer (rowMap_) to
5997  // the same object. We might want to get rid of this redundant
5998  // pointer sometime, but for now, we'll leave it alone and just
5999  // set map_ to the same object.
6000  this->map_ = rowMap;
6001  domainMap_ = domainMap;
6002  rangeMap_ = rangeMap;
6003  colMap_ = colMap;
6004  }
6005 
6006  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6007  void
6009  getLocalDiagOffsets (const Kokkos::View<size_t*, device_type, Kokkos::MemoryUnmanaged>& offsets) const
6010  {
6011  typedef LocalOrdinal LO;
6012  typedef GlobalOrdinal GO;
6013  const char tfecfFuncName[] = "getLocalDiagOffsets: ";
6014 
6015  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6016  (! hasColMap (), std::runtime_error, "The graph must have a column Map.");
6017  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
6018  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6019  (static_cast<LO> (offsets.extent (0)) < lclNumRows,
6020  std::invalid_argument, "offsets.extent(0) = " <<
6021  offsets.extent (0) << " < getNodeNumRows() = " << lclNumRows << ".");
6022 
6023  const map_type& rowMap = * (this->getRowMap ());
6024  const map_type& colMap = * (this->getColMap ());
6025 
6026 #ifdef HAVE_TPETRA_DEBUG
6027  bool allRowMapDiagEntriesInColMap = true;
6028  bool allDiagEntriesFound = true;
6029  bool allOffsetsCorrect = true;
6030  bool noOtherWeirdness = true;
6031  std::vector<std::pair<LO, size_t> > wrongOffsets;
6032 #endif // HAVE_TPETRA_DEBUG
6033 
6034  // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just
6035  // the subset of Map functionality that we need below.
6036  auto lclRowMap = rowMap.getLocalMap ();
6037  auto lclColMap = colMap.getLocalMap ();
6038 
6039  // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this
6040  // setup, at least on the host. For CUDA, we have to use LocalMap
6041  // (that comes from each of the two Maps).
6042 
6043  const bool sorted = this->isSorted ();
6044  if (isFillComplete ()) {
6045  auto lclGraph = this->getLocalGraph ();
6046  ::Tpetra::Details::getGraphDiagOffsets (offsets, lclRowMap, lclColMap,
6047  lclGraph.row_map,
6048  lclGraph.entries, sorted);
6049  }
6050  else {
6051  // NOTE (mfh 22 Feb 2017): We have to run this code on host,
6052  // since the graph is not fill complete. The previous version
6053  // of this code assumed UVM; this version does not.
6054  auto offsets_h = Kokkos::create_mirror_view (offsets);
6055 
6056  for (LO lclRowInd = 0; lclRowInd < lclNumRows; ++lclRowInd) {
6057  // Find the diagonal entry. Since the row Map and column Map
6058  // may differ, we have to compare global row and column
6059  // indices, not local.
6060  const GO gblRowInd = lclRowMap.getGlobalElement (lclRowInd);
6061  const GO gblColInd = gblRowInd;
6062  const LO lclColInd = lclColMap.getLocalElement (gblColInd);
6063 
6064  if (lclColInd == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
6065 #ifdef HAVE_TPETRA_DEBUG
6066  allRowMapDiagEntriesInColMap = false;
6067 #endif // HAVE_TPETRA_DEBUG
6068  offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
6069  }
6070  else {
6071  const RowInfo rowInfo = this->getRowInfo (lclRowInd);
6072  if (static_cast<LO> (rowInfo.localRow) == lclRowInd &&
6073  rowInfo.numEntries > 0) {
6074 
6075  auto colInds = this->getLocalKokkosRowView (rowInfo);
6076  const size_t hint = 0; // not needed for this algorithm
6077  const size_t offset =
6078  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
6079  lclColInd, hint, sorted);
6080  offsets_h(lclRowInd) = offset;
6081 
6082 #ifdef HAVE_TPETRA_DEBUG
6083  // Now that we have what we think is an offset, make sure
6084  // that it really does point to the diagonal entry. Offsets
6085  // are _relative_ to each row, not absolute (for the whole
6086  // (local) graph).
6087  Teuchos::ArrayView<const LO> lclColInds;
6088  try {
6089  this->getLocalRowView (lclRowInd, lclColInds);
6090  }
6091  catch (...) {
6092  noOtherWeirdness = false;
6093  }
6094  // Don't continue with error checking if the above failed.
6095  if (noOtherWeirdness) {
6096  const size_t numEnt = lclColInds.size ();
6097  if (offset >= numEnt) {
6098  // Offsets are relative to each row, so this means that
6099  // the offset is out of bounds.
6100  allOffsetsCorrect = false;
6101  wrongOffsets.push_back (std::make_pair (lclRowInd, offset));
6102  } else {
6103  const LO actualLclColInd = lclColInds[offset];
6104  const GO actualGblColInd = lclColMap.getGlobalElement (actualLclColInd);
6105  if (actualGblColInd != gblColInd) {
6106  allOffsetsCorrect = false;
6107  wrongOffsets.push_back (std::make_pair (lclRowInd, offset));
6108  }
6109  }
6110  }
6111 #endif // HAVE_TPETRA_DEBUG
6112  }
6113  else { // either row is empty, or something went wrong w/ getRowInfo()
6114  offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
6115 #ifdef HAVE_TPETRA_DEBUG
6116  allDiagEntriesFound = false;
6117 #endif // HAVE_TPETRA_DEBUG
6118  }
6119  } // whether lclColInd is a valid local column index
6120  } // for each local row
6121 
6122  Kokkos::deep_copy (offsets, offsets_h);
6123  } // whether the graph is fill complete
6124 
6125 #ifdef HAVE_TPETRA_DEBUG
6126  if (wrongOffsets.size () != 0) {
6127  std::ostringstream os;
6128  os << "Proc " << this->getComm ()->getRank () << ": Wrong offsets: [";
6129  for (size_t k = 0; k < wrongOffsets.size (); ++k) {
6130  os << "(" << wrongOffsets[k].first << ","
6131  << wrongOffsets[k].second << ")";
6132  if (k + 1 < wrongOffsets.size ()) {
6133  os << ", ";
6134  }
6135  }
6136  os << "]" << std::endl;
6137  std::cerr << os.str ();
6138  }
6139 #endif // HAVE_TPETRA_DEBUG
6140 
6141 #ifdef HAVE_TPETRA_DEBUG
6142  using Teuchos::reduceAll;
6143  using std::endl;
6144  Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getComm ();
6145  const bool localSuccess =
6146  allRowMapDiagEntriesInColMap && allDiagEntriesFound && allOffsetsCorrect;
6147  const int numResults = 5;
6148  int lclResults[5];
6149  lclResults[0] = allRowMapDiagEntriesInColMap ? 1 : 0;
6150  lclResults[1] = allDiagEntriesFound ? 1 : 0;
6151  lclResults[2] = allOffsetsCorrect ? 1 : 0;
6152  lclResults[3] = noOtherWeirdness ? 1 : 0;
6153  // min-all-reduce will compute least rank of all the processes
6154  // that didn't succeed.
6155  lclResults[4] = ! localSuccess ? comm->getRank () : comm->getSize ();
6156 
6157  int gblResults[5];
6158  gblResults[0] = 0;
6159  gblResults[1] = 0;
6160  gblResults[2] = 0;
6161  gblResults[3] = 0;
6162  gblResults[4] = 0;
6163  reduceAll<int, int> (*comm, Teuchos::REDUCE_MIN,
6164  numResults, lclResults, gblResults);
6165 
6166  if (gblResults[0] != 1 || gblResults[1] != 1 || gblResults[2] != 1
6167  || gblResults[3] != 1) {
6168  std::ostringstream os; // build error message
6169  os << "Issue(s) that we noticed (on Process " << gblResults[4] << ", "
6170  "possibly among others): " << endl;
6171  if (gblResults[0] == 0) {
6172  os << " - The column Map does not contain at least one diagonal entry "
6173  "of the graph." << endl;
6174  }
6175  if (gblResults[1] == 0) {
6176  os << " - On one or more processes, some row does not contain a "
6177  "diagonal entry." << endl;
6178  }
6179  if (gblResults[2] == 0) {
6180  os << " - On one or more processes, some offsets are incorrect."
6181  << endl;
6182  }
6183  if (gblResults[3] == 0) {
6184  os << " - One or more processes had some other error."
6185  << endl;
6186  }
6187  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str());
6188  }
6189 #endif // HAVE_TPETRA_DEBUG
6190  }
6191 
6192  namespace { // (anonymous)
6193 
6194  // mfh 21 Jan 2016: This is useful for getLocalDiagOffsets (see
6195  // below). The point is to avoid the deep copy between the input
6196  // Teuchos::ArrayRCP and the internally used Kokkos::View. We
6197  // can't use UVM to avoid the deep copy with CUDA, because the
6198  // ArrayRCP is a host pointer, while the input to the graph's
6199  // getLocalDiagOffsets method is a device pointer. Assigning a
6200  // host pointer to a device pointer is incorrect unless the host
6201  // pointer points to host pinned memory. The goal is to get rid
6202  // of the Teuchos::ArrayRCP overload anyway, so we accept the deep
6203  // copy for backwards compatibility.
6204  //
6205  // We have to use template magic because
6206  // "staticGraph_->getLocalDiagOffsets(offsetsHosts)" won't compile
6207  // if device_type::memory_space is not Kokkos::HostSpace (as is
6208  // the case with CUDA).
6209 
6210  template<class DeviceType,
6211  const bool memSpaceIsHostSpace =
6212  std::is_same<typename DeviceType::memory_space,
6213  Kokkos::HostSpace>::value>
6214  struct HelpGetLocalDiagOffsets {};
6215 
6216  template<class DeviceType>
6217  struct HelpGetLocalDiagOffsets<DeviceType, true> {
6218  typedef DeviceType device_type;
6219  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6220  Kokkos::MemoryUnmanaged> device_offsets_type;
6221  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6222  Kokkos::MemoryUnmanaged> host_offsets_type;
6223 
6224  static device_offsets_type
6225  getDeviceOffsets (const host_offsets_type& hostOffsets)
6226  {
6227  // Host and device are the same; no need to allocate a
6228  // temporary device View.
6229  return hostOffsets;
6230  }
6231 
6232  static void
6233  copyBackIfNeeded (const host_offsets_type& /* hostOffsets */,
6234  const device_offsets_type& /* deviceOffsets */)
6235  { /* copy back not needed; host and device are the same */ }
6236  };
6237 
6238  template<class DeviceType>
6239  struct HelpGetLocalDiagOffsets<DeviceType, false> {
6240  typedef DeviceType device_type;
6241  // We have to do a deep copy, since host memory space != device
6242  // memory space. Thus, the device View is managed (we need to
6243  // allocate a temporary device View).
6244  typedef Kokkos::View<size_t*, device_type> device_offsets_type;
6245  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6246  Kokkos::MemoryUnmanaged> host_offsets_type;
6247 
6248  static device_offsets_type
6249  getDeviceOffsets (const host_offsets_type& hostOffsets)
6250  {
6251  // Host memory space != device memory space, so we must
6252  // allocate a temporary device View for the graph.
6253  return device_offsets_type ("offsets", hostOffsets.extent (0));
6254  }
6255 
6256  static void
6257  copyBackIfNeeded (const host_offsets_type& hostOffsets,
6258  const device_offsets_type& deviceOffsets)
6259  {
6260  Kokkos::deep_copy (hostOffsets, deviceOffsets);
6261  }
6262  };
6263  } // namespace (anonymous)
6264 
6265 
6266  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6267  void
6268  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
6269  getLocalDiagOffsets (Teuchos::ArrayRCP<size_t>& offsets) const
6270  {
6271  typedef LocalOrdinal LO;
6272  const char tfecfFuncName[] = "getLocalDiagOffsets: ";
6273  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6274  (! this->hasColMap (), std::runtime_error,
6275  "The graph does not yet have a column Map.");
6276  const LO myNumRows = static_cast<LO> (this->getNodeNumRows ());
6277  if (static_cast<LO> (offsets.size ()) != myNumRows) {
6278  // NOTE (mfh 21 Jan 2016) This means that the method does not
6279  // satisfy the strong exception guarantee (no side effects
6280  // unless successful).
6281  offsets.resize (myNumRows);
6282  }
6283 
6284  // mfh 21 Jan 2016: This method unfortunately takes a
6285  // Teuchos::ArrayRCP, which is host memory. The graph wants a
6286  // device pointer. We can't access host memory from the device;
6287  // that's the wrong direction for UVM. (It's the right direction
6288  // for inefficient host pinned memory, but we don't want to use
6289  // that here.) Thus, if device memory space != host memory space,
6290  // we allocate and use a temporary device View to get the offsets.
6291  // If the two spaces are equal, the template magic makes the deep
6292  // copy go away.
6293  typedef HelpGetLocalDiagOffsets<device_type> helper_type;
6294  typedef typename helper_type::host_offsets_type host_offsets_type;
6295  // Unmanaged host View that views the output array.
6296  host_offsets_type hostOffsets (offsets.getRawPtr (), myNumRows);
6297  // Allocate temp device View if host != device, else reuse host array.
6298  auto deviceOffsets = helper_type::getDeviceOffsets (hostOffsets);
6299  // NOT recursion; this calls the overload that takes a device View.
6300  this->getLocalDiagOffsets (deviceOffsets);
6301  helper_type::copyBackIfNeeded (hostOffsets, deviceOffsets);
6302  }
6303 
6304  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6305  bool
6308  return true;
6309  }
6310 
6311  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6312  void
6315  const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
6316  const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node> > & domainTransfer,
6317  const Teuchos::RCP<const map_type>& domainMap,
6318  const Teuchos::RCP<const map_type>& rangeMap,
6319  const Teuchos::RCP<Teuchos::ParameterList>& params) const
6320  {
6325  using Teuchos::ArrayRCP;
6326  using Teuchos::ArrayView;
6327  using Teuchos::Comm;
6328  using Teuchos::ParameterList;
6329  using Teuchos::rcp;
6330  using Teuchos::RCP;
6331 #ifdef HAVE_TPETRA_MMM_TIMINGS
6332  using std::string;
6333  using Teuchos::TimeMonitor;
6334 #endif
6335 
6336  using LO = LocalOrdinal;
6337  using GO = GlobalOrdinal;
6338  using NT = node_type;
6339  using this_type = CrsGraph<LO, GO, NT>;
6340  using ivector_type = Vector<int, LO, GO, NT>;
6341  using packet_type = typename this_type::packet_type;
6342 
6343  const char* prefix = "Tpetra::CrsGraph::transferAndFillComplete: ";
6344 
6345 #ifdef HAVE_TPETRA_MMM_TIMINGS
6346  string label;
6347  if(!params.is_null()) label = params->get("Timer Label", label);
6348  string prefix2 = string("Tpetra ")+ label + std::string(": CrsGraph TAFC ");
6349  RCP<TimeMonitor> MM =
6350  rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Pack-1"))));
6351 #endif
6352 
6353  // Make sure that the input argument rowTransfer is either an
6354  // Import or an Export. Import and Export are the only two
6355  // subclasses of Transfer that we defined, but users might
6356  // (unwisely, for now at least) decide to implement their own
6357  // subclasses. Exclude this possibility.
6358  const import_type* xferAsImport = dynamic_cast<const import_type*>(&rowTransfer);
6359  const export_type* xferAsExport = dynamic_cast<const export_type*>(&rowTransfer);
6360  TEUCHOS_TEST_FOR_EXCEPTION(
6361  xferAsImport == NULL && xferAsExport == NULL, std::invalid_argument,
6362  prefix << "The 'rowTransfer' input argument must be either an Import or "
6363  "an Export, and its template parameters must match the corresponding "
6364  "template parameters of the CrsGraph.");
6365 
6366  // Make sure that the input argument domainTransfer is either an
6367  // Import or an Export. Import and Export are the only two
6368  // subclasses of Transfer that we defined, but users might
6369  // (unwisely, for now at least) decide to implement their own
6370  // subclasses. Exclude this possibility.
6371  Teuchos::RCP<const import_type> xferDomainAsImport =
6372  Teuchos::rcp_dynamic_cast<const import_type>(domainTransfer);
6373  Teuchos::RCP<const export_type> xferDomainAsExport =
6374  Teuchos::rcp_dynamic_cast<const export_type>(domainTransfer);
6375 
6376  if(! domainTransfer.is_null()) {
6377 
6378  TEUCHOS_TEST_FOR_EXCEPTION(
6379  (xferDomainAsImport.is_null() && xferDomainAsExport.is_null()), std::invalid_argument,
6380  prefix << "The 'domainTransfer' input argument must be either an "
6381  "Import or an Export, and its template parameters must match the "
6382  "corresponding template parameters of the CrsGraph.");
6383 
6384  TEUCHOS_TEST_FOR_EXCEPTION(
6385  ( xferAsImport != NULL || ! xferDomainAsImport.is_null() ) &&
6386  (( xferAsImport != NULL && xferDomainAsImport.is_null() ) ||
6387  ( xferAsImport == NULL && ! xferDomainAsImport.is_null() )), std::invalid_argument,
6388  prefix << "The 'rowTransfer' and 'domainTransfer' input arguments "
6389  "must be of the same type (either Import or Export).");
6390 
6391  TEUCHOS_TEST_FOR_EXCEPTION(
6392  ( xferAsExport != NULL || ! xferDomainAsExport.is_null() ) &&
6393  (( xferAsExport != NULL && xferDomainAsExport.is_null() ) ||
6394  ( xferAsExport == NULL && ! xferDomainAsExport.is_null() )), std::invalid_argument,
6395  prefix << "The 'rowTransfer' and 'domainTransfer' input arguments "
6396  "must be of the same type (either Import or Export).");
6397 
6398  } // domainTransfer != null
6399 
6400 
6401  // FIXME (mfh 15 May 2014) Wouldn't communication still be needed,
6402  // if the source Map is not distributed but the target Map is?
6403  const bool communication_needed = rowTransfer.getSourceMap()->isDistributed();
6404 
6405  //
6406  // Get the caller's parameters
6407  //
6408 
6409  bool reverseMode = false; // Are we in reverse mode?
6410  bool restrictComm = false; // Do we need to restrict the communicator?
6411  RCP<ParameterList> graphparams; // parameters for the destination graph
6412  if (! params.is_null()) {
6413  reverseMode = params->get("Reverse Mode", reverseMode);
6414  restrictComm = params->get("Restrict Communicator", restrictComm);
6415  graphparams = sublist(params, "CrsGraph");
6416  }
6417 
6418  // Get the new domain and range Maps. We need some of them for error
6419  // checking, now that we have the reverseMode parameter.
6420  RCP<const map_type> MyRowMap = reverseMode ?
6421  rowTransfer.getSourceMap() : rowTransfer.getTargetMap();
6422  RCP<const map_type> MyColMap; // create this below
6423  RCP<const map_type> MyDomainMap = ! domainMap.is_null() ? domainMap : getDomainMap();
6424  RCP<const map_type> MyRangeMap = ! rangeMap.is_null() ? rangeMap : getRangeMap();
6425  RCP<const map_type> BaseRowMap = MyRowMap;
6426  RCP<const map_type> BaseDomainMap = MyDomainMap;
6427 
6428  // If the user gave us a nonnull destGraph, then check whether it's
6429  // "pristine." That means that it has no entries.
6430  //
6431  // FIXME (mfh 15 May 2014) If this is not true on all processes,
6432  // then this exception test may hang. It would be better to
6433  // forward an error flag to the next communication phase.
6434  if (! destGraph.is_null()) {
6435  // FIXME (mfh 15 May 2014): The Epetra idiom for checking
6436  // whether a graph or matrix has no entries on the calling
6437  // process, is that it is neither locally nor globally indexed.
6438  // This may change eventually with the Kokkos refactor version
6439  // of Tpetra, so it would be better just to check the quantity
6440  // of interest directly. Note that with the Kokkos refactor
6441  // version of Tpetra, asking for the total number of entries in
6442  // a graph or matrix that is not fill complete might require
6443  // computation (kernel launch), since it is not thread scalable
6444  // to update a count every time an entry is inserted.
6445  const bool NewFlag =
6446  ! destGraph->isLocallyIndexed() && ! destGraph->isGloballyIndexed();
6447  TEUCHOS_TEST_FOR_EXCEPTION(! NewFlag, std::invalid_argument,
6448  prefix << "The input argument 'destGraph' is only allowed to be nonnull, "
6449  "if its graph is empty (neither locally nor globally indexed).");
6450 
6451  // FIXME (mfh 15 May 2014) At some point, we want to change
6452  // graphs and matrices so that their DistObject Map
6453  // (this->getMap()) may differ from their row Map. This will
6454  // make redistribution for 2-D distributions more efficient. I
6455  // hesitate to change this check, because I'm not sure how much
6456  // the code here depends on getMap() and getRowMap() being the
6457  // same.
6458  TEUCHOS_TEST_FOR_EXCEPTION(
6459  ! destGraph->getRowMap()->isSameAs(*MyRowMap), std::invalid_argument,
6460  prefix << "The (row) Map of the input argument 'destGraph' is not the "
6461  "same as the (row) Map specified by the input argument 'rowTransfer'.");
6462 
6463  TEUCHOS_TEST_FOR_EXCEPTION(
6464  ! destGraph->checkSizes(*this), std::invalid_argument,
6465  prefix << "You provided a nonnull destination graph, but checkSizes() "
6466  "indicates that it is not a legal legal target for redistribution from "
6467  "the source graph (*this). This may mean that they do not have the "
6468  "same dimensions.");
6469  }
6470 
6471  // If forward mode (the default), then *this's (row) Map must be
6472  // the same as the source Map of the Transfer. If reverse mode,
6473  // then *this's (row) Map must be the same as the target Map of
6474  // the Transfer.
6475  //
6476  // FIXME (mfh 15 May 2014) At some point, we want to change graphs
6477  // and matrices so that their DistObject Map (this->getMap()) may
6478  // differ from their row Map. This will make redistribution for
6479  // 2-D distributions more efficient. I hesitate to change this
6480  // check, because I'm not sure how much the code here depends on
6481  // getMap() and getRowMap() being the same.
6482  TEUCHOS_TEST_FOR_EXCEPTION(
6483  ! (reverseMode || getRowMap()->isSameAs(*rowTransfer.getSourceMap())),
6484  std::invalid_argument, prefix <<
6485  "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
6486 
6487  TEUCHOS_TEST_FOR_EXCEPTION(
6488  ! (! reverseMode || getRowMap()->isSameAs(*rowTransfer.getTargetMap())),
6489  std::invalid_argument, prefix <<
6490  "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
6491 
6492  // checks for domainTransfer
6493  TEUCHOS_TEST_FOR_EXCEPTION(
6494  ! xferDomainAsImport.is_null() && ! xferDomainAsImport->getTargetMap()->isSameAs(*domainMap),
6495  std::invalid_argument,
6496  prefix << "The target map of the 'domainTransfer' input argument must be "
6497  "the same as the rebalanced domain map 'domainMap'");
6498 
6499  TEUCHOS_TEST_FOR_EXCEPTION(
6500  ! xferDomainAsExport.is_null() && ! xferDomainAsExport->getSourceMap()->isSameAs(*domainMap),
6501  std::invalid_argument,
6502  prefix << "The source map of the 'domainTransfer' input argument must be "
6503  "the same as the rebalanced domain map 'domainMap'");
6504 
6505  // The basic algorithm here is:
6506  //
6507  // 1. Call the moral equivalent of "distor.do" to handle the import.
6508  // 2. Copy all the Imported and Copy/Permuted data into the raw
6509  // CrsGraph pointers, still using GIDs.
6510  // 3. Call an optimized version of MakeColMap that avoids the
6511  // Directory lookups (since the importer knows who owns all the
6512  // GIDs) AND reindexes to LIDs.
6513  // 4. Call expertStaticFillComplete()
6514 
6515  // Get information from the Importer
6516  const size_t NumSameIDs = rowTransfer.getNumSameIDs();
6517  ArrayView<const LO> ExportLIDs = reverseMode ?
6518  rowTransfer.getRemoteLIDs() : rowTransfer.getExportLIDs();
6519  ArrayView<const LO> RemoteLIDs = reverseMode ?
6520  rowTransfer.getExportLIDs() : rowTransfer.getRemoteLIDs();
6521  ArrayView<const LO> PermuteToLIDs = reverseMode ?
6522  rowTransfer.getPermuteFromLIDs() : rowTransfer.getPermuteToLIDs();
6523  ArrayView<const LO> PermuteFromLIDs = reverseMode ?
6524  rowTransfer.getPermuteToLIDs() : rowTransfer.getPermuteFromLIDs();
6525  Distributor& Distor = rowTransfer.getDistributor();
6526 
6527  // Owning PIDs
6528  Teuchos::Array<int> SourcePids;
6529  Teuchos::Array<int> TargetPids;
6530  int MyPID = getComm()->getRank();
6531 
6532  // Temp variables for sub-communicators
6533  RCP<const map_type> ReducedRowMap, ReducedColMap,
6534  ReducedDomainMap, ReducedRangeMap;
6535  RCP<const Comm<int> > ReducedComm;
6536 
6537  // If the user gave us a null destGraph, then construct the new
6538  // destination graph. We will replace its column Map later.
6539  if (destGraph.is_null()) {
6540  destGraph = rcp(new this_type(MyRowMap, 0, StaticProfile, graphparams));
6541  }
6542 
6543  /***************************************************/
6544  /***** 1) First communicator restriction phase ****/
6545  /***************************************************/
6546  if (restrictComm) {
6547  ReducedRowMap = MyRowMap->removeEmptyProcesses();
6548  ReducedComm = ReducedRowMap.is_null() ?
6549  Teuchos::null :
6550  ReducedRowMap->getComm();
6551  destGraph->removeEmptyProcessesInPlace(ReducedRowMap);
6552 
6553  ReducedDomainMap = MyRowMap.getRawPtr() == MyDomainMap.getRawPtr() ?
6554  ReducedRowMap :
6555  MyDomainMap->replaceCommWithSubset(ReducedComm);
6556  ReducedRangeMap = MyRowMap.getRawPtr() == MyRangeMap.getRawPtr() ?
6557  ReducedRowMap :
6558  MyRangeMap->replaceCommWithSubset(ReducedComm);
6559 
6560  // Reset the "my" maps
6561  MyRowMap = ReducedRowMap;
6562  MyDomainMap = ReducedDomainMap;
6563  MyRangeMap = ReducedRangeMap;
6564 
6565  // Update my PID, if we've restricted the communicator
6566  if (! ReducedComm.is_null()) {
6567  MyPID = ReducedComm->getRank();
6568  }
6569  else {
6570  MyPID = -2; // For debugging
6571  }
6572  }
6573  else {
6574  ReducedComm = MyRowMap->getComm();
6575  }
6576 
6577  /***************************************************/
6578  /***** 2) From Tpera::DistObject::doTransfer() ****/
6579  /***************************************************/
6580 #ifdef HAVE_TPETRA_MMM_TIMINGS
6581  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("ImportSetup"))));
6582 #endif
6583  // Get the owning PIDs
6584  RCP<const import_type> MyImporter = getImporter();
6585 
6586  // check whether domain maps of source graph and base domain map is the same
6587  bool bSameDomainMap = BaseDomainMap->isSameAs(*getDomainMap());
6588 
6589  if (! restrictComm && ! MyImporter.is_null() && bSameDomainMap ) {
6590  // Same domain map as source graph
6591  //
6592  // NOTE: This won't work for restrictComm (because the Import
6593  // doesn't know the restricted PIDs), though writing an
6594  // optimized version for that case would be easy (Import an
6595  // IntVector of the new PIDs). Might want to add this later.
6596  Import_Util::getPids(*MyImporter, SourcePids, false);
6597  }
6598  else if (restrictComm && ! MyImporter.is_null() && bSameDomainMap) {
6599  // Same domain map as source graph (restricted communicator)
6600  // We need one import from the domain to the column map
6601  ivector_type SourceDomain_pids(getDomainMap(),true);
6602  ivector_type SourceCol_pids(getColMap());
6603  // SourceDomain_pids contains the restricted pids
6604  SourceDomain_pids.putScalar(MyPID);
6605 
6606  SourceCol_pids.doImport(SourceDomain_pids, *MyImporter, INSERT);
6607  SourcePids.resize(getColMap()->getNodeNumElements());
6608  SourceCol_pids.get1dCopy(SourcePids());
6609  }
6610  else if (MyImporter.is_null() && bSameDomainMap) {
6611  // Graph has no off-process entries
6612  SourcePids.resize(getColMap()->getNodeNumElements());
6613  SourcePids.assign(getColMap()->getNodeNumElements(), MyPID);
6614  }
6615  else if ( ! MyImporter.is_null() &&
6616  ! domainTransfer.is_null() ) {
6617  // general implementation for rectangular matrices with
6618  // domain map different than SourceGraph domain map.
6619  // User has to provide a DomainTransfer object. We need
6620  // to communications (import/export)
6621 
6622  // TargetDomain_pids lives on the rebalanced new domain map
6623  ivector_type TargetDomain_pids(domainMap);
6624  TargetDomain_pids.putScalar(MyPID);
6625 
6626  // SourceDomain_pids lives on the non-rebalanced old domain map
6627  ivector_type SourceDomain_pids(getDomainMap());
6628 
6629  // SourceCol_pids lives on the non-rebalanced old column map
6630  ivector_type SourceCol_pids(getColMap());
6631 
6632  if (! reverseMode && ! xferDomainAsImport.is_null() ) {
6633  SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsImport, INSERT);
6634  }
6635  else if (reverseMode && ! xferDomainAsExport.is_null() ) {
6636  SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsExport, INSERT);
6637  }
6638  else if (! reverseMode && ! xferDomainAsExport.is_null() ) {
6639  SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsExport, INSERT);
6640  }
6641  else if (reverseMode && ! xferDomainAsImport.is_null() ) {
6642  SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsImport, INSERT);
6643  }
6644  else {
6645  TEUCHOS_TEST_FOR_EXCEPTION(
6646  true, std::logic_error,
6647  prefix << "Should never get here! Please report this bug to a Tpetra developer.");
6648  }
6649  SourceCol_pids.doImport(SourceDomain_pids, *MyImporter, INSERT);
6650  SourcePids.resize(getColMap()->getNodeNumElements());
6651  SourceCol_pids.get1dCopy(SourcePids());
6652  }
6653  else if (BaseDomainMap->isSameAs(*BaseRowMap) &&
6654  getDomainMap()->isSameAs(*getRowMap())) {
6655  // We can use the rowTransfer + SourceGraph's Import to find out who owns what.
6656  ivector_type TargetRow_pids(domainMap);
6657  ivector_type SourceRow_pids(getRowMap());
6658  ivector_type SourceCol_pids(getColMap());
6659 
6660  TargetRow_pids.putScalar(MyPID);
6661  if (! reverseMode && xferAsImport != NULL) {
6662  SourceRow_pids.doExport(TargetRow_pids, *xferAsImport, INSERT);
6663  }
6664  else if (reverseMode && xferAsExport != NULL) {
6665  SourceRow_pids.doExport(TargetRow_pids, *xferAsExport, INSERT);
6666  }
6667  else if (! reverseMode && xferAsExport != NULL) {
6668  SourceRow_pids.doImport(TargetRow_pids, *xferAsExport, INSERT);
6669  }
6670  else if (reverseMode && xferAsImport != NULL) {
6671  SourceRow_pids.doImport(TargetRow_pids, *xferAsImport, INSERT);
6672  }
6673  else {
6674  TEUCHOS_TEST_FOR_EXCEPTION(
6675  true, std::logic_error,
6676  prefix << "Should never get here! Please report this bug to a Tpetra developer.");
6677  }
6678  SourceCol_pids.doImport(SourceRow_pids, *MyImporter, INSERT);
6679  SourcePids.resize(getColMap()->getNodeNumElements());
6680  SourceCol_pids.get1dCopy(SourcePids());
6681  }
6682  else {
6683  TEUCHOS_TEST_FOR_EXCEPTION(
6684  true, std::invalid_argument,
6685  prefix << "This method only allows either domainMap == getDomainMap(), "
6686  "or (domainMap == rowTransfer.getTargetMap() and getDomainMap() == getRowMap()).");
6687  }
6688 
6689  // Tpetra-specific stuff
6690  size_t constantNumPackets = destGraph->constantNumberOfPackets();
6691  if (constantNumPackets == 0) {
6692  destGraph->reallocArraysForNumPacketsPerLid(ExportLIDs.size(),
6693  RemoteLIDs.size());
6694  }
6695  else {
6696  // There are a constant number of packets per element. We
6697  // already know (from the number of "remote" (incoming)
6698  // elements) how many incoming elements we expect, so we can
6699  // resize the buffer accordingly.
6700  const size_t rbufLen = RemoteLIDs.size() * constantNumPackets;
6701  destGraph->reallocImportsIfNeeded(rbufLen);
6702  }
6703 
6704  {
6705  // packAndPrepare* methods modify numExportPacketsPerLID_.
6706  destGraph->numExportPacketsPerLID_.template modify<Kokkos::HostSpace>();
6707  Teuchos::ArrayView<size_t> numExportPacketsPerLID =
6708  getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
6709 
6710  // Pack & Prepare w/ owning PIDs
6711  packCrsGraphWithOwningPIDs(*this, destGraph->exports_,
6712  numExportPacketsPerLID, ExportLIDs,
6713  SourcePids, constantNumPackets, Distor);
6714  }
6715 
6716  // Do the exchange of remote data.
6717 #ifdef HAVE_TPETRA_MMM_TIMINGS
6718  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Transfer"))));
6719 #endif
6720 
6721  if (communication_needed) {
6722  if (reverseMode) {
6723  if (constantNumPackets == 0) { // variable number of packets per LID
6724  // Make sure that host has the latest version, since we're
6725  // using the version on host. If host has the latest
6726  // version, syncing to host does nothing.
6727  destGraph->numExportPacketsPerLID_.template sync<Kokkos::HostSpace>();
6728  Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
6729  getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
6730  destGraph->numImportPacketsPerLID_.template sync<Kokkos::HostSpace>();
6731  Teuchos::ArrayView<size_t> numImportPacketsPerLID =
6732  getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
6733  Distor.doReversePostsAndWaits(numExportPacketsPerLID, 1,
6734  numImportPacketsPerLID);
6735  size_t totalImportPackets = 0;
6736  for (Array_size_type i = 0; i < numImportPacketsPerLID.size(); ++i) {
6737  totalImportPackets += numImportPacketsPerLID[i];
6738  }
6739 
6740  // Reallocation MUST go before setting the modified flag,
6741  // because it may clear out the flags.
6742  destGraph->reallocImportsIfNeeded(totalImportPackets);
6743  destGraph->imports_.template modify<Kokkos::HostSpace>();
6744  Teuchos::ArrayView<packet_type> hostImports =
6745  getArrayViewFromDualView(destGraph->imports_);
6746  // This is a legacy host pack/unpack path, so use the host
6747  // version of exports_.
6748  destGraph->exports_.template sync<Kokkos::HostSpace>();
6749  Teuchos::ArrayView<const packet_type> hostExports =
6750  getArrayViewFromDualView(destGraph->exports_);
6751  Distor.doReversePostsAndWaits(hostExports,
6752  numExportPacketsPerLID,
6753  hostImports,
6754  numImportPacketsPerLID);
6755  }
6756  else { // constant number of packets per LI
6757  destGraph->imports_.template modify<Kokkos::HostSpace>();
6758  Teuchos::ArrayView<packet_type> hostImports =
6759  getArrayViewFromDualView(destGraph->imports_);
6760  // This is a legacy host pack/unpack path, so use the host
6761  // version of exports_.
6762  destGraph->exports_.template sync<Kokkos::HostSpace>();
6763  Teuchos::ArrayView<const packet_type> hostExports =
6764  getArrayViewFromDualView(destGraph->exports_);
6765  Distor.doReversePostsAndWaits(hostExports,
6766  constantNumPackets,
6767  hostImports);
6768  }
6769  }
6770  else { // forward mode (the default)
6771  if (constantNumPackets == 0) { // variable number of packets per LID
6772  // Make sure that host has the latest version, since we're
6773  // using the version on host. If host has the latest
6774  // version, syncing to host does nothing.
6775  destGraph->numExportPacketsPerLID_.template sync<Kokkos::HostSpace>();
6776  Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
6777  getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
6778  destGraph->numImportPacketsPerLID_.template sync<Kokkos::HostSpace>();
6779  Teuchos::ArrayView<size_t> numImportPacketsPerLID =
6780  getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
6781  Distor.doPostsAndWaits(numExportPacketsPerLID, 1,
6782  numImportPacketsPerLID);
6783  size_t totalImportPackets = 0;
6784  for (Array_size_type i = 0; i < numImportPacketsPerLID.size(); ++i) {
6785  totalImportPackets += numImportPacketsPerLID[i];
6786  }
6787 
6788  // Reallocation MUST go before setting the modified flag,
6789  // because it may clear out the flags.
6790  destGraph->reallocImportsIfNeeded(totalImportPackets);
6791  destGraph->imports_.template modify<Kokkos::HostSpace>();
6792  Teuchos::ArrayView<packet_type> hostImports =
6793  getArrayViewFromDualView(destGraph->imports_);
6794  // This is a legacy host pack/unpack path, so use the host
6795  // version of exports_.
6796  destGraph->exports_.template sync<Kokkos::HostSpace>();
6797  Teuchos::ArrayView<const packet_type> hostExports =
6798  getArrayViewFromDualView(destGraph->exports_);
6799  Distor.doPostsAndWaits(hostExports,
6800  numExportPacketsPerLID,
6801  hostImports,
6802  numImportPacketsPerLID);
6803  }
6804  else { // constant number of packets per LID
6805  destGraph->imports_.template modify<Kokkos::HostSpace>();
6806  Teuchos::ArrayView<packet_type> hostImports =
6807  getArrayViewFromDualView(destGraph->imports_);
6808  // This is a legacy host pack/unpack path, so use the host
6809  // version of exports_.
6810  destGraph->exports_.template sync<Kokkos::HostSpace>();
6811  Teuchos::ArrayView<const packet_type> hostExports =
6812  getArrayViewFromDualView(destGraph->exports_);
6813  Distor.doPostsAndWaits(hostExports,
6814  constantNumPackets,
6815  hostImports);
6816  }
6817  }
6818  }
6819 
6820  /*********************************************************************/
6821  /**** 3) Copy all of the Same/Permute/Remote data into CSR_arrays ****/
6822  /*********************************************************************/
6823 
6824 #ifdef HAVE_TPETRA_MMM_TIMINGS
6825  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Unpack-1"))));
6826 #endif
6827 
6828  // Backwards compatibility measure. We'll use this again below.
6829  destGraph->numImportPacketsPerLID_.template sync<Kokkos::HostSpace>();
6830  Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
6831  getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
6832  destGraph->imports_.template sync<Kokkos::HostSpace>();
6833  Teuchos::ArrayView<const packet_type> hostImports =
6834  getArrayViewFromDualView(destGraph->imports_);
6835  size_t mynnz =
6836  unpackAndCombineWithOwningPIDsCount(*this, RemoteLIDs, hostImports,
6837  numImportPacketsPerLID,
6838  constantNumPackets, Distor, INSERT,
6839  NumSameIDs, PermuteToLIDs, PermuteFromLIDs);
6840  size_t N = BaseRowMap->getNodeNumElements();
6841 
6842  // Allocations
6843  ArrayRCP<size_t> CSR_rowptr(N+1);
6844  ArrayRCP<GO> CSR_colind_GID;
6845  ArrayRCP<LO> CSR_colind_LID;
6846  CSR_colind_GID.resize(mynnz);
6847 
6848  // If LO and GO are the same, we can reuse memory when
6849  // converting the column indices from global to local indices.
6850  if (typeid(LO) == typeid(GO)) {
6851  CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO>(CSR_colind_GID);
6852  }
6853  else {
6854  CSR_colind_LID.resize(mynnz);
6855  }
6856 
6857  // FIXME (mfh 15 May 2014) Why can't we abstract this out as an
6858  // unpackAndCombine method on a "CrsArrays" object? This passing
6859  // in a huge list of arrays is icky. Can't we have a bit of an
6860  // abstraction? Implementing a concrete DistObject subclass only
6861  // takes five methods.
6862  unpackAndCombineIntoCrsArrays(*this, RemoteLIDs, hostImports,
6863  numImportPacketsPerLID, constantNumPackets,
6864  Distor, INSERT, NumSameIDs, PermuteToLIDs,
6865  PermuteFromLIDs, N, mynnz, MyPID,
6866  CSR_rowptr(), CSR_colind_GID(),
6867  SourcePids(), TargetPids);
6868 
6869  /**************************************************************/
6870  /**** 4) Call Optimized MakeColMap w/ no Directory Lookups ****/
6871  /**************************************************************/
6872 #ifdef HAVE_TPETRA_MMM_TIMINGS
6873  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Unpack-2"))));
6874 #endif
6875  // Call an optimized version of makeColMap that avoids the
6876  // Directory lookups (since the Import object knows who owns all
6877  // the GIDs).
6878  Teuchos::Array<int> RemotePids;
6879  Import_Util::lowCommunicationMakeColMapAndReindex(CSR_rowptr(),
6880  CSR_colind_LID(),
6881  CSR_colind_GID(),
6882  BaseDomainMap,
6883  TargetPids, RemotePids,
6884  MyColMap);
6885 
6886  /*******************************************************/
6887  /**** 4) Second communicator restriction phase ****/
6888  /*******************************************************/
6889  if (restrictComm) {
6890  ReducedColMap = (MyRowMap.getRawPtr() == MyColMap.getRawPtr()) ?
6891  ReducedRowMap :
6892  MyColMap->replaceCommWithSubset(ReducedComm);
6893  MyColMap = ReducedColMap; // Reset the "my" maps
6894  }
6895 
6896  // Replace the col map
6897  destGraph->replaceColMap(MyColMap);
6898 
6899  // Short circuit if the processor is no longer in the communicator
6900  //
6901  // NOTE: Epetra replaces modifies all "removed" processes so they
6902  // have a dummy (serial) Map that doesn't touch the original
6903  // communicator. Duplicating that here might be a good idea.
6904  if (ReducedComm.is_null()) {
6905  return;
6906  }
6907 
6908  /***************************************************/
6909  /**** 5) Sort ****/
6910  /***************************************************/
6911  if ((! reverseMode && xferAsImport != NULL) ||
6912  (reverseMode && xferAsExport != NULL)) {
6913  Import_Util::sortCrsEntries(CSR_rowptr(),
6914  CSR_colind_LID());
6915  }
6916  else if ((! reverseMode && xferAsExport != NULL) ||
6917  (reverseMode && xferAsImport != NULL)) {
6918  Import_Util::sortAndMergeCrsEntries(CSR_rowptr(),
6919  CSR_colind_LID());
6920  if (CSR_rowptr[N] != mynnz) {
6921  CSR_colind_LID.resize(CSR_rowptr[N]);
6922  }
6923  }
6924  else {
6925  TEUCHOS_TEST_FOR_EXCEPTION(
6926  true, std::logic_error,
6927  prefix << "Should never get here! Please report this bug to a Tpetra developer.");
6928  }
6929  /***************************************************/
6930  /**** 6) Reset the colmap and the arrays ****/
6931  /***************************************************/
6932 
6933  // Call constructor for the new graph (restricted as needed)
6934  //
6935  destGraph->setAllIndices(CSR_rowptr, CSR_colind_LID);
6936 
6937  /***************************************************/
6938  /**** 7) Build Importer & Call ESFC ****/
6939  /***************************************************/
6940  // Pre-build the importer using the existing PIDs
6941  Teuchos::ParameterList esfc_params;
6942 #ifdef HAVE_TPETRA_MMM_TIMINGS
6943  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("CreateImporter"))));
6944 #endif
6945  RCP<import_type> MyImport = rcp(new import_type(MyDomainMap, MyColMap, RemotePids));
6946 #ifdef HAVE_TPETRA_MMM_TIMINGS
6947  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("ESFC"))));
6948 
6949  esfc_params.set("Timer Label",prefix + std::string("TAFC"));
6950 #endif
6951  if(!params.is_null())
6952  esfc_params.set("compute global constants",params->get("compute global constants",true));
6953 
6954  destGraph->expertStaticFillComplete(MyDomainMap, MyRangeMap,
6955  MyImport, Teuchos::null, rcp(&esfc_params,false));
6956 
6957  }
6958 
6959  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6960  void
6961  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
6962  importAndFillComplete(Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >& destGraph,
6963  const import_type& importer,
6964  const Teuchos::RCP<const map_type>& domainMap,
6965  const Teuchos::RCP<const map_type>& rangeMap,
6966  const Teuchos::RCP<Teuchos::ParameterList>& params) const
6967  {
6968  transferAndFillComplete(destGraph, importer, Teuchos::null, domainMap, rangeMap, params);
6969  }
6970 
6971  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6972  void
6975  const import_type& rowImporter,
6976  const import_type& domainImporter,
6977  const Teuchos::RCP<const map_type>& domainMap,
6978  const Teuchos::RCP<const map_type>& rangeMap,
6979  const Teuchos::RCP<Teuchos::ParameterList>& params) const
6980  {
6981  transferAndFillComplete(destGraph, rowImporter, Teuchos::rcpFromRef(domainImporter), domainMap, rangeMap, params);
6982  }
6983 
6984  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6985  void
6988  const export_type& exporter,
6989  const Teuchos::RCP<const map_type>& domainMap,
6990  const Teuchos::RCP<const map_type>& rangeMap,
6991  const Teuchos::RCP<Teuchos::ParameterList>& params) const
6992  {
6993  transferAndFillComplete(destGraph, exporter, Teuchos::null, domainMap, rangeMap, params);
6994  }
6995 
6996  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6997  void
7000  const export_type& rowExporter,
7001  const export_type& domainExporter,
7002  const Teuchos::RCP<const map_type>& domainMap,
7003  const Teuchos::RCP<const map_type>& rangeMap,
7004  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7005  {
7006  transferAndFillComplete(destGraph, rowExporter, Teuchos::rcpFromRef(domainExporter), domainMap, rangeMap, params);
7007  }
7008 
7009 } // namespace Classes
7010 } // namespace Tpetra
7011 
7012 //
7013 // Explicit instantiation macros
7014 //
7015 // Must be expanded from within the Tpetra namespace!
7016 //
7017 #define TPETRA_CRSGRAPH_GRAPH_INSTANT(LO,GO,NODE) \
7018  namespace Classes { template class CrsGraph< LO , GO , NODE >; }
7019 
7020 #define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7021  template<> \
7022  Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7023  importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7024  const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7025  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7026  CrsGraph<LO,GO,NODE>::node_type>& importer, \
7027  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7028  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7029  CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7030  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7031  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7032  CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7033  const Teuchos::RCP<Teuchos::ParameterList>& params);
7034 
7035 #define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
7036  template<> \
7037  Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7038  importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7039  const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7040  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7041  CrsGraph<LO,GO,NODE>::node_type>& rowImporter, \
7042  const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7043  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7044  CrsGraph<LO,GO,NODE>::node_type>& domainImporter, \
7045  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7046  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7047  CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7048  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7049  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7050  CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7051  const Teuchos::RCP<Teuchos::ParameterList>& params);
7052 
7053 
7054 #define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7055  template<> \
7056  Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7057  exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7058  const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7059  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7060  CrsGraph<LO,GO,NODE>::node_type>& exporter, \
7061  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7062  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7063  CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7064  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7065  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7066  CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7067  const Teuchos::RCP<Teuchos::ParameterList>& params);
7068 
7069 #define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
7070  template<> \
7071  Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7072  exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7073  const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7074  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7075  CrsGraph<LO,GO,NODE>::node_type>& rowExporter, \
7076  const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7077  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7078  CrsGraph<LO,GO,NODE>::node_type>& domainExporter, \
7079  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7080  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7081  CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7082  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7083  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7084  CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7085  const Teuchos::RCP<Teuchos::ParameterList>& params);
7086 
7087 
7088 // WARNING: These macros exist only for backwards compatibility.
7089 // We will remove them at some point.
7090 #define TPETRA_CRSGRAPH_SORTROWINDICESANDVALUES_INSTANT(S,LO,GO,NODE)
7091 #define TPETRA_CRSGRAPH_MERGEROWINDICESANDVALUES_INSTANT(S,LO,GO,NODE)
7092 #define TPETRA_CRSGRAPH_ALLOCATEVALUES1D_INSTANT(S,LO,GO,NODE)
7093 #define TPETRA_CRSGRAPH_ALLOCATEVALUES2D_INSTANT(S,LO,GO,NODE)
7094 
7095 #define TPETRA_CRSGRAPH_INSTANT(S,LO,GO,NODE) \
7096  TPETRA_CRSGRAPH_SORTROWINDICESANDVALUES_INSTANT(S,LO,GO,NODE) \
7097  TPETRA_CRSGRAPH_MERGEROWINDICESANDVALUES_INSTANT(S,LO,GO,NODE) \
7098  TPETRA_CRSGRAPH_ALLOCATEVALUES1D_INSTANT(S,LO,GO,NODE) \
7099  TPETRA_CRSGRAPH_ALLOCATEVALUES2D_INSTANT(S,LO,GO,NODE) \
7100  TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7101  TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7102  TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
7103  TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE)
7104 
7105 
7106 #endif // TPETRA_CRSGRAPH_DEF_HPP
Tpetra_Import_Util.hpp
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects.
Tpetra::ProfileType
ProfileType
Definition: Tpetra_ConfigDefs.hpp:130
Tpetra::Details::computeOffsetsFromCounts
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
Definition: Tpetra_Details_computeOffsets.hpp:284
Tpetra::StaticProfile
Definition: Tpetra_ConfigDefs.hpp:131
Tpetra::Details::unpackAndCombineIntoCrsArrays
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, Distributor &distor, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
Tpetra::Details::computeOffsetsFromConstantCount
OffsetsViewType::non_const_value_type computeOffsetsFromConstantCount(const OffsetsViewType &ptr, const CountType &count)
Compute offsets from a constant count.
Definition: Tpetra_Details_computeOffsets.hpp:415
Tpetra::Classes::CrsGraph< LO, GO, node_type >::device_type
node_type ::device_type device_type
This class' Kokkos device type.
Definition: Tpetra_CrsGraph_decl.hpp:285
Tpetra::Classes::Map::getLocalMap
local_map_type getLocalMap() const
Get the local Map for Kokkos kernels.
Definition: Tpetra_Map_def.hpp:1165
Tpetra::Classes::Map::getGlobalElement
GlobalOrdinal getGlobalElement(LocalOrdinal localIndex) const
The global index corresponding to the given local index.
Definition: Tpetra_Map_def.hpp:1114
Tpetra::Details::determineLocalTriangularStructure
LocalTriangularStructureResult< typename LocalMapType::local_ordinal_type > determineLocalTriangularStructure(const LocalGraphType &G, const LocalMapType &rowMap, const LocalMapType &colMap, const bool ignoreMapsForTriangularStructure)
Count the local number of diagonal entries in a local sparse graph, and determine whether the local p...
Definition: Tpetra_Details_determineLocalTriangularStructure.hpp:241
Tpetra::RowInfo
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.
Definition: Tpetra_CrsGraph_decl.hpp:112
Tpetra_Details_copyOffsets.hpp
Declare and define Tpetra::Details::copyOffsets, an implementation detail of Tpetra (in particular,...
Tpetra_Details_Behavior.hpp
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Tpetra::Details::makeColMap
int makeColMap(Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &colMap, Teuchos::Array< int > &remotePIDs, const Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &domMap, const RowGraph< LO, GO, NT > &graph, const bool sortEachProcsGids=true, std::ostream *errStrm=NULL)
Make the graph's column Map.
Definition: Tpetra_Details_makeColMap_def.hpp:67
Tpetra::Details::ProfilingRegion
Profile the given scope.
Definition: Tpetra_Details_Profiling.hpp:100
Tpetra::Details::getArrayViewFromDualView
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
Definition: Tpetra_Util.hpp:878
Details
Implementation details of Tpetra.
Tpetra::Details::gathervPrint
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator,...
Definition: Tpetra_Details_gathervPrint.cpp:52
Tpetra::Details::Behavior::debug
static bool debug()
Whether Tpetra is in debug mode.
Definition: Tpetra_Details_Behavior.cpp:245
Tpetra::Classes::RowGraph< LO, GO, node_type >
Tpetra_Import_Util2.hpp
Utility functions for packing and unpacking sparse matrix entries.
Tpetra::DynamicProfile
Definition: Tpetra_ConfigDefs.hpp:132
Tpetra::Details::packCrsGraphWithOwningPIDs
void packCrsGraphWithOwningPIDs(const CrsGraph< LO, GO, NT > &sourceGraph, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse graph for communication.
Definition: Tpetra_Details_packCrsGraph_def.hpp:913
Tpetra::Classes::Map::getLocalElement
LocalOrdinal getLocalElement(GlobalOrdinal globalIndex) const
The local index corresponding to the given global index.
Definition: Tpetra_Map_def.hpp:1091
Tpetra::Classes::DistObject< GO, LO, GO, node_type >
Tpetra_Details_Profiling.hpp
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Tpetra::Details::convertColumnIndicesFromGlobalToLocal
OffsetType convertColumnIndicesFromGlobalToLocal(const Kokkos::View< LO *, DT > &lclColInds, const Kokkos::View< const GO *, DT > &gblColInds, const Kokkos::View< const OffsetType *, DT > &ptr, const LocalMap< LO, GO, DT > &lclColMap, const Kokkos::View< const NumEntType *, DT > &numRowEnt)
Convert a (StaticProfile) CrsGraph's global column indices into local column indices.
Definition: Tpetra_CrsGraph_def.hpp:163
Tpetra::Details::getGlobalNumDiags
CrsGraphType::global_ordinal_type getGlobalNumDiags(const CrsGraphType &G)
Number of populated diagonal entries in the given sparse graph, over all processes in the graph's (MP...
Definition: Tpetra_Details_getNumDiags.hpp:406
Tpetra::Classes::CrsGraph::getIndexBase
GlobalOrdinal getIndexBase() const override
Returns the index base for global indices for this graph.
Definition: Tpetra_CrsGraph_def.hpp:1228
Tpetra::Classes::Import
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Definition: Tpetra_Import_decl.hpp:115
Tpetra::Distributor
Sets up and executes a communication plan for a Tpetra DistObject.
Definition: Tpetra_Distributor.hpp:188
Tpetra::Details::Classes::LocalMap
"Local" part of Map suitable for Kokkos kernels.
Definition: Tpetra_Details_LocalMap.hpp:72
Tpetra::createOneToOne
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Creates a one-to-one version of the given Map where each GID lives on only one process.
Tpetra::Classes::CrsGraph::CrsGraph
friend class CrsGraph
Alias for Tpetra::Classes::CrsGraph.
Definition: Tpetra_CrsGraph_decl.hpp:271
Tpetra::Classes::CrsGraph::getNode
Teuchos::RCP< node_type > getNode() const override
Returns the underlying node.
Definition: Tpetra_CrsGraph_def.hpp:895
Tpetra::Classes::CrsGraph< LO, GO, node_type >::execution_space
device_type::execution_space execution_space
This class' Kokkos execution space.
Definition: Tpetra_CrsGraph_decl.hpp:287
Tpetra_Details_getEntryOnHost.hpp
Declaration and definition of Tpetra::Details::getEntryOnHost.
Tpetra::Classes::CrsGraph< LO, GO, node_type >::local_ordinal_type
LO local_ordinal_type
This class' first template parameter; the type of local indices.
Definition: Tpetra_CrsGraph_decl.hpp:278
Tpetra::removeEmptyProcessesInPlace
void removeEmptyProcessesInPlace(Teuchos::RCP< DistObjectType > &input, const Teuchos::RCP< const Map< typename DistObjectType::local_ordinal_type, typename DistObjectType::global_ordinal_type, typename DistObjectType::node_type > > &newMap)
Remove processes which contain no elements in this object's Map.
Definition: Tpetra_DistObject_def.hpp:1643
Tpetra::Classes::Map
A parallel distribution of indices over processes.
Definition: Tpetra_Map_decl.hpp:247
Tpetra::Classes::CrsGraph::getComm
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
Returns the communicator.
Definition: Tpetra_CrsGraph_def.hpp:1219
Tpetra::Classes::Map::isNodeGlobalElement
bool isNodeGlobalElement(GlobalOrdinal globalIndex) const
Whether the given global index is owned by this Map on the calling process.
Definition: Tpetra_Map_def.hpp:1146
Tpetra::Classes::CrsGraph< LO, GO, node_type >::t_GlobalOrdinal_1D
Kokkos::View< GO *, execution_space > t_GlobalOrdinal_1D
Type of the k_gblInds1D_ array of global column indices.
Definition: Tpetra_CrsGraph_decl.hpp:2177
Tpetra_Details_computeOffsets.hpp
Declare and define the function Tpetra::Details::computeOffsetsFromCounts, an implementation detail o...
Tpetra::Classes::CrsGraph< LO, GO, node_type >::local_graph_type
Kokkos::StaticCrsGraph< LO, Kokkos::LayoutLeft, execution_space > local_graph_type
The type of the part of the sparse graph on each MPI process.
Definition: Tpetra_CrsGraph_decl.hpp:292
Tpetra::Details::unpackAndCombineWithOwningPIDsCount
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, Distributor &distor, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks.
Tpetra::global_size_t
size_t global_size_t
Global size_t object.
Definition: Tpetra_ConfigDefs.hpp:109
Tpetra
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Tpetra::deep_copy
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Definition: Tpetra_MultiVector_decl.hpp:2453
Tpetra::Classes::CrsGraph< LO, GO, node_type >::node_type
node_type node_type
This class' Kokkos Node type.
Definition: Tpetra_CrsGraph_decl.hpp:282
Tpetra::Classes::Map::isNodeLocalElement
bool isNodeLocalElement(LocalOrdinal localIndex) const
Whether the given local index is valid for this Map on the calling process.
Definition: Tpetra_Map_def.hpp:1134
Tpetra::Classes::Export
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Definition: Tpetra_Export_decl.hpp:124
Tpetra::SrcDistObject
Abstract base class for objects that can be the source of an Import or Export operation.
Definition: Tpetra_SrcDistObject.hpp:89
Tpetra_Details_determineLocalTriangularStructure.hpp
Declaration and definition of Tpetra::Details::determineLocalTriangularStructure.
Tpetra::Details::copyOffsets
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types.
Definition: Tpetra_Details_copyOffsets.hpp:407
Tpetra::INSERT
Insert new values that don't currently exist.
Definition: Tpetra_CombineMode.hpp:96
Tpetra::Classes::CrsGraph
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
Definition: Tpetra_CrsGraph_decl.hpp:259
Tpetra::CrsGraph
Classes::CrsGraph< LocalOrdinal, GlobalOrdinal, Node > CrsGraph
Alias for Tpetra::Classes::CrsGraph.
Definition: Tpetra_CrsGraph_fwd.hpp:71
Tpetra::CombineMode
CombineMode
Rule for combining data in an Import or Export.
Definition: Tpetra_CombineMode.hpp:94
Tpetra::Classes::Vector
A distributed dense vector.
Definition: Tpetra_Vector_decl.hpp:82