Tpetra parallel linear algebra  Version of the Day
Tpetra_CrsMatrix_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_CRSMATRIX_DEF_HPP
43 #define TPETRA_CRSMATRIX_DEF_HPP
44 
52 
53 #include "Tpetra_RowMatrix.hpp"
54 #include "Tpetra_Import_Util.hpp"
55 #include "Tpetra_Import_Util2.hpp"
61 #include "Tpetra_Details_getDiagCopyWithoutOffsets.hpp"
62 #include "Tpetra_Details_gathervPrint.hpp"
63 #include "Tpetra_Details_leftScaleLocalCrsMatrix.hpp"
65 #include "Tpetra_Details_rightScaleLocalCrsMatrix.hpp"
66 
67 //#include "Tpetra_Util.hpp" // comes in from Tpetra_CrsGraph_decl.hpp
68 #include "Teuchos_SerialDenseMatrix.hpp"
69 #include "KokkosSparse_getDiagCopy.hpp"
70 #include "Tpetra_Details_copyConvert.hpp"
72 #include "Tpetra_Details_packCrsMatrix.hpp"
73 #include "Tpetra_Details_unpackCrsMatrixAndCombine.hpp"
74 #include <memory>
75 #include <sstream>
76 #include <typeinfo>
77 #include <vector>
78 
79 namespace Tpetra {
80 
81 namespace { // (anonymous)
82 
83  template<class T, class BinaryFunction>
84  T atomic_binary_function_update (volatile T* const dest,
85  const T& inputVal,
86  BinaryFunction f)
87  {
88  T oldVal = *dest;
89  T assume;
90 
91  // NOTE (mfh 30 Nov 2015) I do NOT need a fence here for IBM
92  // POWER architectures, because 'newval' depends on 'assume',
93  // which depends on 'oldVal', which depends on '*dest'. This
94  // sets up a chain of read dependencies that should ensure
95  // correct behavior given a sane memory model.
96  do {
97  assume = oldVal;
98  T newVal = f (assume, inputVal);
99  oldVal = Kokkos::atomic_compare_exchange (dest, assume, newVal);
100  } while (assume != oldVal);
101 
102  return oldVal;
103  }
104 } // namespace (anonymous)
105 
106 //
107 // Users must never rely on anything in the Details namespace.
108 //
109 namespace Details {
110 
120 template<class Scalar>
121 struct AbsMax {
123  Scalar operator() (const Scalar& x, const Scalar& y) {
124  typedef Teuchos::ScalarTraits<Scalar> STS;
125  return std::max (STS::magnitude (x), STS::magnitude (y));
126  }
127 };
128 
129 } // namespace Details
130 } // namespace Tpetra
131 
132 namespace Tpetra {
133 namespace Classes {
134 
135  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
137  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
138  size_t maxNumEntriesPerRow,
139  ProfileType pftype,
140  const Teuchos::RCP<Teuchos::ParameterList>& params) :
141  dist_object_type (rowMap),
142  storageStatus_ (pftype == StaticProfile ?
143  ::Tpetra::Details::STORAGE_1D_UNPACKED :
144  ::Tpetra::Details::STORAGE_2D),
145  fillComplete_ (false),
146  frobNorm_ (-STM::one ())
147  {
148  const char tfecfFuncName[] = "CrsMatrix(RCP<const Map>, size_t, "
149  "ProfileType[, RCP<ParameterList>]): ";
150  Teuchos::RCP<crs_graph_type> graph;
151  try {
152  graph = Teuchos::rcp (new crs_graph_type (rowMap, maxNumEntriesPerRow,
153  pftype, params));
154  }
155  catch (std::exception& e) {
156  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
157  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
158  "size_t, ProfileType[, RCP<ParameterList>]) threw an exception: "
159  << e.what ());
160  }
161  // myGraph_ not null means that the matrix owns the graph. That's
162  // different than the const CrsGraph constructor, where the matrix
163  // does _not_ own the graph.
164  myGraph_ = graph;
165  staticGraph_ = myGraph_;
166  resumeFill (params);
167  checkInternalState ();
168  }
169 
170  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
172  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
173  const Teuchos::ArrayRCP<const size_t>& NumEntriesPerRowToAlloc,
174  ProfileType pftype,
175  const Teuchos::RCP<Teuchos::ParameterList>& params) :
176  dist_object_type (rowMap),
177  storageStatus_ (pftype == StaticProfile ?
178  ::Tpetra::Details::STORAGE_1D_UNPACKED :
179  ::Tpetra::Details::STORAGE_2D),
180  fillComplete_ (false),
181  frobNorm_ (-STM::one ())
182  {
183  const char tfecfFuncName[] = "CrsMatrix(RCP<const Map>, "
184  "ArrayRCP<const size_t>, ProfileType[, RCP<ParameterList>]): ";
185  Teuchos::RCP<crs_graph_type> graph;
186  try {
187  graph = Teuchos::rcp (new crs_graph_type (rowMap, NumEntriesPerRowToAlloc,
188  pftype, params));
189  }
190  catch (std::exception &e) {
191  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
192  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
193  "ArrayRCP<const size_t>, ProfileType[, RCP<ParameterList>]) threw "
194  "an exception: " << e.what ());
195  }
196  // myGraph_ not null means that the matrix owns the graph. That's
197  // different than the const CrsGraph constructor, where the matrix
198  // does _not_ own the graph.
199  myGraph_ = graph;
200  staticGraph_ = graph;
201  resumeFill (params);
202  checkInternalState ();
203  }
204 
205  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
207  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
208  const Teuchos::RCP<const map_type>& colMap,
209  size_t maxNumEntriesPerRow,
210  ProfileType pftype,
211  const Teuchos::RCP<Teuchos::ParameterList>& params) :
212  dist_object_type (rowMap),
213  storageStatus_ (pftype == StaticProfile ?
214  ::Tpetra::Details::STORAGE_1D_UNPACKED :
215  ::Tpetra::Details::STORAGE_2D),
216  fillComplete_ (false),
217  frobNorm_ (-STM::one ())
218  {
219  const char tfecfFuncName[] = "CrsMatrix(RCP<const Map>, RCP<const Map>, "
220  "size_t, ProfileType[, RCP<ParameterList>]): ";
221 
222 #ifdef HAVE_TPETRA_DEBUG
223  // An artifact of debugging something a while back.
224  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
225  (! staticGraph_.is_null (), std::logic_error,
226  "staticGraph_ is not null at the beginning of the constructor. "
227  "Please report this bug to the Tpetra developers.");
228  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
229  (! myGraph_.is_null (), std::logic_error,
230  "myGraph_ is not null at the beginning of the constructor. "
231  "Please report this bug to the Tpetra developers.");
232 #endif // HAVE_TPETRA_DEBUG
233 
234  Teuchos::RCP<crs_graph_type> graph;
235  try {
236  graph = Teuchos::rcp (new crs_graph_type (rowMap, colMap,
237  maxNumEntriesPerRow,
238  pftype, params));
239  }
240  catch (std::exception &e) {
241  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
242  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
243  "RCP<const Map>, size_t, ProfileType[, RCP<ParameterList>]) threw an "
244  "exception: " << e.what ());
245  }
246  // myGraph_ not null means that the matrix owns the graph. That's
247  // different than the const CrsGraph constructor, where the matrix
248  // does _not_ own the graph.
249  myGraph_ = graph;
250  staticGraph_ = myGraph_;
251  resumeFill (params);
252  checkInternalState ();
253  }
254 
255  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
257  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
258  const Teuchos::RCP<const map_type>& colMap,
259  const Teuchos::ArrayRCP<const size_t>& numEntPerRow,
260  ProfileType pftype,
261  const Teuchos::RCP<Teuchos::ParameterList>& params) :
262  dist_object_type (rowMap),
263  storageStatus_ (pftype == StaticProfile ?
264  ::Tpetra::Details::STORAGE_1D_UNPACKED :
265  ::Tpetra::Details::STORAGE_2D),
266  fillComplete_ (false),
267  frobNorm_ (-STM::one ())
268  {
269  const char tfecfFuncName[] = "CrsMatrix(RCP<const Map>, RCP<const Map>, "
270  "ArrayRCP<const size_t>, ProfileType[, RCP<ParameterList>]): ";
271  Teuchos::RCP<crs_graph_type> graph;
272  try {
273  graph = Teuchos::rcp (new crs_graph_type (rowMap, colMap, numEntPerRow,
274  pftype, params));
275  }
276  catch (std::exception &e) {
277  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
278  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
279  "RCP<const Map>, ArrayRCP<const size_t>, ProfileType[, "
280  "RCP<ParameterList>]) threw an exception: " << e.what ());
281  }
282  // myGraph_ not null means that the matrix owns the graph. That's
283  // different than the const CrsGraph constructor, where the matrix
284  // does _not_ own the graph.
285  myGraph_ = graph;
286  staticGraph_ = graph;
287  resumeFill (params);
288  checkInternalState ();
289  }
290 
291  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
293  CrsMatrix (const Teuchos::RCP<const crs_graph_type>& graph,
294  const Teuchos::RCP<Teuchos::ParameterList>& /* params */) :
295  dist_object_type (graph->getRowMap ()),
296  staticGraph_ (graph),
297  storageStatus_ (::Tpetra::Details::STORAGE_1D_PACKED),
298  fillComplete_ (false),
299  frobNorm_ (-STM::one ())
300  {
301  typedef typename local_matrix_type::values_type values_type;
302  const char tfecfFuncName[] = "CrsMatrix(RCP<const CrsGraph>[, "
303  "RCP<ParameterList>]): ";
304  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
305  (graph.is_null (), std::runtime_error, "Input graph is null.");
306  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
307  (! graph->isFillComplete (), std::runtime_error, "Input graph is not "
308  "fill complete. You must call fillComplete on the graph before using "
309  "it to construct a CrsMatrix. Note that calling resumeFill on the "
310  "graph makes it not fill complete, even if you had previously called "
311  "fillComplete. In that case, you must call fillComplete on the graph "
312  "again.");
313 
314  // The graph is fill complete, so it is locally indexed and has a
315  // fixed structure. This means we can allocate the (1-D) array of
316  // values and build the local matrix right now. Note that the
317  // local matrix's number of columns comes from the column Map, not
318  // the domain Map.
319 
320  const size_t numCols = graph->getColMap ()->getNodeNumElements ();
321  auto lclGraph = graph->getLocalGraph ();
322  const size_t numEnt = lclGraph.entries.extent (0);
323  values_type val ("Tpetra::CrsMatrix::val", numEnt);
324 
325  this->lclMatrix_ = local_matrix_type ("Tpetra::CrsMatrix::lclMatrix_",
326  numCols, val, lclGraph);
327  // FIXME (22 Jun 2016) I would very much like to get rid of
328  // k_values1D_ at some point. I find it confusing to have all
329  // these extra references lying around.
330  this->k_values1D_ = this->lclMatrix_.values;
331 
332  checkInternalState ();
333  }
334 
335  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
337  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
338  const Teuchos::RCP<const map_type>& colMap,
339  const typename local_matrix_type::row_map_type& rowPointers,
340  const typename local_graph_type::entries_type::non_const_type& columnIndices,
341  const typename local_matrix_type::values_type& values,
342  const Teuchos::RCP<Teuchos::ParameterList>& params) :
343  dist_object_type (rowMap),
344  storageStatus_ (::Tpetra::Details::STORAGE_1D_PACKED),
345  fillComplete_ (false),
346  frobNorm_ (-STM::one ())
347  {
348  using Teuchos::RCP;
349  const char tfecfFuncName[] = "Tpetra::CrsMatrix(RCP<const Map>, "
350  "RCP<const Map>, ptr, ind, val[, params]): ";
351  const char suffix[] = ". Please report this bug to the Tpetra developers.";
352 
353  // Check the user's input. Note that this might throw only on
354  // some processes but not others, causing deadlock. We prefer
355  // deadlock due to exceptions to segfaults, because users can
356  // catch exceptions.
357  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
358  (values.extent (0) != columnIndices.extent (0),
359  std::invalid_argument, "Input arrays don't have matching dimensions. "
360  "values.extent(0) = " << values.extent (0) << " != "
361  "columnIndices.extent(0) = " << columnIndices.extent (0) << ".");
362 #ifdef HAVE_TPETRA_DEBUG
363  if (rowPointers.extent (0) != 0) {
364  const size_t numEnt =
365  ::Tpetra::Details::getEntryOnHost (rowPointers, rowPointers.extent (0) - 1);
366  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
367  (numEnt != static_cast<size_t> (columnIndices.extent (0)) ||
368  numEnt != static_cast<size_t> (values.extent (0)),
369  std::invalid_argument, "Last entry of rowPointers says that the matrix"
370  " has " << numEnt << " entr" << (numEnt != 1 ? "ies" : "y") << ", but "
371  "the dimensions of columnIndices and values don't match this. "
372  "columnIndices.extent(0) = " << columnIndices.extent (0) <<
373  " and values.extent(0) = " << values.extent (0) << ".");
374  }
375 #endif // HAVE_TPETRA_DEBUG
376 
377  RCP<crs_graph_type> graph;
378  try {
379  graph = Teuchos::rcp (new crs_graph_type (rowMap, colMap, rowPointers,
380  columnIndices, params));
381  }
382  catch (std::exception& e) {
383  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
384  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
385  "RCP<const Map>, ptr, ind[, params]) threw an exception: "
386  << e.what ());
387  }
388  // The newly created CrsGraph _must_ have a local graph at this
389  // point. We don't really care whether CrsGraph's constructor
390  // deep-copies or shallow-copies the input, but the dimensions
391  // have to be right. That's how we tell whether the CrsGraph has
392  // a local graph.
393  auto lclGraph = graph->getLocalGraph ();
394  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
395  (lclGraph.row_map.extent (0) != rowPointers.extent (0) ||
396  lclGraph.entries.extent (0) != columnIndices.extent (0),
397  std::logic_error, "CrsGraph's constructor (rowMap, colMap, ptr, "
398  "ind[, params]) did not set the local graph correctly." << suffix);
399  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
400  (lclGraph.entries.extent (0) != values.extent (0),
401  std::logic_error, "CrsGraph's constructor (rowMap, colMap, ptr, ind[, "
402  "params]) did not set the local graph correctly. "
403  "lclGraph.entries.extent(0) = " << lclGraph.entries.extent (0)
404  << " != values.extent(0) = " << values.extent (0) << suffix);
405 
406  // myGraph_ not null means that the matrix owns the graph. This
407  // is true because the column indices come in as nonconst,
408  // implying shared ownership.
409  myGraph_ = graph;
410  staticGraph_ = graph;
411 
412  // The graph may not be fill complete yet. However, it is locally
413  // indexed (since we have a column Map) and has a fixed structure
414  // (due to the input arrays). This means we can allocate the
415  // (1-D) array of values and build the local matrix right now.
416  // Note that the local matrix's number of columns comes from the
417  // column Map, not the domain Map.
418 
419  const size_t numCols = graph->getColMap ()->getNodeNumElements ();
420  lclMatrix_ = local_matrix_type ("Tpetra::CrsMatrix::lclMatrix_",
421  numCols, values, lclGraph);
422  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
423  (lclMatrix_.values.extent (0) != values.extent (0),
424  std::logic_error, "Local matrix's constructor did not set the values "
425  "correctly. lclMatrix_.values.extent(0) = " <<
426  lclMatrix_.values.extent (0) << " != values.extent(0) = " <<
427  values.extent (0) << suffix);
428 
429  // FIXME (22 Jun 2016) I would very much like to get rid of
430  // k_values1D_ at some point. I find it confusing to have all
431  // these extra references lying around.
432  this->k_values1D_ = this->lclMatrix_.values;
433 
434  checkInternalState ();
435  }
436 
437  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
439  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
440  const Teuchos::RCP<const map_type>& colMap,
441  const Teuchos::ArrayRCP<size_t>& ptr,
442  const Teuchos::ArrayRCP<LocalOrdinal>& ind,
443  const Teuchos::ArrayRCP<Scalar>& val,
444  const Teuchos::RCP<Teuchos::ParameterList>& params) :
445  dist_object_type (rowMap),
446  storageStatus_ (::Tpetra::Details::STORAGE_1D_PACKED),
447  fillComplete_ (false),
448  frobNorm_ (-STM::one ())
449  {
450  using Kokkos::Compat::getKokkosViewDeepCopy;
451  using Teuchos::av_reinterpret_cast;
452  using Teuchos::RCP;
453  typedef typename local_matrix_type::values_type values_type;
454  typedef impl_scalar_type IST;
455  const char tfecfFuncName[] = "Tpetra::CrsMatrix(RCP<const Map>, "
456  "RCP<const Map>, ptr, ind, val[, params]): ";
457 
458  RCP<crs_graph_type> graph;
459  try {
460  graph = Teuchos::rcp (new crs_graph_type (rowMap, colMap, ptr,
461  ind, params));
462  }
463  catch (std::exception& e) {
464  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
465  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
466  "RCP<const Map>, ArrayRCP<size_t>, ArrayRCP<LocalOrdinal>[, "
467  "RCP<ParameterList>]) threw an exception: " << e.what ());
468  }
469  // myGraph_ not null means that the matrix owns the graph. This
470  // is true because the column indices come in as nonconst,
471  // implying shared ownership.
472  myGraph_ = graph;
473  staticGraph_ = graph;
474 
475  // The graph may not be fill complete yet. However, it is locally
476  // indexed (since we have a column Map) and has a fixed structure
477  // (due to the input arrays). This means we can allocate the
478  // (1-D) array of values and build the local matrix right now.
479  // Note that the local matrix's number of columns comes from the
480  // column Map, not the domain Map.
481 
482  // The graph _must_ have a local graph at this point. We don't
483  // really care whether CrsGraph's constructor deep-copies or
484  // shallow-copies the input, but the dimensions have to be right.
485  // That's how we tell whether the CrsGraph has a local graph.
486  auto lclGraph = staticGraph_->getLocalGraph ();
487  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
488  (static_cast<size_t> (lclGraph.row_map.extent (0)) != static_cast<size_t> (ptr.size ()) ||
489  static_cast<size_t> (lclGraph.entries.extent (0)) != static_cast<size_t> (ind.size ()),
490  std::logic_error, "CrsGraph's constructor (rowMap, colMap, ptr, "
491  "ind[, params]) did not set the local graph correctly. Please "
492  "report this bug to the Tpetra developers.");
493 
494  const size_t numCols = staticGraph_->getColMap ()->getNodeNumElements ();
495  values_type valIn = getKokkosViewDeepCopy<device_type> (av_reinterpret_cast<IST> (val ()));
496  this->lclMatrix_ = local_matrix_type ("Tpetra::CrsMatrix::lclMatrix_",
497  numCols, valIn, lclGraph);
498  // FIXME (22 Jun 2016) I would very much like to get rid of
499  // k_values1D_ at some point. I find it confusing to have all
500  // these extra references lying around.
501  this->k_values1D_ = this->lclMatrix_.values;
502 
503  checkInternalState ();
504  }
505 
506  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
508  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
509  const Teuchos::RCP<const map_type>& colMap,
510  const local_matrix_type& lclMatrix,
511  const Teuchos::RCP<Teuchos::ParameterList>& params) :
512  dist_object_type (rowMap),
513  lclMatrix_ (lclMatrix),
514  k_values1D_ (lclMatrix.values),
515  storageStatus_ (::Tpetra::Details::STORAGE_1D_PACKED),
516  fillComplete_ (true),
517  frobNorm_ (-STM::one ())
518  {
519  const char tfecfFuncName[] = "Tpetra::CrsMatrix(RCP<const Map>, "
520  "RCP<const Map>, local_matrix_type[, RCP<ParameterList>]): ";
521  Teuchos::RCP<crs_graph_type> graph;
522  try {
523  graph = Teuchos::rcp (new crs_graph_type (rowMap, colMap,
524  lclMatrix.graph, params));
525  }
526  catch (std::exception& e) {
527  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
528  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
529  "RCP<const Map>, local_graph_type[, RCP<ParameterList>]) threw an "
530  "exception: " << e.what ());
531  }
532  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
533  (!graph->isFillComplete (), std::logic_error, "CrsGraph constructor (RCP"
534  "<const Map>, RCP<const Map>, local_graph_type[, RCP<ParameterList>]) "
535  "did not produce a fill-complete graph. Please report this bug to the "
536  "Tpetra developers.");
537  // myGraph_ not null means that the matrix owns the graph. This
538  // is true because the column indices come in as nonconst through
539  // the matrix, implying shared ownership.
540  myGraph_ = graph;
541  staticGraph_ = graph;
542 
543  const bool callComputeGlobalConstants = params.get () == nullptr ||
544  params->get ("compute global constants", true);
545  if (callComputeGlobalConstants) {
546  this->computeGlobalConstants ();
547  }
548 
549  // Sanity checks at the end.
550 #ifdef HAVE_TPETRA_DEBUG
551  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isFillActive (), std::logic_error,
552  "We're at the end of fillComplete(), but isFillActive() is true. "
553  "Please report this bug to the Tpetra developers.");
554  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillComplete (), std::logic_error,
555  "We're at the end of fillComplete(), but isFillComplete() is false. "
556  "Please report this bug to the Tpetra developers.");
557 #endif // HAVE_TPETRA_DEBUG
558  checkInternalState ();
559  }
560 
561  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
563  CrsMatrix (const local_matrix_type& lclMatrix,
564  const Teuchos::RCP<const map_type>& rowMap,
565  const Teuchos::RCP<const map_type>& colMap,
566  const Teuchos::RCP<const map_type>& domainMap,
567  const Teuchos::RCP<const map_type>& rangeMap,
568  const Teuchos::RCP<Teuchos::ParameterList>& params) :
569  dist_object_type (rowMap),
570  lclMatrix_ (lclMatrix),
571  k_values1D_ (lclMatrix.values),
572  storageStatus_ (::Tpetra::Details::STORAGE_1D_PACKED),
573  fillComplete_ (true),
574  frobNorm_ (-STM::one ())
575  {
576  const char tfecfFuncName[] = "Tpetra::CrsMatrix(RCP<const Map>, "
577  "RCP<const Map>, RCP<const Map>, RCP<const Map>, local_matrix_type[, "
578  "RCP<ParameterList>]): ";
579  Teuchos::RCP<crs_graph_type> graph;
580  try {
581  graph = Teuchos::rcp (new crs_graph_type (lclMatrix.graph, rowMap, colMap,
582  domainMap, rangeMap, params));
583  }
584  catch (std::exception& e) {
585  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
586  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
587  "RCP<const Map>, RCP<const Map>, RCP<const Map>, local_graph_type[, "
588  "RCP<ParameterList>]) threw an exception: " << e.what ());
589  }
590  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
591  (!graph->isFillComplete (), std::logic_error, "CrsGraph constructor (RCP"
592  "<const Map>, RCP<const Map>, RCP<const Map>, RCP<const Map>, local_graph_type[, "
593  "RCP<ParameterList>]) did not produce a fill-complete graph. Please report this "
594  "bug to the Tpetra developers.");
595  // myGraph_ not null means that the matrix owns the graph. This
596  // is true because the column indices come in as nonconst through
597  // the matrix, implying shared ownership.
598  myGraph_ = graph;
599  staticGraph_ = graph;
600 
601  const bool callComputeGlobalConstants = params.get () == nullptr ||
602  params->get ("compute global constants", true);
603  if (callComputeGlobalConstants) {
604  this->computeGlobalConstants ();
605  }
606 
607  // Sanity checks at the end.
608 #ifdef HAVE_TPETRA_DEBUG
609  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isFillActive (), std::logic_error,
610  "We're at the end of fillComplete(), but isFillActive() is true. "
611  "Please report this bug to the Tpetra developers.");
612  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillComplete (), std::logic_error,
613  "We're at the end of fillComplete(), but isFillComplete() is false. "
614  "Please report this bug to the Tpetra developers.");
615 #endif // HAVE_TPETRA_DEBUG
616  checkInternalState ();
617  }
618 
619  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
622  {}
623 
624  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
625  Teuchos::RCP<const Teuchos::Comm<int> >
627  getComm () const {
628  return getCrsGraphRef ().getComm ();
629  }
630 
631  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
632  Teuchos::RCP<Node>
634  getNode () const {
635  return getCrsGraphRef ().getNode ();
636  }
637 
638  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
641  getProfileType () const {
642  return this->getCrsGraphRef ().getProfileType ();
643  }
644 
645  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
646  bool
648  isFillComplete () const {
649  return fillComplete_;
650  }
651 
652  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
653  bool
655  isFillActive () const {
656  return ! fillComplete_;
657  }
658 
659  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
660  bool
663  return this->getCrsGraphRef ().isStorageOptimized ();
664  }
665 
666  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
667  bool
670  return getCrsGraphRef ().isLocallyIndexed ();
671  }
672 
673  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
674  bool
677  return getCrsGraphRef ().isGloballyIndexed ();
678  }
679 
680  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
681  bool
683  hasColMap () const {
684  return getCrsGraphRef ().hasColMap ();
685  }
686 
687  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
691  return getCrsGraphRef ().getGlobalNumEntries ();
692  }
693 
694  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
695  size_t
698  return getCrsGraphRef ().getNodeNumEntries ();
699  }
700 
701  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
705  return getCrsGraphRef ().getGlobalNumRows ();
706  }
707 
708  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
712  return getCrsGraphRef ().getGlobalNumCols ();
713  }
714 
715  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
716  size_t
718  getNodeNumRows () const {
719  return getCrsGraphRef ().getNodeNumRows ();
720  }
721 
722  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
723  size_t
725  getNodeNumCols () const {
726  return getCrsGraphRef ().getNodeNumCols ();
727  }
728 
729  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
733  const crs_graph_type& G = this->getCrsGraphRef ();
735  return dynamic_cast<const HDM&> (G).getGlobalNumDiagsImpl ();
736  }
737 
738  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
742  return this->getGlobalNumDiagsImpl ();
743  }
744 
745  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
746  size_t
749  const crs_graph_type& G = this->getCrsGraphRef ();
751  return dynamic_cast<const HDM&> (G).getNodeNumDiagsImpl ();
752  }
753 
754  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
755  size_t
758  return this->getNodeNumDiagsImpl ();
759  }
760 
761  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
762  size_t
764  getNumEntriesInGlobalRow (GlobalOrdinal globalRow) const {
765  return getCrsGraphRef ().getNumEntriesInGlobalRow (globalRow);
766  }
767 
768  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
769  size_t
771  getNumEntriesInLocalRow (LocalOrdinal localRow) const {
772  return getCrsGraphRef ().getNumEntriesInLocalRow (localRow);
773  }
774 
775  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
776  size_t
779  return getCrsGraphRef ().getGlobalMaxNumRowEntries ();
780  }
781 
782  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
783  size_t
786  return getCrsGraphRef ().getNodeMaxNumRowEntries ();
787  }
788 
789  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
790  GlobalOrdinal
792  getIndexBase () const {
793  return getRowMap ()->getIndexBase ();
794  }
795 
796  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
797  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
799  getRowMap () const {
800  return getCrsGraphRef ().getRowMap ();
801  }
802 
803  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
804  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
806  getColMap () const {
807  return getCrsGraphRef ().getColMap ();
808  }
809 
810  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
811  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
813  getDomainMap () const {
814  return getCrsGraphRef ().getDomainMap ();
815  }
816 
817  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
818  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
820  getRangeMap () const {
821  return getCrsGraphRef ().getRangeMap ();
822  }
823 
824  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
825  Teuchos::RCP<const RowGraph<LocalOrdinal, GlobalOrdinal, Node> >
827  getGraph () const {
828  if (staticGraph_ != Teuchos::null) {
829  return staticGraph_;
830  }
831  return myGraph_;
832  }
833 
834  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
835  Teuchos::RCP<const CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
837  getCrsGraph () const {
838  if (staticGraph_ != Teuchos::null) {
839  return staticGraph_;
840  }
841  return myGraph_;
842  }
843 
844  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
847  getCrsGraphRef () const {
848  if (! this->staticGraph_.is_null ()) {
849  return * (this->staticGraph_);
850  }
851  else {
852 #ifdef HAVE_TPETRA_DEBUG
853  const char tfecfFuncName[] = "getCrsGraphRef: ";
854  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
855  (this->myGraph_.is_null (), std::logic_error,
856  "Both staticGraph_ and myGraph_ are null. "
857  "Please report this bug to the Tpetra developers.");
858 #endif // HAVE_TPETRA_DEBUG
859  return * (this->myGraph_);
860  }
861  }
862 
863  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
864  bool
865  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
866  isLowerTriangularImpl () const {
867  const crs_graph_type& G = this->getCrsGraphRef ();
869  return dynamic_cast<const HDM&> (G).isLowerTriangularImpl ();
870  }
871 
872  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
873  bool
876  return this->isLowerTriangularImpl ();
877  }
878 
879  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
880  bool
883  const crs_graph_type& G = this->getCrsGraphRef ();
885  return dynamic_cast<const HDM&> (G).isUpperTriangularImpl ();
886  }
887 
888  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
889  bool
892  return this->isUpperTriangularImpl ();
893  }
894 
895  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
896  bool
898  isStaticGraph () const {
899  return myGraph_.is_null ();
900  }
901 
902  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
903  bool
906  return true;
907  }
908 
909  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
910  bool
913  return true;
914  }
915 
916  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
917  Teuchos::ArrayRCP<Teuchos::Array<typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::impl_scalar_type> >
920  {
921  using Teuchos::arcp;
922  using Teuchos::Array;
923  using Teuchos::ArrayRCP;
924  typedef impl_scalar_type IST;
925  typedef LocalOrdinal LO;
926  const char tfecfFuncName[] = "allocateValues2D: ";
927 
928  const crs_graph_type& graph = this->getCrsGraphRef ();
929  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
930  (! graph.indicesAreAllocated (), std::runtime_error,
931  "Graph indices must be allocated before values.");
932  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
933  (graph.getProfileType () != DynamicProfile, std::runtime_error,
934  "Graph indices must be allocated in a dynamic profile.");
935 
936  const LO lclNumRows = graph.getNodeNumRows ();
937  Teuchos::ArrayRCP<Teuchos::Array<IST> > values2D (lclNumRows);
938  if (! graph.lclInds2D_.is_null ()) {
939  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
940  values2D[lclRow].resize (graph.lclInds2D_[lclRow].size ());
941  }
942  }
943  else if (! graph.gblInds2D_.is_null ()) {
944  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
945  values2D[lclRow].resize (graph.gblInds2D_[lclRow].size ());
946  }
947  }
948  return values2D;
949  }
950 
951  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
952  void
953  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
954  allocateValues (ELocalGlobal lg, GraphAllocationStatus gas)
955  {
956  using ::Tpetra::Details::ProfilingRegion;
957  const char tfecfFuncName[] = "allocateValues: ";
958  ProfilingRegion regionAllocateValues ("Tpetra::CrsMatrix::allocateValues");
959 
960 #ifdef HAVE_TPETRA_DEBUG
961  const char suffix[] = " Please report this bug to the Tpetra developers.";
962 
963  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
964  (this->staticGraph_.is_null (), std::logic_error,
965  "staticGraph_ is null." << suffix);
966 
967  // If the graph indices are already allocated, then gas should be
968  // GraphAlreadyAllocated. Otherwise, gas should be
969  // GraphNotYetAllocated.
970  if ((gas == GraphAlreadyAllocated) != this->staticGraph_->indicesAreAllocated ()) {
971  const char err1[] = "The caller has asserted that the graph is ";
972  const char err2[] = "already allocated, but the static graph says "
973  "that its indices are ";
974  const char err3[] = "already allocated. Please report this bug to "
975  "the Tpetra developers.";
976  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
977  (gas == GraphAlreadyAllocated && ! this->staticGraph_->indicesAreAllocated (),
978  std::logic_error, err1 << err2 << "not " << err3);
979  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
980  (gas != GraphAlreadyAllocated && this->staticGraph_->indicesAreAllocated (),
981  std::logic_error, err1 << "not " << err2 << err3);
982  }
983 
984  // If the graph is unallocated, then it had better be a
985  // matrix-owned graph. ("Matrix-owned graph" means that the
986  // matrix gets to define the graph structure. If the CrsMatrix
987  // constructor that takes an RCP<const CrsGraph> was used, then
988  // the matrix does _not_ own the graph.)
989  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
990  (! this->staticGraph_->indicesAreAllocated () &&
991  this->myGraph_.is_null (), std::logic_error,
992  "The static graph says that its indices are not allocated, "
993  "but the graph is not owned by the matrix." << suffix);
994 #endif // HAVE_TPETRA_DEBUG
995 
996  if (gas == GraphNotYetAllocated) {
997 #ifdef HAVE_TPETRA_DEBUG
998  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
999  (this->myGraph_.is_null (), std::logic_error,
1000  "gas = GraphNotYetAllocated, but myGraph_ is null." << suffix);
1001 #endif // HAVE_TPETRA_DEBUG
1002  try {
1003  this->myGraph_->allocateIndices (lg);
1004  }
1005  catch (std::exception& e) {
1006  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1007  (true, std::runtime_error, "CrsGraph::allocateIndices "
1008  "threw an exception: " << e.what ());
1009  }
1010  catch (...) {
1011  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1012  (true, std::runtime_error, "CrsGraph::allocateIndices "
1013  "threw an exception not a subclass of std::exception.");
1014  }
1015  }
1016 
1017  // Allocate matrix values.
1018  if (this->getProfileType () == StaticProfile) {
1019  // "Static profile" means that the number of matrix entries in
1020  // each row was fixed at the time the CrsMatrix constructor was
1021  // called. This lets us use 1-D storage for the matrix's
1022  // values. ("1-D storage" means the same as that used by the
1023  // three arrays in the compressed sparse row storage format.)
1024 
1025 #ifdef HAVE_TPETRA_DEBUG
1026  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1027  (this->staticGraph_.is_null (), std::logic_error,
1028  "this->getProfileType() == StaticProfile, but staticGraph_ is null."
1029  << suffix);
1030 #endif // HAVE_TPETRA_DEBUG
1031 
1032  const size_t lclNumRows = this->staticGraph_->getNodeNumRows ();
1033  typename Graph::local_graph_type::row_map_type k_ptrs =
1034  this->staticGraph_->k_rowPtrs_;
1035  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1036  (k_ptrs.extent (0) != lclNumRows+1, std::logic_error,
1037  "With StaticProfile, row offsets array has length "
1038  << k_ptrs.extent (0) << " != (lclNumRows+1) = "
1039  << (lclNumRows+1) << ".");
1040 
1041  const size_t lclTotalNumEntries =
1042  ::Tpetra::Details::getEntryOnHost (k_ptrs, lclNumRows);
1043 
1044  // Allocate array of (packed???) matrix values.
1045  typedef typename local_matrix_type::values_type values_type;
1046  this->k_values1D_ =
1047  values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries);
1048  }
1049  else {
1050  // "Dynamic profile" means the number of matrix entries in each
1051  // row is not fixed and may expand. Thus, we store the matrix's
1052  // values in "2-D storage," meaning an array of arrays. The
1053  // outer array has as many inner arrays as there are rows in the
1054  // matrix, and each inner array stores the values in that row.
1055  this->values2D_ = this->allocateValues2D ();
1056  }
1057  }
1058 
1059  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1060  void
1062  getAllValues (Teuchos::ArrayRCP<const size_t>& rowPointers,
1063  Teuchos::ArrayRCP<const LocalOrdinal>& columnIndices,
1064  Teuchos::ArrayRCP<const Scalar>& values) const
1065  {
1066  using Teuchos::RCP;
1067  const char tfecfFuncName[] = "getAllValues: ";
1068  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1069  columnIndices.size () != values.size (), std::runtime_error,
1070  "Requires that columnIndices and values are the same size.");
1071 
1072  RCP<const crs_graph_type> relevantGraph = getCrsGraph ();
1073  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1074  relevantGraph.is_null (), std::runtime_error,
1075  "Requires that getCrsGraph() is not null.");
1076  try {
1077  rowPointers = relevantGraph->getNodeRowPtrs ();
1078  }
1079  catch (std::exception &e) {
1080  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1081  true, std::runtime_error,
1082  "Caught exception while calling graph->getNodeRowPtrs(): "
1083  << e.what ());
1084  }
1085  try {
1086  columnIndices = relevantGraph->getNodePackedIndices ();
1087  }
1088  catch (std::exception &e) {
1089  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1090  true, std::runtime_error,
1091  "Caught exception while calling graph->getNodePackedIndices(): "
1092  << e.what ());
1093  }
1094  Teuchos::ArrayRCP<const impl_scalar_type> vals =
1095  Kokkos::Compat::persistingView (k_values1D_);
1096  values = Teuchos::arcp_reinterpret_cast<const Scalar> (vals);
1097  }
1098 
1099  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1100  void
1101  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
1102  fillLocalGraphAndMatrix (const Teuchos::RCP<Teuchos::ParameterList>& params)
1103  {
1105  using ::Tpetra::Details::ProfilingRegion;
1106  using Kokkos::create_mirror_view;
1107  using Teuchos::arcp_const_cast;
1108  using Teuchos::Array;
1109  using Teuchos::ArrayRCP;
1110  using Teuchos::null;
1111  using Teuchos::RCP;
1112  using Teuchos::rcp;
1113  typedef typename local_matrix_type::row_map_type row_map_type;
1114  typedef typename Graph::local_graph_type::entries_type::non_const_type lclinds_1d_type;
1115  typedef typename local_matrix_type::values_type values_type;
1116  ProfilingRegion regionFLGAM ("Tpetra::CrsGraph::fillLocalGraphAndMatrix");
1117 
1118 #ifdef HAVE_TPETRA_DEBUG
1119  const char tfecfFuncName[] = "fillLocalGraphAndMatrix (called from "
1120  "fillComplete or expertStaticFillComplete): ";
1121 #endif // HAVE_TPETRA_DEBUG
1122 
1123 #ifdef HAVE_TPETRA_DEBUG
1124  // fillComplete() only calls fillLocalGraphAndMatrix() if the
1125  // matrix owns the graph, which means myGraph_ is not null.
1126  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1127  (myGraph_.is_null (), std::logic_error, "The nonconst graph (myGraph_) "
1128  "is null. This means that the matrix has a const (a.k.a. \"static\") "
1129  "graph. fillComplete or expertStaticFillComplete should never call "
1130  "fillLocalGraphAndMatrix in that case. "
1131  "Please report this bug to the Tpetra developers.");
1132 #endif // HAVE_TPETRA_DEBUG
1133 
1134  const size_t lclNumRows = this->getNodeNumRows ();
1135 
1136  // This method's goal is to fill in the three arrays (compressed
1137  // sparse row format) that define the sparse graph's and matrix's
1138  // structure, and the sparse matrix's values.
1139  //
1140  // Use the nonconst version of row_map_type for k_ptrs,
1141  // because row_map_type is const and we need to modify k_ptrs here.
1142  typename row_map_type::non_const_type k_ptrs;
1143  row_map_type k_ptrs_const;
1144  lclinds_1d_type k_inds;
1145  values_type k_vals;
1146 
1147  // Get references to the data in myGraph_, so we can modify them
1148  // as well. Note that we only call fillLocalGraphAndMatrix() if
1149  // the matrix owns the graph, which means myGraph_ is not null.
1150  lclinds_1d_type k_lclInds1D_ = myGraph_->k_lclInds1D_;
1151 
1152  typedef decltype (myGraph_->k_numRowEntries_) row_entries_type;
1153 
1154  if (getProfileType () == DynamicProfile) {
1155  // Pack 2-D storage (DynamicProfile) into 1-D packed storage.
1156  //
1157  // DynamicProfile means that the matrix's column indices and
1158  // values are currently stored in a 2-D "unpacked" format, in
1159  // the arrays-of-arrays myGraph_->lclInds2D_ (for column
1160  // indices) and values2D_ (for values). We allocate 1-D storage
1161  // (k_inds resp. k_vals), and then copy from 2-D storage
1162  // (lclInds2D_ resp. values2D_) into 1-D storage (k_inds
1163  // resp. k_vals).
1164 
1165  // We're be packing on host. k_numRowEntries_ lives on host,
1166  // and computeOffsetsFromCounts accepts a host View for counts,
1167  // even if offsets is a device View. (Furthermore, the "host"
1168  // View may very well live in CudaUVMSpace, so doing this has no
1169  // penalty, other than requiring synchronization between Cuda
1170  // and host. UVM memory gets grumpy if both device and host
1171  // attempt to access it at the same time without an intervening
1172  // fence.)
1173  typename row_entries_type::const_type numRowEnt_h =
1174  myGraph_->k_numRowEntries_;
1175 #ifdef HAVE_TPETRA_DEBUG
1176  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1177  (static_cast<size_t> (numRowEnt_h.extent (0)) != lclNumRows,
1178  std::logic_error, "(DynamicProfile branch) numRowEnt_h has the "
1179  "wrong length. numRowEnt_h.extent(0) = "
1180  << numRowEnt_h.extent (0) << " != getNodeNumRows() = "
1181  << lclNumRows << ".");
1182 #endif // HAVE_TPETRA_DEBUG
1183 
1184  // We're packing on host (since we can't read Teuchos data
1185  // structures on device), so let's fill the packed row offsets
1186  // on host first.
1187  k_ptrs = typename row_map_type::non_const_type ("Tpetra::CrsGraph::ptr",
1188  lclNumRows+1);
1189  typename row_map_type::non_const_type::HostMirror h_ptrs =
1190  create_mirror_view (k_ptrs);
1191 
1192  // Pack the row offsets into k_ptrs, by doing a sum-scan of
1193  // the array of valid entry counts per row.
1194  //
1195  // Return value is the total number of entries in the matrix on
1196  // the calling process. It's cheap to compute and useful as a
1197  // sanity check.
1198  const size_t lclTotalNumEntries =
1199  computeOffsetsFromCounts (h_ptrs, numRowEnt_h);
1200 #ifdef HAVE_TPETRA_DEBUG
1201  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1202  (static_cast<size_t> (h_ptrs.extent (0)) != lclNumRows + 1,
1203  std::logic_error, "(DynamicProfile branch) After packing h_ptrs, "
1204  "h_ptrs.extent(0) = " << h_ptrs.extent (0) << " != "
1205  "(lclNumRows+1) = " << (lclNumRows+1) << ".");
1206  {
1207  const size_t h_ptrs_lastEnt = h_ptrs(lclNumRows); // it's a host View
1208  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1209  (h_ptrs_lastEnt != lclTotalNumEntries, std::logic_error,
1210  "(DynamicProfile branch) After packing h_ptrs, h_ptrs(lclNumRows="
1211  << lclNumRows << ") = " << h_ptrs_lastEnt << " != total number "
1212  "of entries on the calling process = " << lclTotalNumEntries << ".");
1213  }
1214 #endif // HAVE_TPETRA_DEBUG
1215 
1216  // Allocate the arrays of packed column indices and values.
1217  k_inds = lclinds_1d_type ("Tpetra::CrsGraph::ind", lclTotalNumEntries);
1218  k_vals = values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries);
1219 
1220  // We need host views of the above, since 2-D storage lives on host.
1221  typename lclinds_1d_type::HostMirror h_inds = create_mirror_view (k_inds);
1222  typename values_type::HostMirror h_vals = create_mirror_view (k_vals);
1223 
1224  // Pack the column indices and values on the host.
1225  ArrayRCP<Array<LocalOrdinal> > lclInds2D = myGraph_->lclInds2D_;
1226  for (size_t row = 0; row < lclNumRows; ++row) {
1227  const size_t numEnt = numRowEnt_h(row);
1228  std::copy (lclInds2D[row].begin(),
1229  lclInds2D[row].begin() + numEnt,
1230  h_inds.data() + h_ptrs(row));
1231  std::copy (values2D_[row].begin(),
1232  values2D_[row].begin() + numEnt,
1233  h_vals.data() + h_ptrs(row));
1234  }
1235 
1236  // Copy the packed column indices and values to the device.
1237  Kokkos::deep_copy (k_inds, h_inds);
1238  Kokkos::deep_copy (k_vals, h_vals);
1239  // Copy the packed row offsets to the device too.
1240  // We didn't actually need them on device before.
1241  Kokkos::deep_copy (k_ptrs, h_ptrs);
1242  k_ptrs_const = k_ptrs; // const version of k_ptrs
1243 
1244 #ifdef HAVE_TPETRA_DEBUG
1245  // Sanity check of packed row offsets.
1246  if (k_ptrs.extent (0) != 0) {
1247  const size_t numOffsets = static_cast<size_t> (k_ptrs.extent (0));
1248  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1249  (numOffsets != lclNumRows + 1, std::logic_error, "(DynamicProfile "
1250  "branch) After copying into k_ptrs, k_ptrs.extent(0) = " <<
1251  numOffsets << " != (lclNumRows+1) = " << (lclNumRows+1) << ".");
1252 
1253  const auto valToCheck = ::Tpetra::Details::getEntryOnHost (k_ptrs, numOffsets-1);
1254  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1255  (static_cast<size_t> (valToCheck) != k_vals.extent (0),
1256  std::logic_error, "(DynamicProfile branch) After packing, k_ptrs("
1257  << (numOffsets-1) << ") = " << valToCheck << " != "
1258  "k_vals.extent(0) = " << k_vals.extent (0) << ".");
1259  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1260  (static_cast<size_t> (valToCheck) != k_inds.extent (0),
1261  std::logic_error, "(DynamicProfile branch) After packing, k_ptrs("
1262  << (numOffsets-1) << ") = " << valToCheck << " != "
1263  "k_inds.extent(0) = " << k_inds.extent (0) << ".");
1264  }
1265 #endif // HAVE_TPETRA_DEBUG
1266  }
1267  else if (getProfileType () == StaticProfile) {
1268  // StaticProfile means that the matrix's column indices and
1269  // values are currently stored in a 1-D format, with row offsets
1270  // in k_rowPtrs_ and local column indices in k_lclInds1D_.
1271 
1272  // StaticProfile also means that the graph's array of row
1273  // offsets must already be allocated.
1274  typename Graph::local_graph_type::row_map_type curRowOffsets =
1275  myGraph_->k_rowPtrs_;
1276 
1277 #ifdef HAVE_TPETRA_DEBUG
1278  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1279  (curRowOffsets.extent (0) == 0, std::logic_error,
1280  "(StaticProfile branch) curRowOffsets.extent(0) == 0.");
1281  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1282  (curRowOffsets.extent (0) != lclNumRows + 1, std::logic_error,
1283  "(StaticProfile branch) curRowOffsets.extent(0) = "
1284  << curRowOffsets.extent (0) << " != lclNumRows + 1 = "
1285  << (lclNumRows + 1) << ".")
1286  {
1287  const size_t numOffsets = curRowOffsets.extent (0);
1288  const auto valToCheck =
1289  ::Tpetra::Details::getEntryOnHost (curRowOffsets, numOffsets - 1);
1290  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1291  (numOffsets != 0 &&
1292  myGraph_->k_lclInds1D_.extent (0) != valToCheck,
1293  std::logic_error, "(StaticProfile branch) numOffsets = " <<
1294  numOffsets << " != 0 and myGraph_->k_lclInds1D_.extent(0) = "
1295  << myGraph_->k_lclInds1D_.extent (0) << " != curRowOffsets("
1296  << numOffsets << ") = " << valToCheck << ".");
1297  }
1298 #endif // HAVE_TPETRA_DEBUG
1299 
1300  if (myGraph_->getNodeNumEntries () != myGraph_->getNodeAllocationSize ()) {
1301  // The matrix's current 1-D storage is "unpacked." This means
1302  // the row offsets may differ from what the final row offsets
1303  // should be. This could happen, for example, if the user
1304  // specified StaticProfile in the constructor and set an upper
1305  // bound on the number of entries per row, but didn't fill all
1306  // those entries.
1307 #ifdef HAVE_TPETRA_DEBUG
1308  if (curRowOffsets.extent (0) != 0) {
1309  const size_t numOffsets =
1310  static_cast<size_t> (curRowOffsets.extent (0));
1311  const auto valToCheck =
1312  ::Tpetra::Details::getEntryOnHost (curRowOffsets, numOffsets-1);
1313  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1314  (static_cast<size_t> (valToCheck) !=
1315  static_cast<size_t> (k_values1D_.extent (0)),
1316  std::logic_error, "(StaticProfile unpacked branch) Before "
1317  "allocating or packing, curRowOffsets(" << (numOffsets-1) << ") = "
1318  << valToCheck << " != k_values1D_.extent(0)"
1319  " = " << k_values1D_.extent (0) << ".");
1320  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1321  (static_cast<size_t> (valToCheck) !=
1322  static_cast<size_t> (myGraph_->k_lclInds1D_.extent (0)),
1323  std::logic_error, "(StaticProfile unpacked branch) Before "
1324  "allocating or packing, curRowOffsets(" << (numOffsets-1) << ") = "
1325  << valToCheck
1326  << " != myGraph_->k_lclInds1D_.extent(0) = "
1327  << myGraph_->k_lclInds1D_.extent (0) << ".");
1328  }
1329 #endif // HAVE_TPETRA_DEBUG
1330 
1331  // Pack the row offsets into k_ptrs, by doing a sum-scan of
1332  // the array of valid entry counts per row.
1333 
1334  // Total number of entries in the matrix on the calling
1335  // process. We will compute this in the loop below. It's
1336  // cheap to compute and useful as a sanity check.
1337  size_t lclTotalNumEntries = 0;
1338  // This will be a host view of packed row offsets.
1339  typename row_map_type::non_const_type::HostMirror h_ptrs;
1340  {
1341  // Allocate the packed row offsets array. We use a nonconst
1342  // temporary (packedRowOffsets) here, because k_ptrs is
1343  // const. We will assign packedRowOffsets to k_ptrs below.
1344  typename row_map_type::non_const_type
1345  packedRowOffsets ("Tpetra::CrsGraph::ptr", lclNumRows + 1);
1346  typename row_entries_type::const_type numRowEnt_h =
1347  myGraph_->k_numRowEntries_;
1348  // We're computing offsets on device. This function can
1349  // handle numRowEnt_h being a host View.
1350  lclTotalNumEntries =
1351  computeOffsetsFromCounts (packedRowOffsets, numRowEnt_h);
1352  // packedRowOffsets is modifiable; k_ptrs isn't, so we have
1353  // to use packedRowOffsets in the loop above and assign here.
1354  k_ptrs = packedRowOffsets;
1355  k_ptrs_const = k_ptrs;
1356  }
1357 
1358 #ifdef HAVE_TPETRA_DEBUG
1359  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1360  (static_cast<size_t> (k_ptrs.extent (0)) != lclNumRows + 1,
1361  std::logic_error,
1362  "(StaticProfile unpacked branch) After packing k_ptrs, "
1363  "k_ptrs.extent(0) = " << k_ptrs.extent (0) << " != "
1364  "lclNumRows+1 = " << (lclNumRows+1) << ".");
1365  {
1366  const auto valToCheck = ::Tpetra::Details::getEntryOnHost (k_ptrs, lclNumRows);
1367  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1368  (valToCheck != lclTotalNumEntries, std::logic_error,
1369  "(StaticProfile unpacked branch) After filling k_ptrs, "
1370  "k_ptrs(lclNumRows=" << lclNumRows << ") = " << valToCheck
1371  << " != total number of entries on the calling process = "
1372  << lclTotalNumEntries << ".");
1373  }
1374 #endif // HAVE_TPETRA_DEBUG
1375 
1376  // Allocate the arrays of packed column indices and values.
1377  k_inds = lclinds_1d_type ("Tpetra::CrsGraph::ind", lclTotalNumEntries);
1378  k_vals = values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries);
1379 
1380  // curRowOffsets (myGraph_->k_rowPtrs_) (???), k_lclInds1D_,
1381  // and k_values1D_ are currently unpacked. Pack them, using
1382  // the packed row offsets array k_ptrs that we created above.
1383  //
1384  // FIXME (mfh 06 Aug 2014) If "Optimize Storage" is false, we
1385  // need to keep around the unpacked row offsets, column
1386  // indices, and values arrays.
1387 
1388  // Pack the column indices from unpacked k_lclInds1D_ into
1389  // packed k_inds. We will replace k_lclInds1D_ below.
1390  typedef pack_functor<typename Graph::local_graph_type::entries_type::non_const_type,
1391  typename Graph::local_graph_type::row_map_type>
1392  inds_packer_type;
1393  inds_packer_type indsPacker (k_inds, myGraph_->k_lclInds1D_,
1394  k_ptrs, curRowOffsets);
1395  typedef typename decltype (k_inds)::execution_space exec_space;
1396  typedef Kokkos::RangePolicy<exec_space, LocalOrdinal> range_type;
1397  Kokkos::parallel_for (range_type (0, lclNumRows), indsPacker);
1398 
1399  // Pack the values from unpacked k_values1D_ into packed
1400  // k_vals. We will replace k_values1D_ below.
1401  typedef pack_functor<values_type, row_map_type> vals_packer_type;
1402  vals_packer_type valsPacker (k_vals, this->k_values1D_,
1403  k_ptrs, curRowOffsets);
1404  Kokkos::parallel_for (range_type (0, lclNumRows), valsPacker);
1405 
1406 #ifdef HAVE_TPETRA_DEBUG
1407  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1408  (k_ptrs.extent (0) == 0, std::logic_error,
1409  "(StaticProfile \"Optimize Storage\" = "
1410  "true branch) After packing, k_ptrs.extent(0) = 0. This "
1411  "probably means that k_rowPtrs_ was never allocated.");
1412  if (k_ptrs.extent (0) != 0) {
1413  const size_t numOffsets = static_cast<size_t> (k_ptrs.extent (0));
1414  const auto valToCheck = ::Tpetra::Details::getEntryOnHost (k_ptrs, numOffsets - 1);
1415  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1416  (static_cast<size_t> (valToCheck) != k_vals.extent (0),
1417  std::logic_error,
1418  "(StaticProfile \"Optimize Storage\"=true branch) After packing, "
1419  "k_ptrs(" << (numOffsets-1) << ") = " << valToCheck <<
1420  " != k_vals.extent(0) = " << k_vals.extent (0) << ".");
1421  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1422  (static_cast<size_t> (valToCheck) != k_inds.extent (0),
1423  std::logic_error,
1424  "(StaticProfile \"Optimize Storage\"=true branch) After packing, "
1425  "k_ptrs(" << (numOffsets-1) << ") = " << valToCheck <<
1426  " != k_inds.extent(0) = " << k_inds.extent (0) << ".");
1427  }
1428 #endif // HAVE_TPETRA_DEBUG
1429  }
1430  else { // We don't have to pack, so just set the pointers.
1431  k_ptrs_const = myGraph_->k_rowPtrs_;
1432  k_inds = myGraph_->k_lclInds1D_;
1433  k_vals = this->k_values1D_;
1434 
1435 #ifdef HAVE_TPETRA_DEBUG
1436  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1437  (k_ptrs_const.extent (0) == 0, std::logic_error,
1438  "(StaticProfile \"Optimize Storage\"=false branch) "
1439  "k_ptrs_const.extent(0) = 0. This probably means that "
1440  "k_rowPtrs_ was never allocated.");
1441  if (k_ptrs_const.extent (0) != 0) {
1442  const size_t numOffsets = static_cast<size_t> (k_ptrs_const.extent (0));
1443  const auto valToCheck = ::Tpetra::Details::getEntryOnHost (k_ptrs_const, numOffsets - 1);
1444  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1445  (static_cast<size_t> (valToCheck) != k_vals.extent (0),
1446  std::logic_error,
1447  "(StaticProfile \"Optimize Storage\"=false branch) "
1448  "k_ptrs_const(" << (numOffsets-1) << ") = " << valToCheck
1449  << " != k_vals.extent(0) = " << k_vals.extent (0) << ".");
1450  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1451  (static_cast<size_t> (valToCheck) != k_inds.extent (0),
1452  std::logic_error,
1453  "(StaticProfile \"Optimize Storage\" = false branch) "
1454  "k_ptrs_const(" << (numOffsets-1) << ") = " << valToCheck
1455  << " != k_inds.extent(0) = " << k_inds.extent (0) << ".");
1456  }
1457 #endif // HAVE_TPETRA_DEBUG
1458  }
1459  }
1460 
1461 #ifdef HAVE_TPETRA_DEBUG
1462  // Extra sanity checks.
1463  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1464  (static_cast<size_t> (k_ptrs_const.extent (0)) != lclNumRows + 1,
1465  std::logic_error, "After packing, k_ptrs_const.extent(0) = " <<
1466  k_ptrs_const.extent (0) << " != lclNumRows+1 = " << (lclNumRows+1)
1467  << ".");
1468  if (k_ptrs_const.extent (0) != 0) {
1469  const size_t numOffsets = static_cast<size_t> (k_ptrs_const.extent (0));
1470  const size_t k_ptrs_const_numOffsetsMinus1 =
1471  ::Tpetra::Details::getEntryOnHost (k_ptrs_const, numOffsets - 1);
1472  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1473  (k_ptrs_const_numOffsetsMinus1 != k_vals.extent (0),
1474  std::logic_error, "After packing, k_ptrs_const(" << (numOffsets-1) <<
1475  ") = " << k_ptrs_const_numOffsetsMinus1 << " != k_vals.extent(0)"
1476  " = " << k_vals.extent (0) << ".");
1477  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1478  (k_ptrs_const_numOffsetsMinus1 != k_inds.extent (0),
1479  std::logic_error, "After packing, k_ptrs_const(" << (numOffsets-1) <<
1480  ") = " << k_ptrs_const_numOffsetsMinus1 << " != k_inds.extent(0)"
1481  " = " << k_inds.extent (0) << ".");
1482  }
1483 #endif // HAVE_TPETRA_DEBUG
1484 
1485  // May we ditch the old allocations for the packed (and otherwise
1486  // "optimized") allocations, later in this routine? Optimize
1487  // storage if the graph is not static, or if the graph already has
1488  // optimized storage.
1489  const bool defaultOptStorage =
1490  ! isStaticGraph () || staticGraph_->isStorageOptimized ();
1491  const bool requestOptimizedStorage =
1492  (! params.is_null () && params->get ("Optimize Storage", defaultOptStorage)) ||
1493  (params.is_null () && defaultOptStorage);
1494 
1495  // The graph has optimized storage when indices are allocated,
1496  // myGraph_->k_numRowEntries_ is empty, and there are more than
1497  // zero rows on this process. It's impossible for the graph to
1498  // have dynamic profile (getProfileType() == DynamicProfile) and
1499  // be optimized (isStorageOptimized()).
1500  if (requestOptimizedStorage) {
1501  // Free the old, unpacked, unoptimized allocations.
1502  // Change the graph from dynamic to static allocation profile
1503 
1504  // Free graph data structures that are only needed for 2-D or
1505  // unpacked 1-D storage.
1506  myGraph_->lclInds2D_ = null; // legacy KokkosClassic 2-D storage
1507  myGraph_->k_numRowEntries_ = row_entries_type ();
1508 
1509  // Free the matrix's 2-D storage.
1510  this->values2D_ = null;
1511 
1512  // Keep the new 1-D packed allocations.
1513  myGraph_->k_rowPtrs_ = k_ptrs_const;
1514  myGraph_->k_lclInds1D_ = k_inds;
1515  this->k_values1D_ = k_vals;
1516 
1517  // Whatever graph was before, it's StaticProfile now.
1518  myGraph_->pftype_ = StaticProfile;
1519  myGraph_->storageStatus_ = ::Tpetra::Details::STORAGE_1D_PACKED;
1520  this->storageStatus_ = ::Tpetra::Details::STORAGE_1D_PACKED;
1521  }
1522 
1523  // Make the local graph, using the arrays of row offsets and
1524  // column indices that we built above. The local graph should be
1525  // null, but we delete it first so that any memory can be freed
1526  // before we allocate the new one.
1527  //
1528  // FIXME (mfh 06,28 Aug 2014) It would make more sense for
1529  // Tpetra::CrsGraph to have a protected method that accepts k_inds
1530  // and k_ptrs, and creates the local graph lclGraph_.
1531  myGraph_->lclGraph_ =
1532  typename Graph::local_graph_type (k_inds, k_ptrs_const);
1533 
1534  // Make the local matrix, using the local graph and vals array.
1535  lclMatrix_ = local_matrix_type ("Tpetra::CrsMatrix::lclMatrix_",
1536  getNodeNumCols (), k_vals,
1537  myGraph_->lclGraph_);
1538  }
1539 
1540  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1541  void
1543  fillLocalMatrix (const Teuchos::RCP<Teuchos::ParameterList>& params)
1544  {
1545  using ::Tpetra::Details::ProfilingRegion;
1546  using Kokkos::create_mirror_view;
1547  using Teuchos::ArrayRCP;
1548  using Teuchos::Array;
1549  using Teuchos::null;
1550  using Teuchos::RCP;
1551  using Teuchos::rcp;
1552  typedef LocalOrdinal LO;
1553  typedef typename Graph::local_graph_type::row_map_type row_map_type;
1554  typedef typename row_map_type::non_const_type non_const_row_map_type;
1555  typedef typename local_matrix_type::values_type values_type;
1556 #ifdef HAVE_TPETRA_DEBUG
1557  const char tfecfFuncName[] = "fillLocalMatrix (called from fillComplete): ";
1558 #endif // HAVE_TPETRA_DEBUG
1559  ProfilingRegion regionFLM ("Tpetra::CrsMatrix::fillLocalMatrix");
1560 
1561  const size_t lclNumRows = getNodeNumRows();
1562  const map_type& rowMap = * (getRowMap ());
1563  RCP<node_type> node = rowMap.getNode ();
1564 
1565  // The goals of this routine are first, to allocate and fill
1566  // packed 1-D storage (see below for an explanation) in the vals
1567  // array, and second, to give vals to the local matrix and
1568  // finalize the local matrix. We only need k_ptrs, the packed 1-D
1569  // row offsets, within the scope of this routine, since we're only
1570  // filling the local matrix here (use fillLocalGraphAndMatrix() to
1571  // fill both the graph and the matrix at the same time).
1572 
1573  // get data from staticGraph_
1574  ArrayRCP<Array<LO> > lclInds2D = staticGraph_->lclInds2D_;
1575  size_t nodeNumEntries = staticGraph_->getNodeNumEntries ();
1576  size_t nodeNumAllocated = staticGraph_->getNodeAllocationSize ();
1577  row_map_type k_rowPtrs_ = staticGraph_->lclGraph_.row_map;
1578 
1579  row_map_type k_ptrs; // "packed" row offsets array
1580  values_type k_vals; // "packed" values array
1581 
1582  // May we ditch the old allocations for the packed (and otherwise
1583  // "optimized") allocations, later in this routine? Request
1584  // optimized storage by default.
1585  bool requestOptimizedStorage = true;
1586  const bool default_OptimizeStorage =
1587  ! isStaticGraph () || staticGraph_->isStorageOptimized ();
1588  if (! params.is_null () && ! params->get ("Optimize Storage", default_OptimizeStorage)) {
1589  requestOptimizedStorage = false;
1590  }
1591  // If we're not allowed to change a static graph, then we can't
1592  // change the storage of the matrix, either. This means that if
1593  // the graph's storage isn't already optimized, we can't optimize
1594  // the matrix's storage either. Check and give warning, as
1595  // appropriate.
1596  if (! staticGraph_->isStorageOptimized () && requestOptimizedStorage) {
1597  TPETRA_ABUSE_WARNING(true, std::runtime_error,
1598  "You requested optimized storage by setting the"
1599  "\"Optimize Storage\" flag to \"true\" in the parameter list, or by virtue"
1600  "of default behavior. However, the associated CrsGraph was filled separately"
1601  "and requested not to optimize storage. Therefore, the CrsMatrix cannot"
1602  "optimize storage.");
1603  requestOptimizedStorage = false;
1604  }
1605 
1606  typedef decltype (staticGraph_->k_numRowEntries_) row_entries_type;
1607 
1608  if (getProfileType() == DynamicProfile) {
1609  // Pack 2-D storage (DynamicProfile) into 1-D packed storage.
1610  //
1611  // DynamicProfile means that the matrix's values are currently
1612  // stored in a 2-D "unpacked" format, in the array-of-arrays
1613  // values2D_. We allocate 1-D storage and then copy from 2-D
1614  // storage in values2D_ into 1-D storage in k_vals. Since we're
1615  // only allocating the local matrix here, not the local graph,
1616  // we don't need to keep the row offsets array, but we do need
1617  // it here temporarily in order to convert to 1-D storage. (The
1618  // allocStorage() function needs it.) We'll free ptrs later in
1619  // this method.
1620  //
1621  // FIXME (mfh 08 Aug 2014) If we're in this method, then the
1622  // graph should already have packed 1-D storage. Why can't we
1623  // just use the graph's current row offsets array?
1624 
1625  // Pack the row offsets into k_ptrs, by doing a sum-scan of
1626  // the array of valid entry counts per row.
1627  //
1628  // Total number of entries in the matrix on the calling
1629  // process. We will compute this in the loop below. It's
1630  // cheap to compute and useful as a sanity check.
1631  size_t lclTotalNumEntries = 0;
1632  // This will be a host view of packed row offsets.
1633  typename non_const_row_map_type::HostMirror h_ptrs;
1634 
1635  typename row_entries_type::const_type numRowEnt_h =
1636  staticGraph_->k_numRowEntries_;
1637  {
1638  non_const_row_map_type packedRowOffsets ("Tpetra::CrsGraph::ptr",
1639  lclNumRows+1);
1640  // NOTE (mfh 27 Jun 2016) We need h_ptrs on host anyway, so
1641  // let's just compute offsets on host.
1642  h_ptrs = create_mirror_view (packedRowOffsets);
1644  lclTotalNumEntries = computeOffsetsFromCounts (h_ptrs, numRowEnt_h);
1645  Kokkos::deep_copy (packedRowOffsets, h_ptrs);
1646  k_ptrs = packedRowOffsets;
1647  }
1648 
1649 #ifdef HAVE_TPETRA_DEBUG
1650  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1651  (static_cast<size_t> (k_ptrs.extent (0)) != lclNumRows + 1,
1652  std::logic_error, "In DynamicProfile branch, after packing k_ptrs, "
1653  "k_ptrs.extent(0) = " << k_ptrs.extent (0) << " != "
1654  "(lclNumRows+1) = " << (lclNumRows+1) << ".");
1655  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1656  (static_cast<size_t> (h_ptrs.extent (0)) != lclNumRows + 1,
1657  std::logic_error, "In DynamicProfile branch, after packing h_ptrs, "
1658  "h_ptrs.extent(0) = " << h_ptrs.extent (0) << " != "
1659  "(lclNumRows+1) = " << (lclNumRows+1) << ".");
1660  {
1661  const auto valToCheck = ::Tpetra::Details::getEntryOnHost (k_ptrs, lclNumRows);
1662  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1663  (static_cast<size_t> (valToCheck) != lclTotalNumEntries,
1664  std::logic_error, "(DynamicProfile branch) After packing k_ptrs, "
1665  "k_ptrs(lclNumRows = " << lclNumRows << ") = " << valToCheck
1666  << " != total number of entries on the calling process = "
1667  << lclTotalNumEntries << ".");
1668  }
1669 #endif // HAVE_TPETRA_DEBUG
1670 
1671  // Allocate the array of packed values.
1672  k_vals = values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries);
1673  // We need a host view of the above, since 2-D storage lives on host.
1674  typename values_type::HostMirror h_vals = create_mirror_view (k_vals);
1675  // Pack the values on the host.
1676  for (size_t lclRow = 0; lclRow < lclNumRows; ++lclRow) {
1677  const size_t numEnt = numRowEnt_h(lclRow);
1678  std::copy (values2D_[lclRow].begin(),
1679  values2D_[lclRow].begin() + numEnt,
1680  h_vals.data() + h_ptrs(lclRow));
1681  }
1682  // Copy the packed values to the device.
1683  Kokkos::deep_copy (k_vals, h_vals);
1684 
1685 #ifdef HAVE_TPETRA_DEBUG
1686  // Sanity check of packed row offsets.
1687  if (k_ptrs.extent (0) != 0) {
1688  const size_t numOffsets = static_cast<size_t> (k_ptrs.extent (0));
1689  const auto valToCheck =
1690  ::Tpetra::Details::getEntryOnHost (k_ptrs, numOffsets - 1);
1691  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1692  (static_cast<size_t> (valToCheck) != k_vals.extent (0),
1693  std::logic_error, "(DynamicProfile branch) After packing, k_ptrs("
1694  << (numOffsets-1) << ") = " << valToCheck << " != "
1695  "k_vals.extent(0) = " << k_vals.extent (0) << ".");
1696  }
1697 #endif // HAVE_TPETRA_DEBUG
1698  }
1699  else if (getProfileType () == StaticProfile) {
1700  // StaticProfile means that the matrix's values are currently
1701  // stored in a 1-D format. However, this format is "unpacked";
1702  // it doesn't necessarily have the same row offsets as indicated
1703  // by the ptrs array returned by allocRowPtrs. This could
1704  // happen, for example, if the user specified StaticProfile in
1705  // the constructor and fixed the number of matrix entries in
1706  // each row, but didn't fill all those entries.
1707  //
1708  // As above, we don't need to keep the "packed" row offsets
1709  // array ptrs here, but we do need it here temporarily, so we
1710  // have to allocate it. We'll free ptrs later in this method.
1711  //
1712  // Note that this routine checks whether storage has already
1713  // been packed. This is a common case for solution of nonlinear
1714  // PDEs using the finite element method, as long as the
1715  // structure of the sparse matrix does not change between linear
1716  // solves.
1717  if (nodeNumEntries != nodeNumAllocated) {
1718  // We have to pack the 1-D storage, since the user didn't fill
1719  // up all requested storage.
1720  non_const_row_map_type tmpk_ptrs ("Tpetra::CrsGraph::ptr",
1721  lclNumRows+1);
1722  // Total number of entries in the matrix on the calling
1723  // process. We will compute this in the loop below. It's
1724  // cheap to compute and useful as a sanity check.
1725  size_t lclTotalNumEntries = 0;
1726  k_ptrs = tmpk_ptrs;
1727  {
1728  typename row_entries_type::const_type numRowEnt_d =
1729  staticGraph_->k_numRowEntries_;
1731  // This function can handle the counts being a host View.
1732  lclTotalNumEntries = computeOffsetsFromCounts (tmpk_ptrs, numRowEnt_d);
1733  }
1734 
1735  // Allocate the "packed" values array.
1736  // It has exactly the right number of entries.
1737  k_vals = values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries);
1738 
1739  // Pack k_values1D_ into k_vals. We will replace k_values1D_ below.
1740  typedef pack_functor<values_type, row_map_type> packer_type;
1741  packer_type valsPacker (k_vals, k_values1D_, tmpk_ptrs, k_rowPtrs_);
1742 
1743  typedef typename decltype (k_vals)::execution_space exec_space;
1744  typedef Kokkos::RangePolicy<exec_space, LocalOrdinal> range_type;
1745  Kokkos::parallel_for (range_type (0, lclNumRows), valsPacker);
1746  }
1747  else { // We don't have to pack, so just set the pointer.
1748  k_vals = k_values1D_;
1749  }
1750  }
1751 
1752  // May we ditch the old allocations for the packed one?
1753  if (requestOptimizedStorage) {
1754  // The user requested optimized storage, so we can dump the
1755  // unpacked 2-D and 1-D storage, and keep the packed storage.
1756  values2D_ = null;
1757  k_values1D_ = k_vals;
1758  this->storageStatus_ = ::Tpetra::Details::STORAGE_1D_PACKED;
1759  }
1760 
1761  // Build the local sparse matrix object. At this point, the local
1762  // matrix certainly has a column Map. Remember that the local
1763  // matrix's number of columns comes from the column Map, not the
1764  // domain Map.
1765  lclMatrix_ = local_matrix_type ("Tpetra::CrsMatrix::lclMatrix_",
1766  getColMap ()->getNodeNumElements (),
1767  k_vals,
1768  staticGraph_->getLocalGraph ());
1769  }
1770 
1771  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1772  void
1774  insertIndicesAndValues (crs_graph_type& graph,
1775  RowInfo& rowInfo,
1776  const typename crs_graph_type::SLocalGlobalViews& newInds,
1777  const Teuchos::ArrayView<impl_scalar_type>& oldRowVals,
1778  const Teuchos::ArrayView<const impl_scalar_type>& newRowVals,
1779  const ELocalGlobal lg,
1780  const ELocalGlobal I)
1781  {
1782  const size_t oldNumEnt = rowInfo.numEntries;
1783  const size_t numInserted = graph.insertIndices (rowInfo, newInds, lg, I);
1784 
1785  // Use of memcpy here works around an issue with GCC >= 4.9.0,
1786  // that probably relates to scalar_type vs. impl_scalar_type
1787  // aliasing. See history of Tpetra_CrsGraph_def.hpp for
1788  // details; look for GCC_WORKAROUND macro definition.
1789  if (numInserted > 0) {
1790  const size_t startOffset = oldNumEnt;
1791  memcpy (&oldRowVals[startOffset], &newRowVals[0],
1792  numInserted * sizeof (impl_scalar_type));
1793  }
1794  }
1795 
1796  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1797  void
1798  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
1799  insertLocalValues (const LocalOrdinal lclRow,
1800  const Teuchos::ArrayView<const LocalOrdinal>& indices,
1801  const Teuchos::ArrayView<const Scalar>& values)
1802  {
1803  using std::endl;
1804  typedef impl_scalar_type IST;
1805  const char tfecfFuncName[] = "insertLocalValues: ";
1806 
1807  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1808  (! this->isFillActive (), std::runtime_error,
1809  "Fill is not active. After calling fillComplete, you must call "
1810  "resumeFill before you may insert entries into the matrix again.");
1811  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1812  (this->isStaticGraph (), std::runtime_error,
1813  "Cannot insert indices with static graph; use replaceLocalValues() "
1814  "instead.");
1815  // At this point, we know that myGraph_ is nonnull.
1816  crs_graph_type& graph = * (this->myGraph_);
1817  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1818  (graph.colMap_.is_null (), std::runtime_error,
1819  "Cannot insert local indices without a column map.");
1820  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1821  (graph.isGloballyIndexed (),
1822  std::runtime_error, "Graph indices are global; use "
1823  "insertGlobalValues().");
1824  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1825  (values.size () != indices.size (), std::runtime_error,
1826  "values.size() = " << values.size ()
1827  << " != indices.size() = " << indices.size () << ".");
1828  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1829  ! graph.rowMap_->isNodeLocalElement (lclRow), std::runtime_error,
1830  "Local row index " << lclRow << " does not belong to this process.");
1831 
1832  if (! graph.indicesAreAllocated ()) {
1833  this->allocateValues (LocalIndices, GraphNotYetAllocated);
1834  }
1835 
1836  const size_t numEntriesToAdd = static_cast<size_t> (indices.size ());
1837 #ifdef HAVE_TPETRA_DEBUG
1838  // In a debug build, test whether any of the given column indices
1839  // are not in the column Map. Keep track of the invalid column
1840  // indices so we can tell the user about them.
1841  {
1842  using Teuchos::toString;
1843 
1844  const map_type& colMap = * (graph.colMap_);
1845  Teuchos::Array<LocalOrdinal> badColInds;
1846  bool allInColMap = true;
1847  for (size_t k = 0; k < numEntriesToAdd; ++k) {
1848  if (! colMap.isNodeLocalElement (indices[k])) {
1849  allInColMap = false;
1850  badColInds.push_back (indices[k]);
1851  }
1852  }
1853  if (! allInColMap) {
1854  std::ostringstream os;
1855  os << "You attempted to insert entries in owned row " << lclRow
1856  << ", at the following column indices: " << toString (indices)
1857  << "." << endl;
1858  os << "Of those, the following indices are not in the column Map on "
1859  "this process: " << toString (badColInds) << "." << endl << "Since "
1860  "the matrix has a column Map already, it is invalid to insert "
1861  "entries at those locations.";
1862  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1863  (true, std::invalid_argument, os.str ());
1864  }
1865  }
1866 #endif // HAVE_TPETRA_DEBUG
1867 
1868  RowInfo rowInfo = graph.getRowInfo (lclRow);
1869  const size_t curNumEnt = rowInfo.numEntries;
1870  const size_t newNumEnt = curNumEnt + numEntriesToAdd;
1871  if (newNumEnt > rowInfo.allocSize) {
1872  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1873  (this->getProfileType () == StaticProfile, std::runtime_error,
1874  "New indices exceed statically allocated graph structure.");
1875  // This must be a nonconst reference, since we'll reallocate.
1876  Teuchos::Array<IST>& curVals = this->values2D_[lclRow];
1877  // Make space for the new matrix entries.
1878  // Teuchos::ArrayRCP::resize automatically copies over values on
1879  // reallocation.
1880  graph.lclInds2D_[rowInfo.localRow].resize (newNumEnt);
1881  curVals.resize (newNumEnt);
1882  rowInfo.allocSize = newNumEnt; // give rowInfo updated allocSize
1883  }
1884  typename crs_graph_type::SLocalGlobalViews indsView;
1885  indsView.linds = indices;
1886 
1887  Teuchos::ArrayView<IST> valsView = this->getViewNonConst (rowInfo);
1888  Teuchos::ArrayView<const IST> valsIn =
1889  Teuchos::av_reinterpret_cast<const IST> (values);
1890  this->insertIndicesAndValues (graph, rowInfo, indsView, valsView,
1891  valsIn, LocalIndices, LocalIndices);
1892 #ifdef HAVE_TPETRA_DEBUG
1893  const size_t chkNewNumEnt = graph.getNumEntriesInLocalRow (lclRow);
1894  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1895  (chkNewNumEnt != newNumEnt, std::logic_error,
1896  "The row should have " << newNumEnt << " entries after insert, but "
1897  "instead has " << chkNewNumEnt << ". Please report this bug to "
1898  "the Tpetra developers.");
1899 #endif // HAVE_TPETRA_DEBUG
1900  }
1901 
1902  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1903  void
1905  insertLocalValues (const LocalOrdinal localRow,
1906  const LocalOrdinal numEnt,
1907  const Scalar vals[],
1908  const LocalOrdinal cols[])
1909  {
1910  Teuchos::ArrayView<const LocalOrdinal> colsT (cols, numEnt);
1911  Teuchos::ArrayView<const Scalar> valsT (vals, numEnt);
1912  this->insertLocalValues (localRow, colsT, valsT);
1913  }
1914 
1915  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1916  void
1918  insertGlobalValuesImpl (crs_graph_type& graph,
1919  RowInfo& rowInfo,
1920  const GlobalOrdinal gblColInds[],
1921  const impl_scalar_type vals[],
1922  const size_t numInputEnt)
1923  {
1924  typedef impl_scalar_type IST;
1925  typedef GlobalOrdinal GO;
1926  const char tfecfFuncName[] = "insertGlobalValuesImpl: ";
1927 
1928 #ifdef HAVE_TPETRA_DEBUG
1929  const size_t origNumEnt = graph.getNumEntriesInLocalRow (rowInfo.localRow);
1930 #endif // HAVE_TPETRA_DEBUG
1931 
1932  if (! graph.indicesAreAllocated ()) {
1933  this->allocateValues (GlobalIndices, GraphNotYetAllocated);
1934  // mfh 23 Jul 2017: allocateValues invalidates existing
1935  // getRowInfo results. Once we get rid of lazy graph
1936  // allocation, we'll be able to move the getRowInfo call outside
1937  // of this method.
1938  rowInfo = graph.getRowInfo (rowInfo.localRow);
1939  }
1940 
1941  const size_t curNumEnt = rowInfo.numEntries;
1942  const size_t newNumEnt = curNumEnt + numInputEnt;
1943  if (newNumEnt > rowInfo.allocSize) {
1944  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1945  (this->getProfileType () == StaticProfile &&
1946  newNumEnt > rowInfo.allocSize, std::runtime_error,
1947  "New indices exceed statically allocated graph structure. "
1948  "curNumEnt (" << curNumEnt << ") + numInputEnt ("
1949  << numInputEnt << ") > allocSize (" << rowInfo.allocSize
1950  << ").");
1951  // This needs to be a nonconst reference, in case we want to
1952  // reallocate it.
1953  Teuchos::Array<IST>& curVals = this->values2D_[rowInfo.localRow];
1954  // Teuchos::ArrayRCP::resize automatically copies over values on
1955  // reallocation.
1956  graph.gblInds2D_[rowInfo.localRow].resize (newNumEnt);
1957  curVals.resize (newNumEnt);
1958  rowInfo.allocSize = newNumEnt; // reassign for updated allocSize
1959  }
1960 
1961  using Teuchos::ArrayView;
1962  typename crs_graph_type::SLocalGlobalViews inputIndsAV;
1963  inputIndsAV.ginds = ArrayView<const GO> (gblColInds, numInputEnt);
1964  ArrayView<IST> curValsAV = this->getViewNonConst (rowInfo);
1965  ArrayView<const IST> inputValsAV (vals, numInputEnt);
1966 
1967  const ELocalGlobal curIndexingStatus =
1968  this->isGloballyIndexed () ? GlobalIndices : LocalIndices;
1969  // curIndexingStatus == GlobalIndices means the method calls
1970  // getGlobalViewNonConst() and does direct copying, which should
1971  // be reasonably fast. LocalIndices means the method calls the
1972  // Map's getLocalElement() method once per entry to insert. This
1973  // may be slow.
1974  this->insertIndicesAndValues (graph, rowInfo, inputIndsAV, curValsAV,
1975  inputValsAV, GlobalIndices,
1976  curIndexingStatus);
1977 #ifdef HAVE_TPETRA_DEBUG
1978  const size_t chkNewNumEnt =
1979  graph.getNumEntriesInLocalRow (rowInfo.localRow);
1980  if (chkNewNumEnt != newNumEnt) {
1981  std::ostringstream os;
1982  os << std::endl << "newNumEnt = " << newNumEnt
1983  << " != graph.getNumEntriesInLocalRow(" << rowInfo.localRow
1984  << ") = " << chkNewNumEnt << "." << std::endl
1985  << "\torigNumEnt: " << origNumEnt << std::endl
1986  << "\tnumInputEnt: " << numInputEnt << std::endl
1987  << "\tgblColInds: [";
1988  for (size_t k = 0; k < numInputEnt; ++k) {
1989  os << gblColInds[k];
1990  if (k + size_t (1) < numInputEnt) {
1991  os << ",";
1992  }
1993  }
1994  os << "]" << std::endl
1995  << "\tvals: [";
1996  for (size_t k = 0; k < numInputEnt; ++k) {
1997  os << vals[k];
1998  if (k + size_t (1) < numInputEnt) {
1999  os << ",";
2000  }
2001  }
2002  os << "]" << std::endl;
2003 
2004  if (this->supportsRowViews ()) {
2005  Teuchos::ArrayView<const Scalar> vals2;
2006  if (this->isGloballyIndexed ()) {
2007  Teuchos::ArrayView<const GlobalOrdinal> gblColInds2;
2008  const GlobalOrdinal gblRow =
2009  graph.rowMap_->getGlobalElement (rowInfo.localRow);
2010  if (gblRow == Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()) {
2011  os << "Local row index " << rowInfo.localRow << " is invalid!" << std::endl;
2012  }
2013  else {
2014  bool getViewThrew = false;
2015  try {
2016  this->getGlobalRowView (gblRow, gblColInds2, vals2);
2017  }
2018  catch (std::exception& e) {
2019  getViewThrew = true;
2020  os << "getGlobalRowView threw exception:" << std::endl
2021  << e.what () << std::endl;
2022  }
2023  if (! getViewThrew) {
2024  os << "\tNew global column indices: "
2025  << Teuchos::toString (gblColInds2) << std::endl
2026  << "\tNew values: " << Teuchos::toString (vals2) << std::endl;
2027  }
2028  }
2029  }
2030  else if (this->isLocallyIndexed ()) {
2031  Teuchos::ArrayView<const LocalOrdinal> lclColInds2;
2032  this->getLocalRowView (rowInfo.localRow, lclColInds2, vals2);
2033  os << "\tNew local column indices: " << Teuchos::toString (lclColInds2)
2034  << std::endl;
2035  os << "\tNew values: " << Teuchos::toString (vals2) << std::endl;
2036  }
2037  }
2038 
2039  os << "Please report this bug to the Tpetra developers.";
2040  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2041  (true, std::logic_error, os.str ());
2042  }
2043 #endif // HAVE_TPETRA_DEBUG
2044  }
2045 
2046  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2047  void
2048  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2049  insertGlobalValues (const GlobalOrdinal gblRow,
2050  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2051  const Teuchos::ArrayView<const Scalar>& values)
2052  {
2053  using Teuchos::toString;
2054  using std::endl;
2055  typedef impl_scalar_type IST;
2056  typedef LocalOrdinal LO;
2057  typedef GlobalOrdinal GO;
2058  typedef Tpetra::Details::OrdinalTraits<LO> OTLO;
2059  typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
2060  const char tfecfFuncName[] = "insertGlobalValues: ";
2061 
2062 #ifdef HAVE_TPETRA_DEBUG
2063  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2064  (values.size () != indices.size (), std::runtime_error,
2065  "values.size() = " << values.size () << " != indices.size() = "
2066  << indices.size () << ".");
2067 #endif // HAVE_TPETRA_DEBUG
2068 
2069  // getRowMap() is not thread safe, because it increments RCP's
2070  // reference count. getCrsGraphRef() is thread safe.
2071  const map_type& rowMap = * (this->getCrsGraphRef ().rowMap_);
2072  const LO lclRow = rowMap.getLocalElement (gblRow);
2073 
2074  if (lclRow == OTLO::invalid ()) {
2075  // Input row is _not_ owned by the calling process.
2076  //
2077  // See a note (now deleted) from mfh 14 Dec 2012: If input row
2078  // is not in the row Map, it doesn't matter whether or not the
2079  // graph is static; the data just get stashed for later use by
2080  // globalAssemble().
2081  this->insertNonownedGlobalValues (gblRow, indices, values);
2082  }
2083  else { // Input row _is_ owned by the calling process
2084  if (this->isStaticGraph ()) {
2085  // Uh oh! Not allowed to insert into owned rows in that case.
2086  const int myRank = rowMap.getComm ()->getRank ();
2087  const int numProcs = rowMap.getComm ()->getSize ();
2088  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2089  (true, std::runtime_error,
2090  "The matrix was constructed with a constant (\"static\") graph, "
2091  "yet the given global row index " << gblRow << " is in the row "
2092  "Map on the calling process (with rank " << myRank << ", of " <<
2093  numProcs << " process(es)). In this case, you may not insert "
2094  "new entries into rows owned by the calling process.");
2095  }
2096 
2097  crs_graph_type& graph = * (this->myGraph_);
2098  const IST* const inputVals =
2099  reinterpret_cast<const IST*> (values.getRawPtr ());
2100  const GO* const inputGblColInds = indices.getRawPtr ();
2101  const size_t numInputEnt = indices.size ();
2102  RowInfo rowInfo = graph.getRowInfo (lclRow);
2103 
2104  // If the matrix has a column Map, check at this point whether
2105  // the column indices belong to the column Map.
2106  //
2107  // FIXME (mfh 16 May 2013) We may want to consider deferring the
2108  // test to the CrsGraph method, since it may have to do this
2109  // anyway.
2110  if (! graph.colMap_.is_null ()) {
2111  const map_type& colMap = * (graph.colMap_);
2112  // In a debug build, keep track of the nonowned ("bad") column
2113  // indices, so that we can display them in the exception
2114  // message. In a release build, just ditch the loop early if
2115  // we encounter a nonowned column index.
2116 #ifdef HAVE_TPETRA_DEBUG
2117  Teuchos::Array<GO> badColInds;
2118 #endif // HAVE_TPETRA_DEBUG
2119  const size_type numEntriesToInsert = indices.size ();
2120  bool allInColMap = true;
2121  for (size_type k = 0; k < numEntriesToInsert; ++k) {
2122  if (! colMap.isNodeGlobalElement (indices[k])) {
2123  allInColMap = false;
2124 #ifdef HAVE_TPETRA_DEBUG
2125  badColInds.push_back (indices[k]);
2126 #else
2127  break;
2128 #endif // HAVE_TPETRA_DEBUG
2129  }
2130  }
2131  if (! allInColMap) {
2132  std::ostringstream os;
2133  os << "You attempted to insert entries in owned row " << gblRow
2134  << ", at the following column indices: " << toString (indices)
2135  << "." << endl;
2136 #ifdef HAVE_TPETRA_DEBUG
2137  os << "Of those, the following indices are not in the column Map "
2138  "on this process: " << toString (badColInds) << "." << endl
2139  << "Since the matrix has a column Map already, it is invalid "
2140  "to insert entries at those locations.";
2141 #else
2142  os << "At least one of those indices is not in the column Map "
2143  "on this process." << endl << "It is invalid to insert into "
2144  "columns not in the column Map on the process that owns the "
2145  "row.";
2146 #endif // HAVE_TPETRA_DEBUG
2147  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2148  (true, std::invalid_argument, os.str ());
2149  }
2150  }
2151 
2152  this->insertGlobalValuesImpl (graph, rowInfo, inputGblColInds,
2153  inputVals, numInputEnt);
2154  }
2155  }
2156 
2157 
2158  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2159  void
2161  insertGlobalValues (const GlobalOrdinal globalRow,
2162  const LocalOrdinal numEnt,
2163  const Scalar vals[],
2164  const GlobalOrdinal inds[])
2165  {
2166  Teuchos::ArrayView<const GlobalOrdinal> indsT (inds, numEnt);
2167  Teuchos::ArrayView<const Scalar> valsT (vals, numEnt);
2168  this->insertGlobalValues (globalRow, indsT, valsT);
2169  }
2170 
2171 
2172  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2173  void
2175  insertGlobalValuesFiltered (const GlobalOrdinal gblRow,
2176  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2177  const Teuchos::ArrayView<const Scalar>& values)
2178  {
2179  typedef impl_scalar_type IST;
2180  typedef LocalOrdinal LO;
2181  typedef GlobalOrdinal GO;
2182  typedef Tpetra::Details::OrdinalTraits<LO> OTLO;
2183  const char tfecfFuncName[] = "insertGlobalValuesFiltered: ";
2184 
2185 #ifdef HAVE_TPETRA_DEBUG
2186  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2187  (values.size () != indices.size (), std::runtime_error,
2188  "values.size() = " << values.size () << " != indices.size() = "
2189  << indices.size () << ".");
2190 #endif // HAVE_TPETRA_DEBUG
2191 
2192  // getRowMap() is not thread safe, because it increments RCP's
2193  // reference count. getCrsGraphRef() is thread safe.
2194  const map_type& rowMap = * (this->getCrsGraphRef ().rowMap_);
2195  const LO lclRow = rowMap.getLocalElement (gblRow);
2196 
2197  if (lclRow == OTLO::invalid ()) {
2198  // Input row is _not_ owned by the calling process.
2199  //
2200  // See a note (now deleted) from mfh 14 Dec 2012: If input row
2201  // is not in the row Map, it doesn't matter whether or not the
2202  // graph is static; the data just get stashed for later use by
2203  // globalAssemble().
2204  this->insertNonownedGlobalValues (gblRow, indices, values);
2205  }
2206  else { // Input row _is_ owned by the calling process
2207  if (this->isStaticGraph ()) {
2208  // Uh oh! Not allowed to insert into owned rows in that case.
2209  const int myRank = rowMap.getComm ()->getRank ();
2210  const int numProcs = rowMap.getComm ()->getSize ();
2211  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2212  (true, std::runtime_error,
2213  "The matrix was constructed with a constant (\"static\") graph, "
2214  "yet the given global row index " << gblRow << " is in the row "
2215  "Map on the calling process (with rank " << myRank << ", of " <<
2216  numProcs << " process(es)). In this case, you may not insert "
2217  "new entries into rows owned by the calling process.");
2218  }
2219 
2220  crs_graph_type& graph = * (this->myGraph_);
2221  const IST* const inputVals =
2222  reinterpret_cast<const IST*> (values.getRawPtr ());
2223  const GO* const inputGblColInds = indices.getRawPtr ();
2224  const size_t numInputEnt = indices.size ();
2225  RowInfo rowInfo = graph.getRowInfo (lclRow);
2226 
2227  if (! graph.colMap_.is_null ()) { // We have a column Map.
2228  const map_type& colMap = * (graph.colMap_);
2229  size_t curOffset = 0;
2230  while (curOffset < numInputEnt) {
2231  // Find a sequence of input indices that are in the column
2232  // Map on the calling process. Doing a sequence at a time,
2233  // instead of one at a time, amortizes some overhead.
2234  size_t endOffset = curOffset;
2235  for ( ; endOffset < numInputEnt &&
2236  colMap.getLocalElement (inputGblColInds[endOffset]) != OTLO::invalid ();
2237  ++endOffset)
2238  {}
2239  // curOffset, endOffset: half-exclusive range of indices in
2240  // the column Map on the calling process. If endOffset ==
2241  // curOffset, the range is empty.
2242  const LO numIndInSeq = (endOffset - curOffset);
2243  if (numIndInSeq != 0) {
2244  this->insertGlobalValuesImpl (graph, rowInfo,
2245  inputGblColInds + curOffset,
2246  inputVals + curOffset,
2247  numIndInSeq);
2248  }
2249  // Invariant before the increment line: Either endOffset ==
2250  // numInputEnt, or inputGblColInds[endOffset] is not in the
2251  // column Map on the calling process.
2252 #ifdef HAVE_TPETRA_DEBUG
2253  const bool invariant = endOffset == numInputEnt ||
2254  colMap.getLocalElement (inputGblColInds[endOffset]) == OTLO::invalid ();
2255  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2256  (! invariant, std::logic_error, std::endl << "Invariant failed!");
2257 #endif // HAVE_TPETRA_DEBUG
2258  curOffset = endOffset + 1;
2259  }
2260  }
2261  else { // we don't have a column Map.
2262  this->insertGlobalValuesImpl (graph, rowInfo, inputGblColInds,
2263  inputVals, numInputEnt);
2264  }
2265  }
2266  }
2267 
2268  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2269  LocalOrdinal
2270  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2271  replaceLocalValuesImpl (impl_scalar_type rowVals[],
2272  const crs_graph_type& graph,
2273  const RowInfo& rowInfo,
2274  const LocalOrdinal inds[],
2275  const impl_scalar_type newVals[],
2276  const LocalOrdinal numElts) const
2277  {
2278  typedef LocalOrdinal LO;
2279  typedef GlobalOrdinal GO;
2280 
2281  const bool sorted = graph.isSorted ();
2282 
2283  size_t hint = 0; // Guess for the current index k into rowVals
2284  LO numValid = 0; // number of valid local column indices
2285 
2286  // NOTE (mfh 11 Oct 2015) This method assumes UVM. More
2287  // accurately, it assumes that the host execution space can
2288  // access data in both InputMemorySpace and ValsMemorySpace.
2289 
2290  if (graph.isLocallyIndexed ()) {
2291  // Get a view of the column indices in the row. This amortizes
2292  // the cost of getting the view over all the entries of inds.
2293  auto colInds = graph.getLocalKokkosRowView (rowInfo);
2294 
2295  for (LO j = 0; j < numElts; ++j) {
2296  const LO lclColInd = inds[j];
2297  const size_t offset =
2298  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2299  lclColInd, hint, sorted);
2300  if (offset != rowInfo.numEntries) {
2301  rowVals[offset] = newVals[j];
2302  hint = offset + 1;
2303  ++numValid;
2304  }
2305  }
2306  }
2307  else if (graph.isGloballyIndexed ()) {
2308  if (graph.colMap_.is_null ()) {
2309  return Teuchos::OrdinalTraits<LO>::invalid ();
2310  }
2311  const map_type colMap = * (graph.colMap_);
2312 
2313  // Get a view of the column indices in the row. This amortizes
2314  // the cost of getting the view over all the entries of inds.
2315  auto colInds = graph.getGlobalKokkosRowView (rowInfo);
2316 
2317  for (LO j = 0; j < numElts; ++j) {
2318  const GO gblColInd = colMap.getGlobalElement (inds[j]);
2319  if (gblColInd != Teuchos::OrdinalTraits<GO>::invalid ()) {
2320  const size_t offset =
2321  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2322  gblColInd, hint, sorted);
2323  if (offset != rowInfo.numEntries) {
2324  rowVals[offset] = newVals[j];
2325  hint = offset + 1;
2326  ++numValid;
2327  }
2328  }
2329  }
2330  }
2331  // NOTE (mfh 26 Jun 2014, 26 Nov 2015) In the current version of
2332  // CrsGraph and CrsMatrix, it's possible for a matrix (or graph)
2333  // to be neither locally nor globally indexed on a process.
2334  // This means that the graph or matrix has no entries on that
2335  // process. Epetra also works like this. It's related to lazy
2336  // allocation (on first insertion, not at graph / matrix
2337  // construction). Lazy allocation will go away because it is
2338  // not thread scalable.
2339 
2340  return numValid;
2341  }
2342 
2343  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2344  LocalOrdinal
2345  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2346  replaceLocalValues (const LocalOrdinal localRow,
2347  const Teuchos::ArrayView<const LocalOrdinal>& lclCols,
2348  const Teuchos::ArrayView<const Scalar>& vals) const
2349  {
2350  typedef LocalOrdinal LO;
2351 
2352  const LO numInputEnt = static_cast<LO> (lclCols.size ());
2353  if (static_cast<LO> (vals.size ()) != numInputEnt) {
2354  return Teuchos::OrdinalTraits<LO>::invalid ();
2355  }
2356  const LO* const inputInds = lclCols.getRawPtr ();
2357  const Scalar* const inputVals = vals.getRawPtr ();
2358  return this->replaceLocalValues (localRow, numInputEnt,
2359  inputVals, inputInds);
2360  }
2361 
2362  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2363  LocalOrdinal
2365  replaceLocalValues (const LocalOrdinal localRow,
2366  const LocalOrdinal numEnt,
2367  const Scalar inputVals[],
2368  const LocalOrdinal inputCols[]) const
2369  {
2370  typedef impl_scalar_type IST;
2371  typedef LocalOrdinal LO;
2372 
2373  if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2374  // Fill must be active and the "nonconst" graph must exist.
2375  return Teuchos::OrdinalTraits<LO>::invalid ();
2376  }
2377  const crs_graph_type& graph = * (this->staticGraph_);
2378  const RowInfo rowInfo = graph.getRowInfo (localRow);
2379 
2380  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2381  // The calling process does not own this row, so it is not
2382  // allowed to modify its values.
2383  return static_cast<LO> (0);
2384  }
2385  auto curRowVals = this->getRowViewNonConst (rowInfo);
2386  const IST* const inVals = reinterpret_cast<const IST*> (inputVals);
2387  return this->replaceLocalValuesImpl (curRowVals.data (), graph, rowInfo,
2388  inputCols, inVals, numEnt);
2389  }
2390 
2391  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2392  LocalOrdinal
2394  replaceGlobalValuesImpl (impl_scalar_type rowVals[],
2395  const crs_graph_type& graph,
2396  const RowInfo& rowInfo,
2397  const GlobalOrdinal inds[],
2398  const impl_scalar_type newVals[],
2399  const LocalOrdinal numElts) const
2400  {
2401  typedef LocalOrdinal LO;
2402  typedef GlobalOrdinal GO;
2403 
2404  const bool sorted = graph.isSorted ();
2405 
2406  size_t hint = 0; // guess at the index's relative offset in the row
2407  LO numValid = 0; // number of valid input column indices
2408 
2409  // NOTE (mfh 11 Oct 2015) This method assumes UVM. More
2410  // accurately, it assumes that the host execution space can
2411  // access data in all the Views.
2412 
2413  if (graph.isLocallyIndexed ()) {
2414  // NOTE (mfh 04 Nov 2015) Dereferencing an RCP or reading its
2415  // pointer does NOT change its reference count. Thus, this
2416  // code is still thread safe.
2417  if (graph.colMap_.is_null ()) {
2418  // NO input column indices are valid in this case, since if
2419  // the column Map is null on the calling process, then the
2420  // calling process owns no graph entries.
2421  return numValid;
2422  }
2423  const map_type& colMap = * (graph.colMap_);
2424 
2425  // Get a view of the column indices in the row. This amortizes
2426  // the cost of getting the view over all the entries of inds.
2427  auto colInds = graph.getLocalKokkosRowView (rowInfo);
2428  const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
2429  for (LO j = 0; j < numElts; ++j) {
2430  const LO lclColInd = colMap.getLocalElement (inds[j]);
2431  if (lclColInd != LINV) {
2432  const size_t offset =
2433  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2434  lclColInd, hint, sorted);
2435  if (offset != rowInfo.numEntries) {
2436  rowVals[offset] = newVals[j];
2437  hint = offset + 1;
2438  numValid++;
2439  }
2440  }
2441  }
2442  }
2443  else if (graph.isGloballyIndexed ()) {
2444  // Get a view of the column indices in the row. This amortizes
2445  // the cost of getting the view over all the entries of inds.
2446  auto colInds = graph.getGlobalKokkosRowView (rowInfo);
2447 
2448  for (LO j = 0; j < numElts; ++j) {
2449  const GO gblColInd = inds[j];
2450  const size_t offset =
2451  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2452  gblColInd, hint, sorted);
2453  if (offset != rowInfo.numEntries) {
2454  rowVals[offset] = newVals[j];
2455  hint = offset + 1;
2456  numValid++;
2457  }
2458  }
2459  }
2460  // If the graph is neither locally nor globally indexed on the
2461  // calling process, that means the calling process has no graph
2462  // entries. Thus, none of the input column indices are valid.
2463 
2464  return numValid;
2465  }
2466 
2467  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2468  LocalOrdinal
2469  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2470  replaceGlobalValues (const GlobalOrdinal globalRow,
2471  const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds,
2472  const Teuchos::ArrayView<const Scalar>& inputVals) const
2473  {
2474  typedef LocalOrdinal LO;
2475 
2476  const LO numInputEnt = static_cast<LO> (inputGblColInds.size ());
2477  if (static_cast<LO> (inputVals.size ()) != numInputEnt) {
2478  return Teuchos::OrdinalTraits<LO>::invalid ();
2479  }
2480  return this->replaceGlobalValues (globalRow, numInputEnt,
2481  inputVals.getRawPtr (),
2482  inputGblColInds.getRawPtr ());
2483  }
2484 
2485  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2486  LocalOrdinal
2488  replaceGlobalValues (const GlobalOrdinal globalRow,
2489  const LocalOrdinal numEnt,
2490  const Scalar inputVals[],
2491  const GlobalOrdinal inputGblColInds[]) const
2492  {
2493  typedef impl_scalar_type IST;
2494  typedef LocalOrdinal LO;
2495 
2496  if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2497  // Fill must be active and the "nonconst" graph must exist.
2498  return Teuchos::OrdinalTraits<LO>::invalid ();
2499  }
2500  const crs_graph_type& graph = * (this->staticGraph_);
2501 
2502  const RowInfo rowInfo = graph.getRowInfoFromGlobalRowIndex (globalRow);
2503  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2504  // The input local row is invalid on the calling process,
2505  // which means that the calling process summed 0 entries.
2506  return static_cast<LO> (0);
2507  }
2508 
2509  auto curRowVals = this->getRowViewNonConst (rowInfo);
2510  const IST* const inVals = reinterpret_cast<const IST*> (inputVals);
2511  return this->replaceGlobalValuesImpl (curRowVals.data (), graph, rowInfo,
2512  inputGblColInds, inVals, numEnt);
2513  }
2514 
2515  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2516  LocalOrdinal
2518  sumIntoGlobalValuesImpl (impl_scalar_type rowVals[],
2519  const crs_graph_type& graph,
2520  const RowInfo& rowInfo,
2521  const GlobalOrdinal inds[],
2522  const impl_scalar_type newVals[],
2523  const LocalOrdinal numElts,
2524  const bool atomic) const
2525  {
2526  typedef LocalOrdinal LO;
2527  typedef GlobalOrdinal GO;
2528 
2529  const bool sorted = graph.isSorted ();
2530 
2531  size_t hint = 0; // guess at the index's relative offset in the row
2532  LO numValid = 0; // number of valid input column indices
2533 
2534  // NOTE (mfh 11 Oct 2015) This method assumes UVM. More
2535  // accurately, it assumes that the host execution space can
2536  // access data in both InputMemorySpace and ValsMemorySpace.
2537 
2538  if (graph.isLocallyIndexed ()) {
2539  // NOTE (mfh 04 Nov 2015) Dereferencing an RCP or reading its
2540  // pointer does NOT change its reference count. Thus, this
2541  // code is still thread safe.
2542  if (graph.colMap_.is_null ()) {
2543  // NO input column indices are valid in this case, since if
2544  // the column Map is null on the calling process, then the
2545  // calling process owns no graph entries.
2546  return numValid;
2547  }
2548  const map_type& colMap = * (graph.colMap_);
2549 
2550  // Get a view of the column indices in the row. This amortizes
2551  // the cost of getting the view over all the entries of inds.
2552  auto colInds = graph.getLocalKokkosRowView (rowInfo);
2553  const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
2554 
2555  for (LO j = 0; j < numElts; ++j) {
2556  const LO lclColInd = colMap.getLocalElement (inds[j]);
2557  if (lclColInd != LINV) {
2558  const size_t offset =
2559  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2560  lclColInd, hint, sorted);
2561  if (offset != rowInfo.numEntries) {
2562  if (atomic) {
2563  Kokkos::atomic_add (&rowVals[offset], newVals[j]);
2564  }
2565  else {
2566  rowVals[offset] += newVals[j];
2567  }
2568  hint = offset + 1;
2569  numValid++;
2570  }
2571  }
2572  }
2573  }
2574  else if (graph.isGloballyIndexed ()) {
2575  // Get a view of the column indices in the row. This amortizes
2576  // the cost of getting the view over all the entries of inds.
2577  auto colInds = graph.getGlobalKokkosRowView (rowInfo);
2578 
2579  for (LO j = 0; j < numElts; ++j) {
2580  const GO gblColInd = inds[j];
2581  const size_t offset =
2582  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2583  gblColInd, hint, sorted);
2584  if (offset != rowInfo.numEntries) {
2585  if (atomic) {
2586  Kokkos::atomic_add (&rowVals[offset], newVals[j]);
2587  }
2588  else {
2589  rowVals[offset] += newVals[j];
2590  }
2591  hint = offset + 1;
2592  numValid++;
2593  }
2594  }
2595  }
2596  // If the graph is neither locally nor globally indexed on the
2597  // calling process, that means the calling process has no graph
2598  // entries. Thus, none of the input column indices are valid.
2599 
2600  return numValid;
2601  }
2602 
2603  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2604  LocalOrdinal
2605  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2606  sumIntoGlobalValues (const GlobalOrdinal gblRow,
2607  const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds,
2608  const Teuchos::ArrayView<const Scalar>& inputVals,
2609  const bool atomic)
2610  {
2611  typedef LocalOrdinal LO;
2612 
2613  const LO numInputEnt = static_cast<LO> (inputGblColInds.size ());
2614  if (static_cast<LO> (inputVals.size ()) != numInputEnt) {
2615  return Teuchos::OrdinalTraits<LO>::invalid ();
2616  }
2617  return this->sumIntoGlobalValues (gblRow, numInputEnt,
2618  inputVals.getRawPtr (),
2619  inputGblColInds.getRawPtr (),
2620  atomic);
2621  }
2622 
2623  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2624  LocalOrdinal
2626  sumIntoGlobalValues (const GlobalOrdinal gblRow,
2627  const LocalOrdinal numInputEnt,
2628  const Scalar inputVals[],
2629  const GlobalOrdinal inputGblColInds[],
2630  const bool atomic)
2631  {
2632  typedef impl_scalar_type IST;
2633  typedef LocalOrdinal LO;
2634  typedef GlobalOrdinal GO;
2635 
2636  if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2637  // Fill must be active and the "nonconst" graph must exist.
2638  return Teuchos::OrdinalTraits<LO>::invalid ();
2639  }
2640  const crs_graph_type& graph = * (this->staticGraph_);
2641 
2642  const RowInfo rowInfo = graph.getRowInfoFromGlobalRowIndex (gblRow);
2643  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2644  // mfh 23 Mar 2017, 26 Jul 2017: This branch may not be not
2645  // thread safe in a debug build, in part because it uses
2646  // Teuchos::ArrayView, and in part because of the data structure
2647  // used to stash outgoing entries.
2648  using Teuchos::ArrayView;
2649  ArrayView<const GO> inputGblColInds_av (numInputEnt == 0 ? NULL :
2650  inputGblColInds, numInputEnt);
2651  ArrayView<const Scalar> inputVals_av (numInputEnt == 0 ? NULL :
2652  inputVals, numInputEnt);
2653  // gblRow is not in the row Map on the calling process, so stash
2654  // the given entries away in a separate data structure.
2655  // globalAssemble() (called during fillComplete()) will exchange
2656  // that data and sum it in using sumIntoGlobalValues().
2657  this->insertNonownedGlobalValues (gblRow, inputGblColInds_av,
2658  inputVals_av);
2659  // FIXME (mfh 08 Jul 2014) It's not clear what to return here,
2660  // since we won't know whether the given indices were valid
2661  // until globalAssemble (called in fillComplete) is called.
2662  // That's why insertNonownedGlobalValues doesn't return
2663  // anything. Just for consistency, I'll return the number of
2664  // entries that the user gave us.
2665  return numInputEnt;
2666  }
2667  else { // input row is in the row Map on the calling process
2668  auto curRowVals = this->getRowViewNonConst (rowInfo);
2669  const IST* const inVals = reinterpret_cast<const IST*> (inputVals);
2670  return this->sumIntoGlobalValuesImpl (curRowVals.data (), graph, rowInfo,
2671  inputGblColInds, inVals,
2672  numInputEnt, atomic);
2673  }
2674  }
2675 
2676  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2677  LocalOrdinal
2679  transformLocalValues (const LocalOrdinal lclRow,
2680  const LocalOrdinal numInputEnt,
2681  const impl_scalar_type inputVals[],
2682  const LocalOrdinal inputCols[],
2683  std::function<impl_scalar_type (const impl_scalar_type&, const impl_scalar_type&) > f,
2684  const bool atomic) const
2685  {
2686  using Tpetra::Details::OrdinalTraits;
2687  typedef LocalOrdinal LO;
2688 
2689  if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2690  // Fill must be active and the "nonconst" graph must exist.
2691  return Teuchos::OrdinalTraits<LO>::invalid ();
2692  }
2693  const crs_graph_type& graph = * (this->staticGraph_);
2694  const RowInfo rowInfo = graph.getRowInfo (lclRow);
2695 
2696  if (rowInfo.localRow == OrdinalTraits<size_t>::invalid ()) {
2697  // The calling process does not own this row, so it is not
2698  // allowed to modify its values.
2699  return static_cast<LO> (0);
2700  }
2701  auto curRowVals = this->getRowViewNonConst (rowInfo);
2702  return this->transformLocalValues (curRowVals.data (), graph,
2703  rowInfo, inputCols, inputVals,
2704  numInputEnt, f, atomic);
2705  }
2706 
2707  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2708  LocalOrdinal
2709  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2710  transformGlobalValues (const GlobalOrdinal gblRow,
2711  const LocalOrdinal numInputEnt,
2712  const impl_scalar_type inputVals[],
2713  const GlobalOrdinal inputCols[],
2714  std::function<impl_scalar_type (const impl_scalar_type&, const impl_scalar_type&) > f,
2715  const bool atomic) const
2716  {
2717  using Tpetra::Details::OrdinalTraits;
2718  typedef LocalOrdinal LO;
2719 
2720  if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2721  // Fill must be active and the "nonconst" graph must exist.
2722  return OrdinalTraits<LO>::invalid ();
2723  }
2724  const crs_graph_type& graph = * (this->staticGraph_);
2725  const RowInfo rowInfo = graph.getRowInfoFromGlobalRowIndex (gblRow);
2726 
2727  if (rowInfo.localRow == OrdinalTraits<size_t>::invalid ()) {
2728  // The calling process does not own this row, so it is not
2729  // allowed to modify its values.
2730  return static_cast<LO> (0);
2731  }
2732  auto curRowVals = this->getRowViewNonConst (rowInfo);
2733  return this->transformGlobalValues (curRowVals.data (), graph,
2734  rowInfo, inputCols, inputVals,
2735  numInputEnt, f, atomic);
2736  }
2737 
2738  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2739  LocalOrdinal
2740  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2741  transformLocalValues (impl_scalar_type rowVals[],
2742  const crs_graph_type& graph,
2743  const RowInfo& rowInfo,
2744  const LocalOrdinal inds[],
2745  const impl_scalar_type newVals[],
2746  const LocalOrdinal numElts,
2747  std::function<impl_scalar_type (const impl_scalar_type&, const impl_scalar_type&) > f,
2748  const bool atomic) const
2749  {
2750  typedef impl_scalar_type ST;
2751  typedef LocalOrdinal LO;
2752  typedef GlobalOrdinal GO;
2753 
2754  //if (newVals.extent (0) != inds.extent (0)) {
2755  // The sizes of the input arrays must match.
2756  //return Tpetra::Details::OrdinalTraits<LO>::invalid ();
2757  //}
2758  //const LO numElts = static_cast<LO> (inds.extent (0));
2759  const bool sorted = graph.isSorted ();
2760 
2761  LO numValid = 0; // number of valid input column indices
2762  size_t hint = 0; // Guess for the current index k into rowVals
2763 
2764  if (graph.isLocallyIndexed ()) {
2765  // Get a view of the column indices in the row. This amortizes
2766  // the cost of getting the view over all the entries of inds.
2767  auto colInds = graph.getLocalKokkosRowView (rowInfo);
2768 
2769  for (LO j = 0; j < numElts; ++j) {
2770  const LO lclColInd = inds[j];
2771  const size_t offset =
2772  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2773  lclColInd, hint, sorted);
2774  if (offset != rowInfo.numEntries) {
2775  if (atomic) {
2776  // NOTE (mfh 30 Nov 2015) The commented-out code is
2777  // wrong because another thread may have changed
2778  // rowVals[offset] between those two lines of code.
2779  //
2780  //const ST newVal = f (rowVals[offset], newVals[j]);
2781  //Kokkos::atomic_assign (&rowVals[offset], newVal);
2782 
2783  volatile ST* const dest = &rowVals[offset];
2784  (void) atomic_binary_function_update (dest, newVals[j], f);
2785  }
2786  else {
2787  // use binary function f
2788  rowVals[offset] = f (rowVals[offset], newVals[j]);
2789  }
2790  hint = offset + 1;
2791  ++numValid;
2792  }
2793  }
2794  }
2795  else if (graph.isGloballyIndexed ()) {
2796  // NOTE (mfh 26 Nov 2015) Dereferencing an RCP or reading its
2797  // pointer does NOT change its reference count. Thus, this
2798  // code is still thread safe.
2799  if (graph.colMap_.is_null ()) {
2800  // NO input column indices are valid in this case. Either
2801  // the column Map hasn't been set yet (so local indices
2802  // don't exist yet), or the calling process owns no graph
2803  // entries.
2804  return numValid;
2805  }
2806  const map_type& colMap = * (graph.colMap_);
2807  // Get a view of the column indices in the row. This amortizes
2808  // the cost of getting the view over all the entries of inds.
2809  auto colInds = graph.getGlobalKokkosRowView (rowInfo);
2810 
2811  const GO GINV = Teuchos::OrdinalTraits<GO>::invalid ();
2812  for (LO j = 0; j < numElts; ++j) {
2813  const GO gblColInd = colMap.getGlobalElement (inds[j]);
2814  if (gblColInd != GINV) {
2815  const size_t offset =
2816  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2817  gblColInd, hint, sorted);
2818  if (offset != rowInfo.numEntries) {
2819  if (atomic) {
2820  // NOTE (mfh 30 Nov 2015) The commented-out code is
2821  // wrong because another thread may have changed
2822  // rowVals[offset] between those two lines of code.
2823  //
2824  //const ST newVal = f (rowVals[offset], newVals[j]);
2825  //Kokkos::atomic_assign (&rowVals[offset], newVal);
2826 
2827  volatile ST* const dest = &rowVals[offset];
2828  (void) atomic_binary_function_update (dest, newVals[j], f);
2829  }
2830  else {
2831  // use binary function f
2832  rowVals[offset] = f (rowVals[offset], newVals[j]);
2833  }
2834  hint = offset + 1;
2835  numValid++;
2836  }
2837  }
2838  }
2839  }
2840  // If the graph is neither locally nor globally indexed on the
2841  // calling process, that means the calling process has no graph
2842  // entries. Thus, none of the input column indices are valid.
2843 
2844  return numValid;
2845  }
2846 
2847  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2848  LocalOrdinal
2849  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2850  transformGlobalValues (impl_scalar_type rowVals[],
2851  const crs_graph_type& graph,
2852  const RowInfo& rowInfo,
2853  const GlobalOrdinal inds[],
2854  const impl_scalar_type newVals[],
2855  const LocalOrdinal numElts,
2856  std::function<impl_scalar_type (const impl_scalar_type&, const impl_scalar_type&) > f,
2857  const bool atomic) const
2858  {
2859  typedef impl_scalar_type ST;
2860  typedef LocalOrdinal LO;
2861  typedef GlobalOrdinal GO;
2862 
2863  //if (newVals.extent (0) != inds.extent (0)) {
2864  // The sizes of the input arrays must match.
2865  //return Tpetra::Details::OrdinalTraits<LO>::invalid ();
2866  //}
2867  //const LO numElts = static_cast<LO> (inds.extent (0));
2868  const bool sorted = graph.isSorted ();
2869 
2870  LO numValid = 0; // number of valid input column indices
2871  size_t hint = 0; // Guess for the current index k into rowVals
2872 
2873  if (graph.isGloballyIndexed ()) {
2874  // Get a view of the column indices in the row. This amortizes
2875  // the cost of getting the view over all the entries of inds.
2876  auto colInds = graph.getGlobalKokkosRowView (rowInfo);
2877 
2878  for (LO j = 0; j < numElts; ++j) {
2879  const GO gblColInd = inds[j];
2880  const size_t offset =
2881  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2882  gblColInd, hint, sorted);
2883  if (offset != rowInfo.numEntries) {
2884  if (atomic) {
2885  // NOTE (mfh 30 Nov 2015) The commented-out code is
2886  // wrong because another thread may have changed
2887  // rowVals[offset] between those two lines of code.
2888  //
2889  //const ST newVal = f (rowVals[offset], newVals[j]);
2890  //Kokkos::atomic_assign (&rowVals[offset], newVal);
2891 
2892  volatile ST* const dest = &rowVals[offset];
2893  (void) atomic_binary_function_update (dest, newVals[j], f);
2894  }
2895  else {
2896  // use binary function f
2897  rowVals[offset] = f (rowVals[offset], newVals[j]);
2898  }
2899  hint = offset + 1;
2900  ++numValid;
2901  }
2902  }
2903  }
2904  else if (graph.isLocallyIndexed ()) {
2905  // NOTE (mfh 26 Nov 2015) Dereferencing an RCP or reading its
2906  // pointer does NOT change its reference count. Thus, this
2907  // code is still thread safe.
2908  if (graph.colMap_.is_null ()) {
2909  // NO input column indices are valid in this case. Either the
2910  // column Map hasn't been set yet (so local indices don't
2911  // exist yet), or the calling process owns no graph entries.
2912  return numValid;
2913  }
2914  const map_type& colMap = * (graph.colMap_);
2915  // Get a view of the column indices in the row. This amortizes
2916  // the cost of getting the view over all the entries of inds.
2917  auto colInds = graph.getLocalKokkosRowView (rowInfo);
2918 
2919  const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
2920  for (LO j = 0; j < numElts; ++j) {
2921  const LO lclColInd = colMap.getLocalElement (inds[j]);
2922  if (lclColInd != LINV) {
2923  const size_t offset =
2924  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2925  lclColInd, hint, sorted);
2926  if (offset != rowInfo.numEntries) {
2927  if (atomic) {
2928  // NOTE (mfh 30 Nov 2015) The commented-out code is
2929  // wrong because another thread may have changed
2930  // rowVals[offset] between those two lines of code.
2931  //
2932  //const ST newVal = f (rowVals[offset], newVals[j]);
2933  //Kokkos::atomic_assign (&rowVals[offset], newVal);
2934 
2935  volatile ST* const dest = &rowVals[offset];
2936  (void) atomic_binary_function_update (dest, newVals[j], f);
2937  }
2938  else {
2939  // use binary function f
2940  rowVals[offset] = f (rowVals[offset], newVals[j]);
2941  }
2942  hint = offset + 1;
2943  numValid++;
2944  }
2945  }
2946  }
2947  }
2948  // If the graph is neither locally nor globally indexed on the
2949  // calling process, that means the calling process has no graph
2950  // entries. Thus, none of the input column indices are valid.
2951 
2952  return numValid;
2953  }
2954 
2955  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2956  LocalOrdinal
2957  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2958  sumIntoLocalValuesImpl (impl_scalar_type rowVals[],
2959  const crs_graph_type& graph,
2960  const RowInfo& rowInfo,
2961  const LocalOrdinal inds[],
2962  const impl_scalar_type newVals[],
2963  const LocalOrdinal numElts,
2964  const bool atomic) const
2965  {
2966  typedef LocalOrdinal LO;
2967  typedef GlobalOrdinal GO;
2968 
2969  const bool sorted = graph.isSorted ();
2970 
2971  size_t hint = 0; // Guess for the current index k into rowVals
2972  LO numValid = 0; // number of valid local column indices
2973 
2974  // NOTE (mfh 11 Oct 2015) This method assumes UVM. More
2975  // accurately, it assumes that the host execution space can
2976  // access data in both InputMemorySpace and ValsMemorySpace.
2977 
2978  if (graph.isLocallyIndexed ()) {
2979  // Get a view of the column indices in the row. This amortizes
2980  // the cost of getting the view over all the entries of inds.
2981  auto colInds = graph.getLocalKokkosRowView (rowInfo);
2982 
2983  for (LO j = 0; j < numElts; ++j) {
2984  const LO lclColInd = inds[j];
2985  const size_t offset =
2986  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2987  lclColInd, hint, sorted);
2988  if (offset != rowInfo.numEntries) {
2989  if (atomic) {
2990  Kokkos::atomic_add (&rowVals[offset], newVals[j]);
2991  }
2992  else {
2993  rowVals[offset] += newVals[j];
2994  }
2995  hint = offset + 1;
2996  ++numValid;
2997  }
2998  }
2999  }
3000  else if (graph.isGloballyIndexed ()) {
3001  if (graph.colMap_.is_null ()) {
3002  return Teuchos::OrdinalTraits<LO>::invalid ();
3003  }
3004  const map_type colMap = * (graph.colMap_);
3005 
3006  // Get a view of the column indices in the row. This amortizes
3007  // the cost of getting the view over all the entries of inds.
3008  auto colInds = graph.getGlobalKokkosRowView (rowInfo);
3009 
3010  for (LO j = 0; j < numElts; ++j) {
3011  const GO gblColInd = colMap.getGlobalElement (inds[j]);
3012  if (gblColInd != Teuchos::OrdinalTraits<GO>::invalid ()) {
3013  const size_t offset =
3014  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
3015  gblColInd, hint, sorted);
3016  if (offset != rowInfo.numEntries) {
3017  if (atomic) {
3018  Kokkos::atomic_add (&rowVals[offset], newVals[j]);
3019  }
3020  else {
3021  rowVals[offset] += newVals[j];
3022  }
3023  hint = offset + 1;
3024  ++numValid;
3025  }
3026  }
3027  }
3028  }
3029  // NOTE (mfh 26 Jun 2014, 26 Nov 2015) In the current version of
3030  // CrsGraph and CrsMatrix, it's possible for a matrix (or graph)
3031  // to be neither locally nor globally indexed on a process.
3032  // This means that the graph or matrix has no entries on that
3033  // process. Epetra also works like this. It's related to lazy
3034  // allocation (on first insertion, not at graph / matrix
3035  // construction). Lazy allocation will go away because it is
3036  // not thread scalable.
3037 
3038  return numValid;
3039  }
3040 
3041  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3042  LocalOrdinal
3043  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
3044  sumIntoLocalValues (const LocalOrdinal localRow,
3045  const Teuchos::ArrayView<const LocalOrdinal>& indices,
3046  const Teuchos::ArrayView<const Scalar>& values,
3047  const bool atomic) const
3048  {
3049  typedef LocalOrdinal LO;
3050 
3051  const LO numInputEnt = static_cast<LO> (indices.size ());
3052  if (static_cast<LO> (values.size ()) != numInputEnt) {
3053  return Teuchos::OrdinalTraits<LO>::invalid ();
3054  }
3055  const LO* const inputInds = indices.getRawPtr ();
3056  const Scalar* const inputVals = values.getRawPtr ();
3057  return this->sumIntoLocalValues (localRow, numInputEnt,
3058  inputVals, inputInds, atomic);
3059  }
3060 
3061  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3062  LocalOrdinal
3064  sumIntoLocalValues (const LocalOrdinal localRow,
3065  const LocalOrdinal numEnt,
3066  const Scalar vals[],
3067  const LocalOrdinal cols[],
3068  const bool atomic) const
3069  {
3070  typedef impl_scalar_type IST;
3071  typedef LocalOrdinal LO;
3072 
3073  if (! this->isFillActive () || this->staticGraph_.is_null ()) {
3074  // Fill must be active and the "nonconst" graph must exist.
3075  return Teuchos::OrdinalTraits<LO>::invalid ();
3076  }
3077  const crs_graph_type& graph = * (this->staticGraph_);
3078  const RowInfo rowInfo = graph.getRowInfo (localRow);
3079 
3080  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
3081  // The calling process does not own this row, so it is not
3082  // allowed to modify its values.
3083  return static_cast<LO> (0);
3084  }
3085  auto curRowVals = this->getRowViewNonConst (rowInfo);
3086  const IST* const inputVals = reinterpret_cast<const IST*> (vals);
3087  return this->sumIntoLocalValuesImpl (curRowVals.data (), graph, rowInfo,
3088  cols, inputVals, numEnt, atomic);
3089  }
3090 
3091  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3092  Teuchos::ArrayView<const typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::impl_scalar_type>
3094  getView (RowInfo rowinfo) const
3095  {
3096  using Kokkos::MemoryUnmanaged;
3097  using Kokkos::View;
3098  using Teuchos::ArrayView;
3099  typedef impl_scalar_type ST;
3100  typedef std::pair<size_t, size_t> range_type;
3101 
3102  if (k_values1D_.extent (0) != 0 && rowinfo.allocSize > 0) {
3103 #ifdef HAVE_TPETRA_DEBUG
3104  TEUCHOS_TEST_FOR_EXCEPTION(
3105  rowinfo.offset1D + rowinfo.allocSize > k_values1D_.extent (0),
3106  std::range_error, "Tpetra::CrsMatrix::getView: Invalid access "
3107  "to 1-D storage of values." << std::endl << "rowinfo.offset1D (" <<
3108  rowinfo.offset1D << ") + rowinfo.allocSize (" << rowinfo.allocSize <<
3109  ") > k_values1D_.extent(0) (" << k_values1D_.extent (0) << ").");
3110 #endif // HAVE_TPETRA_DEBUG
3111  range_type range (rowinfo.offset1D, rowinfo.offset1D + rowinfo.allocSize);
3112  typedef View<const ST*, execution_space, MemoryUnmanaged> subview_type;
3113  // mfh 23 Nov 2015: Don't just create a subview of k_values1D_
3114  // directly, because that first creates a _managed_ subview,
3115  // then returns an unmanaged version of that. That touches the
3116  // reference count, which costs performance in a measurable way.
3117  // Instead, we create a temporary unmanaged view, then create
3118  // the subview from that.
3119  subview_type sv = Kokkos::subview (subview_type (k_values1D_), range);
3120  const ST* const sv_raw = (rowinfo.allocSize == 0) ? NULL : sv.data ();
3121  return ArrayView<const ST> (sv_raw, rowinfo.allocSize);
3122  }
3123  else if (values2D_ != Teuchos::null) {
3124  return values2D_[rowinfo.localRow] ();
3125  }
3126  else {
3127  return ArrayView<impl_scalar_type> ();
3128  }
3129  }
3130 
3131 
3132  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3133  LocalOrdinal
3136  LocalOrdinal& numEnt,
3137  const RowInfo& rowinfo) const
3138  {
3139  if (k_values1D_.extent (0) != 0 && rowinfo.allocSize > 0) {
3140 #ifdef HAVE_TPETRA_DEBUG
3141  if (rowinfo.offset1D + rowinfo.allocSize > k_values1D_.extent (0)) {
3142  vals = NULL;
3143  numEnt = 0;
3144  return Teuchos::OrdinalTraits<LocalOrdinal>::invalid ();
3145  }
3146 #endif // HAVE_TPETRA_DEBUG
3147  vals = k_values1D_.data () + rowinfo.offset1D;
3148  numEnt = rowinfo.allocSize;
3149  }
3150  else if (! values2D_.is_null ()) {
3151 #ifdef HAVE_TPETRA_DEBUG
3152  if (rowinfo.localRow >= static_cast<size_t> (values2D_.size ())) {
3153  vals = NULL;
3154  numEnt = 0;
3155  return Teuchos::OrdinalTraits<LocalOrdinal>::invalid ();
3156  }
3157 #endif // HAVE_TPETRA_DEBUG
3158  // Use const reference so that we don't update ArrayRCP's
3159  // reference count, which is not thread safe.
3160  const auto& curRow = values2D_[rowinfo.localRow];
3161  vals = curRow.getRawPtr ();
3162  numEnt = curRow.size ();
3163  }
3164  else {
3165  vals = NULL;
3166  numEnt = 0;
3167  }
3168 
3169  return static_cast<LocalOrdinal> (0);
3170  }
3171 
3172  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3173  LocalOrdinal
3176  LocalOrdinal& numEnt,
3177  const RowInfo& rowinfo) const
3178  {
3179  const impl_scalar_type* valsConst;
3180  const LocalOrdinal err = this->getViewRawConst (valsConst, numEnt, rowinfo);
3181  vals = const_cast<impl_scalar_type*> (valsConst);
3182  return err;
3183  }
3184 
3185  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3186  Kokkos::View<const typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::impl_scalar_type*,
3188  Kokkos::MemoryUnmanaged>
3190  getRowView (const RowInfo& rowInfo) const
3191  {
3192  using Kokkos::MemoryUnmanaged;
3193  using Kokkos::View;
3194  typedef impl_scalar_type ST;
3195  typedef View<const ST*, execution_space, MemoryUnmanaged> subview_type;
3196  typedef std::pair<size_t, size_t> range_type;
3197 
3198  if (k_values1D_.extent (0) != 0 && rowInfo.allocSize > 0) {
3199 #ifdef HAVE_TPETRA_DEBUG
3200  TEUCHOS_TEST_FOR_EXCEPTION
3201  (rowInfo.offset1D + rowInfo.allocSize > this->k_values1D_.extent (0),
3202  std::range_error, "Tpetra::CrsMatrix::getRowView: Invalid access "
3203  "to 1-D storage of values. rowInfo.offset1D ("
3204  << rowInfo.offset1D << ") + rowInfo.allocSize (" << rowInfo.allocSize
3205  << ") > this->k_values1D_.extent(0) ("
3206  << this->k_values1D_.extent (0) << ").");
3207 #endif // HAVE_TPETRA_DEBUG
3208  range_type range (rowInfo.offset1D, rowInfo.offset1D + rowInfo.allocSize);
3209  // mfh 23 Nov 2015: Don't just create a subview of k_values1D_
3210  // directly, because that first creates a _managed_ subview,
3211  // then returns an unmanaged version of that. That touches the
3212  // reference count, which costs performance in a measurable way.
3213  // Instead, we create a temporary unmanaged view, then create
3214  // the subview from that.
3215  return Kokkos::subview (subview_type (this->k_values1D_), range);
3216  }
3217  else if (this->values2D_ != Teuchos::null) {
3218  // Use a reference, so that I don't touch the Teuchos::ArrayView
3219  // reference count in a debug build. (It has no reference count
3220  // in a release build.) This ensures thread safety.
3221  auto& rowView = this->values2D_[rowInfo.localRow];
3222  return subview_type (rowView.getRawPtr (), rowView.size ());
3223  }
3224  else {
3225  return subview_type ();
3226  }
3227  }
3228 
3229  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3230  Kokkos::View<typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::impl_scalar_type*,
3231  typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::execution_space,
3232  Kokkos::MemoryUnmanaged>
3233  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
3234  getRowViewNonConst (const RowInfo& rowInfo) const
3235  {
3236  using Kokkos::MemoryUnmanaged;
3237  using Kokkos::View;
3238  typedef impl_scalar_type ST;
3239  typedef View<ST*, execution_space, MemoryUnmanaged> subview_type;
3240  typedef std::pair<size_t, size_t> range_type;
3241 
3242  if (k_values1D_.extent (0) != 0 && rowInfo.allocSize > 0) {
3243 #ifdef HAVE_TPETRA_DEBUG
3244  TEUCHOS_TEST_FOR_EXCEPTION
3245  (rowInfo.offset1D + rowInfo.allocSize > this->k_values1D_.extent (0),
3246  std::range_error, "Tpetra::CrsMatrix::getRowViewNonConst: Invalid "
3247  "access to 1-D storage of values. rowInfo.offset1D ("
3248  << rowInfo.offset1D << ") + rowInfo.allocSize (" << rowInfo.allocSize
3249  << ") > this->k_values1D_.extent(0) ("
3250  << this->k_values1D_.extent (0) << ").");
3251 #endif // HAVE_TPETRA_DEBUG
3252  range_type range (rowInfo.offset1D, rowInfo.offset1D + rowInfo.allocSize);
3253  // mfh 23 Nov 2015: Don't just create a subview of k_values1D_
3254  // directly, because that first creates a _managed_ subview,
3255  // then returns an unmanaged version of that. That touches the
3256  // reference count, which costs performance in a measurable way.
3257  // Instead, we create a temporary unmanaged view, then create
3258  // the subview from that.
3259  return Kokkos::subview (subview_type (this->k_values1D_), range);
3260  }
3261  else if (this->values2D_ != Teuchos::null) {
3262  // Use a reference, so that I don't touch the Teuchos::ArrayView
3263  // reference count in a debug build. (It has no reference count
3264  // in a release build.) This ensures thread safety.
3265  auto& rowView = this->values2D_[rowInfo.localRow];
3266  return subview_type (rowView.getRawPtr (), rowView.size ());
3267  }
3268  else {
3269  return subview_type ();
3270  }
3271  }
3272 
3273  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3274  Teuchos::ArrayView<typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::impl_scalar_type>
3275  CrsMatrix<Scalar, LocalOrdinal,GlobalOrdinal, Node>::
3276  getViewNonConst (const RowInfo& rowinfo) const
3277  {
3278  return Teuchos::av_const_cast<impl_scalar_type> (this->getView (rowinfo));
3279  }
3280 
3281  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3282  void
3284  getLocalRowCopy (LocalOrdinal localRow,
3285  const Teuchos::ArrayView<LocalOrdinal>& indices,
3286  const Teuchos::ArrayView<Scalar>& values,
3287  size_t& numEntries) const
3288  {
3289  using Teuchos::ArrayView;
3290  using Teuchos::av_reinterpret_cast;
3291  const char tfecfFuncName[] = "getLocalRowCopy: ";
3292 
3293  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3294  (! this->hasColMap (), std::runtime_error,
3295  "The matrix does not have a column Map yet. This means we don't have "
3296  "local indices for columns yet, so it doesn't make sense to call this "
3297  "method. If the matrix doesn't have a column Map yet, you should call "
3298  "fillComplete on it first.");
3299 
3300  const RowInfo rowinfo = staticGraph_->getRowInfo (localRow);
3301  const size_t theNumEntries = rowinfo.numEntries;
3302  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3303  (static_cast<size_t> (indices.size ()) < theNumEntries ||
3304  static_cast<size_t> (values.size ()) < theNumEntries,
3305  std::runtime_error, "Row with local index " << localRow << " has " <<
3306  theNumEntries << " entry/ies, but indices.size() = " <<
3307  indices.size () << " and values.size() = " << values.size () << ".");
3308  numEntries = theNumEntries; // first side effect
3309 
3310  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
3311  if (staticGraph_->isLocallyIndexed ()) {
3312  const LocalOrdinal* curLclInds;
3313  const impl_scalar_type* curVals;
3314  LocalOrdinal numSpots; // includes both current entries and extra space
3315 
3316  // If we got this far, rowinfo should be correct and should
3317  // refer to a valid local row. Thus, these error checks are
3318  // superfluous, but we retain them in a debug build.
3319 #ifdef HAVE_TPETRA_DEBUG
3320  int err =
3321  staticGraph_->getLocalViewRawConst (curLclInds, numSpots, rowinfo);
3322  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3323  (err != static_cast<LocalOrdinal> (0), std::logic_error,
3324  "staticGraph_->getLocalViewRawConst returned nonzero error code "
3325  << err << ".");
3326  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3327  (static_cast<size_t> (numSpots) < theNumEntries, std::logic_error,
3328  "numSpots = " << numSpots << " < theNumEntries = " << theNumEntries
3329  << ".");
3330  const LocalOrdinal numSpotsBefore = numSpots;
3331  err = getViewRawConst (curVals, numSpots, rowinfo);
3332  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3333  (err != static_cast<LocalOrdinal> (0), std::logic_error,
3334  "getViewRaw returned nonzero error code " << err << ".");
3335  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3336  (numSpotsBefore != numSpots, std::logic_error,
3337  "numSpotsBefore = " << numSpotsBefore << " != numSpots = "
3338  << numSpots << ".");
3339 #else
3340  (void) staticGraph_->getLocalViewRawConst (curLclInds, numSpots, rowinfo);
3341  (void) getViewRawConst (curVals, numSpots, rowinfo);
3342 #endif // HAVE_TPETRA_DEBUG
3343 
3344  for (size_t j = 0; j < theNumEntries; ++j) {
3345  values[j] = curVals[j];
3346  indices[j] = curLclInds[j];
3347  }
3348  }
3349  else if (staticGraph_->isGloballyIndexed ()) {
3350  // Don't call getColMap(), because it touches RCP's reference count.
3351  const map_type& colMap = * (staticGraph_->colMap_);
3352  const GlobalOrdinal* curGblInds;
3353  const impl_scalar_type* curVals;
3354  LocalOrdinal numSpots; // includes both current entries and extra space
3355 
3356  // If we got this far, rowinfo should be correct and should
3357  // refer to a valid local row. Thus, these error checks are
3358  // superfluous, but we retain them in a debug build.
3359 #ifdef HAVE_TPETRA_DEBUG
3360  int err =
3361  staticGraph_->getGlobalViewRawConst (curGblInds, numSpots, rowinfo);
3362  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3363  (err != static_cast<LocalOrdinal> (0), std::logic_error,
3364  "staticGraph_->getGlobalViewRawConst returned nonzero error code "
3365  << err << ".");
3366  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3367  (static_cast<size_t> (numSpots) < theNumEntries, std::logic_error,
3368  "numSpots = " << numSpots << " < theNumEntries = " << theNumEntries
3369  << ".");
3370  const LocalOrdinal numSpotsBefore = numSpots;
3371  err = getViewRawConst (curVals, numSpots, rowinfo);
3372  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3373  (err != static_cast<LocalOrdinal> (0), std::logic_error,
3374  "getViewRawConst returned nonzero error code " << err << ".");
3375  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3376  (numSpotsBefore != numSpots, std::logic_error,
3377  "numSpotsBefore = " << numSpotsBefore << " != numSpots = "
3378  << numSpots << ".");
3379 #else
3380  (void) staticGraph_->getGlobalViewRawConst (curGblInds, numSpots, rowinfo);
3381  (void) getViewRawConst (curVals, numSpots, rowinfo);
3382 #endif //HAVE_TPETRA_DEBUG
3383 
3384  for (size_t j = 0; j < theNumEntries; ++j) {
3385  values[j] = curVals[j];
3386  indices[j] = colMap.getLocalElement (curGblInds[j]);
3387  }
3388  }
3389  }
3390  }
3391 
3392  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3393  void
3395  getGlobalRowCopy (GlobalOrdinal globalRow,
3396  const Teuchos::ArrayView<GlobalOrdinal>& indices,
3397  const Teuchos::ArrayView<Scalar>& values,
3398  size_t& numEntries) const
3399  {
3400  using Teuchos::ArrayView;
3401  using Teuchos::av_reinterpret_cast;
3402  const char tfecfFuncName[] = "getGlobalRowCopy: ";
3403 
3404  const RowInfo rowinfo =
3405  staticGraph_->getRowInfoFromGlobalRowIndex (globalRow);
3406  const size_t theNumEntries = rowinfo.numEntries;
3407  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3408  static_cast<size_t> (indices.size ()) < theNumEntries ||
3409  static_cast<size_t> (values.size ()) < theNumEntries,
3410  std::runtime_error, "Row with global index " << globalRow << " has "
3411  << theNumEntries << " entry/ies, but indices.size() = " <<
3412  indices.size () << " and values.size() = " << values.size () << ".");
3413  numEntries = theNumEntries; // first side effect
3414 
3415  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
3416  if (staticGraph_->isLocallyIndexed ()) {
3417  const map_type& colMap = * (staticGraph_->colMap_);
3418  const LocalOrdinal* curLclInds;
3419  const impl_scalar_type* curVals;
3420  LocalOrdinal numSpots; // includes both current entries and extra space
3421 
3422  // If we got this far, rowinfo should be correct and should
3423  // refer to a valid local row. Thus, these error checks are
3424  // superfluous, but we retain them in a debug build.
3425 #ifdef HAVE_TPETRA_DEBUG
3426  int err =
3427  staticGraph_->getLocalViewRawConst (curLclInds, numSpots, rowinfo);
3428  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3429  (err != static_cast<LocalOrdinal> (0), std::logic_error,
3430  "staticGraph_->getLocalViewRawConst returned nonzero error code "
3431  << err << ".");
3432  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3433  (static_cast<size_t> (numSpots) < theNumEntries, std::logic_error,
3434  "numSpots = " << numSpots << " < theNumEntries = " << theNumEntries
3435  << ".");
3436  const LocalOrdinal numSpotsBefore = numSpots;
3437  err = getViewRawConst (curVals, numSpots, rowinfo);
3438  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3439  (err != static_cast<LocalOrdinal> (0), std::logic_error,
3440  "getViewRaw returned nonzero error code " << err << ".");
3441  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3442  (numSpotsBefore != numSpots, std::logic_error,
3443  "numSpotsBefore = " << numSpotsBefore << " != numSpots = "
3444  << numSpots << ".");
3445 #else
3446  (void) staticGraph_->getLocalViewRawConst (curLclInds, numSpots, rowinfo);
3447  (void) getViewRawConst (curVals, numSpots, rowinfo);
3448 #endif //HAVE_TPETRA_DEBUG
3449 
3450  for (size_t j = 0; j < theNumEntries; ++j) {
3451  values[j] = curVals[j];
3452  indices[j] = colMap.getGlobalElement (curLclInds[j]);
3453  }
3454  }
3455  else if (staticGraph_->isGloballyIndexed ()) {
3456  const GlobalOrdinal* curGblInds;
3457  const impl_scalar_type* curVals;
3458  LocalOrdinal numSpots; // includes both current entries and extra space
3459 
3460  // If we got this far, rowinfo should be correct and should
3461  // refer to a valid local row. Thus, these error checks are
3462  // superfluous, but we retain them in a debug build.
3463 #ifdef HAVE_TPETRA_DEBUG
3464  int err =
3465  staticGraph_->getGlobalViewRawConst (curGblInds, numSpots, rowinfo);
3466  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3467  (err != static_cast<LocalOrdinal> (0), std::logic_error,
3468  "staticGraph_->getGlobalViewRawConst returned nonzero error code "
3469  << err << ".");
3470  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3471  (static_cast<size_t> (numSpots) < theNumEntries, std::logic_error,
3472  "numSpots = " << numSpots << " < theNumEntries = " << theNumEntries
3473  << ".");
3474  const LocalOrdinal numSpotsBefore = numSpots;
3475  err = getViewRawConst (curVals, numSpots, rowinfo);
3476  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3477  (err != static_cast<LocalOrdinal> (0), std::logic_error,
3478  "getViewRawConst returned nonzero error code " << err << ".");
3479  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3480  (numSpotsBefore != numSpots, std::logic_error,
3481  "numSpotsBefore = " << numSpotsBefore << " != numSpots = "
3482  << numSpots << ".");
3483 #else
3484  (void) staticGraph_->getGlobalViewRawConst (curGblInds, numSpots, rowinfo);
3485  (void) getViewRawConst (curVals, numSpots, rowinfo);
3486 #endif //HAVE_TPETRA_DEBUG
3487 
3488  for (size_t j = 0; j < theNumEntries; ++j) {
3489  values[j] = curVals[j];
3490  indices[j] = curGblInds[j];
3491  }
3492  }
3493  }
3494  }
3495 
3496  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3497  void
3499  getLocalRowView (LocalOrdinal localRow,
3500  Teuchos::ArrayView<const LocalOrdinal>& indices,
3501  Teuchos::ArrayView<const Scalar>& values) const
3502  {
3503  using Teuchos::ArrayView;
3504  using Teuchos::av_reinterpret_cast;
3505  typedef LocalOrdinal LO;
3506  const char tfecfFuncName[] = "getLocalRowView: ";
3507 
3508  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3509  isGloballyIndexed (), std::runtime_error, "The matrix currently stores "
3510  "its indices as global indices, so you cannot get a view with local "
3511  "column indices. If the matrix has a column Map, you may call "
3512  "getLocalRowCopy() to get local column indices; otherwise, you may get "
3513  "a view with global column indices by calling getGlobalRowCopy().");
3514  indices = Teuchos::null;
3515  values = Teuchos::null;
3516  const RowInfo rowinfo = staticGraph_->getRowInfo (localRow);
3517  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
3518  rowinfo.numEntries > 0) {
3519  ArrayView<const LO> indTmp = staticGraph_->getLocalView (rowinfo);
3520  ArrayView<const Scalar> valTmp =
3521  av_reinterpret_cast<const Scalar> (this->getView (rowinfo));
3522  indices = indTmp (0, rowinfo.numEntries);
3523  values = valTmp (0, rowinfo.numEntries);
3524  }
3525 
3526 #ifdef HAVE_TPETRA_DEBUG
3527  const char suffix[] = ". This should never happen. Please report this "
3528  "bug to the Tpetra developers.";
3529  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3530  (static_cast<size_t> (indices.size ()) !=
3531  static_cast<size_t> (values.size ()), std::logic_error,
3532  "At the end of this method, for local row " << localRow << ", "
3533  "indices.size() = " << indices.size () << " != values.size () = "
3534  << values.size () << suffix);
3535  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3536  (static_cast<size_t> (indices.size ()) !=
3537  static_cast<size_t> (rowinfo.numEntries), std::logic_error,
3538  "At the end of this method, for local row " << localRow << ", "
3539  "indices.size() = " << indices.size () << " != rowinfo.numEntries = "
3540  << rowinfo.numEntries << suffix);
3541  const size_t expectedNumEntries = getNumEntriesInLocalRow (localRow);
3542  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3543  (rowinfo.numEntries != expectedNumEntries, std::logic_error, "At the end "
3544  "of this method, for local row " << localRow << ", rowinfo.numEntries = "
3545  << rowinfo.numEntries << " != getNumEntriesInLocalRow(localRow) = " <<
3546  expectedNumEntries << suffix);
3547 #endif // HAVE_TPETRA_DEBUG
3548  }
3549 
3550  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3551  LocalOrdinal
3553  getLocalRowView (const LocalOrdinal lclRow,
3554  LocalOrdinal& numEnt,
3555  const impl_scalar_type*& val,
3556  const LocalOrdinal*& ind) const
3557  {
3558  typedef LocalOrdinal LO;
3559 
3560  // Don't call getCrsGraph(), because that modfies an RCP reference
3561  // count, which is not thread safe. Checking whether an RCP is
3562  // null does NOT modify its reference count, and is therefore
3563  // thread safe. Note that isGloballyIndexed() calls
3564  // getCrsGraph(), so we have to go to the graph directly.
3565  if (staticGraph_.is_null () || staticGraph_->isGloballyIndexed ()) {
3566  return Tpetra::Details::OrdinalTraits<LO>::invalid ();
3567  }
3568  else {
3569  const RowInfo rowInfo = staticGraph_->getRowInfo (lclRow);
3570  if (rowInfo.localRow == Tpetra::Details::OrdinalTraits<size_t>::invalid ()) {
3571  numEnt = 0; // no valid entries in this row on the calling process
3572  val = NULL;
3573  ind = NULL;
3574  // First argument (lclRow) invalid, so make 1 the error code.
3575  return static_cast<LO> (1);
3576  }
3577  else {
3578  numEnt = static_cast<LO> (rowInfo.numEntries);
3579  auto lclColInds = staticGraph_->getLocalKokkosRowView (rowInfo);
3580  ind = lclColInds.data (); // FIXME (mfh 18 Jul 2016) UVM
3581  const LO err = this->getViewRawConst (val, numEnt, rowInfo);
3582  return err;
3583  }
3584  }
3585  }
3586 
3587  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3588  LocalOrdinal
3590  getLocalRowViewRaw (const LocalOrdinal lclRow,
3591  LocalOrdinal& numEnt,
3592  const LocalOrdinal*& lclColInds,
3593  const Scalar*& vals) const
3594  {
3595  const impl_scalar_type* vals_ist = NULL;
3596  const LocalOrdinal errCode =
3597  this->getLocalRowView (lclRow, numEnt, vals_ist, lclColInds);
3598  vals = reinterpret_cast<const Scalar*> (vals_ist);
3599  return errCode;
3600  }
3601 
3602  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3603  void
3605  getGlobalRowView (GlobalOrdinal globalRow,
3606  Teuchos::ArrayView<const GlobalOrdinal>& indices,
3607  Teuchos::ArrayView<const Scalar>& values) const
3608  {
3609  using Teuchos::ArrayView;
3610  using Teuchos::av_reinterpret_cast;
3611  typedef GlobalOrdinal GO;
3612  const char tfecfFuncName[] = "getGlobalRowView: ";
3613 
3614  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3615  isLocallyIndexed (), std::runtime_error,
3616  "The matrix is locally indexed, so we cannot return a view of the row "
3617  "with global column indices. Use getGlobalRowCopy() instead.");
3618  indices = Teuchos::null;
3619  values = Teuchos::null;
3620  const RowInfo rowinfo =
3621  staticGraph_->getRowInfoFromGlobalRowIndex (globalRow);
3622  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
3623  rowinfo.numEntries > 0) {
3624  ArrayView<const GO> indTmp = staticGraph_->getGlobalView (rowinfo);
3625  ArrayView<const Scalar> valTmp =
3626  av_reinterpret_cast<const Scalar> (this->getView (rowinfo));
3627 #ifdef HAVE_TPETRA_DEBUG
3628  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3629  (static_cast<size_t> (indTmp.size ()) < rowinfo.numEntries ||
3630  static_cast<size_t> (valTmp.size ()) < rowinfo.numEntries,
3631  std::logic_error, std::endl << "rowinfo.numEntries not accurate. "
3632  << std::endl << "indTmp.size() = " << indTmp.size ()
3633  << ", valTmp.size() = " << valTmp.size ()
3634  << ", rowinfo.numEntries = " << rowinfo.numEntries << ".");
3635 #endif // HAVE_TPETRA_DEBUG
3636  indices = indTmp (0, rowinfo.numEntries);
3637  values = valTmp (0, rowinfo.numEntries);
3638  }
3639 
3640 #ifdef HAVE_TPETRA_DEBUG
3641  const char suffix[] = ". This should never happen. Please report this "
3642  "bug to the Tpetra developers.";
3643  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3644  (static_cast<size_t> (indices.size ()) !=
3645  static_cast<size_t> (values.size ()), std::logic_error,
3646  "At the end of this method, for global row " << globalRow << ", "
3647  "indices.size() = " << indices.size () << " != values.size () = "
3648  << values.size () << suffix);
3649  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3650  (static_cast<size_t> (indices.size ()) !=
3651  static_cast<size_t> (rowinfo.numEntries), std::logic_error,
3652  "At the end of this method, for global row " << globalRow << ", "
3653  "indices.size() = " << indices.size () << " != rowinfo.numEntries = "
3654  << rowinfo.numEntries << suffix);
3655  const size_t expectedNumEntries = getNumEntriesInGlobalRow (globalRow);
3656  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3657  (rowinfo.numEntries != expectedNumEntries, std::logic_error, "At the end "
3658  "of this method, for global row " << globalRow << ", rowinfo.numEntries "
3659  "= " << rowinfo.numEntries << " != getNumEntriesInGlobalRow(globalRow) ="
3660  " " << expectedNumEntries << suffix);
3661 #endif // HAVE_TPETRA_DEBUG
3662  }
3663 
3664  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3665  void
3667  scale (const Scalar& alpha)
3668  {
3669  typedef LocalOrdinal LO;
3670  typedef typename Teuchos::Array<Scalar>::size_type size_type;
3671  const char tfecfFuncName[] = "scale: ";
3672  const impl_scalar_type theAlpha = static_cast<impl_scalar_type> (alpha);
3673 
3674  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3675  ! isFillActive (), std::runtime_error,
3676  "Fill must be active before you may call this method. "
3677  "Please call resumeFill() to make fill active.");
3678 
3679  const size_t nlrs = staticGraph_->getNodeNumRows ();
3680  const size_t numEntries = staticGraph_->getNodeNumEntries ();
3681  if (! staticGraph_->indicesAreAllocated () ||
3682  nlrs == 0 || numEntries == 0) {
3683  // do nothing
3684  }
3685  else {
3686  if (staticGraph_->getProfileType () == StaticProfile) {
3687  const LO lclNumRows = lclMatrix_.numRows ();
3688  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
3689  auto row_i = lclMatrix_.row (lclRow);
3690  for (LO k = 0; k < row_i.length; ++k) {
3691  // FIXME (mfh 02 Jan 2015) This assumes CUDA UVM.
3692  row_i.value (k) *= theAlpha;
3693  }
3694  }
3695  }
3696  else if (staticGraph_->getProfileType () == DynamicProfile) {
3697  for (size_t row = 0; row < nlrs; ++row) {
3698  const size_type numEnt = getNumEntriesInLocalRow (row);
3699  Teuchos::ArrayView<impl_scalar_type> rowVals = values2D_[row] ();
3700  for (size_type k = 0; k < numEnt; ++k) {
3701  rowVals[k] *= theAlpha;
3702  }
3703  }
3704  }
3705  }
3706  }
3707 
3708  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3709  void
3711  setAllToScalar (const Scalar& alpha)
3712  {
3713  const char tfecfFuncName[] = "setAllToScalar: ";
3714  const impl_scalar_type theAlpha = static_cast<impl_scalar_type> (alpha);
3715  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3716  ! isFillActive (), std::runtime_error,
3717  "Fill must be active before you may call this method. "
3718  "Please call resumeFill() to make fill active.");
3719 
3720  // replace all values in the matrix
3721  // it is easiest to replace all allocated values, instead of replacing only the ones with valid entries
3722  // however, if there are no valid entries, we can short-circuit
3723  // furthermore, if the values aren't allocated, we can short-circuit (no entry have been inserted so far)
3724  const size_t nlrs = staticGraph_->getNodeNumRows();
3725  const size_t numEntries = staticGraph_->getNodeNumEntries();
3726  if (! staticGraph_->indicesAreAllocated () || numEntries == 0) {
3727  // do nothing
3728  }
3729  else {
3730  const ProfileType profType = staticGraph_->getProfileType ();
3731  if (profType == StaticProfile) {
3732  // FIXME (mfh 24 Dec 2014) Once CrsMatrix implements DualView
3733  // semantics, this would be the place to mark memory as
3734  // modified.
3735  Kokkos::deep_copy (k_values1D_, theAlpha);
3736  }
3737  else if (profType == DynamicProfile) {
3738  for (size_t row = 0; row < nlrs; ++row) {
3739  std::fill (values2D_[row].begin (), values2D_[row].end (), theAlpha);
3740  }
3741  }
3742  }
3743  }
3744 
3745  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3746  void
3748  setAllValues (const typename local_matrix_type::row_map_type& rowPointers,
3749  const typename local_graph_type::entries_type::non_const_type& columnIndices,
3750  const typename local_matrix_type::values_type& values)
3751  {
3752  const char tfecfFuncName[] = "setAllValues: ";
3753  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3754  (columnIndices.size () != values.size (), std::invalid_argument,
3755  "columnIndices.size() = " << columnIndices.size () << " != values.size()"
3756  " = " << values.size () << ".");
3757  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3758  (myGraph_.is_null (), std::runtime_error, "myGraph_ must not be null.");
3759 
3760  try {
3761  myGraph_->setAllIndices (rowPointers, columnIndices);
3762  }
3763  catch (std::exception &e) {
3764  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3765  (true, std::runtime_error, "myGraph_->setAllIndices() threw an "
3766  "exception: " << e.what ());
3767  }
3768  // Make sure that myGraph_ now has a local graph. It may not be
3769  // fillComplete yet, so it's important to check. We don't care
3770  // whether setAllIndices() did a shallow copy or a deep copy, so a
3771  // good way to check is to compare dimensions.
3772  auto lclGraph = myGraph_->getLocalGraph ();
3773  const size_t numEnt = lclGraph.entries.extent (0);
3774  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3775  (lclGraph.row_map.extent (0) != rowPointers.extent (0) ||
3776  numEnt != static_cast<size_t> (columnIndices.extent (0)),
3777  std::logic_error, "myGraph_->setAllIndices() did not correctly create "
3778  "local graph. Please report this bug to the Tpetra developers.");
3779 
3780  const size_t numCols = myGraph_->getColMap ()->getNodeNumElements ();
3781  this->lclMatrix_ = local_matrix_type ("Tpetra::CrsMatrix::lclMatrix_",
3782  numCols, values, lclGraph);
3783  // FIXME (22 Jun 2016) I would very much like to get rid of
3784  // k_values1D_ at some point. I find it confusing to have all
3785  // these extra references lying around.
3786  this->k_values1D_ = this->lclMatrix_.values;
3787 
3788  // Storage MUST be packed, since the interface doesn't give any
3789  // way to indicate any extra space at the end of each row.
3790  this->storageStatus_ = ::Tpetra::Details::STORAGE_1D_PACKED;
3791 
3792  checkInternalState ();
3793  }
3794 
3795  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3796  void
3798  setAllValues (const Teuchos::ArrayRCP<size_t>& ptr,
3799  const Teuchos::ArrayRCP<LocalOrdinal>& ind,
3800  const Teuchos::ArrayRCP<Scalar>& val)
3801  {
3802  using Kokkos::Compat::getKokkosViewDeepCopy;
3803  using Teuchos::ArrayRCP;
3804  using Teuchos::av_reinterpret_cast;
3805  typedef device_type DT;
3806  typedef impl_scalar_type IST;
3807  typedef typename local_matrix_type::row_map_type row_map_type;
3808  //typedef typename row_map_type::non_const_value_type row_offset_type;
3809  const char tfecfFuncName[] = "setAllValues(ArrayRCP<size_t>, ArrayRCP<LO>, ArrayRCP<Scalar>): ";
3810 
3811  // The row offset type may depend on the execution space. It may
3812  // not necessarily be size_t. If it's not, we need to make a deep
3813  // copy. We need to make a deep copy anyway so that Kokkos can
3814  // own the memory. Regardless, ptrIn gets the copy.
3815  typename row_map_type::non_const_type ptrNative ("ptr", ptr.size ());
3816  Kokkos::View<const size_t*,
3817  typename row_map_type::array_layout,
3818  Kokkos::HostSpace,
3819  Kokkos::MemoryUnmanaged> ptrSizeT (ptr.getRawPtr (), ptr.size ());
3820  ::Tpetra::Details::copyOffsets (ptrNative, ptrSizeT);
3821 
3822  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3823  (ptrNative.extent (0) != ptrSizeT.extent (0),
3824  std::logic_error, "ptrNative.extent(0) = " <<
3825  ptrNative.extent (0) << " != ptrSizeT.extent(0) = "
3826  << ptrSizeT.extent (0) << ". Please report this bug to the "
3827  "Tpetra developers.");
3828 
3829  auto indIn = getKokkosViewDeepCopy<DT> (ind ());
3830  auto valIn = getKokkosViewDeepCopy<DT> (av_reinterpret_cast<IST> (val ()));
3831  this->setAllValues (ptrNative, indIn, valIn);
3832  }
3833 
3834  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3835  void
3837  getLocalDiagOffsets (Teuchos::ArrayRCP<size_t>& offsets) const
3838  {
3839  const char tfecfFuncName[] = "getLocalDiagOffsets: ";
3840  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3841  (staticGraph_.is_null (), std::runtime_error, "The matrix has no graph.");
3842 
3843  // mfh 11 May 2016: We plan to deprecate the ArrayRCP version of
3844  // this method in CrsGraph too, so don't call it (otherwise build
3845  // warnings will show up and annoy users). Instead, copy results
3846  // in and out, if the memory space requires it.
3847 
3848  const size_t lclNumRows = staticGraph_->getNodeNumRows ();
3849  if (static_cast<size_t> (offsets.size ()) < lclNumRows) {
3850  offsets.resize (lclNumRows);
3851  }
3852 
3853  // The input ArrayRCP must always be a host pointer. Thus, if
3854  // device_type::memory_space is Kokkos::HostSpace, it's OK for us
3855  // to write to that allocation directly as a Kokkos::View.
3856  typedef typename device_type::memory_space memory_space;
3857  if (std::is_same<memory_space, Kokkos::HostSpace>::value) {
3858  // It is always syntactically correct to assign a raw host
3859  // pointer to a device View, so this code will compile correctly
3860  // even if this branch never runs.
3861  typedef Kokkos::View<size_t*, device_type,
3862  Kokkos::MemoryUnmanaged> output_type;
3863  output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
3864  staticGraph_->getLocalDiagOffsets (offsetsOut);
3865  }
3866  else {
3867  Kokkos::View<size_t*, device_type> offsetsTmp ("diagOffsets", lclNumRows);
3868  staticGraph_->getLocalDiagOffsets (offsetsTmp);
3869  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
3870  Kokkos::MemoryUnmanaged> output_type;
3871  output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
3872  Kokkos::deep_copy (offsetsOut, offsetsTmp);
3873  }
3874  }
3875 
3876  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3877  void
3880  {
3881  using Teuchos::ArrayRCP;
3882  using Teuchos::ArrayView;
3883  using Teuchos::av_reinterpret_cast;
3884  const char tfecfFuncName[] = "getLocalDiagCopy (1-arg): ";
3885  typedef local_ordinal_type LO;
3886 
3887 
3888  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3889  staticGraph_.is_null (), std::runtime_error,
3890  "This method requires that the matrix have a graph.");
3891  auto rowMapPtr = this->getRowMap ();
3892  if (rowMapPtr.is_null () || rowMapPtr->getComm ().is_null ()) {
3893  // Processes on which the row Map or its communicator is null
3894  // don't participate. Users shouldn't even call this method on
3895  // those processes.
3896  return;
3897  }
3898  auto colMapPtr = this->getColMap ();
3899  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3900  (! this->hasColMap () || colMapPtr.is_null (), std::runtime_error,
3901  "This method requires that the matrix have a column Map.");
3902  const map_type& rowMap = * rowMapPtr;
3903  const map_type& colMap = * colMapPtr;
3904  const LO myNumRows = static_cast<LO> (this->getNodeNumRows ());
3905 
3906 #ifdef HAVE_TPETRA_DEBUG
3907  // isCompatible() requires an all-reduce, and thus this check
3908  // should only be done in debug mode.
3909  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3910  ! diag.getMap ()->isCompatible (rowMap), std::runtime_error,
3911  "The input Vector's Map must be compatible with the CrsMatrix's row "
3912  "Map. You may check this by using Map's isCompatible method: "
3913  "diag.getMap ()->isCompatible (A.getRowMap ());");
3914 #endif // HAVE_TPETRA_DEBUG
3915 
3916  if (this->isFillComplete ()) {
3917  diag.template modify<device_type> ();
3918  const auto D_lcl = diag.template getLocalView<device_type> ();
3919  // 1-D subview of the first (and only) column of D_lcl.
3920  const auto D_lcl_1d =
3921  Kokkos::subview (D_lcl, Kokkos::make_pair (LO (0), myNumRows), 0);
3922 
3923  const auto lclRowMap = rowMap.getLocalMap ();
3924  const auto lclColMap = colMap.getLocalMap ();
3925  const auto lclMatrix = this->lclMatrix_;
3927  (void) getDiagCopyWithoutOffsets (D_lcl_1d, lclRowMap,
3928  lclColMap, lclMatrix);
3929  }
3930  else {
3932  (void) getLocalDiagCopyWithoutOffsetsNotFillComplete (diag, *this);
3933  }
3934  }
3935 
3936  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3937  void
3940  const Kokkos::View<const size_t*, device_type,
3941  Kokkos::MemoryUnmanaged>& offsets) const
3942  {
3943  typedef LocalOrdinal LO;
3944 
3945 #ifdef HAVE_TPETRA_DEBUG
3946  const char tfecfFuncName[] = "getLocalDiagCopy: ";
3947  const map_type& rowMap = * (this->getRowMap ());
3948  // isCompatible() requires an all-reduce, and thus this check
3949  // should only be done in debug mode.
3950  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3951  ! diag.getMap ()->isCompatible (rowMap), std::runtime_error,
3952  "The input Vector's Map must be compatible with (in the sense of Map::"
3953  "isCompatible) the CrsMatrix's row Map.");
3954 #endif // HAVE_TPETRA_DEBUG
3955 
3956  // For now, we fill the Vector on the host and sync to device.
3957  // Later, we may write a parallel kernel that works entirely on
3958  // device.
3959  //
3960  // NOTE (mfh 21 Jan 2016): The host kernel here assumes UVM. Once
3961  // we write a device kernel, it will not need to assume UVM.
3962 
3963  diag.template modify<device_type> ();
3964  auto D_lcl = diag.template getLocalView<device_type> ();
3965  const LO myNumRows = static_cast<LO> (this->getNodeNumRows ());
3966  // Get 1-D subview of the first (and only) column of D_lcl.
3967  auto D_lcl_1d =
3968  Kokkos::subview (D_lcl, Kokkos::make_pair (LO (0), myNumRows), 0);
3969 
3970  KokkosSparse::getDiagCopy (D_lcl_1d, offsets, this->lclMatrix_);
3971  }
3972 
3973  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3974  void
3977  const Teuchos::ArrayView<const size_t>& offsets) const
3978  {
3979  typedef LocalOrdinal LO;
3980  typedef impl_scalar_type IST;
3982  typedef typename vec_type::dual_view_type dual_view_type;
3983  typedef typename dual_view_type::host_mirror_space::execution_space host_execution_space;
3984 
3985 #ifdef HAVE_TPETRA_DEBUG
3986  const char tfecfFuncName[] = "getLocalDiagCopy: ";
3987  const map_type& rowMap = * (this->getRowMap ());
3988  // isCompatible() requires an all-reduce, and thus this check
3989  // should only be done in debug mode.
3990  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3991  ! diag.getMap ()->isCompatible (rowMap), std::runtime_error,
3992  "The input Vector's Map must be compatible with (in the sense of Map::"
3993  "isCompatible) the CrsMatrix's row Map.");
3994 #endif // HAVE_TPETRA_DEBUG
3995 
3996  // See #1510. In case diag has already been marked modified on
3997  // device, we need to clear that flag, since the code below works
3998  // on host.
3999  auto diag_dv = diag.getDualView ();
4000  diag_dv.modified_device () = 0;
4001 
4002  // For now, we fill the Vector on the host and sync to device.
4003  // Later, we may write a parallel kernel that works entirely on
4004  // device.
4005  diag.template modify<host_execution_space> ();
4006  auto lclVecHost = diag.template getLocalView<host_execution_space> ();
4007  // 1-D subview of the first (and only) column of lclVecHost.
4008  auto lclVecHost1d = Kokkos::subview (lclVecHost, Kokkos::ALL (), 0);
4009 
4010  Kokkos::View<const size_t*, Kokkos::HostSpace,
4011  Kokkos::MemoryTraits<Kokkos::Unmanaged> >
4012  h_offsets (offsets.getRawPtr (), offsets.size ());
4013  // Find the diagonal entries and put them in lclVecHost1d.
4014  const LO myNumRows = static_cast<LO> (this->getNodeNumRows ());
4015  typedef Kokkos::RangePolicy<host_execution_space, LO> policy_type;
4016  const size_t INV = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
4017 
4018  Kokkos::parallel_for (policy_type (0, myNumRows), [&] (const LO& lclRow) {
4019  lclVecHost1d(lclRow) = STS::zero (); // default value if no diag entry
4020  if (h_offsets[lclRow] != INV) {
4021  auto curRow = lclMatrix_.rowConst (lclRow);
4022  lclVecHost1d(lclRow) = static_cast<IST> (curRow.value(h_offsets[lclRow]));
4023  }
4024  });
4025  diag.template sync<execution_space> (); // sync changes back to device
4026  }
4027 
4028 
4029  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4030  void
4033  {
4034  using ::Tpetra::Details::ProfilingRegion;
4035  using Teuchos::ArrayRCP;
4036  using Teuchos::ArrayView;
4037  using Teuchos::null;
4038  using Teuchos::RCP;
4039  using Teuchos::rcp;
4040  using Teuchos::rcpFromRef;
4041  using LO = local_ordinal_type;
4043  const char tfecfFuncName[] = "leftScale: ";
4044 
4045  ProfilingRegion region ("Tpetra::CrsMatrix::leftScale");
4046 
4047  RCP<const vec_type> xp;
4048  if (this->getRangeMap ()->isSameAs (* (x.getMap ()))) {
4049  // Take from Epetra: If we have a non-trivial exporter, we must
4050  // import elements that are permuted or are on other processors.
4051  auto exporter = this->getCrsGraphRef ().getExporter ();
4052  if (exporter.get () != nullptr) {
4053  RCP<vec_type> tempVec (new vec_type (this->getRowMap ()));
4054  tempVec->doImport (x, *exporter, REPLACE); // reverse mode
4055  xp = tempVec;
4056  }
4057  else {
4058  xp = rcpFromRef (x);
4059  }
4060  }
4061  else if (this->getRowMap ()->isSameAs (* (x.getMap ()))) {
4062  xp = rcpFromRef (x);
4063  }
4064  else {
4065  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4066  (true, std::invalid_argument, "x's Map must be the same as "
4067  "either the row Map or the range Map of the CrsMatrix.");
4068  }
4069 
4070  // Check whether A has a valid local matrix. It might not if it
4071  // was not created with a local matrix, and if fillComplete has
4072  // never been called on it before. A never-initialized (and thus
4073  // invalid) local matrix has zero rows, because it was default
4074  // constructed.
4075  const LO lclNumRows =
4076  static_cast<LO> (this->getRowMap ()->getNodeNumElements ());
4077  const bool validLocalMatrix = this->lclMatrix_.numRows () == lclNumRows;
4078 
4079  if (validLocalMatrix) {
4080  using dev_memory_space = typename device_type::memory_space;
4081  if (xp->template need_sync<dev_memory_space> ()) {
4082  using Teuchos::rcp_const_cast;
4083  rcp_const_cast<vec_type> (xp)->template sync<dev_memory_space> ();
4084  }
4085  auto x_lcl = xp->template getLocalView<dev_memory_space> ();
4086  auto x_lcl_1d = Kokkos::subview (x_lcl, Kokkos::ALL (), 0);
4087  ::Tpetra::Details::leftScaleLocalCrsMatrix (this->lclMatrix_, x_lcl_1d, false, false);
4088  }
4089  else {
4090  execution_space::fence (); // for UVM's sake
4091 
4092  ArrayRCP<const Scalar> vectorVals = xp->getData (0);
4093  ArrayView<impl_scalar_type> rowValues = Teuchos::null;
4094  for (LocalOrdinal i = 0; i < lclNumRows; ++i) {
4095  const RowInfo rowinfo = this->staticGraph_->getRowInfo (i);
4096  rowValues = this->getViewNonConst (rowinfo);
4097  const impl_scalar_type scaleValue = static_cast<impl_scalar_type> (vectorVals[i]);
4098  for (size_t j = 0; j < rowinfo.numEntries; ++j) {
4099  rowValues[j] *= scaleValue;
4100  }
4101  }
4102  execution_space::fence (); // for UVM's sake
4103  }
4104  }
4105 
4106  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4107  void
4110  {
4111  using ::Tpetra::Details::ProfilingRegion;
4112  using Teuchos::ArrayRCP;
4113  using Teuchos::ArrayView;
4114  using Teuchos::null;
4115  using Teuchos::RCP;
4116  using Teuchos::rcp;
4117  using Teuchos::rcpFromRef;
4118  using LO = local_ordinal_type;
4120  const char tfecfFuncName[] = "rightScale: ";
4121 
4122  ProfilingRegion region ("Tpetra::CrsMatrix::rightScale");
4123 
4124  RCP<const vec_type> xp;
4125  if (this->getDomainMap ()->isSameAs (* (x.getMap ()))) {
4126  // Take from Epetra: If we have a non-trivial exporter, we must
4127  // import elements that are permuted or are on other processors.
4128  auto importer = this->getCrsGraphRef ().getImporter ();
4129  if (importer.get () != nullptr) {
4130  RCP<vec_type> tempVec (new vec_type (this->getColMap ()));
4131  tempVec->doImport (x, *importer, REPLACE);
4132  xp = tempVec;
4133  }
4134  else {
4135  xp = rcpFromRef (x);
4136  }
4137  }
4138  else if (this->getColMap ()->isSameAs (* (x.getMap ()))) {
4139  xp = rcpFromRef (x);
4140  } else {
4141  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4142  (true, std::runtime_error, "x's Map must be the same as "
4143  "either the domain Map or the column Map of the CrsMatrix.");
4144  }
4145 
4146  // Check whether A has a valid local matrix. It might not if it
4147  // was not created with a local matrix, and if fillComplete has
4148  // never been called on it before. A never-initialized (and thus
4149  // invalid) local matrix has zero rows, because it was default
4150  // constructed.
4151  const LO lclNumRows =
4152  static_cast<LO> (this->getRowMap ()->getNodeNumElements ());
4153  const bool validLocalMatrix = this->lclMatrix_.numRows () == lclNumRows;
4154 
4155  if (validLocalMatrix) {
4156  using dev_memory_space = typename device_type::memory_space;
4157  if (xp->template need_sync<dev_memory_space> ()) {
4158  using Teuchos::rcp_const_cast;
4159  rcp_const_cast<vec_type> (xp)->template sync<dev_memory_space> ();
4160  }
4161  auto x_lcl = xp->template getLocalView<dev_memory_space> ();
4162  auto x_lcl_1d = Kokkos::subview (x_lcl, Kokkos::ALL (), 0);
4163  ::Tpetra::Details::rightScaleLocalCrsMatrix (this->lclMatrix_, x_lcl_1d, false, false);
4164  }
4165  else {
4166  execution_space::fence (); // for UVM's sake
4167 
4168  ArrayRCP<const Scalar> vectorVals = xp->getData (0);
4169  ArrayView<impl_scalar_type> rowValues = null;
4170  for (LO i = 0; i < lclNumRows; ++i) {
4171  const RowInfo rowinfo = this->staticGraph_->getRowInfo (i);
4172  rowValues = this->getViewNonConst (rowinfo);
4173  ArrayView<const LO> colInds;
4174  this->getCrsGraphRef ().getLocalRowView (i, colInds);
4175  for (size_t j = 0; j < rowinfo.numEntries; ++j) {
4176  rowValues[j] *= static_cast<impl_scalar_type> (vectorVals[colInds[j]]);
4177  }
4178  }
4179  execution_space::fence (); // for UVM's sake
4180  }
4181  }
4182 
4183  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4187  {
4188  using Teuchos::ArrayView;
4189  using Teuchos::outArg;
4190  using Teuchos::REDUCE_SUM;
4191  using Teuchos::reduceAll;
4192  typedef typename Teuchos::ArrayRCP<const impl_scalar_type>::size_type size_type;
4193 
4194  // FIXME (mfh 05 Aug 2014) Write a thread-parallel kernel for the
4195  // local part of this computation. It could make sense to put
4196  // this operation in the Kokkos::CrsMatrix.
4197 
4198  // check the cache first
4199  mag_type frobNorm = frobNorm_;
4200  if (frobNorm == -STM::one ()) {
4201  mag_type mySum = STM::zero ();
4202  if (getNodeNumEntries() > 0) {
4203  if (isStorageOptimized ()) {
4204  // "Optimized" storage is packed storage. That means we can
4205  // iterate in one pass through the 1-D values array.
4206  const size_type numEntries =
4207  static_cast<size_type> (getNodeNumEntries ());
4208  for (size_type k = 0; k < numEntries; ++k) {
4209  // FIXME (mfh 05 Aug 2014) This assumes UVM.
4210  const impl_scalar_type val = k_values1D_(k);
4211  // Note (etp 06 Jan 2015) We need abs() here for composite types
4212  // (in general, if mag_type is on the left-hand-side, we need
4213  // abs() on the right-hand-side)
4214  const mag_type val_abs = STS::abs (val);
4215  mySum += val_abs * val_abs;
4216  }
4217  }
4218  else {
4219  const LocalOrdinal numRows =
4220  static_cast<LocalOrdinal> (this->getNodeNumRows ());
4221  for (LocalOrdinal r = 0; r < numRows; ++r) {
4222  const RowInfo rowInfo = myGraph_->getRowInfo (r);
4223  const size_type numEntries =
4224  static_cast<size_type> (rowInfo.numEntries);
4225  ArrayView<const impl_scalar_type> A_r =
4226  this->getView (rowInfo).view (0, numEntries);
4227  for (size_type k = 0; k < numEntries; ++k) {
4228  const impl_scalar_type val = A_r[k];
4229  const mag_type val_abs = STS::abs (val);
4230  mySum += val_abs * val_abs;
4231  }
4232  }
4233  }
4234  }
4235  mag_type totalSum = STM::zero ();
4236  reduceAll<int, mag_type> (* (getComm ()), REDUCE_SUM,
4237  mySum, outArg (totalSum));
4238  frobNorm = STM::sqrt (totalSum);
4239  }
4240  if (isFillComplete ()) {
4241  // Only cache the result if the matrix is fill complete.
4242  // Otherwise, the values might still change. resumeFill clears
4243  // the cache.
4244  frobNorm_ = frobNorm;
4245  }
4246  return frobNorm;
4247  }
4248 
4249  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4250  void
4252  replaceColMap (const Teuchos::RCP<const map_type>& newColMap)
4253  {
4254  const char tfecfFuncName[] = "replaceColMap: ";
4255  // FIXME (mfh 06 Aug 2014) What if the graph is locally indexed?
4256  // Then replacing the column Map might mean that we need to
4257  // reindex the column indices.
4258  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4259  myGraph_.is_null (), std::runtime_error,
4260  "This method does not work if the matrix has a const graph. The whole "
4261  "idea of a const graph is that you are not allowed to change it, but "
4262  "this method necessarily must modify the graph, since the graph owns "
4263  "the matrix's column Map.");
4264  myGraph_->replaceColMap (newColMap);
4265  }
4266 
4267  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4268  void
4271  const Teuchos::RCP<const map_type>& newColMap,
4272  const Teuchos::RCP<const import_type>& newImport,
4273  const bool sortEachRow)
4274  {
4275  const char tfecfFuncName[] = "reindexColumns: ";
4276  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4277  graph == NULL && myGraph_.is_null (), std::invalid_argument,
4278  "The input graph is NULL, but the matrix does not own its graph.");
4279 
4280  crs_graph_type& theGraph = (graph == NULL) ? *myGraph_ : *graph;
4281  const bool sortGraph = false; // we'll sort graph & matrix together below
4282  theGraph.reindexColumns (newColMap, newImport, sortGraph);
4283  if (sortEachRow && theGraph.isLocallyIndexed () && ! theGraph.isSorted ()) {
4284  const LocalOrdinal lclNumRows =
4285  static_cast<LocalOrdinal> (theGraph.getNodeNumRows ());
4286  for (LocalOrdinal row = 0; row < lclNumRows; ++row) {
4287  const RowInfo rowInfo = theGraph.getRowInfo (row);
4288  auto lclColInds = theGraph.getLocalKokkosRowViewNonConst (rowInfo);
4289  auto vals = this->getRowViewNonConst (rowInfo);
4290  // FIXME (mfh 09 May 2017) This assumes CUDA UVM, at least for
4291  // lclColInds, if not also for values.
4292  sort2 (lclColInds.data (),
4293  lclColInds.data () + rowInfo.numEntries,
4294  vals.data ());
4295  }
4296  theGraph.indicesAreSorted_ = true;
4297  }
4298  }
4299 
4300  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4301  void
4303  replaceDomainMapAndImporter (const Teuchos::RCP<const map_type>& newDomainMap,
4304  Teuchos::RCP<const import_type>& newImporter)
4305  {
4306  const char tfecfFuncName[] = "replaceDomainMapAndImporter: ";
4307  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4308  myGraph_.is_null (), std::runtime_error,
4309  "This method does not work if the matrix has a const graph. The whole "
4310  "idea of a const graph is that you are not allowed to change it, but this"
4311  " method necessarily must modify the graph, since the graph owns the "
4312  "matrix's domain Map and Import objects.");
4313  myGraph_->replaceDomainMapAndImporter (newDomainMap, newImporter);
4314  }
4315 
4316  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4317  void
4319  insertNonownedGlobalValues (const GlobalOrdinal globalRow,
4320  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
4321  const Teuchos::ArrayView<const Scalar>& values)
4322  {
4323  using Teuchos::Array;
4324  typedef GlobalOrdinal GO;
4325  typedef typename Array<GO>::size_type size_type;
4326 
4327  const size_type numToInsert = indices.size ();
4328  // Add the new data to the list of nonlocals.
4329  // This creates the arrays if they don't exist yet.
4330  std::pair<Array<GO>, Array<Scalar> >& curRow = nonlocals_[globalRow];
4331  Array<GO>& curRowInds = curRow.first;
4332  Array<Scalar>& curRowVals = curRow.second;
4333  const size_type newCapacity = curRowInds.size () + numToInsert;
4334  curRowInds.reserve (newCapacity);
4335  curRowVals.reserve (newCapacity);
4336  for (size_type k = 0; k < numToInsert; ++k) {
4337  curRowInds.push_back (indices[k]);
4338  curRowVals.push_back (values[k]);
4339  }
4340  }
4341 
4342  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4343  void
4344  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
4345  globalAssemble ()
4346  {
4347  using ::Tpetra::Details::ProfilingRegion;
4348  using Teuchos::Comm;
4349  using Teuchos::outArg;
4350  using Teuchos::RCP;
4351  using Teuchos::rcp;
4352  using Teuchos::REDUCE_MAX;
4353  using Teuchos::REDUCE_MIN;
4354  using Teuchos::reduceAll;
4356  //typedef LocalOrdinal LO;
4357  typedef GlobalOrdinal GO;
4358  typedef typename Teuchos::Array<GO>::size_type size_type;
4359  const char tfecfFuncName[] = "globalAssemble: "; // for exception macro
4360  ProfilingRegion regionGlobalAssemble ("Tpetra::CrsMatrix::globalAssemble");
4361 
4362  RCP<const Comm<int> > comm = getComm ();
4363 
4364  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4365  (! isFillActive (), std::runtime_error, "Fill must be active before "
4366  "you may call this method.");
4367 
4368  const size_t myNumNonlocalRows = nonlocals_.size ();
4369 
4370  // If no processes have nonlocal rows, then we don't have to do
4371  // anything. Checking this is probably cheaper than constructing
4372  // the Map of nonlocal rows (see below) and noticing that it has
4373  // zero global entries.
4374  {
4375  const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
4376  int someoneHasNonlocalRows = 0;
4377  reduceAll<int, int> (*comm, REDUCE_MAX, iHaveNonlocalRows,
4378  outArg (someoneHasNonlocalRows));
4379  if (someoneHasNonlocalRows == 0) {
4380  return; // no process has nonlocal rows, so nothing to do
4381  }
4382  }
4383 
4384  // 1. Create a list of the "nonlocal" rows on each process. this
4385  // requires iterating over nonlocals_, so while we do this,
4386  // deduplicate the entries and get a count for each nonlocal
4387  // row on this process.
4388  // 2. Construct a new row Map corresponding to those rows. This
4389  // Map is likely overlapping. We know that the Map is not
4390  // empty on all processes, because the above all-reduce and
4391  // return exclude that case.
4392 
4393  RCP<const map_type> nonlocalRowMap;
4394  // Keep this for CrsGraph's constructor, so we can use StaticProfile.
4395  Teuchos::ArrayRCP<size_t> numEntPerNonlocalRow (myNumNonlocalRows);
4396  {
4397  Teuchos::Array<GO> myNonlocalGblRows (myNumNonlocalRows);
4398  size_type curPos = 0;
4399  for (auto mapIter = nonlocals_.begin (); mapIter != nonlocals_.end ();
4400  ++mapIter, ++curPos) {
4401  myNonlocalGblRows[curPos] = mapIter->first;
4402  // Get the values and column indices by reference, since we
4403  // intend to change them in place (that's what "erase" does).
4404  Teuchos::Array<GO>& gblCols = (mapIter->second).first;
4405  Teuchos::Array<Scalar>& vals = (mapIter->second).second;
4406 
4407  // Sort both arrays jointly, using the column indices as keys,
4408  // then merge them jointly. "Merge" here adds values
4409  // corresponding to the same column indices. The first 2 args
4410  // of merge2 are output arguments that work just like the
4411  // return value of std::unique.
4412  sort2 (gblCols.begin (), gblCols.end (), vals.begin ());
4413  typename Teuchos::Array<GO>::iterator gblCols_newEnd;
4414  typename Teuchos::Array<Scalar>::iterator vals_newEnd;
4415  merge2 (gblCols_newEnd, vals_newEnd,
4416  gblCols.begin (), gblCols.end (),
4417  vals.begin (), vals.end ());
4418  gblCols.erase (gblCols_newEnd, gblCols.end ());
4419  vals.erase (vals_newEnd, vals.end ());
4420  numEntPerNonlocalRow[curPos] = gblCols.size ();
4421  }
4422 
4423  // Currently, Map requires that its indexBase be the global min
4424  // of all its global indices. Map won't compute this for us, so
4425  // we must do it. If our process has no nonlocal rows, set the
4426  // "min" to the max possible GO value. This ensures that if
4427  // some process has at least one nonlocal row, then it will pick
4428  // that up as the min. We know that at least one process has a
4429  // nonlocal row, since the all-reduce and return at the top of
4430  // this method excluded that case.
4431  GO myMinNonlocalGblRow = std::numeric_limits<GO>::max ();
4432  {
4433  auto iter = std::min_element (myNonlocalGblRows.begin (),
4434  myNonlocalGblRows.end ());
4435  if (iter != myNonlocalGblRows.end ()) {
4436  myMinNonlocalGblRow = *iter;
4437  }
4438  }
4439  GO gblMinNonlocalGblRow = 0;
4440  reduceAll<int, GO> (*comm, REDUCE_MIN, myMinNonlocalGblRow,
4441  outArg (gblMinNonlocalGblRow));
4442  const GO indexBase = gblMinNonlocalGblRow;
4443  const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid ();
4444  nonlocalRowMap = rcp (new map_type (INV, myNonlocalGblRows (), indexBase, comm));
4445  }
4446 
4447  // 3. Use the values and column indices for each nonlocal row, as
4448  // stored in nonlocals_, to construct a CrsMatrix corresponding
4449  // to nonlocal rows. We may use StaticProfile, since we have
4450  // exact counts of the number of entries in each nonlocal row.
4451 
4452  RCP<crs_matrix_type> nonlocalMatrix =
4453  rcp (new crs_matrix_type (nonlocalRowMap, numEntPerNonlocalRow,
4454  StaticProfile));
4455  {
4456  size_type curPos = 0;
4457  for (auto mapIter = nonlocals_.begin (); mapIter != nonlocals_.end ();
4458  ++mapIter, ++curPos) {
4459  const GO gblRow = mapIter->first;
4460  // Get values & column indices by ref, just to avoid copy.
4461  Teuchos::Array<GO>& gblCols = (mapIter->second).first;
4462  Teuchos::Array<Scalar>& vals = (mapIter->second).second;
4463  //const LO numEnt = static_cast<LO> (numEntPerNonlocalRow[curPos]);
4464  nonlocalMatrix->insertGlobalValues (gblRow, gblCols (), vals ());
4465  }
4466  }
4467  // There's no need to fill-complete the nonlocals matrix.
4468  // We just use it as a temporary container for the Export.
4469 
4470  // 4. If the original row Map is one to one, then we can Export
4471  // directly from nonlocalMatrix into this. Otherwise, we have
4472  // to create a temporary matrix with a one-to-one row Map,
4473  // Export into that, then Import from the temporary matrix into
4474  // *this.
4475 
4476  auto origRowMap = this->getRowMap ();
4477  const bool origRowMapIsOneToOne = origRowMap->isOneToOne ();
4478 
4479  int isLocallyComplete = 1; // true by default
4480 
4481  if (origRowMapIsOneToOne) {
4482  export_type exportToOrig (nonlocalRowMap, origRowMap);
4483  if (! exportToOrig.isLocallyComplete ()) {
4484  isLocallyComplete = 0;
4485  }
4486  this->doExport (*nonlocalMatrix, exportToOrig, Tpetra::ADD);
4487  // We're done at this point!
4488  }
4489  else {
4490  // If you ask a Map whether it is one to one, it does some
4491  // communication and stashes intermediate results for later use
4492  // by createOneToOne. Thus, calling createOneToOne doesn't cost
4493  // much more then the original cost of calling isOneToOne.
4494  auto oneToOneRowMap = Tpetra::createOneToOne (origRowMap);
4495  export_type exportToOneToOne (nonlocalRowMap, oneToOneRowMap);
4496  if (! exportToOneToOne.isLocallyComplete ()) {
4497  isLocallyComplete = 0;
4498  }
4499 
4500  // Create a temporary matrix with the one-to-one row Map.
4501  //
4502  // TODO (mfh 09 Sep 2016, 12 Sep 2016) Estimate # entries in
4503  // each row, to avoid reallocation during the Export operation.
4504  crs_matrix_type oneToOneMatrix (oneToOneRowMap, 0);
4505  // Export from matrix of nonlocals into the temp one-to-one matrix.
4506  oneToOneMatrix.doExport (*nonlocalMatrix, exportToOneToOne, Tpetra::ADD);
4507 
4508  // We don't need the matrix of nonlocals anymore, so get rid of
4509  // it, to keep the memory high-water mark down.
4510  nonlocalMatrix = Teuchos::null;
4511 
4512  // Import from the one-to-one matrix to the original matrix.
4513  import_type importToOrig (oneToOneRowMap, origRowMap);
4514  this->doImport (oneToOneMatrix, importToOrig, Tpetra::ADD);
4515  }
4516 
4517  // It's safe now to clear out nonlocals_, since we've already
4518  // committed side effects to *this. The standard idiom for
4519  // clearing a Container like std::map, is to swap it with an empty
4520  // Container and let the swapped Container fall out of scope.
4521  decltype (nonlocals_) newNonlocals;
4522  std::swap (nonlocals_, newNonlocals);
4523 
4524  // FIXME (mfh 12 Sep 2016) I don't like this all-reduce, and I
4525  // don't like throwing an exception here. A local return value
4526  // would likely be more useful to users. However, if users find
4527  // themselves exercising nonlocal inserts often, then they are
4528  // probably novice users who need the help. See Gibhub Issues
4529  // #603 and #601 (esp. the latter) for discussion.
4530 
4531  int isGloballyComplete = 0; // output argument of reduceAll
4532  reduceAll<int, int> (*comm, REDUCE_MIN, isLocallyComplete,
4533  outArg (isGloballyComplete));
4534  TEUCHOS_TEST_FOR_EXCEPTION
4535  (isGloballyComplete != 1, std::runtime_error, "On at least one process, "
4536  "you called insertGlobalValues with a global row index which is not in "
4537  "the matrix's row Map on any process in its communicator.");
4538  }
4539 
4540  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4541  void
4543  resumeFill (const Teuchos::RCP<Teuchos::ParameterList>& params)
4544  {
4545  if (! isStaticGraph ()) { // Don't resume fill of a nonowned graph.
4546  myGraph_->resumeFill (params);
4547  }
4548  clearGlobalConstants ();
4549  fillComplete_ = false;
4550  }
4551 
4552  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4553  void
4556  {
4557  // This method doesn't do anything. The analogous method in
4558  // CrsGraph does actually compute something.
4559  //
4560  // Oddly enough, clearGlobalConstants() clears frobNorm_ (by
4561  // setting it to -1), but computeGlobalConstants() does _not_
4562  // compute the Frobenius norm; this is done on demand in
4563  // getFrobeniusNorm(), and the result is cached there.
4564  }
4565 
4566  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4567  bool
4570  return getCrsGraphRef ().haveGlobalConstants ();
4571  }
4572 
4573  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4574  void
4577  // We use -1 to indicate that the Frobenius norm needs to be
4578  // recomputed, since the values might change between now and the
4579  // next fillComplete call.
4580  //
4581  // Oddly enough, clearGlobalConstants() clears frobNorm_, but
4582  // computeGlobalConstants() does _not_ compute the Frobenius norm;
4583  // this is done on demand in getFrobeniusNorm(), and the result is
4584  // cached there.
4585  frobNorm_ = -STM::one ();
4586  }
4587 
4588  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4589  void
4591  fillComplete (const Teuchos::RCP<Teuchos::ParameterList>& params)
4592  {
4593  const char tfecfFuncName[] = "fillComplete(params): ";
4594 
4595  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4596  (this->getCrsGraph ().is_null (), std::logic_error,
4597  "getCrsGraph() returns null. This should not happen at this point. "
4598  "Please report this bug to the Tpetra developers.");
4599 
4600  const crs_graph_type& graph = this->getCrsGraphRef ();
4601  if (this->isStaticGraph () && graph.isFillComplete ()) {
4602  // If this matrix's graph is fill complete and the user did not
4603  // supply a domain or range Map, use the graph's domain and
4604  // range Maps.
4605  this->fillComplete (graph.getDomainMap (), graph.getRangeMap (), params);
4606  }
4607  else { // assume that user's row Map is the domain and range Map
4608  Teuchos::RCP<const map_type> rangeMap = graph.getRowMap ();
4609  Teuchos::RCP<const map_type> domainMap = rangeMap;
4610  this->fillComplete (domainMap, rangeMap, params);
4611  }
4612  }
4613 
4614  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4615  void
4617  fillComplete (const Teuchos::RCP<const map_type>& domainMap,
4618  const Teuchos::RCP<const map_type>& rangeMap,
4619  const Teuchos::RCP<Teuchos::ParameterList>& params)
4620  {
4621  using ::Tpetra::Details::ProfilingRegion;
4622  using Teuchos::ArrayRCP;
4623  using Teuchos::RCP;
4624  using Teuchos::rcp;
4625  const char tfecfFuncName[] = "fillComplete: ";
4626  ProfilingRegion regionFillComplete ("Tpetra::CrsMatrix::fillComplete");
4627 
4628  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4629  (! this->isFillActive () || this->isFillComplete (), std::runtime_error,
4630  "Matrix fill state must be active (isFillActive() "
4631  "must be true) before you may call fillComplete().");
4632  const int numProcs = this->getComm ()->getSize ();
4633 
4634  //
4635  // Read parameters from the input ParameterList.
4636  //
4637 
4638  // If true, the caller promises that no process did nonlocal
4639  // changes since the last call to fillComplete.
4640  bool assertNoNonlocalInserts = false;
4641  // If true, makeColMap sorts remote GIDs (within each remote
4642  // process' group).
4643  bool sortGhosts = true;
4644 
4645  if (! params.is_null ()) {
4646  assertNoNonlocalInserts = params->get ("No Nonlocal Changes",
4647  assertNoNonlocalInserts);
4648  if (params->isParameter ("sort column map ghost gids")) {
4649  sortGhosts = params->get ("sort column map ghost gids", sortGhosts);
4650  }
4651  else if (params->isParameter ("Sort column Map ghost GIDs")) {
4652  sortGhosts = params->get ("Sort column Map ghost GIDs", sortGhosts);
4653  }
4654  }
4655  // We also don't need to do global assembly if there is only one
4656  // process in the communicator.
4657  const bool needGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1;
4658  // This parameter only matters if this matrix owns its graph.
4659  if (! this->myGraph_.is_null ()) {
4660  this->myGraph_->sortGhostsAssociatedWithEachProcessor_ = sortGhosts;
4661  }
4662 
4663  if (! this->getCrsGraphRef ().indicesAreAllocated ()) {
4664  if (this->hasColMap ()) {
4665  // We have a column Map, so use local indices.
4666  this->allocateValues (LocalIndices, GraphNotYetAllocated);
4667  } else {
4668  // We don't have a column Map, so use global indices.
4669  this->allocateValues (GlobalIndices, GraphNotYetAllocated);
4670  }
4671  }
4672  // Global assemble, if we need to. This call only costs a single
4673  // all-reduce if we didn't need global assembly after all.
4674  if (needGlobalAssemble) {
4675  this->globalAssemble ();
4676  }
4677  else {
4678  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4679  (numProcs == 1 && nonlocals_.size() > 0,
4680  std::runtime_error, "Cannot have nonlocal entries on a serial run. "
4681  "An invalid entry (i.e., with row index not in the row Map) must have "
4682  "been submitted to the CrsMatrix.");
4683  }
4684 
4685  if (this->isStaticGraph ()) {
4686  // FIXME (mfh 14 Nov 2016) In order to fix #843, I enable the
4687  // checks below only in debug mode. It would be nicer to do a
4688  // local check, then propagate the error state in a deferred
4689  // way, whenever communication happens. That would reduce the
4690  // cost of checking, to the point where it may make sense to
4691  // enable it even in release mode.
4692 #ifdef HAVE_TPETRA_DEBUG
4693  // FIXME (mfh 18 Jun 2014) This check for correctness of the
4694  // input Maps incurs a penalty of two all-reduces for the
4695  // otherwise optimal const graph case.
4696  //
4697  // We could turn these (max) 2 all-reduces into (max) 1, by
4698  // fusing them. We could do this by adding a "locallySameAs"
4699  // method to Map, which would return one of four states:
4700  //
4701  // a. Certainly globally the same
4702  // b. Certainly globally not the same
4703  // c. Locally the same
4704  // d. Locally not the same
4705  //
4706  // The first two states don't require further communication.
4707  // The latter two states require an all-reduce to communicate
4708  // globally, but we only need one all-reduce, since we only need
4709  // to check whether at least one of the Maps is wrong.
4710  const bool domainMapsMatch =
4711  this->staticGraph_->getDomainMap ()->isSameAs (*domainMap);
4712  const bool rangeMapsMatch =
4713  this->staticGraph_->getRangeMap ()->isSameAs (*rangeMap);
4714 
4715  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4716  (! domainMapsMatch, std::runtime_error,
4717  "The CrsMatrix's domain Map does not match the graph's domain Map. "
4718  "The graph cannot be changed because it was given to the CrsMatrix "
4719  "constructor as const. You can fix this by passing in the graph's "
4720  "domain Map and range Map to the matrix's fillComplete call.");
4721 
4722  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4723  (! rangeMapsMatch, std::runtime_error,
4724  "The CrsMatrix's range Map does not match the graph's range Map. "
4725  "The graph cannot be changed because it was given to the CrsMatrix "
4726  "constructor as const. You can fix this by passing in the graph's "
4727  "domain Map and range Map to the matrix's fillComplete call.");
4728 #endif // HAVE_TPETRA_DEBUG
4729 
4730  // The matrix does _not_ own the graph, and the graph's
4731  // structure is already fixed, so just fill the local matrix.
4732  this->fillLocalMatrix (params);
4733  }
4734  else {
4735  // Set the graph's domain and range Maps. This will clear the
4736  // Import if the domain Map has changed (is a different
4737  // pointer), and the Export if the range Map has changed (is a
4738  // different pointer).
4739  this->myGraph_->setDomainRangeMaps (domainMap, rangeMap);
4740 
4741  // Make the graph's column Map, if necessary.
4742  Teuchos::Array<int> remotePIDs (0);
4743  const bool mustBuildColMap = ! this->hasColMap ();
4744  if (mustBuildColMap) {
4745  this->myGraph_->makeColMap (remotePIDs);
4746  }
4747 
4748  // Make indices local, if necessary. The method won't do
4749  // anything if the graph is already locally indexed.
4750  const std::pair<size_t, std::string> makeIndicesLocalResult =
4751  this->myGraph_->makeIndicesLocal ();
4752  // TODO (mfh 20 Jul 2017) Instead of throwing here, pass along
4753  // the error state to makeImportExport or
4754  // computeGlobalConstants, which may do all-reduces and thus may
4755  // have the opportunity to communicate that error state.
4756  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4757  (makeIndicesLocalResult.first != 0, std::runtime_error,
4758  makeIndicesLocalResult.second);
4759 
4760  const bool sorted = this->myGraph_->isSorted ();
4761  const bool merged = this->myGraph_->isMerged ();
4762  this->sortAndMergeIndicesAndValues (sorted, merged);
4763 
4764  // Make Import and Export objects, if they haven't been made
4765  // already. If we made a column Map above, reuse information
4766  // from that process to avoid communiation in the Import setup.
4767  this->myGraph_->makeImportExport (remotePIDs, mustBuildColMap);
4768 
4769  // The matrix _does_ own the graph, so fill the local graph at
4770  // the same time as the local matrix.
4771  this->fillLocalGraphAndMatrix (params);
4772 
4773  const bool callGraphComputeGlobalConstants = params.get () == nullptr ||
4774  params->get ("compute global constants", true);
4775  const bool computeLocalTriangularConstants = params.get () == nullptr ||
4776  params->get ("compute local triangular constants", true);
4777  if (callGraphComputeGlobalConstants) {
4778  this->myGraph_->computeGlobalConstants (computeLocalTriangularConstants);
4779  }
4780  else {
4781  this->myGraph_->computeLocalConstants (computeLocalTriangularConstants);
4782  }
4783  this->myGraph_->fillComplete_ = true;
4784  this->myGraph_->checkInternalState ();
4785  }
4786 
4787  const bool callComputeGlobalConstants = params.get () == nullptr ||
4788  params->get ("compute global constants", true);
4789  if (callComputeGlobalConstants) {
4790  this->computeGlobalConstants ();
4791  }
4792 
4793  // FIXME (mfh 28 Aug 2014) "Preserve Local Graph" bool parameter no longer used.
4794 
4795  this->fillComplete_ = true; // Now we're fill complete!
4796  this->checkInternalState ();
4797  }
4798 
4799  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4800  void
4802  expertStaticFillComplete (const Teuchos::RCP<const map_type> & domainMap,
4803  const Teuchos::RCP<const map_type> & rangeMap,
4804  const Teuchos::RCP<const import_type>& importer,
4805  const Teuchos::RCP<const export_type>& exporter,
4806  const Teuchos::RCP<Teuchos::ParameterList> &params)
4807  {
4808 #ifdef HAVE_TPETRA_MMM_TIMINGS
4809  std::string label;
4810  if(!params.is_null())
4811  label = params->get("Timer Label",label);
4812  std::string prefix = std::string("Tpetra ")+ label + std::string(": ");
4813  using Teuchos::TimeMonitor;
4814  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-M-Graph"))));
4815 #endif
4816 
4817 
4818  const char tfecfFuncName[] = "expertStaticFillComplete: ";
4819  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( ! isFillActive() || isFillComplete(),
4820  std::runtime_error, "Matrix fill state must be active (isFillActive() "
4821  "must be true) before calling fillComplete().");
4822  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4823  myGraph_.is_null (), std::logic_error, "myGraph_ is null. This is not allowed.");
4824 
4825 
4826  // We will presume globalAssemble is not needed, so we do the ESFC on the graph
4827  myGraph_->expertStaticFillComplete (domainMap, rangeMap, importer, exporter,params);
4828 
4829  const bool callComputeGlobalConstants = params.get () == nullptr ||
4830  params->get ("compute global constants", true);
4831  if (callComputeGlobalConstants) {
4832 #ifdef HAVE_TPETRA_MMM_TIMINGS
4833  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-M-cGC"))));
4834 #endif
4835  this->computeGlobalConstants ();
4836  }
4837 
4838 #ifdef HAVE_TPETRA_MMM_TIMINGS
4839  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-M-fLGAM"))));
4840 #endif
4841 
4842  // Fill the local graph and matrix
4843  fillLocalGraphAndMatrix (params);
4844 
4845  // FIXME (mfh 28 Aug 2014) "Preserve Local Graph" bool parameter no longer used.
4846 
4847  // Now we're fill complete!
4848  fillComplete_ = true;
4849 
4850  // Sanity checks at the end.
4851 #ifdef HAVE_TPETRA_DEBUG
4852  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isFillActive(), std::logic_error,
4853  ": We're at the end of fillComplete(), but isFillActive() is true. "
4854  "Please report this bug to the Tpetra developers.");
4855  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillComplete(), std::logic_error,
4856  ": We're at the end of fillComplete(), but isFillActive() is true. "
4857  "Please report this bug to the Tpetra developers.");
4858 #endif // HAVE_TPETRA_DEBUG
4859 
4860 #ifdef HAVE_TPETRA_MMM_TIMINGS
4861  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-M-cIS"))));
4862 #endif
4863 
4864  checkInternalState();
4865  }
4866 
4867  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4868  size_t
4871  const RowInfo& rowInfo)
4872  {
4873 #ifdef HAVE_TPETRA_DEBUG
4874  const char tfecfFuncName[] = "mergeRowIndicesAndValues: ";
4875 #endif // HAVE_TPETRA_DEBUG
4876 
4877  auto rowValues = this->getRowViewNonConst (rowInfo);
4878  typedef typename std::decay<decltype (rowValues[0]) >::type value_type;
4879  value_type* rowValueIter = rowValues.data ();
4880  auto inds_view = graph.getLocalKokkosRowViewNonConst (rowInfo);
4881 
4882  // beg,end define a half-exclusive interval over which to iterate.
4883  LocalOrdinal* beg = inds_view.data ();
4884  LocalOrdinal* end = inds_view.data () + rowInfo.numEntries;
4885 
4886 #ifdef HAVE_TPETRA_DEBUG
4887  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4888  (rowInfo.allocSize != static_cast<size_t> (inds_view.extent (0)) ||
4889  rowInfo.allocSize != static_cast<size_t> (rowValues.extent (0)),
4890  std::runtime_error, "rowInfo.allocSize = " << rowInfo.allocSize
4891  << " != inds_view.extent(0) = " << inds_view.extent (0)
4892  << " || rowInfo.allocSize = " << rowInfo.allocSize
4893  << " != rowValues.extent(0) = " << rowValues.extent (0) << ".");
4894 #endif // HAVE_TPETRA_DEBUG
4895 
4896  LocalOrdinal* newend = beg;
4897  if (beg != end) {
4898  LocalOrdinal* cur = beg + 1;
4899  value_type* vcur = rowValueIter + 1;
4900  value_type* vend = rowValueIter;
4901  cur = beg+1;
4902  while (cur != end) {
4903  if (*cur != *newend) {
4904  // new entry; save it
4905  ++newend;
4906  ++vend;
4907  (*newend) = (*cur);
4908  (*vend) = (*vcur);
4909  }
4910  else {
4911  // old entry; merge it
4912  //(*vend) = f (*vend, *vcur);
4913  (*vend) += *vcur;
4914  }
4915  ++cur;
4916  ++vcur;
4917  }
4918  ++newend; // one past the last entry, per typical [beg,end) semantics
4919  }
4920  const size_t mergedEntries = newend - beg;
4921  graph.k_numRowEntries_(rowInfo.localRow) = mergedEntries;
4922  const size_t numDups = rowInfo.numEntries - mergedEntries;
4923  return numDups;
4924  }
4925 
4926  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4927  void
4929  sortAndMergeIndicesAndValues (const bool sorted, const bool merged)
4930  {
4931  using ::Tpetra::Details::ProfilingRegion;
4932  typedef LocalOrdinal LO;
4933  typedef typename Kokkos::View<LO*, device_type>::HostMirror::execution_space
4934  host_execution_space;
4935  typedef Kokkos::RangePolicy<host_execution_space, LO> range_type;
4936  //typedef Kokkos::RangePolicy<Kokkos::Serial, LO> range_type;
4937  const char tfecfFuncName[] = "sortAndMergeIndicesAndValues: ";
4938  ProfilingRegion regionSAM ("Tpetra::CrsMatrix::sortAndMergeIndicesAndValues");
4939 
4940  if (! sorted || ! merged) {
4941  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4942  (this->isStaticGraph (), std::runtime_error, "Cannot sort or merge with "
4943  "\"static\" (const) graph, since the matrix does not own the graph.");
4944  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4945  (this->myGraph_.is_null (), std::logic_error, "myGraph_ is null, but "
4946  "this matrix claims ! isStaticGraph(). "
4947  "Please report this bug to the Tpetra developers.");
4948  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4949  (this->isStorageOptimized (), std::logic_error, "It is invalid to call "
4950  "this method if the graph's storage has already been optimized. "
4951  "Please report this bug to the Tpetra developers.");
4952 
4953  crs_graph_type& graph = * (this->myGraph_);
4954  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
4955  size_t totalNumDups = 0;
4956  // FIXME (mfh 10 May 2017) This may assume CUDA UVM.
4957  Kokkos::parallel_reduce (range_type (0, lclNumRows),
4958  [this, &graph, sorted, merged] (const LO& lclRow, size_t& numDups) {
4959  const RowInfo rowInfo = graph.getRowInfo (lclRow);
4960  if (! sorted) {
4961  auto lclColInds = graph.getLocalKokkosRowViewNonConst (rowInfo);
4962  auto vals = this->getRowViewNonConst (rowInfo);
4963  // FIXME (mfh 09 May 2017) This assumes CUDA UVM, at least
4964  // for lclColInds, if not also for values.
4965  sort2 (lclColInds.data (),
4966  lclColInds.data () + rowInfo.numEntries,
4967  vals.data ());
4968  }
4969  if (! merged) {
4970  numDups += this->mergeRowIndicesAndValues (graph, rowInfo);
4971  }
4972  }, totalNumDups);
4973  if (! sorted) {
4974  graph.indicesAreSorted_ = true; // we just sorted every row
4975  }
4976  if (! merged) {
4977  graph.noRedundancies_ = true; // we just merged every row
4978  }
4979  }
4980  }
4981 
4982  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4983  void
4987  Scalar alpha,
4988  Scalar beta) const
4989  {
4991  using Teuchos::null;
4992  using Teuchos::RCP;
4993  using Teuchos::rcp;
4994  using Teuchos::rcp_const_cast;
4995  using Teuchos::rcpFromRef;
4996  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
4997  const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one ();
4998 
4999  // mfh 05 Jun 2014: Special case for alpha == 0. I added this to
5000  // fix an Ifpack2 test (RILUKSingleProcessUnitTests), which was
5001  // failing only for the Kokkos refactor version of Tpetra. It's a
5002  // good idea regardless to have the bypass.
5003  if (alpha == ZERO) {
5004  if (beta == ZERO) {
5005  Y_in.putScalar (ZERO);
5006  } else if (beta != ONE) {
5007  Y_in.scale (beta);
5008  }
5009  return;
5010  }
5011 
5012  // It's possible that X is a view of Y or vice versa. We don't
5013  // allow this (apply() requires that X and Y not alias one
5014  // another), but it's helpful to detect and work around this case.
5015  // We don't try to to detect the more subtle cases (e.g., one is a
5016  // subview of the other, but their initial pointers differ). We
5017  // only need to do this if this matrix's Import is trivial;
5018  // otherwise, we don't actually apply the operator from X into Y.
5019 
5020  RCP<const import_type> importer = this->getGraph ()->getImporter ();
5021  RCP<const export_type> exporter = this->getGraph ()->getExporter ();
5022 
5023  // If beta == 0, then the output MV will be overwritten; none of
5024  // its entries should be read. (Sparse BLAS semantics say that we
5025  // must ignore any Inf or NaN entries in Y_in, if beta is zero.)
5026  // This matters if we need to do an Export operation; see below.
5027  const bool Y_is_overwritten = (beta == ZERO);
5028 
5029  // We treat the case of a replicated MV output specially.
5030  const bool Y_is_replicated =
5031  (! Y_in.isDistributed () && this->getComm ()->getSize () != 1);
5032 
5033  // This is part of the special case for replicated MV output.
5034  // We'll let each process do its thing, but do an all-reduce at
5035  // the end to sum up the results. Setting beta=0 on all processes
5036  // but Proc 0 makes the math work out for the all-reduce. (This
5037  // assumes that the replicated data is correctly replicated, so
5038  // that the data are the same on all processes.)
5039  if (Y_is_replicated && this->getComm ()->getRank () > 0) {
5040  beta = ZERO;
5041  }
5042 
5043  // Temporary MV for Import operation. After the block of code
5044  // below, this will be an (Imported if necessary) column Map MV
5045  // ready to give to localMultiply().
5046  RCP<const MV> X_colMap;
5047  if (importer.is_null ()) {
5048  if (! X_in.isConstantStride ()) {
5049  // Not all sparse mat-vec kernels can handle an input MV with
5050  // nonconstant stride correctly, so we have to copy it in that
5051  // case into a constant stride MV. To make a constant stride
5052  // copy of X_in, we force creation of the column (== domain)
5053  // Map MV (if it hasn't already been created, else fetch the
5054  // cached copy). This avoids creating a new MV each time.
5055  RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in, true);
5056  Tpetra::deep_copy (*X_colMapNonConst, X_in);
5057  X_colMap = rcp_const_cast<const MV> (X_colMapNonConst);
5058  }
5059  else {
5060  // The domain and column Maps are the same, so do the local
5061  // multiply using the domain Map input MV X_in.
5062  X_colMap = rcpFromRef (X_in);
5063  }
5064  }
5065  else { // need to Import source (multi)vector
5066  ProfilingRegion regionImport ("Tpetra::CrsMatrix::apply: Import");
5067 
5068  // We're doing an Import anyway, which will copy the relevant
5069  // elements of the domain Map MV X_in into a separate column Map
5070  // MV. Thus, we don't have to worry whether X_in is constant
5071  // stride.
5072  RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in);
5073 
5074  // Import from the domain Map MV to the column Map MV.
5075  X_colMapNonConst->doImport (X_in, *importer, INSERT);
5076  X_colMap = rcp_const_cast<const MV> (X_colMapNonConst);
5077  }
5078 
5079  // Temporary MV for doExport (if needed), or for copying a
5080  // nonconstant stride output MV into a constant stride MV. This
5081  // is null if we don't need the temporary MV, that is, if the
5082  // Export is trivial (null).
5083  RCP<MV> Y_rowMap = getRowMapMultiVector (Y_in);
5084 
5085  // If we have a nontrivial Export object, we must perform an
5086  // Export. In that case, the local multiply result will go into
5087  // the row Map multivector. We don't have to make a
5088  // constant-stride version of Y_in in this case, because we had to
5089  // make a constant stride Y_rowMap MV and do an Export anyway.
5090  if (! exporter.is_null ()) {
5091  this->localApply (*X_colMap, *Y_rowMap, Teuchos::NO_TRANS, alpha, ZERO);
5092  {
5093  ProfilingRegion regionExport ("Tpetra::CrsMatrix::apply: Export");
5094 
5095  // If we're overwriting the output MV Y_in completely (beta ==
5096  // 0), then make sure that it is filled with zeros before we
5097  // do the Export. Otherwise, the ADD combine mode will use
5098  // data in Y_in, which is supposed to be zero.
5099  if (Y_is_overwritten) {
5100  Y_in.putScalar (ZERO);
5101  }
5102  else {
5103  // Scale output MV by beta, so that doExport sums in the
5104  // mat-vec contribution: Y_in = beta*Y_in + alpha*A*X_in.
5105  Y_in.scale (beta);
5106  }
5107  // Do the Export operation.
5108  Y_in.doExport (*Y_rowMap, *exporter, ADD);
5109  }
5110  }
5111  else { // Don't do an Export: row Map and range Map are the same.
5112  //
5113  // If Y_in does not have constant stride, or if the column Map
5114  // MV aliases Y_in, then we can't let the kernel write directly
5115  // to Y_in. Instead, we have to use the cached row (== range)
5116  // Map MV as temporary storage.
5117  //
5118  // FIXME (mfh 05 Jun 2014) This test for aliasing only tests if
5119  // the user passed in the same MultiVector for both X and Y. It
5120  // won't detect whether one MultiVector views the other. We
5121  // should also check the MultiVectors' raw data pointers.
5122  if (! Y_in.isConstantStride () || X_colMap.getRawPtr () == &Y_in) {
5123  // Force creating the MV if it hasn't been created already.
5124  // This will reuse a previously created cached MV.
5125  Y_rowMap = getRowMapMultiVector (Y_in, true);
5126 
5127  // If beta == 0, we don't need to copy Y_in into Y_rowMap,
5128  // since we're overwriting it anyway.
5129  if (beta != ZERO) {
5130  Tpetra::deep_copy (*Y_rowMap, Y_in);
5131  }
5132  this->localApply (*X_colMap, *Y_rowMap, Teuchos::NO_TRANS, alpha, beta);
5133  Tpetra::deep_copy (Y_in, *Y_rowMap);
5134  }
5135  else {
5136  this->localApply (*X_colMap, Y_in, Teuchos::NO_TRANS, alpha, beta);
5137  }
5138  }
5139 
5140  // If the range Map is a locally replicated Map, sum up
5141  // contributions from each process. We set beta = 0 on all
5142  // processes but Proc 0 initially, so this will handle the scaling
5143  // factor beta correctly.
5144  if (Y_is_replicated) {
5145  ProfilingRegion regionReduce ("Tpetra::CrsMatrix::apply: Reduce Y");
5146  Y_in.reduce ();
5147  }
5148  }
5149 
5150  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5151  void
5155  const Teuchos::ETransp mode,
5156  Scalar alpha,
5157  Scalar beta) const
5158  {
5160  using Teuchos::null;
5161  using Teuchos::RCP;
5162  using Teuchos::rcp;
5163  using Teuchos::rcp_const_cast;
5164  using Teuchos::rcpFromRef;
5165  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
5166 
5167  // Take shortcuts for alpha == 0.
5168  if (alpha == ZERO) {
5169  // Follow the Sparse BLAS convention by ignoring both the matrix
5170  // and X_in, in this case.
5171  if (beta == ZERO) {
5172  // Follow the Sparse BLAS convention by overwriting any Inf or
5173  // NaN values in Y_in, in this case.
5174  Y_in.putScalar (ZERO);
5175  }
5176  else {
5177  Y_in.scale (beta);
5178  }
5179  return;
5180  }
5181 
5182  const size_t numVectors = X_in.getNumVectors ();
5183 
5184  // We don't allow X_in and Y_in to alias one another. It's hard
5185  // to check this, because advanced users could create views from
5186  // raw pointers. However, if X_in and Y_in reference the same
5187  // object, we will do the user a favor by copying X into new
5188  // storage (with a warning). We only need to do this if we have
5189  // trivial importers; otherwise, we don't actually apply the
5190  // operator from X into Y.
5191  RCP<const import_type> importer = this->getGraph ()->getImporter ();
5192  RCP<const export_type> exporter = this->getGraph ()->getExporter ();
5193  // access X indirectly, in case we need to create temporary storage
5194  RCP<const MV> X;
5195 
5196  // some parameters for below
5197  const bool Y_is_replicated = ! Y_in.isDistributed ();
5198  const bool Y_is_overwritten = (beta == ZERO);
5199  if (Y_is_replicated && this->getComm ()->getRank () > 0) {
5200  beta = ZERO;
5201  }
5202 
5203  // The kernels do not allow input or output with nonconstant stride.
5204  if (! X_in.isConstantStride () && importer.is_null ()) {
5205  X = rcp (new MV (X_in, Teuchos::Copy)); // Constant-stride copy of X_in
5206  } else {
5207  X = rcpFromRef (X_in); // Reference to X_in
5208  }
5209 
5210  // Set up temporary multivectors for Import and/or Export.
5211  if (importer != Teuchos::null) {
5212  if (importMV_ != Teuchos::null && importMV_->getNumVectors() != numVectors) {
5213  importMV_ = null;
5214  }
5215  if (importMV_ == null) {
5216  importMV_ = rcp (new MV (this->getColMap (), numVectors));
5217  }
5218  }
5219  if (exporter != Teuchos::null) {
5220  if (exportMV_ != Teuchos::null && exportMV_->getNumVectors() != numVectors) {
5221  exportMV_ = null;
5222  }
5223  if (exportMV_ == null) {
5224  exportMV_ = rcp (new MV (this->getRowMap (), numVectors));
5225  }
5226  }
5227 
5228  // If we have a non-trivial exporter, we must import elements that
5229  // are permuted or are on other processors.
5230  if (! exporter.is_null ()) {
5231  ProfilingRegion regionImport ("Tpetra::CrsMatrix::apply (transpose): Import");
5232  exportMV_->doImport (X_in, *exporter, INSERT);
5233  X = exportMV_; // multiply out of exportMV_
5234  }
5235 
5236  // If we have a non-trivial importer, we must export elements that
5237  // are permuted or belong to other processors. We will compute
5238  // solution into the to-be-exported MV; get a view.
5239  if (importer != Teuchos::null) {
5240  ProfilingRegion regionExport ("Tpetra::CrsMatrix::apply (transpose): Export");
5241 
5242  // FIXME (mfh 18 Apr 2015) Temporary fix suggested by Clark
5243  // Dohrmann on Fri 17 Apr 2015. At some point, we need to go
5244  // back and figure out why this helps. importMV_ SHOULD be
5245  // completely overwritten in the localMultiply() call below,
5246  // because beta == ZERO there.
5247  importMV_->putScalar (ZERO);
5248  // Do the local computation.
5249  this->localApply (*X, *importMV_, mode, alpha, ZERO);
5250  if (Y_is_overwritten) {
5251  Y_in.putScalar (ZERO);
5252  } else {
5253  Y_in.scale (beta);
5254  }
5255  Y_in.doExport (*importMV_, *importer, ADD);
5256  }
5257  // otherwise, multiply into Y
5258  else {
5259  // can't multiply in-situ; can't multiply into non-strided multivector
5260  //
5261  // FIXME (mfh 05 Jun 2014) This test for aliasing only tests if
5262  // the user passed in the same MultiVector for both X and Y. It
5263  // won't detect whether one MultiVector views the other. We
5264  // should also check the MultiVectors' raw data pointers.
5265  if (! Y_in.isConstantStride () || X.getRawPtr () == &Y_in) {
5266  // Make a deep copy of Y_in, into which to write the multiply result.
5267  MV Y (Y_in, Teuchos::Copy);
5268  this->localApply (*X, Y, mode, alpha, beta);
5269  Tpetra::deep_copy (Y_in, Y);
5270  } else {
5271  this->localApply (*X, Y_in, mode, alpha, beta);
5272  }
5273  }
5274 
5275  // If the range Map is a locally replicated map, sum the
5276  // contributions from each process. (That's why we set beta=0
5277  // above for all processes but Proc 0.)
5278  if (Y_is_replicated) {
5279  ProfilingRegion regionReduce ("Tpetra::CrsMatrix::apply (transpose): Reduce Y");
5280  Y_in.reduce ();
5281  }
5282  }
5283 
5284  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5285  void
5289  const Teuchos::ETransp mode,
5290  const Scalar& alpha,
5291  const Scalar& beta) const
5292  {
5294  ProfilingRegion regionLocalApply ("Tpetra::CrsMatrix::localApply");
5295  this->template localMultiply<Scalar, Scalar> (X, Y, mode, alpha, beta);
5296  }
5297 
5298  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5299  void
5303  Teuchos::ETransp mode,
5304  Scalar alpha,
5305  Scalar beta) const
5306  {
5308  const char fnName[] = "Tpetra::CrsMatrix::apply";
5309 
5310  TEUCHOS_TEST_FOR_EXCEPTION
5311  (! isFillComplete (), std::runtime_error,
5312  fnName << ": Cannot call apply() until fillComplete() "
5313  "has been called.");
5314 
5315  if (mode == Teuchos::NO_TRANS) {
5316  ProfilingRegion regionNonTranspose (fnName);
5317  this->applyNonTranspose (X, Y, alpha, beta);
5318  }
5319  else {
5320  ProfilingRegion regionTranspose ("Tpetra::CrsMatrix::apply (transpose)");
5321 
5322  //Thyra was implicitly assuming that Y gets set to zero / or is overwritten
5323  //when bets==0. This was not the case with transpose in a multithreaded
5324  //environment where a multiplication with subsequent atomic_adds is used
5325  //since 0 is effectively not special cased. Doing the explicit set to zero here
5326  //This catches cases where Y is nan or inf.
5327  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
5328  if (beta == ZERO) {
5329  Y.putScalar (ZERO);
5330  }
5331  this->applyTranspose (X, Y, mode, alpha, beta);
5332  }
5333  }
5334 
5335  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5336  void
5341  const Scalar& dampingFactor,
5342  const ESweepDirection direction,
5343  const int numSweeps) const
5344  {
5345  reorderedGaussSeidel (B, X, D, Teuchos::null, dampingFactor, direction, numSweeps);
5346  }
5347 
5348  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5349  void
5354  const Teuchos::ArrayView<LocalOrdinal>& rowIndices,
5355  const Scalar& dampingFactor,
5356  const ESweepDirection direction,
5357  const int numSweeps) const
5358  {
5359  using Teuchos::null;
5360  using Teuchos::RCP;
5361  using Teuchos::rcp;
5362  using Teuchos::rcp_const_cast;
5363  using Teuchos::rcpFromRef;
5364  typedef Scalar ST;
5365 
5366  TEUCHOS_TEST_FOR_EXCEPTION(
5367  isFillComplete() == false, std::runtime_error,
5368  "Tpetra::CrsMatrix::gaussSeidel: cannot call this method until "
5369  "fillComplete() has been called.");
5370  TEUCHOS_TEST_FOR_EXCEPTION(
5371  numSweeps < 0,
5372  std::invalid_argument,
5373  "Tpetra::CrsMatrix::gaussSeidel: The number of sweeps must be , "
5374  "nonnegative but you provided numSweeps = " << numSweeps << " < 0.");
5375 
5376  // Translate from global to local sweep direction.
5377  // While doing this, validate the input.
5378  ESweepDirection localDirection;
5379  if (direction == Forward) {
5380  localDirection = Forward;
5381  }
5382  else if (direction == Backward) {
5383  localDirection = Backward;
5384  }
5385  else if (direction == Symmetric) {
5386  // We'll control local sweep direction manually.
5387  localDirection = Forward;
5388  }
5389  else {
5390  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument,
5391  "Tpetra::CrsMatrix::gaussSeidel: The 'direction' enum does not have "
5392  "any of its valid values: Forward, Backward, or Symmetric.");
5393  }
5394 
5395  if (numSweeps == 0) {
5396  return; // Nothing to do.
5397  }
5398 
5399  // We don't need the Export object because this method assumes
5400  // that the row, domain, and range Maps are the same. We do need
5401  // the Import object, if there is one, though.
5402  RCP<const import_type> importer = this->getGraph()->getImporter();
5403  RCP<const export_type> exporter = this->getGraph()->getExporter();
5404  TEUCHOS_TEST_FOR_EXCEPTION(
5405  ! exporter.is_null (), std::runtime_error,
5406  "Tpetra's gaussSeidel implementation requires that the row, domain, "
5407  "and range Maps be the same. This cannot be the case, because the "
5408  "matrix has a nontrivial Export object.");
5409 
5410  RCP<const map_type> domainMap = this->getDomainMap ();
5411  RCP<const map_type> rangeMap = this->getRangeMap ();
5412  RCP<const map_type> rowMap = this->getGraph ()->getRowMap ();
5413  RCP<const map_type> colMap = this->getGraph ()->getColMap ();
5414 
5415 #ifdef HAVE_TEUCHOS_DEBUG
5416  {
5417  // The relation 'isSameAs' is transitive. It's also a
5418  // collective, so we don't have to do a "shared" test for
5419  // exception (i.e., a global reduction on the test value).
5420  TEUCHOS_TEST_FOR_EXCEPTION(
5421  ! X.getMap ()->isSameAs (*domainMap),
5422  std::runtime_error,
5423  "Tpetra::CrsMatrix::gaussSeidel requires that the input "
5424  "multivector X be in the domain Map of the matrix.");
5425  TEUCHOS_TEST_FOR_EXCEPTION(
5426  ! B.getMap ()->isSameAs (*rangeMap),
5427  std::runtime_error,
5428  "Tpetra::CrsMatrix::gaussSeidel requires that the input "
5429  "B be in the range Map of the matrix.");
5430  TEUCHOS_TEST_FOR_EXCEPTION(
5431  ! D.getMap ()->isSameAs (*rowMap),
5432  std::runtime_error,
5433  "Tpetra::CrsMatrix::gaussSeidel requires that the input "
5434  "D be in the row Map of the matrix.");
5435  TEUCHOS_TEST_FOR_EXCEPTION(
5436  ! rowMap->isSameAs (*rangeMap),
5437  std::runtime_error,
5438  "Tpetra::CrsMatrix::gaussSeidel requires that the row Map and the "
5439  "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
5440  TEUCHOS_TEST_FOR_EXCEPTION(
5441  ! domainMap->isSameAs (*rangeMap),
5442  std::runtime_error,
5443  "Tpetra::CrsMatrix::gaussSeidel requires that the domain Map and "
5444  "the range Map of the matrix be the same.");
5445  }
5446 #else
5447  // Forestall any compiler warnings for unused variables.
5448  (void) rangeMap;
5449  (void) rowMap;
5450 #endif // HAVE_TEUCHOS_DEBUG
5451 
5452  // If B is not constant stride, copy it into a constant stride
5453  // multivector. We'l handle the right-hand side B first and deal
5454  // with X right before the sweeps, to improve locality of the
5455  // first sweep. (If the problem is small enough, then that will
5456  // hopefully keep more of the entries of X in cache. This
5457  // optimizes for the typical case of a small number of sweeps.)
5458  RCP<const MV> B_in;
5459  if (B.isConstantStride()) {
5460  B_in = rcpFromRef (B);
5461  }
5462  else {
5463  // The range Map and row Map are the same in this case, so we
5464  // can use the (possibly cached) row Map multivector to store a
5465  // constant stride copy of B. We don't have to copy back, since
5466  // Gauss-Seidel won't modify B.
5467  RCP<MV> B_in_nonconst = getRowMapMultiVector (B, true);
5468  deep_copy (*B_in_nonconst, B); // Copy from B into B_in(_nonconst).
5469  B_in = rcp_const_cast<const MV> (B_in_nonconst);
5470 
5472  ! B.isConstantStride (),
5473  std::runtime_error,
5474  "gaussSeidel: The current implementation of the Gauss-Seidel kernel "
5475  "requires that X and B both have constant stride. Since B does not "
5476  "have constant stride, we had to make a copy. This is a limitation of "
5477  "the current implementation and not your fault, but we still report it "
5478  "as an efficiency warning for your information.");
5479  }
5480 
5481  // If X is not constant stride, copy it into a constant stride
5482  // multivector. Also, make the column Map multivector X_colMap,
5483  // and its domain Map view X_domainMap. (X actually must be a
5484  // domain Map view of a column Map multivector; exploit this, if X
5485  // has constant stride.)
5486 
5487  RCP<MV> X_domainMap;
5488  RCP<MV> X_colMap;
5489  bool copiedInput = false;
5490 
5491  if (importer.is_null ()) { // Domain and column Maps are the same.
5492  if (X.isConstantStride ()) {
5493  X_domainMap = rcpFromRef (X);
5494  X_colMap = X_domainMap;
5495  copiedInput = false;
5496  }
5497  else {
5498  // Get a temporary column Map multivector, make a domain Map
5499  // view of it, and copy X into the domain Map view. We have
5500  // to copy here because we won't be doing Import operations.
5501  X_colMap = getColumnMapMultiVector (X, true);
5502  X_domainMap = X_colMap; // Domain and column Maps are the same.
5503  deep_copy (*X_domainMap, X); // Copy X into the domain Map view.
5504  copiedInput = true;
5506  ! X.isConstantStride (), std::runtime_error,
5507  "Tpetra::CrsMatrix::gaussSeidel: The current implementation of the "
5508  "Gauss-Seidel kernel requires that X and B both have constant "
5509  "stride. Since X does not have constant stride, we had to make a "
5510  "copy. This is a limitation of the current implementation and not "
5511  "your fault, but we still report it as an efficiency warning for "
5512  "your information.");
5513  }
5514  }
5515  else { // We will be doing Import operations in the sweeps.
5516  if (X.isConstantStride ()) {
5517  X_domainMap = rcpFromRef (X);
5518  // This kernel assumes that X is a domain Map view of a column
5519  // Map multivector. We will only check if this is valid if
5520  // the CMake configure Teuchos_ENABLE_DEBUG is ON.
5521  X_colMap = X_domainMap->offsetViewNonConst (colMap, 0);
5522 
5523  // FIXME (mfh 19 Mar 2013) Do we need to fill the remote
5524  // entries of X_colMap with zeros? Do we need to fill all of
5525  // X_domainMap initially with zeros? Ifpack
5526  // (Ifpack_PointRelaxation.cpp, line 906) creates an entirely
5527  // new MultiVector each time.
5528 
5529  // Do the first Import for the first sweep. This simplifies
5530  // the logic in the sweeps.
5531  X_colMap->doImport (X, *importer, INSERT);
5532  copiedInput = false;
5533  }
5534  else {
5535  // Get a temporary column Map multivector X_colMap, and make a
5536  // domain Map view X_domainMap of it. Instead of copying, we
5537  // do an Import from X into X_domainMap. This saves us a
5538  // copy, since the Import has to copy the data anyway.
5539  X_colMap = getColumnMapMultiVector (X, true);
5540  X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
5541  X_colMap->doImport (X, *importer, INSERT);
5542  copiedInput = true;
5544  ! X.isConstantStride (), std::runtime_error,
5545  "Tpetra::CrsMatrix::gaussSeidel: The current implementation of the "
5546  "Gauss-Seidel kernel requires that X and B both have constant stride. "
5547  "Since X does not have constant stride, we had to make a copy. "
5548  "This is a limitation of the current implementation and not your fault, "
5549  "but we still report it as an efficiency warning for your information.");
5550  }
5551  }
5552 
5553  for (int sweep = 0; sweep < numSweeps; ++sweep) {
5554  if (! importer.is_null () && sweep > 0) {
5555  // We already did the first Import for the zeroth sweep.
5556  X_colMap->doImport (*X_domainMap, *importer, INSERT);
5557  }
5558 
5559  // Do local Gauss-Seidel.
5560  if (direction != Symmetric) {
5561  if (rowIndices.is_null ()) {
5562  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
5563  dampingFactor,
5564  localDirection);
5565  }
5566  else {
5567  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
5568  D, rowIndices,
5569  dampingFactor,
5570  localDirection);
5571  }
5572  }
5573  else { // direction == Symmetric
5574  const bool doImportBetweenDirections = false;
5575  if (rowIndices.is_null ()) {
5576  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
5577  dampingFactor,
5578  Forward);
5579  // mfh 18 Mar 2013: Aztec's implementation of "symmetric
5580  // Gauss-Seidel" does _not_ do an Import between the forward
5581  // and backward sweeps. This makes sense, because Aztec
5582  // considers "symmetric Gauss-Seidel" a subdomain solver.
5583  if (doImportBetweenDirections) {
5584  // Communicate again before the Backward sweep.
5585  if (! importer.is_null ()) {
5586  X_colMap->doImport (*X_domainMap, *importer, INSERT);
5587  }
5588  }
5589  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
5590  dampingFactor,
5591  Backward);
5592  }
5593  else {
5594  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
5595  D, rowIndices,
5596  dampingFactor,
5597  Forward);
5598  if (doImportBetweenDirections) {
5599  // Communicate again before the Backward sweep.
5600  if (! importer.is_null ()) {
5601  X_colMap->doImport (*X_domainMap, *importer, INSERT);
5602  }
5603  }
5604  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
5605  D, rowIndices,
5606  dampingFactor,
5607  Backward);
5608  }
5609  }
5610  }
5611 
5612  if (copiedInput) {
5613  deep_copy (X, *X_domainMap); // Copy back from X_domainMap to X.
5614  }
5615  }
5616 
5617  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5618  void
5623  const Scalar& dampingFactor,
5624  const ESweepDirection direction,
5625  const int numSweeps,
5626  const bool zeroInitialGuess) const
5627  {
5628  reorderedGaussSeidelCopy (X, B, D, Teuchos::null, dampingFactor, direction,
5629  numSweeps, zeroInitialGuess);
5630  }
5631 
5632  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5633  void
5638  const Teuchos::ArrayView<LocalOrdinal>& rowIndices,
5639  const Scalar& dampingFactor,
5640  const ESweepDirection direction,
5641  const int numSweeps,
5642  const bool zeroInitialGuess) const
5643  {
5644  using Teuchos::null;
5645  using Teuchos::RCP;
5646  using Teuchos::rcp;
5647  using Teuchos::rcpFromRef;
5648  using Teuchos::rcp_const_cast;
5649  typedef Scalar ST;
5650  const char prefix[] = "Tpetra::CrsMatrix::(reordered)gaussSeidelCopy: ";
5651  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
5652 
5653  TEUCHOS_TEST_FOR_EXCEPTION(
5654  ! isFillComplete (), std::runtime_error,
5655  prefix << "The matrix is not fill complete.");
5656  TEUCHOS_TEST_FOR_EXCEPTION(
5657  numSweeps < 0, std::invalid_argument,
5658  prefix << "The number of sweeps must be nonnegative, "
5659  "but you provided numSweeps = " << numSweeps << " < 0.");
5660 
5661  // Translate from global to local sweep direction.
5662  // While doing this, validate the input.
5663  ESweepDirection localDirection;
5664  if (direction == Forward) {
5665  localDirection = Forward;
5666  }
5667  else if (direction == Backward) {
5668  localDirection = Backward;
5669  }
5670  else if (direction == Symmetric) {
5671  // We'll control local sweep direction manually.
5672  localDirection = Forward;
5673  }
5674  else {
5675  TEUCHOS_TEST_FOR_EXCEPTION(
5676  true, std::invalid_argument,
5677  prefix << "The 'direction' enum does not have any of its valid "
5678  "values: Forward, Backward, or Symmetric.");
5679  }
5680 
5681  if (numSweeps == 0) {
5682  return;
5683  }
5684 
5685  RCP<const import_type> importer = this->getGraph ()->getImporter ();
5686  RCP<const export_type> exporter = this->getGraph ()->getExporter ();
5687  TEUCHOS_TEST_FOR_EXCEPTION(
5688  ! exporter.is_null (), std::runtime_error,
5689  "This method's implementation currently requires that the matrix's row, "
5690  "domain, and range Maps be the same. This cannot be the case, because "
5691  "the matrix has a nontrivial Export object.");
5692 
5693  RCP<const map_type> domainMap = this->getDomainMap ();
5694  RCP<const map_type> rangeMap = this->getRangeMap ();
5695  RCP<const map_type> rowMap = this->getGraph ()->getRowMap ();
5696  RCP<const map_type> colMap = this->getGraph ()->getColMap ();
5697 
5698 #ifdef HAVE_TEUCHOS_DEBUG
5699  {
5700  // The relation 'isSameAs' is transitive. It's also a
5701  // collective, so we don't have to do a "shared" test for
5702  // exception (i.e., a global reduction on the test value).
5703  TEUCHOS_TEST_FOR_EXCEPTION(
5704  ! X.getMap ()->isSameAs (*domainMap), std::runtime_error,
5705  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
5706  "multivector X be in the domain Map of the matrix.");
5707  TEUCHOS_TEST_FOR_EXCEPTION(
5708  ! B.getMap ()->isSameAs (*rangeMap), std::runtime_error,
5709  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
5710  "B be in the range Map of the matrix.");
5711  TEUCHOS_TEST_FOR_EXCEPTION(
5712  ! D.getMap ()->isSameAs (*rowMap), std::runtime_error,
5713  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
5714  "D be in the row Map of the matrix.");
5715  TEUCHOS_TEST_FOR_EXCEPTION(
5716  ! rowMap->isSameAs (*rangeMap), std::runtime_error,
5717  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the row Map and the "
5718  "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
5719  TEUCHOS_TEST_FOR_EXCEPTION(
5720  ! domainMap->isSameAs (*rangeMap), std::runtime_error,
5721  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the domain Map and "
5722  "the range Map of the matrix be the same.");
5723  }
5724 #else
5725  // Forestall any compiler warnings for unused variables.
5726  (void) rangeMap;
5727  (void) rowMap;
5728 #endif // HAVE_TEUCHOS_DEBUG
5729 
5730  // Fetch a (possibly cached) temporary column Map multivector
5731  // X_colMap, and a domain Map view X_domainMap of it. Both have
5732  // constant stride by construction. We know that the domain Map
5733  // must include the column Map, because our Gauss-Seidel kernel
5734  // requires that the row Map, domain Map, and range Map are all
5735  // the same, and that each process owns all of its own diagonal
5736  // entries of the matrix.
5737 
5738  RCP<MV> X_colMap;
5739  RCP<MV> X_domainMap;
5740  bool copyBackOutput = false;
5741  if (importer.is_null ()) {
5742  if (X.isConstantStride ()) {
5743  X_colMap = rcpFromRef (X);
5744  X_domainMap = rcpFromRef (X);
5745  // Column Map and domain Map are the same, so there are no
5746  // remote entries. Thus, if we are not setting the initial
5747  // guess to zero, we don't have to worry about setting remote
5748  // entries to zero, even though we are not doing an Import in
5749  // this case.
5750  if (zeroInitialGuess) {
5751  X_colMap->putScalar (ZERO);
5752  }
5753  // No need to copy back to X at end.
5754  }
5755  else { // We must copy X into a constant stride multivector.
5756  // Just use the cached column Map multivector for that.
5757  // force=true means fill with zeros, so no need to fill
5758  // remote entries (not in domain Map) with zeros.
5759  X_colMap = getColumnMapMultiVector (X, true);
5760  // X_domainMap is always a domain Map view of the column Map
5761  // multivector. In this case, the domain and column Maps are
5762  // the same, so X_domainMap _is_ X_colMap.
5763  X_domainMap = X_colMap;
5764  if (! zeroInitialGuess) { // Don't copy if zero initial guess
5765  try {
5766  deep_copy (*X_domainMap , X); // Copy X into constant stride MV
5767  } catch (std::exception& e) {
5768  std::ostringstream os;
5769  os << "Tpetra::CrsMatrix::reorderedGaussSeidelCopy: "
5770  "deep_copy(*X_domainMap, X) threw an exception: "
5771  << e.what () << ".";
5772  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
5773  }
5774  }
5775  copyBackOutput = true; // Don't forget to copy back at end.
5777  ! X.isConstantStride (),
5778  std::runtime_error,
5779  "gaussSeidelCopy: The current implementation of the Gauss-Seidel "
5780  "kernel requires that X and B both have constant stride. Since X "
5781  "does not have constant stride, we had to make a copy. This is a "
5782  "limitation of the current implementation and not your fault, but we "
5783  "still report it as an efficiency warning for your information.");
5784  }
5785  }
5786  else { // Column Map and domain Map are _not_ the same.
5787  X_colMap = getColumnMapMultiVector (X);
5788  X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
5789 
5790 #ifdef HAVE_TPETRA_DEBUG
5791  auto X_colMap_host_view =
5792  X_colMap->template getLocalView<Kokkos::HostSpace> ();
5793  auto X_domainMap_host_view =
5794  X_domainMap->template getLocalView<Kokkos::HostSpace> ();
5795 
5796  if (X_colMap->getLocalLength () != 0 && X_domainMap->getLocalLength ()) {
5797  TEUCHOS_TEST_FOR_EXCEPTION
5798  (X_colMap_host_view.data () != X_domainMap_host_view.data (),
5799  std::logic_error, "Tpetra::CrsMatrix::gaussSeidelCopy: Pointer to "
5800  "start of column Map view of X is not equal to pointer to start of "
5801  "(domain Map view of) X. This may mean that Tpetra::MultiVector::"
5802  "offsetViewNonConst is broken. "
5803  "Please report this bug to the Tpetra developers.");
5804  }
5805 
5806  TEUCHOS_TEST_FOR_EXCEPTION(
5807  X_colMap_host_view.extent (0) < X_domainMap_host_view.extent (0) ||
5808  X_colMap->getLocalLength () < X_domainMap->getLocalLength (),
5809  std::logic_error, "Tpetra::CrsMatrix::gaussSeidelCopy: "
5810  "X_colMap has fewer local rows than X_domainMap. "
5811  "X_colMap_host_view.extent(0) = " << X_colMap_host_view.extent (0)
5812  << ", X_domainMap_host_view.extent(0) = "
5813  << X_domainMap_host_view.extent (0)
5814  << ", X_colMap->getLocalLength() = " << X_colMap->getLocalLength ()
5815  << ", and X_domainMap->getLocalLength() = "
5816  << X_domainMap->getLocalLength ()
5817  << ". This means that Tpetra::MultiVector::offsetViewNonConst "
5818  "is broken. Please report this bug to the Tpetra developers.");
5819 
5820  TEUCHOS_TEST_FOR_EXCEPTION(
5821  X_colMap->getNumVectors () != X_domainMap->getNumVectors (),
5822  std::logic_error, "Tpetra::CrsMatrix::gaussSeidelCopy: "
5823  "X_colMap has a different number of columns than X_domainMap. "
5824  "X_colMap->getNumVectors() = " << X_colMap->getNumVectors ()
5825  << " != X_domainMap->getNumVectors() = "
5826  << X_domainMap->getNumVectors ()
5827  << ". This means that Tpetra::MultiVector::offsetViewNonConst "
5828  "is broken. Please report this bug to the Tpetra developers.");
5829 #endif // HAVE_TPETRA_DEBUG
5830 
5831  if (zeroInitialGuess) {
5832  // No need for an Import, since we're filling with zeros.
5833  X_colMap->putScalar (ZERO);
5834  } else {
5835  // We could just copy X into X_domainMap. However, that
5836  // wastes a copy, because the Import also does a copy (plus
5837  // communication). Since the typical use case for
5838  // Gauss-Seidel is a small number of sweeps (2 is typical), we
5839  // don't want to waste that copy. Thus, we do the Import
5840  // here, and skip the first Import in the first sweep.
5841  // Importing directly from X effects the copy into X_domainMap
5842  // (which is a view of X_colMap).
5843  X_colMap->doImport (X, *importer, INSERT);
5844  }
5845  copyBackOutput = true; // Don't forget to copy back at end.
5846  } // if column and domain Maps are (not) the same
5847 
5848  // The Gauss-Seidel / SOR kernel expects multivectors of constant
5849  // stride. X_colMap is by construction, but B might not be. If
5850  // it's not, we have to make a copy.
5851  RCP<const MV> B_in;
5852  if (B.isConstantStride ()) {
5853  B_in = rcpFromRef (B);
5854  }
5855  else {
5856  // Range Map and row Map are the same in this case, so we can
5857  // use the cached row Map multivector to store a constant stride
5858  // copy of B.
5859  RCP<MV> B_in_nonconst = getRowMapMultiVector (B, true);
5860  try {
5861  deep_copy (*B_in_nonconst, B);
5862  } catch (std::exception& e) {
5863  std::ostringstream os;
5864  os << "Tpetra::CrsMatrix::reorderedGaussSeidelCopy: "
5865  "deep_copy(*B_in_nonconst, B) threw an exception: "
5866  << e.what () << ".";
5867  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
5868  }
5869  B_in = rcp_const_cast<const MV> (B_in_nonconst);
5870 
5872  ! B.isConstantStride (),
5873  std::runtime_error,
5874  "gaussSeidelCopy: The current implementation requires that B have "
5875  "constant stride. Since B does not have constant stride, we had to "
5876  "copy it into a separate constant-stride multivector. This is a "
5877  "limitation of the current implementation and not your fault, but we "
5878  "still report it as an efficiency warning for your information.");
5879  }
5880 
5881  for (int sweep = 0; sweep < numSweeps; ++sweep) {
5882  if (! importer.is_null () && sweep > 0) {
5883  // We already did the first Import for the zeroth sweep above,
5884  // if it was necessary.
5885  X_colMap->doImport (*X_domainMap, *importer, INSERT);
5886  }
5887 
5888  // Do local Gauss-Seidel.
5889  if (direction != Symmetric) {
5890  if (rowIndices.is_null ()) {
5891  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
5892  dampingFactor,
5893  localDirection);
5894  }
5895  else {
5896  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
5897  D, rowIndices,
5898  dampingFactor,
5899  localDirection);
5900  }
5901  }
5902  else { // direction == Symmetric
5903  if (rowIndices.is_null ()) {
5904  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
5905  dampingFactor,
5906  Forward);
5907  // mfh 18 Mar 2013: Aztec's implementation of "symmetric
5908  // Gauss-Seidel" does _not_ do an Import between the forward
5909  // and backward sweeps. This makes symmetric Gauss-Seidel a
5910  // symmetric preconditioner if the matrix A is symmetric. We
5911  // imitate Aztec's behavior here.
5912  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
5913  dampingFactor,
5914  Backward);
5915  }
5916  else {
5917  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
5918  D, rowIndices,
5919  dampingFactor,
5920  Forward);
5921  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
5922  D, rowIndices,
5923  dampingFactor,
5924  Backward);
5925 
5926  }
5927  }
5928  }
5929 
5930  if (copyBackOutput) {
5931  try {
5932  deep_copy (X , *X_domainMap); // Copy result back into X.
5933  } catch (std::exception& e) {
5934  TEUCHOS_TEST_FOR_EXCEPTION(
5935  true, std::runtime_error, prefix << "deep_copy(X, *X_domainMap) "
5936  "threw an exception: " << e.what ());
5937  }
5938  }
5939  }
5940 
5941  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5942  template<class T>
5943  Teuchos::RCP<CrsMatrix<T, LocalOrdinal, GlobalOrdinal, Node> >
5945  convert () const
5946  {
5947  using Teuchos::RCP;
5948  typedef CrsMatrix<T, LocalOrdinal, GlobalOrdinal, Node> output_matrix_type;
5949  const char tfecfFuncName[] = "convert: ";
5950 
5951  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5952  (! this->isFillComplete (), std::runtime_error, "This matrix (the source "
5953  "of the conversion) is not fill complete. You must first call "
5954  "fillComplete() (possibly with the domain and range Map) without an "
5955  "intervening call to resumeFill(), before you may call this method.");
5956  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5957  (! this->isStaticGraph (), std::logic_error, "This matrix (the source "
5958  "of the conversion) claims to be fill complete, but does not have a "
5959  "static (i.e., constant) graph. Please report this bug to the Tpetra "
5960  "developers.");
5961 
5962  RCP<output_matrix_type> newMatrix
5963  (new output_matrix_type (this->getCrsGraph ()));
5964  // Copy old values into new values. impl_scalar_type and T may
5965  // differ, so we can't use Kokkos::deep_copy.
5966  ::Tpetra::Details::copyConvert (newMatrix->lclMatrix_.values,
5967  this->lclMatrix_.values);
5968  // Since newmat has a static (const) graph, the graph already has
5969  // a column Map, and Import and Export objects already exist (if
5970  // applicable). Thus, calling fillComplete is cheap.
5971  newMatrix->fillComplete (this->getDomainMap (), this->getRangeMap ());
5972 
5973  return newMatrix;
5974  }
5975 
5976 
5977  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5978  void
5981  {
5982 #ifdef HAVE_TPETRA_DEBUG
5983  const char tfecfFuncName[] = "checkInternalState: ";
5984  const char err[] = "Internal state is not consistent. "
5985  "Please report this bug to the Tpetra developers.";
5986 
5987  // This version of the graph (RCP<const crs_graph_type>) must
5988  // always be nonnull.
5989  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5990  staticGraph_.is_null (),
5991  std::logic_error, err);
5992  // myGraph == null means that the matrix has a const ("static")
5993  // graph. Otherwise, the matrix has a dynamic graph (it owns its
5994  // graph).
5995  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5996  ! myGraph_.is_null () && myGraph_ != staticGraph_,
5997  std::logic_error, err);
5998  // if matrix is fill complete, then graph must be fill complete
5999  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6000  isFillComplete () && ! staticGraph_->isFillComplete (),
6001  std::logic_error, err << " Specifically, the matrix is fill complete, "
6002  "but its graph is NOT fill complete.");
6003  // if matrix is storage optimized, it should have a 1D allocation
6004  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6005  isStorageOptimized () && ! values2D_.is_null (),
6006  std::logic_error, err);
6007  // if matrix/graph are static profile, then 2D allocation should not be present
6008  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6009  getProfileType() == StaticProfile && values2D_ != Teuchos::null,
6010  std::logic_error, err);
6011  // if matrix/graph are dynamic profile, then 1D allocation should not be present
6012  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6013  getProfileType() == DynamicProfile && k_values1D_.extent (0) > 0,
6014  std::logic_error, err);
6015  // if values are allocated and they are non-zero in number, then
6016  // one of the allocations should be present
6017  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6018  staticGraph_->indicesAreAllocated () &&
6019  staticGraph_->getNodeAllocationSize() > 0 &&
6020  staticGraph_->getNodeNumRows() > 0
6021  && values2D_.is_null () &&
6022  k_values1D_.extent (0) == 0,
6023  std::logic_error, err);
6024  // we cannot have both a 1D and 2D allocation
6025  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6026  k_values1D_.extent (0) > 0 && values2D_ != Teuchos::null,
6027  std::logic_error, err << " Specifically, k_values1D_ is allocated (has "
6028  "size " << k_values1D_.extent (0) << " > 0) and values2D_ is also "
6029  "allocated. CrsMatrix is not suppose to have both a 1-D and a 2-D "
6030  "allocation at the same time.");
6031 #endif
6032  }
6033 
6034  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
6035  std::string
6038  {
6039  std::ostringstream os;
6040 
6041  os << "Tpetra::CrsMatrix (Kokkos refactor): {";
6042  if (this->getObjectLabel () != "") {
6043  os << "Label: \"" << this->getObjectLabel () << "\", ";
6044  }
6045  if (isFillComplete ()) {
6046  os << "isFillComplete: true"
6047  << ", global dimensions: [" << getGlobalNumRows () << ", "
6048  << getGlobalNumCols () << "]"
6049  << ", global number of entries: " << getGlobalNumEntries ()
6050  << "}";
6051  }
6052  else {
6053  os << "isFillComplete: false"
6054  << ", global dimensions: [" << getGlobalNumRows () << ", "
6055  << getGlobalNumCols () << "]}";
6056  }
6057  return os.str ();
6058  }
6059 
6060  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
6061  void
6063  describe (Teuchos::FancyOStream &out,
6064  const Teuchos::EVerbosityLevel verbLevel) const
6065  {
6066  using std::endl;
6067  using std::setw;
6068  using Teuchos::ArrayView;
6069  using Teuchos::Comm;
6070  using Teuchos::RCP;
6071  using Teuchos::TypeNameTraits;
6072  using Teuchos::VERB_DEFAULT;
6073  using Teuchos::VERB_NONE;
6074  using Teuchos::VERB_LOW;
6075  using Teuchos::VERB_MEDIUM;
6076  using Teuchos::VERB_HIGH;
6077  using Teuchos::VERB_EXTREME;
6078 
6079  const Teuchos::EVerbosityLevel vl = (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
6080 
6081  if (vl == VERB_NONE) {
6082  return; // Don't print anything at all
6083  }
6084  // By convention, describe() always begins with a tab.
6085  Teuchos::OSTab tab0 (out);
6086 
6087  RCP<const Comm<int> > comm = this->getComm();
6088  const int myRank = comm->getRank();
6089  const int numProcs = comm->getSize();
6090  size_t width = 1;
6091  for (size_t dec=10; dec<getGlobalNumRows(); dec *= 10) {
6092  ++width;
6093  }
6094  width = std::max<size_t> (width, static_cast<size_t> (11)) + 2;
6095 
6096  // none: print nothing
6097  // low: print O(1) info from node 0
6098  // medium: print O(P) info, num entries per process
6099  // high: print O(N) info, num entries per row
6100  // extreme: print O(NNZ) info: print indices and values
6101  //
6102  // for medium and higher, print constituent objects at specified verbLevel
6103  if (myRank == 0) {
6104  out << "Tpetra::CrsMatrix (Kokkos refactor):" << endl;
6105  }
6106  Teuchos::OSTab tab1 (out);
6107 
6108  if (myRank == 0) {
6109  if (this->getObjectLabel () != "") {
6110  out << "Label: \"" << this->getObjectLabel () << "\", ";
6111  }
6112  {
6113  out << "Template parameters:" << endl;
6114  Teuchos::OSTab tab2 (out);
6115  out << "Scalar: " << TypeNameTraits<Scalar>::name () << endl
6116  << "LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name () << endl
6117  << "GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name () << endl
6118  << "Node: " << TypeNameTraits<Node>::name () << endl;
6119  }
6120  if (isFillComplete()) {
6121  out << "isFillComplete: true" << endl
6122  << "Global dimensions: [" << getGlobalNumRows () << ", "
6123  << getGlobalNumCols () << "]" << endl
6124  << "Global number of entries: " << getGlobalNumEntries () << endl
6125  << endl << "Global max number of entries in a row: "
6126  << getGlobalMaxNumRowEntries () << endl;
6127  }
6128  else {
6129  out << "isFillComplete: false" << endl
6130  << "Global dimensions: [" << getGlobalNumRows () << ", "
6131  << getGlobalNumCols () << "]" << endl;
6132  }
6133  }
6134 
6135  if (vl < VERB_MEDIUM) {
6136  return; // all done!
6137  }
6138 
6139  // Describe the row Map.
6140  if (myRank == 0) {
6141  out << endl << "Row Map:" << endl;
6142  }
6143  if (getRowMap ().is_null ()) {
6144  if (myRank == 0) {
6145  out << "null" << endl;
6146  }
6147  }
6148  else {
6149  if (myRank == 0) {
6150  out << endl;
6151  }
6152  getRowMap ()->describe (out, vl);
6153  }
6154 
6155  // Describe the column Map.
6156  if (myRank == 0) {
6157  out << "Column Map: ";
6158  }
6159  if (getColMap ().is_null ()) {
6160  if (myRank == 0) {
6161  out << "null" << endl;
6162  }
6163  } else if (getColMap () == getRowMap ()) {
6164  if (myRank == 0) {
6165  out << "same as row Map" << endl;
6166  }
6167  } else {
6168  if (myRank == 0) {
6169  out << endl;
6170  }
6171  getColMap ()->describe (out, vl);
6172  }
6173 
6174  // Describe the domain Map.
6175  if (myRank == 0) {
6176  out << "Domain Map: ";
6177  }
6178  if (getDomainMap ().is_null ()) {
6179  if (myRank == 0) {
6180  out << "null" << endl;
6181  }
6182  } else if (getDomainMap () == getRowMap ()) {
6183  if (myRank == 0) {
6184  out << "same as row Map" << endl;
6185  }
6186  } else if (getDomainMap () == getColMap ()) {
6187  if (myRank == 0) {
6188  out << "same as column Map" << endl;
6189  }
6190  } else {
6191  if (myRank == 0) {
6192  out << endl;
6193  }
6194  getDomainMap ()->describe (out, vl);
6195  }
6196 
6197  // Describe the range Map.
6198  if (myRank == 0) {
6199  out << "Range Map: ";
6200  }
6201  if (getRangeMap ().is_null ()) {
6202  if (myRank == 0) {
6203  out << "null" << endl;
6204  }
6205  } else if (getRangeMap () == getDomainMap ()) {
6206  if (myRank == 0) {
6207  out << "same as domain Map" << endl;
6208  }
6209  } else if (getRangeMap () == getRowMap ()) {
6210  if (myRank == 0) {
6211  out << "same as row Map" << endl;
6212  }
6213  } else {
6214  if (myRank == 0) {
6215  out << endl;
6216  }
6217  getRangeMap ()->describe (out, vl);
6218  }
6219 
6220  // O(P) data
6221  for (int curRank = 0; curRank < numProcs; ++curRank) {
6222  if (myRank == curRank) {
6223  out << "Process rank: " << curRank << endl;
6224  Teuchos::OSTab tab2 (out);
6225  if (! staticGraph_->indicesAreAllocated ()) {
6226  out << "Graph indices not allocated" << endl;
6227  }
6228  else {
6229  out << "Number of allocated entries: "
6230  << staticGraph_->getNodeAllocationSize () << endl;
6231  }
6232  out << "Number of entries: " << getNodeNumEntries () << endl
6233  << "Max number of entries per row: " << getNodeMaxNumRowEntries ()
6234  << endl;
6235  }
6236  // Give output time to complete by executing some barriers.
6237  comm->barrier ();
6238  comm->barrier ();
6239  comm->barrier ();
6240  }
6241 
6242  if (vl < VERB_HIGH) {
6243  return; // all done!
6244  }
6245 
6246  // O(N) and O(NNZ) data
6247  for (int curRank = 0; curRank < numProcs; ++curRank) {
6248  if (myRank == curRank) {
6249  out << std::setw(width) << "Proc Rank"
6250  << std::setw(width) << "Global Row"
6251  << std::setw(width) << "Num Entries";
6252  if (vl == VERB_EXTREME) {
6253  out << std::setw(width) << "(Index,Value)";
6254  }
6255  out << endl;
6256  for (size_t r = 0; r < getNodeNumRows (); ++r) {
6257  const size_t nE = getNumEntriesInLocalRow(r);
6258  GlobalOrdinal gid = getRowMap()->getGlobalElement(r);
6259  out << std::setw(width) << myRank
6260  << std::setw(width) << gid
6261  << std::setw(width) << nE;
6262  if (vl == VERB_EXTREME) {
6263  if (isGloballyIndexed()) {
6264  ArrayView<const GlobalOrdinal> rowinds;
6265  ArrayView<const Scalar> rowvals;
6266  getGlobalRowView (gid, rowinds, rowvals);
6267  for (size_t j = 0; j < nE; ++j) {
6268  out << " (" << rowinds[j]
6269  << ", " << rowvals[j]
6270  << ") ";
6271  }
6272  }
6273  else if (isLocallyIndexed()) {
6274  ArrayView<const LocalOrdinal> rowinds;
6275  ArrayView<const Scalar> rowvals;
6276  getLocalRowView (r, rowinds, rowvals);
6277  for (size_t j=0; j < nE; ++j) {
6278  out << " (" << getColMap()->getGlobalElement(rowinds[j])
6279  << ", " << rowvals[j]
6280  << ") ";
6281  }
6282  } // globally or locally indexed
6283  } // vl == VERB_EXTREME
6284  out << endl;
6285  } // for each row r on this process
6286  } // if (myRank == curRank)
6287 
6288  // Give output time to complete
6289  comm->barrier ();
6290  comm->barrier ();
6291  comm->barrier ();
6292  } // for each process p
6293  }
6294 
6295  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
6296  bool
6299  {
6300  // It's not clear what kind of compatibility checks on sizes can
6301  // be performed here. Epetra_CrsGraph doesn't check any sizes for
6302  // compatibility.
6303 
6304  // Currently, the source object must be a RowMatrix with the same
6305  // four template parameters as the target CrsMatrix. We might
6306  // relax this requirement later.
6308  const row_matrix_type* srcRowMat =
6309  dynamic_cast<const row_matrix_type*> (&source);
6310  return (srcRowMat != NULL);
6311  }
6312 
6313  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
6314  bool
6317  {
6318  return true;
6319  }
6320 
6321  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
6322  void
6325  const size_t numSameIDs,
6326  const LocalOrdinal permuteToLIDs[],
6327  const LocalOrdinal permuteFromLIDs[],
6328  const size_t numPermutes)
6329  {
6331  using Teuchos::Array;
6332  using Teuchos::ArrayView;
6333  typedef LocalOrdinal LO;
6334  typedef GlobalOrdinal GO;
6335 #ifdef HAVE_TPETRA_DEBUG
6336  // Method name string for TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC.
6337  const char tfecfFuncName[] = "copyAndPermuteImpl: ";
6338 #endif // HAVE_TPETRA_DEBUG
6339  ProfilingRegion regionCAP ("Tpetra::CrsMatrix::copyAndPermuteImpl");
6340 
6341  const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed ();
6342  //
6343  // Copy the first numSame row from source to target (this matrix).
6344  // This involves copying rows corresponding to LIDs [0, numSame-1].
6345  //
6346  const map_type& srcRowMap = * (srcMat.getRowMap ());
6347  Array<GO> rowInds;
6348  Array<Scalar> rowVals;
6349  const LO numSameIDs_as_LID = static_cast<LO> (numSameIDs);
6350  for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) {
6351  // Global ID for the current row index in the source matrix.
6352  // The first numSameIDs GIDs in the two input lists are the
6353  // same, so sourceGID == targetGID in this case.
6354  const GO sourceGID = srcRowMap.getGlobalElement (sourceLID);
6355  const GO targetGID = sourceGID;
6356 
6357  // Input views for the combineGlobalValues() call below.
6358  ArrayView<const GO> rowIndsConstView;
6359  ArrayView<const Scalar> rowValsConstView;
6360 
6361  if (sourceIsLocallyIndexed) {
6362  const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
6363  if (rowLength > static_cast<size_t> (rowInds.size())) {
6364  rowInds.resize (rowLength);
6365  rowVals.resize (rowLength);
6366  }
6367  // Resizing invalidates an Array's views, so we must make new
6368  // ones, even if rowLength hasn't changed.
6369  ArrayView<GO> rowIndsView = rowInds.view (0, rowLength);
6370  ArrayView<Scalar> rowValsView = rowVals.view (0, rowLength);
6371 
6372  // The source matrix is locally indexed, so we have to get a
6373  // copy. Really it's the GIDs that have to be copied (because
6374  // they have to be converted from LIDs).
6375  size_t checkRowLength = 0;
6376  srcMat.getGlobalRowCopy (sourceGID, rowIndsView, rowValsView, checkRowLength);
6377 
6378 #ifdef HAVE_TPETRA_DEBUG
6379  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowLength != checkRowLength,
6380  std::logic_error, "For global row index " << sourceGID << ", the source"
6381  " matrix's getNumEntriesInGlobalRow() method returns a row length of "
6382  << rowLength << ", but the getGlobalRowCopy() method reports that "
6383  "the row length is " << checkRowLength << ". Please report this bug "
6384  "to the Tpetra developers.");
6385 #endif // HAVE_TPETRA_DEBUG
6386 
6387  rowIndsConstView = rowIndsView.view (0, rowLength);
6388  rowValsConstView = rowValsView.view (0, rowLength);
6389  }
6390  else { // source matrix is globally indexed.
6391  srcMat.getGlobalRowView (sourceGID, rowIndsConstView, rowValsConstView);
6392  }
6393 
6394  // Combine the data into the target matrix.
6395  if (this->isStaticGraph ()) {
6396  // Applying a permutation to a matrix with a static graph
6397  // means REPLACE-ing entries.
6398  combineGlobalValues (targetGID, rowIndsConstView, rowValsConstView, REPLACE);
6399  }
6400  else {
6401  // Applying a permutation to a matrix with a dynamic graph
6402  // means INSERT-ing entries. This has the same effect as
6403  // ADD, if the target graph already has an entry there.
6404  combineGlobalValues (targetGID, rowIndsConstView, rowValsConstView, INSERT);
6405  }
6406  } // For each of the consecutive source and target IDs that are the same
6407 
6408  //
6409  // Permute the remaining rows.
6410  //
6411  const map_type& tgtRowMap = * (this->getRowMap ());
6412  for (size_t p = 0; p < numPermutes; ++p) {
6413  const GO sourceGID = srcRowMap.getGlobalElement (permuteFromLIDs[p]);
6414  const GO targetGID = tgtRowMap.getGlobalElement (permuteToLIDs[p]);
6415 
6416  // Input views for the combineGlobalValues() call below.
6417  ArrayView<const GO> rowIndsConstView;
6418  ArrayView<const Scalar> rowValsConstView;
6419 
6420  if (sourceIsLocallyIndexed) {
6421  const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
6422  if (rowLength > static_cast<size_t> (rowInds.size ())) {
6423  rowInds.resize (rowLength);
6424  rowVals.resize (rowLength);
6425  }
6426  // Resizing invalidates an Array's views, so we must make new
6427  // ones, even if rowLength hasn't changed.
6428  ArrayView<GO> rowIndsView = rowInds.view (0, rowLength);
6429  ArrayView<Scalar> rowValsView = rowVals.view (0, rowLength);
6430 
6431  // The source matrix is locally indexed, so we have to get a
6432  // copy. Really it's the GIDs that have to be copied (because
6433  // they have to be converted from LIDs).
6434  size_t checkRowLength = 0;
6435  srcMat.getGlobalRowCopy (sourceGID, rowIndsView, rowValsView, checkRowLength);
6436 
6437 #ifdef HAVE_TPETRA_DEBUG
6438  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowLength != checkRowLength,
6439  std::logic_error, "For the source matrix's global row index "
6440  << sourceGID << ", the source matrix's getNumEntriesInGlobalRow() "
6441  "method returns a row length of " << rowLength << ", but the "
6442  "getGlobalRowCopy() method reports that the row length is "
6443  << checkRowLength << ". Please report this bug to the Tpetra "
6444  "developers.");
6445 #endif // HAVE_TPETRA_DEBUG
6446 
6447  rowIndsConstView = rowIndsView.view (0, rowLength);
6448  rowValsConstView = rowValsView.view (0, rowLength);
6449  }
6450  else {
6451  srcMat.getGlobalRowView (sourceGID, rowIndsConstView, rowValsConstView);
6452  }
6453 
6454  // Combine the data into the target matrix.
6455  if (isStaticGraph()) {
6456  this->combineGlobalValues (targetGID, rowIndsConstView,
6457  rowValsConstView, REPLACE);
6458  }
6459  else {
6460  this->combineGlobalValues (targetGID, rowIndsConstView,
6461  rowValsConstView, INSERT);
6462  }
6463  } // For each ID to permute
6464  }
6465 
6466  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
6467  void
6468  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6469  copyAndPermuteNew (const SrcDistObject& srcObj,
6470  const size_t numSameIDs,
6471  const Kokkos::DualView<const local_ordinal_type*, device_type>& permuteToLIDs,
6472  const Kokkos::DualView<const local_ordinal_type*, device_type>& permuteFromLIDs)
6473  {
6477  using std::endl;
6478  typedef Kokkos::HostSpace host_mem_space;
6479  typedef typename device_type::memory_space dev_mem_space;
6480  // Method name string for TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC.
6481  const char tfecfFuncName[] = "copyAndPermuteNew: ";
6482  ProfilingRegion regionCAP ("Tpetra::CrsMatrix::copyAndPermuteNew");
6483 
6484  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
6485  // output to std::cerr on every MPI process. This is unwise for
6486  // runs with large numbers of MPI processes.
6487  const bool verbose = ::Tpetra::Details::Behavior::verbose ();
6488  std::unique_ptr<std::string> prefix;
6489  if (verbose) {
6490  int myRank = 0;
6491  auto map = this->getMap ();
6492  if (! map.is_null ()) {
6493  auto comm = map->getComm ();
6494  if (! comm.is_null ()) {
6495  myRank = comm->getRank ();
6496  }
6497  }
6498 
6499  // Restrict pfxStrm to inner scope to reduce high-water memory usage.
6500  prefix = [myRank] () {
6501  std::ostringstream pfxStrm;
6502  pfxStrm << "(Proc " << myRank << ") ";
6503  return std::unique_ptr<std::string> (new std::string (pfxStrm.str ()));
6504  } ();
6505  std::ostringstream os;
6506  os << *prefix << "Tpetra::CrsMatrix::copyAndPermuteNew: " << endl
6507  << *prefix << " "
6508  << dualViewStatusToString (permuteToLIDs, "permuteToLIDs") << endl
6509  << *prefix << " "
6510  << dualViewStatusToString (permuteFromLIDs, "permuteFromLIDs") << endl;
6511  std::cerr << os.str ();
6512  }
6513 
6514  const auto numPermute = permuteToLIDs.extent (0);
6515  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6516  (numPermute != permuteFromLIDs.extent (0),
6517  std::invalid_argument, "permuteToLIDs.extent(0) = "
6518  << numPermute << "!= permuteFromLIDs.extent(0) = "
6519  << permuteFromLIDs.extent (0) << ".");
6520 
6521  // We want to keep permuteToLIDs and permuteFromLIDs on device, if
6522  // possible, but respect their current placement. This is because
6523  // DistObject might use their placement to decide where to pack
6524  // and/or unpack.
6525  const bool permuteToLIDs_sync_back =
6526  permuteToLIDs.modified_device () >= permuteToLIDs.modified_host ();
6527  auto permuteToLIDs_nc = castAwayConstDualView (permuteToLIDs);
6528  permuteToLIDs_nc.template sync<host_mem_space> ();
6529  auto permuteToLIDs_h = permuteToLIDs.template view<host_mem_space> ();
6530 
6531  const bool permuteFromLIDs_sync_back =
6532  permuteFromLIDs.modified_device () >= permuteFromLIDs.modified_host ();
6533  auto permuteFromLIDs_nc = castAwayConstDualView (permuteFromLIDs);
6534  permuteFromLIDs_nc.template sync<host_mem_space> ();
6535  auto permuteFromLIDs_h = permuteFromLIDs.template view<host_mem_space> ();
6536 
6537  if (verbose) {
6538  std::ostringstream os;
6539  os << *prefix << "permuteToLIDs_sync_back: "
6540  << (permuteToLIDs_sync_back ? "true" : "false") << ", "
6541  << "permuteFromLIDs_sync_back: "
6542  << (permuteFromLIDs_sync_back ? "true" : "false") << endl;
6543  std::cerr << os.str ();
6544  }
6545 
6546  // This dynamic cast should succeed, because we've already tested
6547  // it in checkSizes().
6548  typedef ::Tpetra::RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> RMT;
6549  const RMT& srcMat = dynamic_cast<const RMT&> (srcObj);
6550 
6551  this->copyAndPermuteImpl (srcMat, numSameIDs, permuteToLIDs_h.data (),
6552  permuteFromLIDs_h.data (), numPermute);
6553 
6554  if (permuteToLIDs_sync_back) {
6555  permuteToLIDs_nc.template sync<dev_mem_space> ();
6556  }
6557  if (permuteFromLIDs_sync_back) {
6558  permuteFromLIDs_nc.template sync<dev_mem_space> ();
6559  }
6560 
6561  if (verbose) {
6562  std::ostringstream os;
6563  os << *prefix << "copyAndPermuteNew: after:" << endl
6564  << *prefix << " "
6565  << dualViewStatusToString (permuteToLIDs, "permuteToLIDs") << endl
6566  << *prefix << " "
6567  << dualViewStatusToString (permuteFromLIDs, "permuteFromLIDs") << endl;
6568  std::cerr << os.str ();
6569  }
6570  }
6571 
6572  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
6573  void
6574  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6575  packAndPrepareNew (const SrcDistObject& source,
6576  const Kokkos::DualView<const local_ordinal_type*, device_type>& exportLIDs,
6577  Kokkos::DualView<char*, buffer_device_type>& exports,
6578  const Kokkos::DualView<size_t*, buffer_device_type>& numPacketsPerLID,
6579  size_t& constantNumPackets,
6580  Distributor& distor)
6581  {
6584  using Teuchos::outArg;
6585  using Teuchos::REDUCE_MAX;
6586  using Teuchos::reduceAll;
6587  using std::endl;
6588  typedef LocalOrdinal LO;
6589  typedef GlobalOrdinal GO;
6590  const char tfecfFuncName[] = "packAndPrepareNew: ";
6591  ProfilingRegion regionPAP ("Tpetra::CrsMatrix::packAndPrepareNew");
6592 
6593  const bool debug = ::Tpetra::Details::Behavior::debug ();
6594  const bool verbose = ::Tpetra::Details::Behavior::verbose ();
6595 
6596  // Processes on which the communicator is null should not participate.
6597  Teuchos::RCP<const Teuchos::Comm<int> > pComm = this->getComm ();
6598  if (pComm.is_null ()) {
6599  return;
6600  }
6601  const Teuchos::Comm<int>& comm = *pComm;
6602  const int myRank = comm.getSize ();
6603 
6604  std::unique_ptr<std::string> prefix;
6605  if (verbose) {
6606  // Restrict pfxStrm to inner scope to reduce high-water memory usage.
6607  prefix = [myRank] () {
6608  std::ostringstream pfxStrm;
6609  pfxStrm << "(Proc " << myRank << ") ";
6610  return std::unique_ptr<std::string> (new std::string (pfxStrm.str ()));
6611  } ();
6612 
6613  std::ostringstream os;
6614  os << *prefix << "Tpetra::CrsMatrix::packAndPrepareNew: " << endl
6615  << *prefix << " "
6616  << dualViewStatusToString (exportLIDs, "exportLIDs")
6617  << endl
6618  << *prefix << " "
6619  << dualViewStatusToString (exports, "exports")
6620  << endl
6621  << *prefix << " "
6622  << dualViewStatusToString (numPacketsPerLID, "numPacketsPerLID")
6623  << endl;
6624  std::cerr << os.str ();
6625  }
6626 
6627  // Attempt to cast the source object to CrsMatrix. If successful,
6628  // use the source object's packNew() method to pack its data for
6629  // communication. Otherwise, attempt to cast to RowMatrix; if
6630  // successful, use the source object's pack() method. Otherwise,
6631  // the source object doesn't have the right type.
6632  //
6633  // FIXME (mfh 30 Jun 2013, 11 Sep 2017) We don't even need the
6634  // RowMatrix to have the same Node type. Unfortunately, we don't
6635  // have a way to ask if the RowMatrix is "a RowMatrix with any
6636  // Node type," since RowMatrix doesn't have a base class. A
6637  // hypothetical RowMatrixBase<Scalar, LO, GO> class, which does
6638  // not currently exist, would satisfy this requirement.
6639  //
6640  // Why RowMatrixBase<Scalar, LO, GO>? The source object's Scalar
6641  // type doesn't technically need to match the target object's
6642  // Scalar type, so we could just have RowMatrixBase<LO, GO>. LO
6643  // and GO need not be the same, as long as there is no overflow of
6644  // the indices. However, checking for index overflow is global
6645  // and therefore undesirable.
6646 
6647  std::ostringstream msg; // for collecting error messages
6648  int lclBad = 0; // to be set below
6649 
6650  typedef CrsMatrix<Scalar, LO, GO, Node> crs_matrix_type;
6651  const crs_matrix_type* srcCrsMat =
6652  dynamic_cast<const crs_matrix_type*> (&source);
6653  if (srcCrsMat != NULL) {
6654  if (verbose) {
6655  std::ostringstream os;
6656  os << *prefix << "Source matrix same (CrsMatrix) type as target; "
6657  "calling packNew" << endl;
6658  std::cerr << os.str ();
6659  }
6660  try {
6661  srcCrsMat->packNew (exportLIDs, exports, numPacketsPerLID,
6662  constantNumPackets, distor);
6663  }
6664  catch (std::exception& e) {
6665  lclBad = 1;
6666  msg << "Proc " << myRank << ": " << e.what () << std::endl;
6667  }
6668  }
6669  else {
6671  using Kokkos::HostSpace;
6672  using Kokkos::subview;
6673  typedef Kokkos::DualView<char*, buffer_device_type> exports_type;
6674  typedef Kokkos::pair<size_t, size_t> range_type;
6675 
6676  if (verbose) {
6677  std::ostringstream os;
6678  os << *prefix << "Source matrix NOT same (CrsMatrix) type as target"
6679  << endl;
6680  std::cerr << os.str ();
6681  }
6682 
6683  typedef RowMatrix<Scalar, LO, GO, Node> row_matrix_type;
6684  const row_matrix_type* srcRowMat =
6685  dynamic_cast<const row_matrix_type*> (&source);
6686  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6687  (srcRowMat == NULL, std::invalid_argument,
6688  "The source object of the Import or Export operation is neither a "
6689  "CrsMatrix (with the same template parameters as the target object), "
6690  "nor a RowMatrix (with the same first four template parameters as the "
6691  "target object).");
6692 
6693  // For the RowMatrix case, we need to convert from
6694  // Kokkos::DualView to Teuchos::Array*. This doesn't need to be
6695  // so terribly efficient, since packing a non-CrsMatrix
6696  // RowMatrix for Import/Export into a CrsMatrix is not a
6697  // critical case. Thus, we may allocate Teuchos::Array objects
6698  // here and copy to and from Kokkos::*View.
6699 
6700  // Sync exportLIDs to host, and view its host data as a Teuchos::ArrayView.
6701  {
6702  auto exportLIDs_nc = castAwayConstDualView (exportLIDs);
6703  exportLIDs_nc.template sync<HostSpace> ();
6704  }
6705  auto exportLIDs_h = exportLIDs.template view<HostSpace> ();
6706  Teuchos::ArrayView<const LO> exportLIDs_av (exportLIDs_h.data (),
6707  exportLIDs_h.size ());
6708 
6709  // pack() will allocate exports_a as needed. We'll copy back
6710  // into exports (after (re)allocating exports if needed) below.
6711  Teuchos::Array<char> exports_a;
6712 
6713  // View exportLIDs' host data as a Teuchos::ArrayView. We don't
6714  // need to sync, since we're doing write-only access, but we do
6715  // need to mark the DualView as modified on host.
6716  {
6717  auto numPacketsPerLID_nc = numPacketsPerLID; // const DV& -> DV
6718  numPacketsPerLID_nc.modified_device() = 0; // write-only host access
6719  numPacketsPerLID_nc.modified_host() = 1;
6720  }
6721  auto numPacketsPerLID_h = numPacketsPerLID.template view<HostSpace> ();
6722  Teuchos::ArrayView<size_t> numPacketsPerLID_av (numPacketsPerLID_h.data (),
6723  numPacketsPerLID_h.size ());
6724 
6725  // Invoke RowMatrix's legacy pack() interface, using above
6726  // Teuchos::Array* objects.
6727  try {
6728  srcRowMat->pack (exportLIDs_av, exports_a, numPacketsPerLID_av,
6729  constantNumPackets, distor);
6730  }
6731  catch (std::exception& e) {
6732  lclBad = 1;
6733  msg << "Proc " << myRank << ": " << e.what () << std::endl;
6734  }
6735 
6736  // Allocate 'exports', and copy exports_a back into it.
6737  const size_t newAllocSize = static_cast<size_t> (exports_a.size ());
6738  if (static_cast<size_t> (exports.extent (0)) < newAllocSize) {
6739  const std::string oldLabel = exports.d_view.label ();
6740  const std::string newLabel = (oldLabel == "") ? "exports" : oldLabel;
6741  exports = exports_type (newLabel, newAllocSize);
6742  }
6743  // It's safe to assume that we're working on host anyway, so
6744  // just keep exports sync'd to host.
6745  exports.modified_device() = 0; // ignore current device contents
6746  exports.modified_host() = 1;
6747 
6748  auto exports_h = exports.template view<HostSpace> ();
6749  auto exports_h_sub = subview (exports_h, range_type (0, newAllocSize));
6750 
6751  // Kokkos::deep_copy needs a Kokkos::View input, so turn
6752  // exports_a into a nonowning Kokkos::View first before copying.
6753  typedef typename exports_type::t_host::execution_space HES;
6754  typedef Kokkos::Device<HES, HostSpace> host_device_type;
6755  Kokkos::View<const char*, host_device_type>
6756  exports_a_kv (exports_a.getRawPtr (), newAllocSize);
6757  Kokkos::deep_copy (exports_h_sub, exports_a_kv);
6758  }
6759 
6760  if (debug) {
6761  int gblBad = 0; // output argument; to be set below
6762  reduceAll<int, int> (comm, REDUCE_MAX, lclBad, outArg (gblBad));
6763  if (gblBad != 0) {
6764  Tpetra::Details::gathervPrint (std::cerr, msg.str (), comm);
6765  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6766  (true, std::logic_error, "packNew() or pack() threw an exception on "
6767  "one or more participating processes.");
6768  }
6769  }
6770  else {
6771  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6772  (lclBad != 0, std::logic_error, "packNew threw an exception on one "
6773  "or more participating processes. Here is this process' error "
6774  "message: " << msg.str ());
6775  }
6776 
6777  if (verbose) {
6778  std::ostringstream os;
6779  os << *prefix << "packAndPrepareNew: Done!" << endl
6780  << *prefix << " "
6781  << dualViewStatusToString (exportLIDs, "exportLIDs")
6782  << endl
6783  << *prefix << " "
6784  << dualViewStatusToString (exports, "exports")
6785  << endl
6786  << *prefix << " "
6787  << dualViewStatusToString (numPacketsPerLID, "numPacketsPerLID")
6788  << endl;
6789  std::cerr << os.str ();
6790  }
6791  }
6792 
6793  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
6794  size_t
6795  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6796  packRow (char exports[],
6797  const size_t offset,
6798  const size_t numEnt,
6799  const GlobalOrdinal gidsIn[],
6800  const impl_scalar_type valsIn[],
6801  const size_t numBytesPerValue) const
6802  {
6803  using Kokkos::View;
6804  using Kokkos::subview;
6806  typedef LocalOrdinal LO;
6807  typedef GlobalOrdinal GO;
6808  typedef impl_scalar_type ST;
6809  typedef typename View<int*, device_type>::HostMirror::execution_space HES;
6810 
6811  if (numEnt == 0) {
6812  // Empty rows always take zero bytes, to ensure sparsity.
6813  return 0;
6814  }
6815 
6816  const GO gid = 0; // packValueCount wants this
6817  const LO numEntLO = static_cast<size_t> (numEnt);
6818 
6819  const size_t numEntBeg = offset;
6820  const size_t numEntLen = PackTraits<LO, HES>::packValueCount (numEntLO);
6821  const size_t gidsBeg = numEntBeg + numEntLen;
6822  const size_t gidsLen = numEnt * PackTraits<GO, HES>::packValueCount (gid);
6823  const size_t valsBeg = gidsBeg + gidsLen;
6824  const size_t valsLen = numEnt * numBytesPerValue;
6825 
6826  char* const numEntOut = exports + numEntBeg;
6827  char* const gidsOut = exports + gidsBeg;
6828  char* const valsOut = exports + valsBeg;
6829 
6830  size_t numBytesOut = 0;
6831  int errorCode = 0;
6832  numBytesOut += PackTraits<LO, HES>::packValue (numEntOut, numEntLO);
6833 
6834  {
6835  Kokkos::pair<int, size_t> p;
6836  p = PackTraits<GO, HES>::packArray (gidsOut, gidsIn, numEnt);
6837  errorCode += p.first;
6838  numBytesOut += p.second;
6839 
6840  p = PackTraits<ST, HES>::packArray (valsOut, valsIn, numEnt);
6841  errorCode += p.first;
6842  numBytesOut += p.second;
6843  }
6844 
6845  const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
6846  TEUCHOS_TEST_FOR_EXCEPTION
6847  (numBytesOut != expectedNumBytes, std::logic_error, "packRow: "
6848  "numBytesOut = " << numBytesOut << " != expectedNumBytes = "
6849  << expectedNumBytes << ".");
6850  TEUCHOS_TEST_FOR_EXCEPTION
6851  (errorCode != 0, std::runtime_error, "packRow: "
6852  "PackTraits::packArray returned a nonzero error code");
6853 
6854  return numBytesOut;
6855  }
6856 
6857  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
6858  size_t
6859  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6860  unpackRow (GlobalOrdinal gidsOut[],
6861  impl_scalar_type valsOut[],
6862  const char imports[],
6863  const size_t offset,
6864  const size_t numBytes,
6865  const size_t numEnt,
6866  const size_t numBytesPerValue)
6867  {
6868  using Kokkos::View;
6869  using Kokkos::subview;
6871  typedef LocalOrdinal LO;
6872  typedef GlobalOrdinal GO;
6873  typedef impl_scalar_type ST;
6874  typedef typename View<int*, device_type>::HostMirror::execution_space HES;
6875 
6876  if (numBytes == 0) {
6877  // Rows with zero bytes should always have zero entries.
6878  if (numEnt != 0) {
6879  const int myRank = this->getMap ()->getComm ()->getRank ();
6880  TEUCHOS_TEST_FOR_EXCEPTION
6881  (true, std::logic_error, "(Proc " << myRank << ") CrsMatrix::"
6882  "unpackRow: The number of bytes to unpack numBytes=0, but the "
6883  "number of entries to unpack (as reported by numPacketsPerLID) "
6884  "for this row numEnt=" << numEnt << " != 0.");
6885  }
6886  return 0;
6887  }
6888 
6889  if (numEnt == 0 && numBytes != 0) {
6890  const int myRank = this->getMap ()->getComm ()->getRank ();
6891  TEUCHOS_TEST_FOR_EXCEPTION
6892  (true, std::logic_error, "(Proc " << myRank << ") CrsMatrix::"
6893  "unpackRow: The number of entries to unpack (as reported by "
6894  "numPacketsPerLID) numEnt=0, but the number of bytes to unpack "
6895  "numBytes=" << numBytes << " != 0.");
6896  }
6897 
6898  const GO gid = 0; // packValueCount wants this
6899  const LO lid = 0; // packValueCount wants this
6900 
6901  const size_t numEntBeg = offset;
6902  const size_t numEntLen = PackTraits<LO, HES>::packValueCount (lid);
6903  const size_t gidsBeg = numEntBeg + numEntLen;
6904  const size_t gidsLen = numEnt * PackTraits<GO, HES>::packValueCount (gid);
6905  const size_t valsBeg = gidsBeg + gidsLen;
6906  const size_t valsLen = numEnt * numBytesPerValue;
6907 
6908  const char* const numEntIn = imports + numEntBeg;
6909  const char* const gidsIn = imports + gidsBeg;
6910  const char* const valsIn = imports + valsBeg;
6911 
6912  size_t numBytesOut = 0;
6913  int errorCode = 0;
6914  LO numEntOut;
6915  numBytesOut += PackTraits<LO, HES>::unpackValue (numEntOut, numEntIn);
6916  if (static_cast<size_t> (numEntOut) != numEnt ||
6917  numEntOut == static_cast<LO> (0)) {
6918  const int myRank = this->getMap ()->getComm ()->getRank ();
6919  std::ostringstream os;
6920  os << "(Proc " << myRank << ") CrsMatrix::unpackRow: ";
6921  bool firstErrorCondition = false;
6922  if (static_cast<size_t> (numEntOut) != numEnt) {
6923  os << "Number of entries from numPacketsPerLID numEnt=" << numEnt
6924  << " does not equal number of entries unpacked from imports "
6925  "buffer numEntOut=" << numEntOut << ".";
6926  firstErrorCondition = true;
6927  }
6928  if (numEntOut == static_cast<LO> (0)) {
6929  if (firstErrorCondition) {
6930  os << " Also, ";
6931  }
6932  os << "Number of entries unpacked from imports buffer numEntOut=0, "
6933  "but number of bytes to unpack for this row numBytes=" << numBytes
6934  << " != 0. This should never happen, since packRow should only "
6935  "ever pack rows with a nonzero number of entries. In this case, "
6936  "the number of entries from numPacketsPerLID is numEnt=" << numEnt
6937  << ".";
6938  }
6939  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, os.str ());
6940  }
6941 
6942  {
6943  Kokkos::pair<int, size_t> p;
6944  p = PackTraits<GO, HES>::unpackArray (gidsOut, gidsIn, numEnt);
6945  errorCode += p.first;
6946  numBytesOut += p.second;
6947 
6948  p = PackTraits<ST, HES>::unpackArray (valsOut, valsIn, numEnt);
6949  errorCode += p.first;
6950  numBytesOut += p.second;
6951  }
6952 
6953  TEUCHOS_TEST_FOR_EXCEPTION
6954  (numBytesOut != numBytes, std::logic_error, "unpackRow: numBytesOut = "
6955  << numBytesOut << " != numBytes = " << numBytes << ".");
6956 
6957  const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
6958  TEUCHOS_TEST_FOR_EXCEPTION
6959  (numBytesOut != expectedNumBytes, std::logic_error, "unpackRow: "
6960  "numBytesOut = " << numBytesOut << " != expectedNumBytes = "
6961  << expectedNumBytes << ".");
6962 
6963  TEUCHOS_TEST_FOR_EXCEPTION
6964  (errorCode != 0, std::runtime_error, "unpackRow: "
6965  "PackTraits::unpackArray returned a nonzero error code");
6966 
6967  return numBytesOut;
6968  }
6969 
6970  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
6971  void
6972  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6973  allocatePackSpaceNew (Kokkos::DualView<char*, buffer_device_type>& exports,
6974  size_t& totalNumEntries,
6975  const Kokkos::DualView<const local_ordinal_type*, device_type>& exportLIDs) const
6976  {
6978  using std::endl;
6979  typedef impl_scalar_type IST;
6980  typedef LocalOrdinal LO;
6981  typedef GlobalOrdinal GO;
6982  //const char tfecfFuncName[] = "allocatePackSpaceNew: ";
6983 
6984  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
6985  // output to std::cerr on every MPI process. This is unwise for
6986  // runs with large numbers of MPI processes.
6987  const bool verbose = ::Tpetra::Details::Behavior::verbose ();
6988  std::unique_ptr<std::string> prefix;
6989  if (verbose) {
6990  int myRank = 0;
6991  auto map = this->getMap ();
6992  if (! map.is_null ()) {
6993  auto comm = map->getComm ();
6994  if (! comm.is_null ()) {
6995  myRank = comm->getRank ();
6996  }
6997  }
6998  // Restrict pfxStrm to inner scope to reduce high-water memory usage.
6999  prefix = [myRank] () {
7000  std::ostringstream pfxStrm;
7001  pfxStrm << "(Proc " << myRank << ") ";
7002  return std::unique_ptr<std::string> (new std::string (pfxStrm.str ()));
7003  } ();
7004 
7005  std::ostringstream os;
7006  os << *prefix << "Tpetra::CrsMatrix::allocatePackSpaceNew: Before:"
7007  << endl
7008  << *prefix << " "
7009  << dualViewStatusToString (exports, "exports")
7010  << endl
7011  << *prefix << " "
7012  << dualViewStatusToString (exportLIDs, "exportLIDs")
7013  << endl;
7014  std::cerr << os.str ();
7015  }
7016 
7017  // The number of export LIDs must fit in LocalOrdinal, assuming
7018  // that the LIDs are distinct and valid on the calling process.
7019  const LO numExportLIDs = static_cast<LO> (exportLIDs.extent (0));
7020 
7021  // We need to access exportLIDs on host, but Kokkos forbids
7022  // sync'ing of a DualView of const. We won't modify the entries,
7023  // so it's fair to leave it const, except for sync'ing it.
7024  {
7025  Kokkos::DualView<local_ordinal_type*, device_type> exportLIDs_nc =
7027  exportLIDs_nc.template sync<Kokkos::HostSpace> ();
7028  }
7029  auto exportLIDs_h = exportLIDs.template view<Kokkos::HostSpace> ();
7030 
7031  // Count the total number of matrix entries to send.
7032  totalNumEntries = 0;
7033  for (LO i = 0; i < numExportLIDs; ++i) {
7034  const LO lclRow = exportLIDs_h[i];
7035  size_t curNumEntries = this->getNumEntriesInLocalRow (lclRow);
7036  // FIXME (mfh 25 Jan 2015) We should actually report invalid row
7037  // indices as an error. Just consider them nonowned for now.
7038  if (curNumEntries == Teuchos::OrdinalTraits<size_t>::invalid ()) {
7039  curNumEntries = 0;
7040  }
7041  totalNumEntries += curNumEntries;
7042  }
7043 
7044  // FIXME (mfh 24 Feb 2013, 24 Mar 2017) This code is only correct
7045  // if sizeof(IST) is a meaningful representation of the amount of
7046  // data in a Scalar instance. (LO and GO are always built-in
7047  // integer types.)
7048  //
7049  // Allocate the exports array. It does NOT need padding for
7050  // alignment, since we use memcpy to write to / read from send /
7051  // receive buffers.
7052  const size_t allocSize =
7053  static_cast<size_t> (numExportLIDs) * sizeof (LO) +
7054  totalNumEntries * (sizeof (IST) + sizeof (GO));
7055  if (static_cast<size_t> (exports.extent (0)) < allocSize) {
7056  typedef Kokkos::DualView<char*, buffer_device_type> exports_type;
7057 
7058  const std::string oldLabel = exports.d_view.label ();
7059  const std::string newLabel = (oldLabel == "") ? "exports" : oldLabel;
7060  exports = exports_type (newLabel, allocSize);
7061  }
7062 
7063  if (verbose) {
7064  std::ostringstream os;
7065  os << *prefix << "Tpetra::CrsMatrix::allocatePackSpaceNew: After:"
7066  << endl
7067  << *prefix << " "
7068  << dualViewStatusToString (exports, "exports")
7069  << endl
7070  << *prefix << " "
7071  << dualViewStatusToString (exportLIDs, "exportLIDs")
7072  << endl;
7073  std::cerr << os.str ();
7074  }
7075  }
7076 
7077  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7078  void
7079  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
7080  packNew (const Kokkos::DualView<const local_ordinal_type*, device_type>& exportLIDs,
7081  Kokkos::DualView<char*, buffer_device_type>& exports,
7082  const Kokkos::DualView<size_t*, buffer_device_type>& numPacketsPerLID,
7083  size_t& constantNumPackets,
7084  Distributor& dist) const
7085  {
7086  // The call to packNew in packAndPrepareNew catches and handles any exceptions.
7087  if (this->isStaticGraph ()) {
7088  using ::Tpetra::Details::packCrsMatrixNew;
7089  packCrsMatrixNew (*this, exports, numPacketsPerLID, exportLIDs,
7090  constantNumPackets, dist);
7091  }
7092  else {
7093  this->packNonStaticNew (exportLIDs, exports, numPacketsPerLID,
7094  constantNumPackets, dist);
7095  }
7096  }
7097 
7098  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7099  void
7101  packNonStaticNew (const Kokkos::DualView<const local_ordinal_type*, device_type>& exportLIDs,
7102  Kokkos::DualView<char*, buffer_device_type>& exports,
7103  const Kokkos::DualView<size_t*, buffer_device_type>& numPacketsPerLID,
7104  size_t& constantNumPackets,
7105  Distributor& distor) const
7106  {
7107  using Kokkos::View;
7111  using std::endl;
7112  typedef LocalOrdinal LO;
7113  typedef GlobalOrdinal GO;
7114  typedef impl_scalar_type ST;
7115  typedef typename View<int*, device_type>::HostMirror::execution_space HES;
7116  const char tfecfFuncName[] = "packNonStaticNew: ";
7117 
7118  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
7119  // output to std::cerr on every MPI process. This is unwise for
7120  // runs with large numbers of MPI processes.
7121  const bool verbose = ::Tpetra::Details::Behavior::verbose ();
7122  std::unique_ptr<std::string> prefix;
7123  if (verbose) {
7124  int myRank = 0;
7125  auto map = this->getMap ();
7126  if (! map.is_null ()) {
7127  auto comm = map->getComm ();
7128  if (! comm.is_null ()) {
7129  myRank = comm->getRank ();
7130  }
7131  }
7132  // Restrict pfxStrm to inner scope to reduce high-water memory usage.
7133  prefix = [myRank] () {
7134  std::ostringstream pfxStrm;
7135  pfxStrm << "(Proc " << myRank << ") ";
7136  return std::unique_ptr<std::string> (new std::string (pfxStrm.str ()));
7137  } ();
7138 
7139  std::ostringstream os;
7140  os << *prefix << "Tpetra::CrsMatrix::packNonStaticNew:" << endl;
7141  std::cerr << os.str ();
7142  }
7143 
7144  const size_t numExportLIDs = static_cast<size_t> (exportLIDs.extent (0));
7145  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7146  (numExportLIDs != static_cast<size_t> (numPacketsPerLID.extent (0)),
7147  std::invalid_argument, "exportLIDs.size() = " << numExportLIDs
7148  << " != numPacketsPerLID.size() = " << numPacketsPerLID.extent (0)
7149  << ".");
7150 
7151  // Setting this to zero tells the caller to expect a possibly
7152  // different ("nonconstant") number of packets per local index
7153  // (i.e., a possibly different number of entries per row).
7154  constantNumPackets = 0;
7155 
7156  // The pack buffer 'exports' enters this method possibly
7157  // unallocated. Do the first two parts of "Count, allocate, fill,
7158  // compute."
7159  size_t totalNumEntries = 0;
7160  this->allocatePackSpaceNew (exports, totalNumEntries, exportLIDs);
7161  const size_t bufSize = static_cast<size_t> (exports.extent (0));
7162 
7163  // Write-only host access
7164  exports.modified_device() = 0;
7165  exports.modified_host() = 1;
7166  auto exports_h = exports.template view<Kokkos::HostSpace> ();
7167  if (verbose) {
7168  std::ostringstream os;
7169  os << *prefix << "After marking exports as modified on host, "
7170  << dualViewStatusToString (exports, "exports") << endl;
7171  std::cerr << os.str ();
7172  }
7173 
7174  // Read-only host access
7175  auto exportLIDs_h = exportLIDs.template view<Kokkos::HostSpace> ();
7176 
7177  // Write-only host access
7178  numPacketsPerLID.modified_device() = 0;
7179  numPacketsPerLID.modified_host() = 1;
7180  auto numPacketsPerLID_h = numPacketsPerLID.template view<Kokkos::HostSpace> ();
7181 
7182  // Compute the number of "packets" (in this case, bytes) per
7183  // export LID (in this case, local index of the row to send), and
7184  // actually pack the data.
7185  size_t offset = 0; // current index into 'exports' array.
7186  for (size_t i = 0; i < numExportLIDs; ++i) {
7187  const LO lclRow = exportLIDs_h[i];
7188 
7189  size_t numEnt;
7190  numEnt = this->getNumEntriesInLocalRow (lclRow);
7191 
7192  // Only pack this row's data if it has a nonzero number of
7193  // entries. We can do this because receiving processes get the
7194  // number of packets, and will know that zero packets means zero
7195  // entries.
7196  if (numEnt == 0) {
7197  numPacketsPerLID_h[i] = 0;
7198  continue;
7199  }
7200 
7201  // Temporary buffer for global column indices.
7202  View<GO*, HES> gidsIn_k;
7203  {
7204  GO gid = 0;
7205  gidsIn_k = PackTraits<GO, HES>::allocateArray(gid, numEnt, "gids");
7206  }
7207 
7208  Teuchos::ArrayView<const Scalar> valsIn;
7209  if (this->isLocallyIndexed ()) {
7210  // If the matrix is locally indexed on the calling process, we
7211  // have to use its column Map (which it _must_ have in this
7212  // case) to convert to global indices.
7213  Teuchos::ArrayView<const LO> lidsIn;
7214  this->getLocalRowView (lclRow, lidsIn, valsIn);
7215  const map_type& colMap = * (this->getColMap ());
7216  for (size_t k = 0; k < numEnt; ++k) {
7217  gidsIn_k[k] = colMap.getGlobalElement (lidsIn[k]);
7218  }
7219  }
7220  else if (this->isGloballyIndexed ()) {
7221  // If the matrix is globally indexed on the calling process,
7222  // then we can use the column indices directly. However, we
7223  // have to get the global row index. The calling process must
7224  // have a row Map, since otherwise it shouldn't be participating
7225  // in packing operations.
7226  Teuchos::ArrayView<const GO> gblIndView;;
7227  const map_type& rowMap = * (this->getRowMap ());
7228  const GO gblRow = rowMap.getGlobalElement (lclRow);
7229  this->getGlobalRowView (gblRow, gblIndView, valsIn);
7230  for (size_t k = 0; k < numEnt; ++k) {
7231  gidsIn_k[k] = gblIndView[k];
7232  }
7233  }
7234  // mfh 11 Sep 2017: Currently, if the matrix is neither globally
7235  // nor locally indexed, then it has no entries. Therefore,
7236  // there is nothing to pack. No worries!
7237 
7238  typename HES::device_type outputDevice;
7239  auto valsIn_k =
7241  reinterpret_cast<const ST*> (valsIn.getRawPtr ()),
7242  valsIn.size (),
7243  true, "valsIn");
7244  const size_t numBytesPerValue =
7245  PackTraits<ST,HES>::packValueCount (valsIn[0]);
7246  const size_t numBytes =
7247  this->packRow (exports_h.data (), offset, numEnt, gidsIn_k.data (),
7248  valsIn_k.data (), numBytesPerValue);
7249  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7250  (offset > bufSize || offset + numBytes > bufSize, std::logic_error,
7251  "First invalid offset into 'exports' pack buffer at index i = " << i
7252  << ". exportLIDs_h[i]: " << exportLIDs_h[i] << ", bufSize: " <<
7253  bufSize << ", offset: " << offset << ", numBytes: " << numBytes <<
7254  ".");
7255  // numPacketsPerLID_h[i] is the number of "packets" in the
7256  // current local row i. Packet=char (really "byte") so use the
7257  // number of bytes of the packed data for that row.
7258  numPacketsPerLID_h[i] = numBytes;
7259  offset += numBytes;
7260  }
7261 
7262  if (verbose) {
7263  std::ostringstream os;
7264  os << *prefix << "Tpetra::CrsMatrix::packNonStaticNew: After:" << endl
7265  << *prefix << " "
7266  << dualViewStatusToString (exports, "exports")
7267  << endl
7268  << *prefix << " "
7269  << dualViewStatusToString (exportLIDs, "exportLIDs")
7270  << endl;
7271  std::cerr << os.str ();
7272  }
7273  }
7274 
7275  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7276  LocalOrdinal
7277  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
7278  combineGlobalValuesRaw (const LocalOrdinal lclRow,
7279  const LocalOrdinal numEnt,
7280  const impl_scalar_type vals[],
7281  const GlobalOrdinal cols[],
7282  const Tpetra::CombineMode combineMode)
7283  {
7284  typedef GlobalOrdinal GO;
7285  //const char tfecfFuncName[] = "combineGlobalValuesRaw: ";
7286 
7287  // mfh 23 Mar 2017: This branch is not thread safe in a debug
7288  // build, due to use of Teuchos::ArrayView; see #229.
7289  const GO gblRow = this->myGraph_->rowMap_->getGlobalElement (lclRow);
7290  Teuchos::ArrayView<const GO> cols_av (numEnt == 0 ? NULL : cols, numEnt);
7291  Teuchos::ArrayView<const Scalar> vals_av (numEnt == 0 ? NULL : reinterpret_cast<const Scalar*> (vals), numEnt);
7292 
7293  // FIXME (mfh 23 Mar 2017) This is a work-around for less common
7294  // combine modes. combineGlobalValues throws on error; it does
7295  // not return an error code. Thus, if it returns, it succeeded.
7296  this->combineGlobalValues (gblRow, cols_av, vals_av, combineMode);
7297  return numEnt;
7298  }
7299 
7300  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7301  void
7302  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
7303  combineGlobalValues (const GlobalOrdinal globalRowIndex,
7304  const Teuchos::ArrayView<const GlobalOrdinal>& columnIndices,
7305  const Teuchos::ArrayView<const Scalar>& values,
7306  const Tpetra::CombineMode combineMode)
7307  {
7308  const char tfecfFuncName[] = "combineGlobalValues: ";
7309 
7310  if (isStaticGraph ()) {
7311  // INSERT doesn't make sense for a static graph, since you
7312  // aren't allowed to change the structure of the graph.
7313  // However, all the other combine modes work.
7314  if (combineMode == ADD) {
7315  sumIntoGlobalValues (globalRowIndex, columnIndices, values);
7316  }
7317  else if (combineMode == REPLACE) {
7318  replaceGlobalValues (globalRowIndex, columnIndices, values);
7319  }
7320  else if (combineMode == ABSMAX) {
7321  using ::Tpetra::Details::AbsMax;
7322  AbsMax<Scalar> f;
7323  this->template transformGlobalValues<AbsMax<Scalar> > (globalRowIndex,
7324  columnIndices,
7325  values, f);
7326  }
7327  else if (combineMode == INSERT) {
7328  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
7329  isStaticGraph () && combineMode == INSERT, std::invalid_argument,
7330  "INSERT combine mode is not allowed if the matrix has a static graph "
7331  "(i.e., was constructed with the CrsMatrix constructor that takes a "
7332  "const CrsGraph pointer).");
7333  }
7334  else {
7335  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
7336  true, std::logic_error, "Invalid combine mode; should never get "
7337  "here! Please report this bug to the Tpetra developers.");
7338  }
7339  }
7340  else { // The matrix has a dynamic graph.
7341  if (combineMode == ADD || combineMode == INSERT) {
7342  // For a dynamic graph, all incoming column indices are
7343  // inserted into the target graph. Duplicate indices will
7344  // have their values summed. In this context, ADD and INSERT
7345  // are equivalent. We need to call insertGlobalValues()
7346  // anyway if the column indices don't yet exist in this row,
7347  // so we just call insertGlobalValues() for both cases.
7348  try {
7349  this->insertGlobalValuesFiltered (globalRowIndex, columnIndices,
7350  values);
7351  }
7352  catch (std::exception& e) {
7353  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7354  (true, std::runtime_error, std::endl
7355  << "insertGlobalValuesFiltered(" << globalRowIndex << ", "
7356  << std::endl << Teuchos::toString (columnIndices) << ", "
7357  << std::endl << Teuchos::toString (values)
7358  << ") threw an exception: " << std::endl << e.what ());
7359  }
7360  }
7361  // FIXME (mfh 14 Mar 2012):
7362  //
7363  // Implementing ABSMAX or REPLACE for a dynamic graph would
7364  // require modifying assembly to attach a possibly different
7365  // combine mode to each inserted (i, j, A_ij) entry. For
7366  // example, consider two different Export operations to the same
7367  // target CrsMatrix, the first with ABSMAX combine mode and the
7368  // second with REPLACE. This isn't a common use case, so we
7369  // won't mess with it for now.
7370  else if (combineMode == ABSMAX) {
7371  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
7372  ! isStaticGraph () && combineMode == ABSMAX, std::logic_error,
7373  "ABSMAX combine mode when the matrix has a dynamic graph is not yet "
7374  "implemented.");
7375  }
7376  else if (combineMode == REPLACE) {
7377  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
7378  ! isStaticGraph () && combineMode == REPLACE, std::logic_error,
7379  "REPLACE combine mode when the matrix has a dynamic graph is not yet "
7380  "implemented.");
7381  }
7382  else {
7383  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
7384  true, std::logic_error, "Should never get here! Please report this "
7385  "bug to the Tpetra developers.");
7386  }
7387  }
7388  }
7389 
7390  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7391  void
7392  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
7393  unpackAndCombineNew (const Kokkos::DualView<const local_ordinal_type*, device_type>& importLIDs,
7394  const Kokkos::DualView<const char*, buffer_device_type>& imports,
7395  const Kokkos::DualView<const size_t*, buffer_device_type>& numPacketsPerLID,
7396  const size_t constantNumPackets,
7397  Distributor& distor,
7398  const CombineMode combineMode)
7399  {
7402  using std::endl;
7403  const char tfecfFuncName[] = "unpackAndCombineNew: ";
7404  ProfilingRegion regionUAC ("Tpetra::CrsMatrix::unpackAndCombineNew");
7405 
7406  const bool debug = ::Tpetra::Details::Behavior::debug ();
7407  const bool verbose = ::Tpetra::Details::Behavior::verbose ();
7408  constexpr int numValidModes = 5;
7409  const CombineMode validModes[numValidModes] =
7410  {ADD, REPLACE, ABSMAX, INSERT, ZERO};
7411  const char* validModeNames[numValidModes] =
7412  {"ADD", "REPLACE", "ABSMAX", "INSERT", "ZERO"};
7413 
7414  std::unique_ptr<std::string> prefix;
7415  int myRank = 0;
7416  if (verbose) {
7417  auto map = this->getMap ();
7418  if (! map.is_null ()) {
7419  auto comm = map->getComm ();
7420  if (! comm.is_null ()) {
7421  myRank = comm->getRank ();
7422  }
7423  }
7424  prefix = [myRank] () {
7425  std::ostringstream pfxStrm;
7426  pfxStrm << "(Proc " << myRank << ") ";
7427  return std::unique_ptr<std::string> (new std::string (pfxStrm.str ()));
7428  } ();
7429 
7430  std::ostringstream os;
7431  os << *prefix << "Tpetra::CrsMatrix::unpackAndCombineNew: " << endl
7432  << *prefix << " "
7433  << dualViewStatusToString (importLIDs, "importLIDs")
7434  << endl
7435  << *prefix << " "
7436  << dualViewStatusToString (imports, "imports")
7437  << endl
7438  << *prefix << " "
7439  << dualViewStatusToString (numPacketsPerLID, "numPacketsPerLID")
7440  << endl
7441  << *prefix << " constantNumPackets: " << constantNumPackets
7442  << endl
7443  << *prefix << " combineMode: " << combineModeToString (combineMode)
7444  << endl;
7445  std::cerr << os.str ();
7446  }
7447 
7448  if (debug) {
7449  if (std::find (validModes, validModes+numValidModes, combineMode) ==
7450  validModes+numValidModes) {
7451  std::ostringstream os;
7452  os << "Invalid combine mode. Valid modes are {";
7453  for (int k = 0; k < numValidModes; ++k) {
7454  os << validModeNames[k];
7455  if (k < numValidModes - 1) {
7456  os << ", ";
7457  }
7458  }
7459  os << "}.";
7460  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7461  (true, std::invalid_argument, os.str ());
7462  }
7463  }
7464 
7465  if (combineMode == ZERO) {
7466  return; // nothing to do
7467  }
7468 
7469  if (debug) {
7470  using Teuchos::reduceAll;
7471  std::unique_ptr<std::ostringstream> msg (new std::ostringstream ());
7472  int lclBad = 0;
7473  try {
7474  this->unpackAndCombineNewImpl (importLIDs, imports, numPacketsPerLID,
7475  constantNumPackets, distor, combineMode);
7476  } catch (std::exception& e) {
7477  lclBad = 1;
7478  *msg << e.what ();
7479  }
7480  int gblBad = 0;
7481  const Teuchos::Comm<int>& comm = * (this->getComm ());
7482  reduceAll<int, int> (comm, Teuchos::REDUCE_MAX,
7483  lclBad, Teuchos::outArg (gblBad));
7484  if (gblBad != 0) {
7485  // mfh 22 Oct 2017: 'prefix' might be null, since it is only
7486  // initialized in a debug build. Thus, we get the process
7487  // rank again here. This is an error message, so the small
7488  // run-time cost doesn't matter. See #1887.
7489  std::ostringstream os;
7490  os << "(Proc " << comm.getRank () << ") " << msg->str () << endl;
7491  msg = std::unique_ptr<std::ostringstream> (new std::ostringstream ());
7492  ::Tpetra::Details::gathervPrint (*msg, os.str (), comm);
7493  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7494  (true, std::logic_error, std::endl << "unpackAndCombineNewImpl() "
7495  "threw an exception on one or more participating processes: "
7496  << endl << msg->str ());
7497  }
7498  }
7499  else {
7500  this->unpackAndCombineNewImpl (importLIDs, imports, numPacketsPerLID,
7501  constantNumPackets, distor, combineMode);
7502  }
7503 
7504  if (verbose) {
7505  std::ostringstream os;
7506  os << *prefix << "unpackAndCombineNew: Done!" << endl
7507  << *prefix << " "
7508  << dualViewStatusToString (importLIDs, "importLIDs")
7509  << endl
7510  << *prefix << " "
7511  << dualViewStatusToString (imports, "imports")
7512  << endl
7513  << *prefix << " "
7514  << dualViewStatusToString (numPacketsPerLID, "numPacketsPerLID")
7515  << endl;
7516  std::cerr << os.str ();
7517  }
7518  }
7519 
7520  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7521  void
7523  unpackAndCombineNewImpl (const Kokkos::DualView<const LocalOrdinal*, device_type>& importLIDs,
7524  const Kokkos::DualView<const char*, buffer_device_type>& imports,
7525  const Kokkos::DualView<const size_t*, buffer_device_type>& numPacketsPerLID,
7526  const size_t constantNumPackets,
7527  Distributor & distor,
7528  const CombineMode combineMode,
7529  const bool atomic)
7530  {
7531  // Exception are caught and handled upstream, so we just call the
7532  // implementations directly.
7533  if (this->isStaticGraph ()) {
7534  using ::Tpetra::Details::unpackCrsMatrixAndCombineNew;
7535  unpackCrsMatrixAndCombineNew (*this, imports, numPacketsPerLID,
7536  importLIDs, constantNumPackets,
7537  distor, combineMode, atomic);
7538  }
7539  else {
7540  this->unpackAndCombineNewImplNonStatic (importLIDs, imports,
7541  numPacketsPerLID,
7542  constantNumPackets,
7543  distor, combineMode);
7544  }
7545  }
7546 
7547  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7548  void
7549  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
7550  unpackAndCombineNewImplNonStatic (const Kokkos::DualView<const LocalOrdinal*, device_type>& importLIDs,
7551  const Kokkos::DualView<const char*, buffer_device_type>& imports,
7552  const Kokkos::DualView<const size_t*, buffer_device_type>& numPacketsPerLID,
7553  const size_t constantNumPackets,
7554  Distributor& distor,
7555  const CombineMode combineMode)
7556  {
7557  using Kokkos::View;
7558  using Kokkos::subview;
7559  using Kokkos::MemoryUnmanaged;
7563  using std::endl;
7564  typedef LocalOrdinal LO;
7565  typedef GlobalOrdinal GO;
7566  typedef impl_scalar_type ST;
7567  typedef typename Teuchos::ArrayView<const LO>::size_type size_type;
7568  typedef typename View<int*, device_type>::HostMirror::execution_space HES;
7569  typedef std::pair<typename View<int*, HES>::size_type,
7570  typename View<int*, HES>::size_type> pair_type;
7571  typedef View<GO*, HES, MemoryUnmanaged> gids_out_type;
7572  typedef View<ST*, HES, MemoryUnmanaged> vals_out_type;
7573  const char tfecfFuncName[] = "unpackAndCombineNewImplNonStatic: ";
7574 
7575  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
7576  // output to std::cerr on every MPI process. This is unwise for
7577  // runs with large numbers of MPI processes.
7578  const bool verbose = ::Tpetra::Details::Behavior::verbose ();
7579  std::unique_ptr<std::string> prefix;
7580  if (verbose) {
7581  int myRank = 0;
7582  auto map = this->getMap ();
7583  if (! map.is_null ()) {
7584  auto comm = map->getComm ();
7585  if (! comm.is_null ()) {
7586  myRank = comm->getRank ();
7587  }
7588  }
7589  // Restrict pfxStrm to inner scope to reduce high-water memory usage.
7590  prefix = [myRank] () {
7591  std::ostringstream pfxStrm;
7592  pfxStrm << "(Proc " << myRank << ") ";
7593  return std::unique_ptr<std::string> (new std::string (pfxStrm.str ()));
7594  } ();
7595 
7596  std::ostringstream os;
7597  os << *prefix << "Tpetra::CrsMatrix::unpackAndCombineNewImplNonStatic:"
7598  << endl; // we've already printed statuses of DualViews
7599  std::cerr << os.str ();
7600  }
7601 
7602  const size_type numImportLIDs = importLIDs.extent (0);
7603  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7604  (numImportLIDs != static_cast<size_type> (numPacketsPerLID.extent (0)),
7605  std::invalid_argument, "importLIDs.size() = " << numImportLIDs
7606  << " != numPacketsPerLID.size() = " << numPacketsPerLID.extent (0)
7607  << ".");
7608  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7609  (combineMode != ADD && combineMode != INSERT && combineMode != REPLACE &&
7610  combineMode != ABSMAX && combineMode != ZERO, std::invalid_argument,
7611  "Invalid CombineMode value " << combineMode << ". Valid "
7612  << "values include ADD, INSERT, REPLACE, ABSMAX, and ZERO.");
7613  if (combineMode == ZERO || numImportLIDs == 0) {
7614  return; // nothing to do; no need to combine entries
7615  }
7616 
7617  // We're unpacking on host. This is read-only host access of imports.
7618  {
7619  auto imports_nc = castAwayConstDualView (imports);
7620  imports_nc.template sync<Kokkos::HostSpace> ();
7621  }
7622  auto imports_h = imports.template view<Kokkos::HostSpace> ();
7623 
7624  // Read-only host access.
7625  {
7626  auto numPacketsPerLID_nc = castAwayConstDualView (numPacketsPerLID);
7627  numPacketsPerLID_nc.template sync<Kokkos::HostSpace> ();
7628  }
7629  auto numPacketsPerLID_h = numPacketsPerLID.template view<Kokkos::HostSpace> ();
7630 
7631  // Read-only host access.
7632  {
7633  auto importLIDs_nc = castAwayConstDualView (importLIDs);
7634  importLIDs_nc.template sync<Kokkos::HostSpace> ();
7635  }
7636  auto importLIDs_h = importLIDs.template view<Kokkos::HostSpace> ();
7637 
7638  size_t numBytesPerValue;
7639  {
7640  // FIXME (mfh 17 Feb 2015, tjf 2 Aug 2017) What do I do about Scalar types
7641  // with run-time size? We already assume that all entries in both the
7642  // source and target matrices have the same size. If the calling process
7643  // owns at least one entry in either matrix, we can use that entry to set
7644  // the size. However, it is possible that the calling process owns no
7645  // entries. In that case, we're in trouble. One way to fix this would be
7646  // for each row's data to contain the run-time size. This is only
7647  // necessary if the size is not a compile-time constant.
7648  Scalar val;
7649  numBytesPerValue = PackTraits<ST, HES>::packValueCount (val);
7650  }
7651 
7652  // Determine the maximum number of entries in any one row
7653  size_t offset = 0;
7654  size_t maxRowNumEnt = 0;
7655  for (size_type i = 0; i < numImportLIDs; ++i) {
7656  const size_t numBytes = numPacketsPerLID_h[i];
7657  if (numBytes == 0) {
7658  continue; // empty buffer for that row means that the row is empty
7659  }
7660  // We need to unpack a nonzero number of entries for this row.
7661 #ifdef HAVE_TPETRA_DEBUG
7662  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7663  (offset + numBytes > static_cast<size_t> (imports_h.extent (0)),
7664  std::logic_error, "At local row index importLIDs_h[i=" << i << "]="
7665  << importLIDs_h[i] << ", offset (=" << offset << ") + numBytes (="
7666  << numBytes << ") > imports_h.extent(0)="
7667  << imports_h.extent (0) << ".");
7668 #endif // HAVE_TPETRA_DEBUG
7669 
7670  LO numEntLO = 0;
7671 
7672 #ifdef HAVE_TPETRA_DEBUG
7673  const size_t theNumBytes = PackTraits<LO, HES>::packValueCount (numEntLO);
7674  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7675  (theNumBytes > numBytes, std::logic_error, "theNumBytes = "
7676  << theNumBytes << " > numBytes = " << numBytes << ".");
7677 #endif // HAVE_TPETRA_DEBUG
7678 
7679  const char* const inBuf = imports_h.data () + offset;
7680  const size_t actualNumBytes =
7681  PackTraits<LO, HES>::unpackValue (numEntLO, inBuf);
7682 
7683 #ifdef HAVE_TPETRA_DEBUG
7684  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7685  (actualNumBytes > numBytes, std::logic_error, "At i = " << i
7686  << ", actualNumBytes=" << actualNumBytes
7687  << " > numBytes=" << numBytes << ".");
7688  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7689  (numEntLO == 0, std::logic_error, "At local row index importLIDs_h[i="
7690  << i << "]=" << importLIDs_h[i] << ", the number of entries read "
7691  "from the packed data is numEntLO=" << numEntLO << ", but numBytes="
7692  << numBytes << " != 0.");
7693 #else
7694  (void) actualNumBytes;
7695 #endif // HAVE_TPETRA_DEBUG
7696 
7697  maxRowNumEnt = std::max (static_cast<size_t> (numEntLO), maxRowNumEnt);
7698  offset += numBytes;
7699  }
7700 
7701  // Temporary space to cache incoming global column indices and
7702  // values. Column indices come in as global indices, in case the
7703  // source object's column Map differs from the target object's
7704  // (this's) column Map.
7705  View<GO*, HES> gblColInds;
7706  View<LO*, HES> lclColInds;
7707  View<ST*, HES> vals;
7708  {
7709  GO gid = 0;
7710  LO lid = 0;
7711  // FIXME (mfh 17 Feb 2015, tjf 2 Aug 2017) What do I do about Scalar types
7712  // with run-time size? We already assume that all entries in both the
7713  // source and target matrices have the same size. If the calling process
7714  // owns at least one entry in either matrix, we can use that entry to set
7715  // the size. However, it is possible that the calling process owns no
7716  // entries. In that case, we're in trouble. One way to fix this would be
7717  // for each row's data to contain the run-time size. This is only
7718  // necessary if the size is not a compile-time constant.
7719  Scalar val;
7720  gblColInds = PackTraits<GO, HES>::allocateArray (gid, maxRowNumEnt, "gids");
7721  lclColInds = PackTraits<LO, HES>::allocateArray (lid, maxRowNumEnt, "lids");
7722  vals = PackTraits<ST, HES>::allocateArray (val, maxRowNumEnt, "vals");
7723  }
7724 
7725  offset = 0;
7726  for (size_type i = 0; i < numImportLIDs; ++i) {
7727  const size_t numBytes = numPacketsPerLID_h[i];
7728  if (numBytes == 0) {
7729  continue; // empty buffer for that row means that the row is empty
7730  }
7731  LO numEntLO = 0;
7732  const char* const inBuf = imports_h.data () + offset;
7733  const size_t actualNumBytes = PackTraits<LO, HES>::unpackValue (numEntLO, inBuf);
7734  (void) actualNumBytes;
7735 
7736  const size_t numEnt = static_cast<size_t>(numEntLO);;
7737  const LO lclRow = importLIDs_h[i];
7738 
7739  gids_out_type gidsOut = subview (gblColInds, pair_type (0, numEnt));
7740  vals_out_type valsOut = subview (vals, pair_type (0, numEnt));
7741 
7742  const size_t numBytesOut =
7743  unpackRow (gidsOut.data (), valsOut.data (), imports_h.data (),
7744  offset, numBytes, numEnt, numBytesPerValue);
7745  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7746  (numBytes != numBytesOut, std::logic_error, "At i = " << i << ", "
7747  << "numBytes = " << numBytes << " != numBytesOut = " << numBytesOut
7748  << ".");
7749 
7750  const ST* const valsRaw = const_cast<const ST*> (valsOut.data ());
7751  const GO* const gidsRaw = const_cast<const GO*> (gidsOut.data ());
7752  this->combineGlobalValuesRaw (lclRow, numEnt, valsRaw, gidsRaw, combineMode);
7753 
7754  // Don't update offset until current LID has succeeded.
7755  offset += numBytes;
7756  } // for each import LID i
7757  }
7758 
7759  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7760  Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7761  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
7762  getColumnMapMultiVector (const MV& X_domainMap,
7763  const bool force) const
7764  {
7765  using Teuchos::null;
7766  using Teuchos::RCP;
7767  using Teuchos::rcp;
7768 
7769  TEUCHOS_TEST_FOR_EXCEPTION(
7770  ! this->hasColMap (), std::runtime_error, "Tpetra::CrsMatrix::getColumn"
7771  "MapMultiVector: You may only call this method if the matrix has a "
7772  "column Map. If the matrix does not yet have a column Map, you should "
7773  "first call fillComplete (with domain and range Map if necessary).");
7774 
7775  // If the graph is not fill complete, then the Import object (if
7776  // one should exist) hasn't been constructed yet.
7777  TEUCHOS_TEST_FOR_EXCEPTION(
7778  ! this->getGraph ()->isFillComplete (), std::runtime_error, "Tpetra::"
7779  "CrsMatrix::getColumnMapMultiVector: You may only call this method if "
7780  "this matrix's graph is fill complete.");
7781 
7782  const size_t numVecs = X_domainMap.getNumVectors ();
7783  RCP<const import_type> importer = this->getGraph ()->getImporter ();
7784  RCP<const map_type> colMap = this->getColMap ();
7785 
7786  RCP<MV> X_colMap; // null by default
7787 
7788  // If the Import object is trivial (null), then we don't need a
7789  // separate column Map multivector. Just return null in that
7790  // case. The caller is responsible for knowing not to use the
7791  // returned null pointer.
7792  //
7793  // If the Import is nontrivial, then we do need a separate
7794  // column Map multivector for the Import operation. Check in
7795  // that case if we have to (re)create the column Map
7796  // multivector.
7797  if (! importer.is_null () || force) {
7798  if (importMV_.is_null () || importMV_->getNumVectors () != numVecs) {
7799  X_colMap = rcp (new MV (colMap, numVecs));
7800 
7801  // Cache the newly created multivector for later reuse.
7802  importMV_ = X_colMap;
7803  }
7804  else { // Yay, we can reuse the cached multivector!
7805  X_colMap = importMV_;
7806  // mfh 09 Jan 2013: We don't have to fill with zeros first,
7807  // because the Import uses INSERT combine mode, which overwrites
7808  // existing entries.
7809  //
7810  //X_colMap->putScalar (ZERO);
7811  }
7812  }
7813  return X_colMap;
7814  }
7815 
7816  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7817  Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7820  const bool force) const
7821  {
7822  using Teuchos::null;
7823  using Teuchos::RCP;
7824  using Teuchos::rcp;
7825 
7826  // If the graph is not fill complete, then the Export object (if
7827  // one should exist) hasn't been constructed yet.
7828  TEUCHOS_TEST_FOR_EXCEPTION(
7829  ! this->getGraph ()->isFillComplete (), std::runtime_error, "Tpetra::"
7830  "CrsMatrix::getRowMapMultiVector: You may only call this method if this "
7831  "matrix's graph is fill complete.");
7832 
7833  const size_t numVecs = Y_rangeMap.getNumVectors ();
7834  RCP<const export_type> exporter = this->getGraph ()->getExporter ();
7835  // Every version of the constructor takes either a row Map, or a
7836  // graph (all of whose constructors take a row Map). Thus, the
7837  // matrix always has a row Map.
7838  RCP<const map_type> rowMap = this->getRowMap ();
7839 
7840  RCP<MV> Y_rowMap; // null by default
7841 
7842  // If the Export object is trivial (null), then we don't need a
7843  // separate row Map multivector. Just return null in that case.
7844  // The caller is responsible for knowing not to use the returned
7845  // null pointer.
7846  //
7847  // If the Export is nontrivial, then we do need a separate row
7848  // Map multivector for the Export operation. Check in that case
7849  // if we have to (re)create the row Map multivector.
7850  if (! exporter.is_null () || force) {
7851  if (exportMV_.is_null () || exportMV_->getNumVectors () != numVecs) {
7852  Y_rowMap = rcp (new MV (rowMap, numVecs));
7853  exportMV_ = Y_rowMap; // Cache the newly created MV for later reuse.
7854  }
7855  else { // Yay, we can reuse the cached multivector!
7856  Y_rowMap = exportMV_;
7857  }
7858  }
7859  return Y_rowMap;
7860  }
7861 
7862  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7863  void
7865  removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& newMap)
7866  {
7867  TEUCHOS_TEST_FOR_EXCEPTION(
7868  myGraph_.is_null (), std::logic_error, "Tpetra::CrsMatrix::"
7869  "removeEmptyProcessesInPlace: This method does not work when the matrix "
7870  "was created with a constant graph (that is, when it was created using "
7871  "the version of its constructor that takes an RCP<const CrsGraph>). "
7872  "This is because the matrix is not allowed to modify the graph in that "
7873  "case, but removing empty processes requires modifying the graph.");
7874  myGraph_->removeEmptyProcessesInPlace (newMap);
7875  // Even though CrsMatrix's row Map (as returned by getRowMap())
7876  // comes from its CrsGraph, CrsMatrix still implements DistObject,
7877  // so we also have to change the DistObject's Map.
7878  this->map_ = this->getRowMap ();
7879  // In the nonconst graph case, staticGraph_ is just a const
7880  // pointer to myGraph_. This assignment is probably redundant,
7881  // but it doesn't hurt.
7882  staticGraph_ = Teuchos::rcp_const_cast<const Graph> (myGraph_);
7883  }
7884 
7885  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7886  Teuchos::RCP<RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7888  add (const Scalar& alpha,
7890  const Scalar& beta,
7891  const Teuchos::RCP<const map_type>& domainMap,
7892  const Teuchos::RCP<const map_type>& rangeMap,
7893  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7894  {
7895  using Teuchos::Array;
7896  using Teuchos::ArrayRCP;
7897  using Teuchos::ArrayView;
7898  using Teuchos::ParameterList;
7899  using Teuchos::RCP;
7900  using Teuchos::rcp;
7901  using Teuchos::rcp_implicit_cast;
7902  using Teuchos::sublist;
7903  typedef LocalOrdinal LO;
7904  typedef GlobalOrdinal GO;
7907 
7908  const crs_matrix_type& B = *this; // a convenient abbreviation
7909  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
7910  const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one ();
7911 
7912  // If the user didn't supply a domain or range Map, then try to
7913  // get one from B first (if it has them), then from A (if it has
7914  // them). If we don't have any domain or range Maps, scold the
7915  // user.
7916  RCP<const map_type> A_domainMap = A.getDomainMap ();
7917  RCP<const map_type> A_rangeMap = A.getRangeMap ();
7918  RCP<const map_type> B_domainMap = B.getDomainMap ();
7919  RCP<const map_type> B_rangeMap = B.getRangeMap ();
7920 
7921  RCP<const map_type> theDomainMap = domainMap;
7922  RCP<const map_type> theRangeMap = rangeMap;
7923 
7924  if (domainMap.is_null ()) {
7925  if (B_domainMap.is_null ()) {
7926  TEUCHOS_TEST_FOR_EXCEPTION(
7927  A_domainMap.is_null (), std::invalid_argument,
7928  "Tpetra::CrsMatrix::add: If neither A nor B have a domain Map, "
7929  "then you must supply a nonnull domain Map to this method.");
7930  theDomainMap = A_domainMap;
7931  } else {
7932  theDomainMap = B_domainMap;
7933  }
7934  }
7935  if (rangeMap.is_null ()) {
7936  if (B_rangeMap.is_null ()) {
7937  TEUCHOS_TEST_FOR_EXCEPTION(
7938  A_rangeMap.is_null (), std::invalid_argument,
7939  "Tpetra::CrsMatrix::add: If neither A nor B have a range Map, "
7940  "then you must supply a nonnull range Map to this method.");
7941  theRangeMap = A_rangeMap;
7942  } else {
7943  theRangeMap = B_rangeMap;
7944  }
7945  }
7946 
7947 #ifdef HAVE_TPETRA_DEBUG
7948  // In a debug build, check that A and B have matching domain and
7949  // range Maps, if they have domain and range Maps at all. (If
7950  // they aren't fill complete, then they may not yet have them.)
7951  if (! A_domainMap.is_null () && ! A_rangeMap.is_null ()) {
7952  if (! B_domainMap.is_null () && ! B_rangeMap.is_null ()) {
7953  TEUCHOS_TEST_FOR_EXCEPTION(
7954  ! B_domainMap->isSameAs (*A_domainMap), std::invalid_argument,
7955  "Tpetra::CrsMatrix::add: The input RowMatrix A must have a domain Map "
7956  "which is the same as (isSameAs) this RowMatrix's domain Map.");
7957  TEUCHOS_TEST_FOR_EXCEPTION(
7958  ! B_rangeMap->isSameAs (*A_rangeMap), std::invalid_argument,
7959  "Tpetra::CrsMatrix::add: The input RowMatrix A must have a range Map "
7960  "which is the same as (isSameAs) this RowMatrix's range Map.");
7961  TEUCHOS_TEST_FOR_EXCEPTION(
7962  ! domainMap.is_null () && ! domainMap->isSameAs (*B_domainMap),
7963  std::invalid_argument,
7964  "Tpetra::CrsMatrix::add: The input domain Map must be the same as "
7965  "(isSameAs) this RowMatrix's domain Map.");
7966  TEUCHOS_TEST_FOR_EXCEPTION(
7967  ! rangeMap.is_null () && ! rangeMap->isSameAs (*B_rangeMap),
7968  std::invalid_argument,
7969  "Tpetra::CrsMatrix::add: The input range Map must be the same as "
7970  "(isSameAs) this RowMatrix's range Map.");
7971  }
7972  }
7973  else if (! B_domainMap.is_null () && ! B_rangeMap.is_null ()) {
7974  TEUCHOS_TEST_FOR_EXCEPTION(
7975  ! domainMap.is_null () && ! domainMap->isSameAs (*B_domainMap),
7976  std::invalid_argument,
7977  "Tpetra::CrsMatrix::add: The input domain Map must be the same as "
7978  "(isSameAs) this RowMatrix's domain Map.");
7979  TEUCHOS_TEST_FOR_EXCEPTION(
7980  ! rangeMap.is_null () && ! rangeMap->isSameAs (*B_rangeMap),
7981  std::invalid_argument,
7982  "Tpetra::CrsMatrix::add: The input range Map must be the same as "
7983  "(isSameAs) this RowMatrix's range Map.");
7984  }
7985  else {
7986  TEUCHOS_TEST_FOR_EXCEPTION(
7987  domainMap.is_null () || rangeMap.is_null (), std::invalid_argument,
7988  "Tpetra::CrsMatrix::add: If neither A nor B have a domain and range "
7989  "Map, then you must supply a nonnull domain and range Map to this "
7990  "method.");
7991  }
7992 #endif // HAVE_TPETRA_DEBUG
7993 
7994  // What parameters do we pass to C's constructor? Do we call
7995  // fillComplete on C after filling it? And if so, what parameters
7996  // do we pass to C's fillComplete call?
7997  bool callFillComplete = true;
7998  RCP<ParameterList> constructorSublist;
7999  RCP<ParameterList> fillCompleteSublist;
8000  if (! params.is_null ()) {
8001  callFillComplete = params->get ("Call fillComplete", callFillComplete);
8002  constructorSublist = sublist (params, "Constructor parameters");
8003  fillCompleteSublist = sublist (params, "fillComplete parameters");
8004  }
8005 
8006  RCP<const map_type> A_rowMap = A.getRowMap ();
8007  RCP<const map_type> B_rowMap = B.getRowMap ();
8008  RCP<const map_type> C_rowMap = B_rowMap; // see discussion in documentation
8009  RCP<crs_matrix_type> C; // The result matrix.
8010 
8011  // If A and B's row Maps are the same, we can compute an upper
8012  // bound on the number of entries in each row of C, before
8013  // actually computing the sum. A reasonable upper bound is the
8014  // sum of the two entry counts in each row. If we choose this as
8015  // the actual per-row upper bound, we can use static profile.
8016  if (A_rowMap->isSameAs (*B_rowMap)) {
8017  const LO localNumRows = static_cast<LO> (A_rowMap->getNodeNumElements ());
8018  ArrayRCP<size_t> C_maxNumEntriesPerRow (localNumRows, 0);
8019 
8020  // Get the number of entries in each row of A.
8021  if (alpha != ZERO) {
8022  for (LO localRow = 0; localRow < localNumRows; ++localRow) {
8023  const size_t A_numEntries = A.getNumEntriesInLocalRow (localRow);
8024  C_maxNumEntriesPerRow[localRow] += A_numEntries;
8025  }
8026  }
8027  // Get the number of entries in each row of B.
8028  if (beta != ZERO) {
8029  for (LO localRow = 0; localRow < localNumRows; ++localRow) {
8030  const size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
8031  C_maxNumEntriesPerRow[localRow] += B_numEntries;
8032  }
8033  }
8034  // Construct the result matrix C.
8035  if (constructorSublist.is_null ()) {
8036  C = rcp (new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow,
8037  StaticProfile));
8038  } else {
8039  C = rcp (new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow,
8040  StaticProfile, constructorSublist));
8041  }
8042  // Since A and B have the same row Maps, we could add them
8043  // together all at once and merge values before we call
8044  // insertGlobalValues. However, we don't really need to, since
8045  // we've already allocated enough space in each row of C for C
8046  // to do the merge itself.
8047  }
8048  else { // the row Maps of A and B are not the same
8049  // Construct the result matrix C.
8050  if (constructorSublist.is_null ()) {
8051  C = rcp (new crs_matrix_type (C_rowMap, 0, DynamicProfile));
8052  } else {
8053  C = rcp (new crs_matrix_type (C_rowMap, 0, DynamicProfile,
8054  constructorSublist));
8055  }
8056  }
8057 
8058 #ifdef HAVE_TPETRA_DEBUG
8059  TEUCHOS_TEST_FOR_EXCEPTION(C.is_null (), std::logic_error,
8060  "Tpetra::RowMatrix::add: C should not be null at this point. "
8061  "Please report this bug to the Tpetra developers.");
8062 #endif // HAVE_TPETRA_DEBUG
8063  //
8064  // Compute C = alpha*A + beta*B.
8065  //
8066  Array<GO> ind;
8067  Array<Scalar> val;
8068 
8069  if (alpha != ZERO) {
8070  const LO A_localNumRows = static_cast<LO> (A_rowMap->getNodeNumElements ());
8071  for (LO localRow = 0; localRow < A_localNumRows; ++localRow) {
8072  size_t A_numEntries = A.getNumEntriesInLocalRow (localRow);
8073  const GO globalRow = A_rowMap->getGlobalElement (localRow);
8074  if (A_numEntries > static_cast<size_t> (ind.size ())) {
8075  ind.resize (A_numEntries);
8076  val.resize (A_numEntries);
8077  }
8078  ArrayView<GO> indView = ind (0, A_numEntries);
8079  ArrayView<Scalar> valView = val (0, A_numEntries);
8080  A.getGlobalRowCopy (globalRow, indView, valView, A_numEntries);
8081 
8082  if (alpha != ONE) {
8083  for (size_t k = 0; k < A_numEntries; ++k) {
8084  valView[k] *= alpha;
8085  }
8086  }
8087  C->insertGlobalValues (globalRow, indView, valView);
8088  }
8089  }
8090 
8091  if (beta != ZERO) {
8092  const LO B_localNumRows = static_cast<LO> (B_rowMap->getNodeNumElements ());
8093  for (LO localRow = 0; localRow < B_localNumRows; ++localRow) {
8094  size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
8095  const GO globalRow = B_rowMap->getGlobalElement (localRow);
8096  if (B_numEntries > static_cast<size_t> (ind.size ())) {
8097  ind.resize (B_numEntries);
8098  val.resize (B_numEntries);
8099  }
8100  ArrayView<GO> indView = ind (0, B_numEntries);
8101  ArrayView<Scalar> valView = val (0, B_numEntries);
8102  B.getGlobalRowCopy (globalRow, indView, valView, B_numEntries);
8103 
8104  if (beta != ONE) {
8105  for (size_t k = 0; k < B_numEntries; ++k) {
8106  valView[k] *= beta;
8107  }
8108  }
8109  C->insertGlobalValues (globalRow, indView, valView);
8110  }
8111  }
8112 
8113  if (callFillComplete) {
8114  if (fillCompleteSublist.is_null ()) {
8115  C->fillComplete (theDomainMap, theRangeMap);
8116  } else {
8117  C->fillComplete (theDomainMap, theRangeMap, fillCompleteSublist);
8118  }
8119  }
8120  return rcp_implicit_cast<row_matrix_type> (C);
8121  }
8122 
8123  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
8124  void
8127  const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
8128  const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node> > & domainTransfer,
8129  const Teuchos::RCP<const map_type>& domainMap,
8130  const Teuchos::RCP<const map_type>& rangeMap,
8131  const Teuchos::RCP<Teuchos::ParameterList>& params) const
8132  {
8137  using Teuchos::ArrayRCP;
8138  using Teuchos::ArrayView;
8139  using Teuchos::Comm;
8140  using Teuchos::ParameterList;
8141  using Teuchos::RCP;
8142  typedef LocalOrdinal LO;
8143  typedef GlobalOrdinal GO;
8144  typedef node_type NT;
8145  typedef CrsMatrix<Scalar, LO, GO, NT> this_type;
8146  typedef Vector<int, LO, GO, NT> IntVectorType;
8147 
8148 #ifdef HAVE_TPETRA_MMM_TIMINGS
8149  std::string label;
8150  if(!params.is_null())
8151  label = params->get("Timer Label",label);
8152  std::string prefix = std::string("Tpetra ")+ label + std::string(": ");
8153  using Teuchos::TimeMonitor;
8154  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC Pack-1"))));
8155 #endif
8156 
8157  // Make sure that the input argument rowTransfer is either an
8158  // Import or an Export. Import and Export are the only two
8159  // subclasses of Transfer that we defined, but users might
8160  // (unwisely, for now at least) decide to implement their own
8161  // subclasses. Exclude this possibility.
8162  const import_type* xferAsImport = dynamic_cast<const import_type*> (&rowTransfer);
8163  const export_type* xferAsExport = dynamic_cast<const export_type*> (&rowTransfer);
8164  TEUCHOS_TEST_FOR_EXCEPTION(
8165  xferAsImport == NULL && xferAsExport == NULL, std::invalid_argument,
8166  "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' input "
8167  "argument must be either an Import or an Export, and its template "
8168  "parameters must match the corresponding template parameters of the "
8169  "CrsMatrix.");
8170 
8171  // Make sure that the input argument domainTransfer is either an
8172  // Import or an Export. Import and Export are the only two
8173  // subclasses of Transfer that we defined, but users might
8174  // (unwisely, for now at least) decide to implement their own
8175  // subclasses. Exclude this possibility.
8176  Teuchos::RCP<const import_type> xferDomainAsImport = Teuchos::rcp_dynamic_cast<const import_type> (domainTransfer);
8177  Teuchos::RCP<const export_type> xferDomainAsExport = Teuchos::rcp_dynamic_cast<const export_type> (domainTransfer);
8178 
8179  if(! domainTransfer.is_null()) {
8180  TEUCHOS_TEST_FOR_EXCEPTION(
8181  (xferDomainAsImport.is_null() && xferDomainAsExport.is_null()), std::invalid_argument,
8182  "Tpetra::CrsMatrix::transferAndFillComplete: The 'domainTransfer' input "
8183  "argument must be either an Import or an Export, and its template "
8184  "parameters must match the corresponding template parameters of the "
8185  "CrsMatrix.");
8186 
8187  TEUCHOS_TEST_FOR_EXCEPTION(
8188  ( xferAsImport != NULL || ! xferDomainAsImport.is_null() ) &&
8189  (( xferAsImport != NULL && xferDomainAsImport.is_null() ) ||
8190  ( xferAsImport == NULL && ! xferDomainAsImport.is_null() )), std::invalid_argument,
8191  "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
8192  "arguments must be of the same type (either Import or Export).");
8193 
8194  TEUCHOS_TEST_FOR_EXCEPTION(
8195  ( xferAsExport != NULL || ! xferDomainAsExport.is_null() ) &&
8196  (( xferAsExport != NULL && xferDomainAsExport.is_null() ) ||
8197  ( xferAsExport == NULL && ! xferDomainAsExport.is_null() )), std::invalid_argument,
8198  "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
8199  "arguments must be of the same type (either Import or Export).");
8200  } // domainTransfer != null
8201 
8202 
8203  // FIXME (mfh 15 May 2014) Wouldn't communication still be needed,
8204  // if the source Map is not distributed but the target Map is?
8205  const bool communication_needed = rowTransfer.getSourceMap ()->isDistributed ();
8206 
8207  //
8208  // Get the caller's parameters
8209  //
8210 
8211  bool reverseMode = false; // Are we in reverse mode?
8212  bool restrictComm = false; // Do we need to restrict the communicator?
8213  RCP<ParameterList> matrixparams; // parameters for the destination matrix
8214  if (! params.is_null ()) {
8215  reverseMode = params->get ("Reverse Mode", reverseMode);
8216  restrictComm = params->get ("Restrict Communicator", restrictComm);
8217  matrixparams = sublist (params, "CrsMatrix");
8218  }
8219 
8220  // Get the new domain and range Maps. We need some of them for
8221  // error checking, now that we have the reverseMode parameter.
8222  RCP<const map_type> MyRowMap = reverseMode ?
8223  rowTransfer.getSourceMap () : rowTransfer.getTargetMap ();
8224  RCP<const map_type> MyColMap; // create this below
8225  RCP<const map_type> MyDomainMap = ! domainMap.is_null () ?
8226  domainMap : getDomainMap ();
8227  RCP<const map_type> MyRangeMap = ! rangeMap.is_null () ?
8228  rangeMap : getRangeMap ();
8229  RCP<const map_type> BaseRowMap = MyRowMap;
8230  RCP<const map_type> BaseDomainMap = MyDomainMap;
8231 
8232  // If the user gave us a nonnull destMat, then check whether it's
8233  // "pristine." That means that it has no entries.
8234  //
8235  // FIXME (mfh 15 May 2014) If this is not true on all processes,
8236  // then this exception test may hang. It would be better to
8237  // forward an error flag to the next communication phase.
8238  if (! destMat.is_null ()) {
8239  // FIXME (mfh 15 May 2014): The Epetra idiom for checking
8240  // whether a graph or matrix has no entries on the calling
8241  // process, is that it is neither locally nor globally indexed.
8242  // This may change eventually with the Kokkos refactor version
8243  // of Tpetra, so it would be better just to check the quantity
8244  // of interest directly. Note that with the Kokkos refactor
8245  // version of Tpetra, asking for the total number of entries in
8246  // a graph or matrix that is not fill complete might require
8247  // computation (kernel launch), since it is not thread scalable
8248  // to update a count every time an entry is inserted.
8249  const bool NewFlag = ! destMat->getGraph ()->isLocallyIndexed () &&
8250  ! destMat->getGraph ()->isGloballyIndexed ();
8251  TEUCHOS_TEST_FOR_EXCEPTION(
8252  ! NewFlag, std::invalid_argument, "Tpetra::CrsMatrix::"
8253  "transferAndFillComplete: The input argument 'destMat' is only allowed "
8254  "to be nonnull, if its graph is empty (neither locally nor globally "
8255  "indexed).");
8256  // FIXME (mfh 15 May 2014) At some point, we want to change
8257  // graphs and matrices so that their DistObject Map
8258  // (this->getMap()) may differ from their row Map. This will
8259  // make redistribution for 2-D distributions more efficient. I
8260  // hesitate to change this check, because I'm not sure how much
8261  // the code here depends on getMap() and getRowMap() being the
8262  // same.
8263  TEUCHOS_TEST_FOR_EXCEPTION(
8264  ! destMat->getRowMap ()->isSameAs (*MyRowMap), std::invalid_argument,
8265  "Tpetra::CrsMatrix::transferAndFillComplete: The (row) Map of the "
8266  "input argument 'destMat' is not the same as the (row) Map specified "
8267  "by the input argument 'rowTransfer'.");
8268  TEUCHOS_TEST_FOR_EXCEPTION(
8269  ! destMat->checkSizes (*this), std::invalid_argument,
8270  "Tpetra::CrsMatrix::transferAndFillComplete: You provided a nonnull "
8271  "destination matrix, but checkSizes() indicates that it is not a legal "
8272  "legal target for redistribution from the source matrix (*this). This "
8273  "may mean that they do not have the same dimensions.");
8274  }
8275 
8276  // If forward mode (the default), then *this's (row) Map must be
8277  // the same as the source Map of the Transfer. If reverse mode,
8278  // then *this's (row) Map must be the same as the target Map of
8279  // the Transfer.
8280  //
8281  // FIXME (mfh 15 May 2014) At some point, we want to change graphs
8282  // and matrices so that their DistObject Map (this->getMap()) may
8283  // differ from their row Map. This will make redistribution for
8284  // 2-D distributions more efficient. I hesitate to change this
8285  // check, because I'm not sure how much the code here depends on
8286  // getMap() and getRowMap() being the same.
8287  TEUCHOS_TEST_FOR_EXCEPTION(
8288  ! (reverseMode || getRowMap ()->isSameAs (*rowTransfer.getSourceMap ())),
8289  std::invalid_argument, "Tpetra::CrsMatrix::transferAndFillComplete: "
8290  "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
8291  TEUCHOS_TEST_FOR_EXCEPTION(
8292  ! (! reverseMode || getRowMap ()->isSameAs (*rowTransfer.getTargetMap ())),
8293  std::invalid_argument, "Tpetra::CrsMatrix::transferAndFillComplete: "
8294  "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
8295 
8296  // checks for domainTransfer
8297  TEUCHOS_TEST_FOR_EXCEPTION(
8298  ! xferDomainAsImport.is_null() && ! xferDomainAsImport->getTargetMap()->isSameAs(*domainMap),
8299  std::invalid_argument,
8300  "Tpetra::CrsMatrix::transferAndFillComplete: The target map of the 'domainTransfer' input "
8301  "argument must be the same as the rebalanced domain map 'domainMap'");
8302 
8303  TEUCHOS_TEST_FOR_EXCEPTION(
8304  ! xferDomainAsExport.is_null() && ! xferDomainAsExport->getSourceMap()->isSameAs(*domainMap),
8305  std::invalid_argument,
8306  "Tpetra::CrsMatrix::transferAndFillComplete: The source map of the 'domainTransfer' input "
8307  "argument must be the same as the rebalanced domain map 'domainMap'");
8308 
8309  // The basic algorithm here is:
8310  //
8311  // 1. Call the moral equivalent of "distor.do" to handle the import.
8312  // 2. Copy all the Imported and Copy/Permuted data into the raw
8313  // CrsMatrix / CrsGraphData pointers, still using GIDs.
8314  // 3. Call an optimized version of MakeColMap that avoids the
8315  // Directory lookups (since the importer knows who owns all the
8316  // GIDs) AND reindexes to LIDs.
8317  // 4. Call expertStaticFillComplete()
8318 
8319  // Get information from the Importer
8320  const size_t NumSameIDs = rowTransfer.getNumSameIDs();
8321  ArrayView<const LO> ExportLIDs = reverseMode ?
8322  rowTransfer.getRemoteLIDs () : rowTransfer.getExportLIDs ();
8323  ArrayView<const LO> RemoteLIDs = reverseMode ?
8324  rowTransfer.getExportLIDs () : rowTransfer.getRemoteLIDs ();
8325  ArrayView<const LO> PermuteToLIDs = reverseMode ?
8326  rowTransfer.getPermuteFromLIDs () : rowTransfer.getPermuteToLIDs ();
8327  ArrayView<const LO> PermuteFromLIDs = reverseMode ?
8328  rowTransfer.getPermuteToLIDs () : rowTransfer.getPermuteFromLIDs ();
8329  Distributor& Distor = rowTransfer.getDistributor ();
8330 
8331  // Owning PIDs
8332  Teuchos::Array<int> SourcePids;
8333  Teuchos::Array<int> TargetPids;
8334  int MyPID = getComm ()->getRank ();
8335 
8336  // Temp variables for sub-communicators
8337  RCP<const map_type> ReducedRowMap, ReducedColMap,
8338  ReducedDomainMap, ReducedRangeMap;
8339  RCP<const Comm<int> > ReducedComm;
8340 
8341  // If the user gave us a null destMat, then construct the new
8342  // destination matrix. We will replace its column Map later.
8343  if (destMat.is_null ()) {
8344  destMat = rcp (new this_type (MyRowMap, 0, StaticProfile, matrixparams));
8345  }
8346 
8347  /***************************************************/
8348  /***** 1) First communicator restriction phase ****/
8349  /***************************************************/
8350  if (restrictComm) {
8351  ReducedRowMap = MyRowMap->removeEmptyProcesses ();
8352  ReducedComm = ReducedRowMap.is_null () ?
8353  Teuchos::null :
8354  ReducedRowMap->getComm ();
8355  destMat->removeEmptyProcessesInPlace (ReducedRowMap);
8356 
8357  ReducedDomainMap = MyRowMap.getRawPtr () == MyDomainMap.getRawPtr () ?
8358  ReducedRowMap :
8359  MyDomainMap->replaceCommWithSubset (ReducedComm);
8360  ReducedRangeMap = MyRowMap.getRawPtr () == MyRangeMap.getRawPtr () ?
8361  ReducedRowMap :
8362  MyRangeMap->replaceCommWithSubset (ReducedComm);
8363 
8364  // Reset the "my" maps
8365  MyRowMap = ReducedRowMap;
8366  MyDomainMap = ReducedDomainMap;
8367  MyRangeMap = ReducedRangeMap;
8368 
8369  // Update my PID, if we've restricted the communicator
8370  if (! ReducedComm.is_null ()) {
8371  MyPID = ReducedComm->getRank ();
8372  }
8373  else {
8374  MyPID = -2; // For debugging
8375  }
8376  }
8377  else {
8378  ReducedComm = MyRowMap->getComm ();
8379  }
8380 
8381  /***************************************************/
8382  /***** 2) From Tpera::DistObject::doTransfer() ****/
8383  /***************************************************/
8384 #ifdef HAVE_TPETRA_MMM_TIMINGS
8385  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC ImportSetup"))));
8386 #endif
8387  // Get the owning PIDs
8388  RCP<const import_type> MyImporter = getGraph ()->getImporter ();
8389 
8390  // check whether domain maps of source matrix and base domain map is the same
8391  bool bSameDomainMap = BaseDomainMap->isSameAs (*getDomainMap ());
8392 
8393  if (! restrictComm && ! MyImporter.is_null () && bSameDomainMap ) {
8394  // Same domain map as source matrix
8395  //
8396  // NOTE: This won't work for restrictComm (because the Import
8397  // doesn't know the restricted PIDs), though writing an
8398  // optimized version for that case would be easy (Import an
8399  // IntVector of the new PIDs). Might want to add this later.
8400  Import_Util::getPids (*MyImporter, SourcePids, false);
8401  }
8402  else if (restrictComm && ! MyImporter.is_null () && bSameDomainMap) {
8403  // Same domain map as source matrix (restricted communicator)
8404  // We need one import from the domain to the column map
8405  IntVectorType SourceDomain_pids(getDomainMap (),true);
8406  IntVectorType SourceCol_pids(getColMap());
8407  // SourceDomain_pids contains the restricted pids
8408  SourceDomain_pids.putScalar(MyPID);
8409 
8410  SourceCol_pids.doImport (SourceDomain_pids, *MyImporter, INSERT);
8411  SourcePids.resize (getColMap ()->getNodeNumElements ());
8412  SourceCol_pids.get1dCopy (SourcePids ());
8413  }
8414  else if (MyImporter.is_null () && bSameDomainMap) {
8415  // Matrix has no off-process entries
8416  SourcePids.resize (getColMap ()->getNodeNumElements ());
8417  SourcePids.assign (getColMap ()->getNodeNumElements (), MyPID);
8418  }
8419  else if ( ! MyImporter.is_null () &&
8420  ! domainTransfer.is_null () ) {
8421  // general implementation for rectangular matrices with
8422  // domain map different than SourceMatrix domain map.
8423  // User has to provide a DomainTransfer object. We need
8424  // to communications (import/export)
8425 
8426  // TargetDomain_pids lives on the rebalanced new domain map
8427  IntVectorType TargetDomain_pids (domainMap);
8428  TargetDomain_pids.putScalar (MyPID);
8429 
8430  // SourceDomain_pids lives on the non-rebalanced old domain map
8431  IntVectorType SourceDomain_pids (getDomainMap ());
8432 
8433  // SourceCol_pids lives on the non-rebalanced old column map
8434  IntVectorType SourceCol_pids (getColMap ());
8435 
8436  if (! reverseMode && ! xferDomainAsImport.is_null() ) {
8437  SourceDomain_pids.doExport (TargetDomain_pids, *xferDomainAsImport, INSERT);
8438  }
8439  else if (reverseMode && ! xferDomainAsExport.is_null() ) {
8440  SourceDomain_pids.doExport (TargetDomain_pids, *xferDomainAsExport, INSERT);
8441  }
8442  else if (! reverseMode && ! xferDomainAsExport.is_null() ) {
8443  SourceDomain_pids.doImport (TargetDomain_pids, *xferDomainAsExport, INSERT);
8444  }
8445  else if (reverseMode && ! xferDomainAsImport.is_null() ) {
8446  SourceDomain_pids.doImport (TargetDomain_pids, *xferDomainAsImport, INSERT);
8447  }
8448  else {
8449  TEUCHOS_TEST_FOR_EXCEPTION(
8450  true, std::logic_error, "Tpetra::CrsMatrix::"
8451  "transferAndFillComplete: Should never get here! "
8452  "Please report this bug to a Tpetra developer.");
8453  }
8454  SourceCol_pids.doImport (SourceDomain_pids, *MyImporter, INSERT);
8455  SourcePids.resize (getColMap ()->getNodeNumElements ());
8456  SourceCol_pids.get1dCopy (SourcePids ());
8457  }
8458  else if (BaseDomainMap->isSameAs (*BaseRowMap) &&
8459  getDomainMap ()->isSameAs (*getRowMap ())) {
8460  // We can use the rowTransfer + SourceMatrix's Import to find out who owns what.
8461  IntVectorType TargetRow_pids (domainMap);
8462  IntVectorType SourceRow_pids (getRowMap ());
8463  IntVectorType SourceCol_pids (getColMap ());
8464 
8465  TargetRow_pids.putScalar (MyPID);
8466  if (! reverseMode && xferAsImport != NULL) {
8467  SourceRow_pids.doExport (TargetRow_pids, *xferAsImport, INSERT);
8468  }
8469  else if (reverseMode && xferAsExport != NULL) {
8470  SourceRow_pids.doExport (TargetRow_pids, *xferAsExport, INSERT);
8471  }
8472  else if (! reverseMode && xferAsExport != NULL) {
8473  SourceRow_pids.doImport (TargetRow_pids, *xferAsExport, INSERT);
8474  }
8475  else if (reverseMode && xferAsImport != NULL) {
8476  SourceRow_pids.doImport (TargetRow_pids, *xferAsImport, INSERT);
8477  }
8478  else {
8479  TEUCHOS_TEST_FOR_EXCEPTION(
8480  true, std::logic_error, "Tpetra::CrsMatrix::"
8481  "transferAndFillComplete: Should never get here! "
8482  "Please report this bug to a Tpetra developer.");
8483  }
8484  SourceCol_pids.doImport (SourceRow_pids, *MyImporter, INSERT);
8485  SourcePids.resize (getColMap ()->getNodeNumElements ());
8486  SourceCol_pids.get1dCopy (SourcePids ());
8487  }
8488  else {
8489  TEUCHOS_TEST_FOR_EXCEPTION(
8490  true, std::invalid_argument, "Tpetra::CrsMatrix::"
8491  "transferAndFillComplete: This method only allows either domainMap == "
8492  "getDomainMap (), or (domainMap == rowTransfer.getTargetMap () and "
8493  "getDomainMap () == getRowMap ()).");
8494  }
8495 #ifdef HAVE_TPETRA_MMM_TIMINGS
8496  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC Pack-2"))));
8497 #endif
8498 
8499  // Tpetra-specific stuff
8500  size_t constantNumPackets = destMat->constantNumberOfPackets ();
8501  if (constantNumPackets == 0) {
8502  destMat->reallocArraysForNumPacketsPerLid (ExportLIDs.size (),
8503  RemoteLIDs.size ());
8504  }
8505  else {
8506  // There are a constant number of packets per element. We
8507  // already know (from the number of "remote" (incoming)
8508  // elements) how many incoming elements we expect, so we can
8509  // resize the buffer accordingly.
8510  const size_t rbufLen = RemoteLIDs.size() * constantNumPackets;
8511  destMat->reallocImportsIfNeeded (rbufLen);
8512  }
8513 
8514  // Pack & Prepare w/ owning PIDs
8515 #ifdef HAVE_TPETRA_DEBUG
8516  {
8517  using Teuchos::outArg;
8518  using Teuchos::REDUCE_MAX;
8519  using Teuchos::reduceAll;
8520  using std::cerr;
8521  using std::endl;
8522  RCP<const Teuchos::Comm<int> > comm = this->getComm ();
8523  const int myRank = comm->getRank ();
8524  const int numProcs = comm->getSize ();
8525 
8526  std::ostringstream os;
8527  int lclErr = 0;
8528  try {
8529  // packAndPrepare* methods modify numExportPacketsPerLID_.
8530  destMat->numExportPacketsPerLID_.template modify<Kokkos::HostSpace> ();
8531  Teuchos::ArrayView<size_t> numExportPacketsPerLID =
8532  getArrayViewFromDualView (destMat->numExportPacketsPerLID_);
8533  packCrsMatrixWithOwningPIDs (*this, destMat->exports_,
8534  numExportPacketsPerLID, ExportLIDs,
8535  SourcePids, constantNumPackets, Distor);
8536  }
8537  catch (std::exception& e) {
8538  os << "Proc " << myRank << ": " << e.what ();
8539  lclErr = 1;
8540  }
8541  int gblErr = 0;
8542  if (! comm.is_null ()) {
8543  reduceAll<int, int> (*comm, REDUCE_MAX, lclErr, outArg (gblErr));
8544  }
8545  if (gblErr != 0) {
8546  if (myRank == 0) {
8547  cerr << "packCrsMatrixWithOwningPIDs threw an exception: " << endl;
8548  }
8549  std::ostringstream err;
8550  for (int r = 0; r < numProcs; ++r) {
8551  if (r == myRank && lclErr != 0) {
8552  cerr << os.str () << endl;
8553  }
8554  comm->barrier ();
8555  comm->barrier ();
8556  comm->barrier ();
8557  }
8558 
8559  TEUCHOS_TEST_FOR_EXCEPTION(
8560  true, std::logic_error, "packCrsMatrixWithOwningPIDs threw an "
8561  "exception.");
8562  }
8563  }
8564 
8565 #else
8566  {
8567  // packAndPrepare* methods modify numExportPacketsPerLID_.
8568  destMat->numExportPacketsPerLID_.template modify<Kokkos::HostSpace> ();
8569  Teuchos::ArrayView<size_t> numExportPacketsPerLID =
8570  getArrayViewFromDualView (destMat->numExportPacketsPerLID_);
8571  packCrsMatrixWithOwningPIDs (*this, destMat->exports_,
8572  numExportPacketsPerLID, ExportLIDs,
8573  SourcePids, constantNumPackets, Distor);
8574  }
8575 #endif // HAVE_TPETRA_DEBUG
8576 
8577  // Do the exchange of remote data.
8578 #ifdef HAVE_TPETRA_MMM_TIMINGS
8579  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC Transfer"))));
8580 #endif
8581 
8582  if (communication_needed) {
8583  if (reverseMode) {
8584  if (constantNumPackets == 0) { // variable number of packets per LID
8585  // Make sure that host has the latest version, since we're
8586  // using the version on host. If host has the latest
8587  // version, syncing to host does nothing.
8588  destMat->numExportPacketsPerLID_.template sync<Kokkos::HostSpace> ();
8589  Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
8590  getArrayViewFromDualView (destMat->numExportPacketsPerLID_);
8591  destMat->numImportPacketsPerLID_.template sync<Kokkos::HostSpace> ();
8592  Teuchos::ArrayView<size_t> numImportPacketsPerLID =
8593  getArrayViewFromDualView (destMat->numImportPacketsPerLID_);
8594  Distor.doReversePostsAndWaits (numExportPacketsPerLID, 1,
8595  numImportPacketsPerLID);
8596  size_t totalImportPackets = 0;
8597  for (Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) {
8598  totalImportPackets += numImportPacketsPerLID[i];
8599  }
8600 
8601  // Reallocation MUST go before setting the modified flag,
8602  // because it may clear out the flags.
8603  destMat->reallocImportsIfNeeded (totalImportPackets);
8604  destMat->imports_.template modify<Kokkos::HostSpace> ();
8605  Teuchos::ArrayView<char> hostImports =
8606  getArrayViewFromDualView (destMat->imports_);
8607  // This is a legacy host pack/unpack path, so use the host
8608  // version of exports_.
8609  destMat->exports_.template sync<Kokkos::HostSpace> ();
8610  Teuchos::ArrayView<const char> hostExports =
8611  getArrayViewFromDualView (destMat->exports_);
8612  Distor.doReversePostsAndWaits (hostExports,
8613  numExportPacketsPerLID,
8614  hostImports,
8615  numImportPacketsPerLID);
8616  }
8617  else { // constant number of packets per LI
8618  destMat->imports_.template modify<Kokkos::HostSpace> ();
8619  Teuchos::ArrayView<char> hostImports =
8620  getArrayViewFromDualView (destMat->imports_);
8621  // This is a legacy host pack/unpack path, so use the host
8622  // version of exports_.
8623  destMat->exports_.template sync<Kokkos::HostSpace> ();
8624  Teuchos::ArrayView<const char> hostExports =
8625  getArrayViewFromDualView (destMat->exports_);
8626  Distor.doReversePostsAndWaits (hostExports,
8627  constantNumPackets,
8628  hostImports);
8629  }
8630  }
8631  else { // forward mode (the default)
8632  if (constantNumPackets == 0) { // variable number of packets per LID
8633  // Make sure that host has the latest version, since we're
8634  // using the version on host. If host has the latest
8635  // version, syncing to host does nothing.
8636  destMat->numExportPacketsPerLID_.template sync<Kokkos::HostSpace> ();
8637  Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
8638  getArrayViewFromDualView (destMat->numExportPacketsPerLID_);
8639  destMat->numImportPacketsPerLID_.template sync<Kokkos::HostSpace> ();
8640  Teuchos::ArrayView<size_t> numImportPacketsPerLID =
8641  getArrayViewFromDualView (destMat->numImportPacketsPerLID_);
8642  Distor.doPostsAndWaits (numExportPacketsPerLID, 1,
8643  numImportPacketsPerLID);
8644  size_t totalImportPackets = 0;
8645  for (Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) {
8646  totalImportPackets += numImportPacketsPerLID[i];
8647  }
8648 
8649  // Reallocation MUST go before setting the modified flag,
8650  // because it may clear out the flags.
8651  destMat->reallocImportsIfNeeded (totalImportPackets);
8652  destMat->imports_.template modify<Kokkos::HostSpace> ();
8653  Teuchos::ArrayView<char> hostImports =
8654  getArrayViewFromDualView (destMat->imports_);
8655  // This is a legacy host pack/unpack path, so use the host
8656  // version of exports_.
8657  destMat->exports_.template sync<Kokkos::HostSpace> ();
8658  Teuchos::ArrayView<const char> hostExports =
8659  getArrayViewFromDualView (destMat->exports_);
8660  Distor.doPostsAndWaits (hostExports,
8661  numExportPacketsPerLID,
8662  hostImports,
8663  numImportPacketsPerLID);
8664  }
8665  else { // constant number of packets per LID
8666  destMat->imports_.template modify<Kokkos::HostSpace> ();
8667  Teuchos::ArrayView<char> hostImports =
8668  getArrayViewFromDualView (destMat->imports_);
8669  // This is a legacy host pack/unpack path, so use the host
8670  // version of exports_.
8671  destMat->exports_.template sync<Kokkos::HostSpace> ();
8672  Teuchos::ArrayView<const char> hostExports =
8673  getArrayViewFromDualView (destMat->exports_);
8674  Distor.doPostsAndWaits (hostExports,
8675  constantNumPackets,
8676  hostImports);
8677  }
8678  }
8679  }
8680 
8681  /*********************************************************************/
8682  /**** 3) Copy all of the Same/Permute/Remote data into CSR_arrays ****/
8683  /*********************************************************************/
8684 
8685 #ifdef HAVE_TPETRA_MMM_TIMINGS
8686  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC Unpack-1"))));
8687 #endif
8688 
8689  // Backwards compatibility measure. We'll use this again below.
8690  destMat->numImportPacketsPerLID_.template sync<Kokkos::HostSpace> ();
8691  Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
8692  getArrayViewFromDualView (destMat->numImportPacketsPerLID_);
8693  destMat->imports_.template sync<Kokkos::HostSpace> ();
8694  Teuchos::ArrayView<const char> hostImports =
8695  getArrayViewFromDualView (destMat->imports_);
8696  size_t mynnz =
8697  unpackAndCombineWithOwningPIDsCount (*this, RemoteLIDs, hostImports,
8698  numImportPacketsPerLID,
8699  constantNumPackets, Distor, INSERT,
8700  NumSameIDs, PermuteToLIDs, PermuteFromLIDs);
8701  size_t N = BaseRowMap->getNodeNumElements ();
8702 
8703  // Allocations
8704  ArrayRCP<size_t> CSR_rowptr(N+1);
8705  ArrayRCP<GO> CSR_colind_GID;
8706  ArrayRCP<LO> CSR_colind_LID;
8707  ArrayRCP<Scalar> CSR_vals;
8708  CSR_colind_GID.resize (mynnz);
8709  CSR_vals.resize (mynnz);
8710 
8711  // If LO and GO are the same, we can reuse memory when
8712  // converting the column indices from global to local indices.
8713  if (typeid (LO) == typeid (GO)) {
8714  CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO> (CSR_colind_GID);
8715  }
8716  else {
8717  CSR_colind_LID.resize (mynnz);
8718  }
8719 
8720  // FIXME (mfh 15 May 2014) Why can't we abstract this out as an
8721  // unpackAndCombine method on a "CrsArrays" object? This passing
8722  // in a huge list of arrays is icky. Can't we have a bit of an
8723  // abstraction? Implementing a concrete DistObject subclass only
8724  // takes five methods.
8725  unpackAndCombineIntoCrsArrays (*this, RemoteLIDs, hostImports,
8726  numImportPacketsPerLID, constantNumPackets,
8727  Distor, INSERT, NumSameIDs, PermuteToLIDs,
8728  PermuteFromLIDs, N, mynnz, MyPID,
8729  CSR_rowptr (), CSR_colind_GID (),
8730  Teuchos::av_reinterpret_cast<impl_scalar_type> (CSR_vals ()),
8731  SourcePids (), TargetPids);
8732 
8733  /**************************************************************/
8734  /**** 4) Call Optimized MakeColMap w/ no Directory Lookups ****/
8735  /**************************************************************/
8736 #ifdef HAVE_TPETRA_MMM_TIMINGS
8737  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC Unpack-2"))));
8738 #endif
8739  // Call an optimized version of makeColMap that avoids the
8740  // Directory lookups (since the Import object knows who owns all
8741  // the GIDs).
8742  Teuchos::Array<int> RemotePids;
8743  Import_Util::lowCommunicationMakeColMapAndReindex (CSR_rowptr (),
8744  CSR_colind_LID (),
8745  CSR_colind_GID (),
8746  BaseDomainMap,
8747  TargetPids, RemotePids,
8748  MyColMap);
8749 
8750  /*******************************************************/
8751  /**** 4) Second communicator restriction phase ****/
8752  /*******************************************************/
8753  if (restrictComm) {
8754  ReducedColMap = (MyRowMap.getRawPtr () == MyColMap.getRawPtr ()) ?
8755  ReducedRowMap :
8756  MyColMap->replaceCommWithSubset (ReducedComm);
8757  MyColMap = ReducedColMap; // Reset the "my" maps
8758  }
8759 
8760  // Replace the col map
8761  destMat->replaceColMap (MyColMap);
8762 
8763  // Short circuit if the processor is no longer in the communicator
8764  //
8765  // NOTE: Epetra replaces modifies all "removed" processes so they
8766  // have a dummy (serial) Map that doesn't touch the original
8767  // communicator. Duplicating that here might be a good idea.
8768  if (ReducedComm.is_null ()) {
8769  return;
8770  }
8771 
8772  /***************************************************/
8773  /**** 5) Sort ****/
8774  /***************************************************/
8775 #ifdef HAVE_TPETRA_MMM_TIMINGS
8776  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC Unpack-3"))));
8777 #endif
8778  if ((! reverseMode && xferAsImport != NULL) ||
8779  (reverseMode && xferAsExport != NULL)) {
8780  Import_Util::sortCrsEntries (CSR_rowptr (),
8781  CSR_colind_LID (),
8782  CSR_vals ());
8783  }
8784  else if ((! reverseMode && xferAsExport != NULL) ||
8785  (reverseMode && xferAsImport != NULL)) {
8786  Import_Util::sortAndMergeCrsEntries (CSR_rowptr (),
8787  CSR_colind_LID (),
8788  CSR_vals ());
8789  if (CSR_rowptr[N] != mynnz) {
8790  CSR_colind_LID.resize (CSR_rowptr[N]);
8791  CSR_vals.resize (CSR_rowptr[N]);
8792  }
8793  }
8794  else {
8795  TEUCHOS_TEST_FOR_EXCEPTION(
8796  true, std::logic_error, "Tpetra::CrsMatrix::"
8797  "transferAndFillComplete: Should never get here! "
8798  "Please report this bug to a Tpetra developer.");
8799  }
8800  /***************************************************/
8801  /**** 6) Reset the colmap and the arrays ****/
8802  /***************************************************/
8803 
8804  // Call constructor for the new matrix (restricted as needed)
8805  //
8806  // NOTE (mfh 15 May 2014) This should work fine for the Kokkos
8807  // refactor version of CrsMatrix, though it reserves the right to
8808  // make a deep copy of the arrays.
8809  destMat->setAllValues (CSR_rowptr, CSR_colind_LID, CSR_vals);
8810 
8811  /***************************************************/
8812  /**** 7) Build Importer & Call ESFC ****/
8813  /***************************************************/
8814  // Pre-build the importer using the existing PIDs
8815  Teuchos::ParameterList esfc_params;
8816 #ifdef HAVE_TPETRA_MMM_TIMINGS
8817  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC CreateImporter"))));
8818 #endif
8819  RCP<import_type> MyImport = rcp (new import_type (MyDomainMap, MyColMap, RemotePids));
8820 #ifdef HAVE_TPETRA_MMM_TIMINGS
8821  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC ESFC"))));
8822 
8823  esfc_params.set("Timer Label",prefix + std::string("TAFC"));
8824 #endif
8825  if(!params.is_null())
8826  esfc_params.set("compute global constants",params->get("compute global constants",true));
8827 
8828  destMat->expertStaticFillComplete (MyDomainMap, MyRangeMap, MyImport,Teuchos::null,rcp(&esfc_params,false));
8829  }
8830 
8831  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
8832  void
8833  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
8834  importAndFillComplete (Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >& destMatrix,
8835  const import_type& importer,
8836  const Teuchos::RCP<const map_type>& domainMap,
8837  const Teuchos::RCP<const map_type>& rangeMap,
8838  const Teuchos::RCP<Teuchos::ParameterList>& params) const
8839  {
8840  transferAndFillComplete (destMatrix, importer, Teuchos::null, domainMap, rangeMap, params);
8841  }
8842 
8843  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
8844  void
8847  const import_type& rowImporter,
8848  const import_type& domainImporter,
8849  const Teuchos::RCP<const map_type>& domainMap,
8850  const Teuchos::RCP<const map_type>& rangeMap,
8851  const Teuchos::RCP<Teuchos::ParameterList>& params) const
8852  {
8853  transferAndFillComplete (destMatrix, rowImporter, Teuchos::rcpFromRef(domainImporter), domainMap, rangeMap, params);
8854  }
8855 
8856  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
8857  void
8860  const export_type& exporter,
8861  const Teuchos::RCP<const map_type>& domainMap,
8862  const Teuchos::RCP<const map_type>& rangeMap,
8863  const Teuchos::RCP<Teuchos::ParameterList>& params) const
8864  {
8865  transferAndFillComplete (destMatrix, exporter, Teuchos::null, domainMap, rangeMap, params);
8866  }
8867 
8868  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
8869  void
8872  const export_type& rowExporter,
8873  const export_type& domainExporter,
8874  const Teuchos::RCP<const map_type>& domainMap,
8875  const Teuchos::RCP<const map_type>& rangeMap,
8876  const Teuchos::RCP<Teuchos::ParameterList>& params) const
8877  {
8878  transferAndFillComplete (destMatrix, rowExporter, Teuchos::rcpFromRef(domainExporter), domainMap, rangeMap, params);
8879  }
8880 
8881 } // namespace Classes
8882 } // namespace Tpetra
8883 
8884 //
8885 // Explicit instantiation macro
8886 //
8887 // Must be expanded from within the Tpetra namespace!
8888 //
8889 
8890 #define TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR,LO,GO,NODE) \
8891  \
8892  namespace Classes { \
8893  template class CrsMatrix< SCALAR , LO , GO , NODE >; \
8894  } \
8895  template Teuchos::RCP< CrsMatrix< SCALAR , LO , GO , NODE > > \
8896  CrsMatrix< SCALAR , LO , GO , NODE >::convert< SCALAR > () const;
8897 
8898 #define TPETRA_CRSMATRIX_CONVERT_INSTANT(SO,SI,LO,GO,NODE) \
8899  \
8900  template Teuchos::RCP< CrsMatrix< SO , LO , GO , NODE > > \
8901  CrsMatrix< SI , LO , GO , NODE >::convert< SO > () const;
8902 
8903 #define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
8904  template<> \
8905  Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
8906  importAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
8907  const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8908  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8909  CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& importer, \
8910  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8911  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8912  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
8913  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8914  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8915  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
8916  const Teuchos::RCP<Teuchos::ParameterList>& params);
8917 
8918 #define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
8919  template<> \
8920  Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
8921  importAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
8922  const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8923  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8924  CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowImporter, \
8925  const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8926  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8927  CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainImporter, \
8928  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8929  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8930  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
8931  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8932  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8933  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
8934  const Teuchos::RCP<Teuchos::ParameterList>& params);
8935 
8936 
8937 #define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
8938  template<> \
8939  Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
8940  exportAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
8941  const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8942  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8943  CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& exporter, \
8944  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8945  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8946  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
8947  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8948  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8949  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
8950  const Teuchos::RCP<Teuchos::ParameterList>& params);
8951 
8952 #define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
8953  template<> \
8954  Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
8955  exportAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
8956  const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8957  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8958  CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowExporter, \
8959  const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8960  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8961  CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainExporter, \
8962  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8963  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8964  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
8965  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8966  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8967  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
8968  const Teuchos::RCP<Teuchos::ParameterList>& params);
8969 
8970 
8971 #define TPETRA_CRSMATRIX_INSTANT(SCALAR, LO, GO ,NODE) \
8972  TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR, LO, GO, NODE) \
8973  TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
8974  TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
8975  TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
8976  TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE)
8977 
8978 #endif // TPETRA_CRSMATRIX_DEF_HPP
Tpetra::Classes::MultiVector::isConstantStride
bool isConstantStride() const
Whether this multivector has constant stride between columns.
Definition: Tpetra_MultiVector_def.hpp:741
Tpetra::Classes::CrsGraph::getRangeMap
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
Definition: Tpetra_CrsGraph_def.hpp:931
Tpetra::Classes::CrsMatrix::getNodeNumCols
size_t getNodeNumCols() const override
The number of columns connected to the locally owned rows of this matrix.
Definition: Tpetra_CrsMatrix_def.hpp:725
Tpetra_Import_Util.hpp
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects.
TPETRA_ABUSE_WARNING
#define TPETRA_ABUSE_WARNING(throw_exception_test, Exception, msg)
Handle an abuse warning, according to HAVE_TPETRA_THROW_ABUSE_WARNINGS and HAVE_TPETRA_PRINT_ABUSE_WA...
Definition: Tpetra_Util.hpp:166
Tpetra::Classes::CrsMatrix::CrsMatrix
friend class CrsMatrix
Alias for Tpetra::Classes::CrsMatrix.
Definition: Tpetra_CrsMatrix_decl.hpp:748
Tpetra::ProfileType
ProfileType
Definition: Tpetra_ConfigDefs.hpp:130
Tpetra::Details::AbsMax
Functor for the the ABSMAX CombineMode of Import and Export operations.
Definition: Tpetra_CrsMatrix_def.hpp:121
Tpetra::Classes::RowMatrix::isLocallyIndexed
virtual bool isLocallyIndexed() const =0
Whether matrix indices are locally indexed.
Tpetra::MatrixMatrix::add
Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > add(const Scalar &alpha, const bool transposeA, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta, const bool transposeB, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMap=Teuchos::null, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Compute the sparse matrix sum C = scalarA * Op(A) + scalarB * Op(B), where Op(X) is either X or its t...
Definition: TpetraExt_MatrixMatrix_def.hpp:572
Tpetra::ESweepDirection
ESweepDirection
Sweep direction for Gauss-Seidel or Successive Over-Relaxation (SOR).
Definition: Tpetra_ConfigDefs.hpp:245
Tpetra::Details::computeOffsetsFromCounts
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
Definition: Tpetra_Details_computeOffsets.hpp:284
Tpetra::StaticProfile
Definition: Tpetra_ConfigDefs.hpp:131
Tpetra::Details::unpackAndCombineIntoCrsArrays
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, Distributor &distor, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
Tpetra::Classes::CrsMatrix::getNodeMaxNumRowEntries
size_t getNodeMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, on this process.
Definition: Tpetra_CrsMatrix_def.hpp:785
Tpetra::Details::HasDeprecatedMethods2630_WarningThisClassIsNotForUsers
Mix-in to avoid spurious deprecation warnings due to #2630.
Definition: Tpetra_CrsGraph_decl.hpp:180
Tpetra::combineModeToString
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
Definition: Tpetra_CombineMode.cpp:89
Tpetra::Classes::Map::getLocalMap
local_map_type getLocalMap() const
Get the local Map for Kokkos kernels.
Definition: Tpetra_Map_def.hpp:1165
Tpetra::Classes::CrsMatrix::getRowMap
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.
Definition: Tpetra_CrsMatrix_def.hpp:799
Tpetra::Details::getLocalDiagCopyWithoutOffsetsNotFillComplete
LO getLocalDiagCopyWithoutOffsetsNotFillComplete(::Tpetra::Vector< SC, LO, GO, NT > &diag, const ::Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool debug=false)
Given a locally indexed, global sparse matrix, extract the matrix's diagonal entries into a Tpetra::V...
Definition: Tpetra_Details_getDiagCopyWithoutOffsets_def.hpp:192
Tpetra::Classes::Map::getGlobalElement
GlobalOrdinal getGlobalElement(LocalOrdinal localIndex) const
The global index corresponding to the given local index.
Definition: Tpetra_Map_def.hpp:1114
Tpetra::Classes::CrsGraph::getDomainMap
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
Definition: Tpetra_CrsGraph_def.hpp:922
Tpetra::Details::packCrsMatrixWithOwningPIDs
void packCrsMatrixWithOwningPIDs(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse matrix for communication.
Definition: Tpetra_Details_packCrsMatrix_def.hpp:1031
Tpetra::REPLACE
Replace existing values with new values.
Definition: Tpetra_CombineMode.hpp:97
Tpetra::Details::getDiagCopyWithoutOffsets
static LocalMapType::local_ordinal_type getDiagCopyWithoutOffsets(const DiagType &D, const LocalMapType &rowMap, const LocalMapType &colMap, const CrsMatrixType &A)
Given a locally indexed, local sparse matrix, and corresponding local row and column Maps,...
Definition: Tpetra_Details_getDiagCopyWithoutOffsets_decl.hpp:182
Tpetra::RowInfo
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.
Definition: Tpetra_CrsGraph_decl.hpp:112
Tpetra::Classes::CrsMatrix::getColMap
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
Definition: Tpetra_CrsMatrix_def.hpp:806
Tpetra::Classes::RowMatrix::getGlobalRowCopy
virtual void getGlobalRowCopy(GlobalOrdinal GlobalRow, const Teuchos::ArrayView< GlobalOrdinal > &Indices, const Teuchos::ArrayView< Scalar > &Values, size_t &NumEntries) const =0
Get a copy of the given global row's entries.
Tpetra::Details::UnpackAndCombineCrsGraphImpl::unpackRow
KOKKOS_FUNCTION int unpackRow(typename Kokkos::View< GO *, Device, Kokkos::MemoryUnmanaged > &gids_out, typename Kokkos::View< int *, Device, Kokkos::MemoryUnmanaged > &pids_out, const Kokkos::View< const Packet *, BufferDevice > &imports, const size_t offset, const size_t num_ent)
Unpack a single row of a CrsGraph.
Definition: Tpetra_Details_unpackCrsGraphAndCombine_def.hpp:103
Tpetra::sort2
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2)
Sort the first array, and apply the resulting permutation to the second array.
Definition: Tpetra_Util.hpp:532
Tpetra_Details_copyOffsets.hpp
Declare and define Tpetra::Details::copyOffsets, an implementation detail of Tpetra (in particular,...
Tpetra::Details::create_mirror_view_from_raw_host_array
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Definition: Tpetra_Details_createMirrorView.hpp:201
Tpetra::Classes::CrsMatrix::getGlobalNumRows
global_size_t getGlobalNumRows() const override
Number of global elements in the row map of this matrix.
Definition: Tpetra_CrsMatrix_def.hpp:704
Tpetra_Details_Behavior.hpp
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
TPETRA_EFFICIENCY_WARNING
#define TPETRA_EFFICIENCY_WARNING(throw_exception_test, Exception, msg)
Print or throw an efficency warning.
Definition: Tpetra_Util.hpp:148
Tpetra::Classes::CrsMatrix::replaceColMap
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the matrix's column Map with the given Map.
Definition: Tpetra_CrsMatrix_def.hpp:4252
Tpetra::Classes::Map::getNode
Teuchos::RCP< Node > getNode() const
Get this Map's Node object.
Definition: Tpetra_Map_def.hpp:1973
Tpetra::Classes::CrsMatrix::getNodeNumEntries
size_t getNodeNumEntries() const override
The local number of entries in this matrix.
Definition: Tpetra_CrsMatrix_def.hpp:697
Tpetra::Classes::CrsMatrix::hasColMap
bool hasColMap() const override
Whether the matrix has a well-defined column Map.
Definition: Tpetra_CrsMatrix_def.hpp:683
Tpetra::Classes::CrsMatrix< SC, LO, GO, NT >::execution_space
device_type::execution_space execution_space
The Kokkos execution space.
Definition: Tpetra_CrsMatrix_decl.hpp:456
Tpetra::Details::ProfilingRegion
Profile the given scope.
Definition: Tpetra_Details_Profiling.hpp:100
Tpetra::Classes::CrsMatrix::getGlobalNumCols
global_size_t getGlobalNumCols() const override
The number of global columns in the matrix.
Definition: Tpetra_CrsMatrix_def.hpp:711
Tpetra::Details::getArrayViewFromDualView
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
Definition: Tpetra_Util.hpp:878
Tpetra::Classes::CrsMatrix::isLocallyIndexed
bool isLocallyIndexed() const override
Whether the matrix is locally indexed on the calling process.
Definition: Tpetra_CrsMatrix_def.hpp:669
Tpetra::Classes::MultiVector::scale
void scale(const Scalar &alpha)
Scale in place: this = alpha*this.
Definition: Tpetra_MultiVector_def.hpp:2742
Tpetra::Classes::CrsMatrix::mag_type
Kokkos::Details::ArithTraits< impl_scalar_type >::mag_type mag_type
Type of a norm result.
Definition: Tpetra_CrsMatrix_decl.hpp:463
Details
Implementation details of Tpetra.
Tpetra::Details::gathervPrint
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator,...
Definition: Tpetra_Details_gathervPrint.cpp:52
Tpetra::Classes::CrsMatrix::getNodeNumRows
size_t getNodeNumRows() const override
The number of matrix rows owned by the calling process.
Definition: Tpetra_CrsMatrix_def.hpp:718
Tpetra::Classes::MultiVector::putScalar
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
Definition: Tpetra_MultiVector_def.hpp:2596
Tpetra::Details::Behavior::debug
static bool debug()
Whether Tpetra is in debug mode.
Definition: Tpetra_Details_Behavior.cpp:245
Tpetra::ZERO
Replace old values with zero.
Definition: Tpetra_CombineMode.hpp:99
Tpetra::Classes::CrsMatrix::getNumEntriesInGlobalRow
size_t getNumEntriesInGlobalRow(GlobalOrdinal globalRow) const override
Number of entries in the sparse matrix in the given globa row, on the calling (MPI) process.
Definition: Tpetra_CrsMatrix_def.hpp:764
Tpetra::Classes::CrsMatrix::getRangeMap
Teuchos::RCP< const map_type > getRangeMap() const override
The range Map of this matrix.
Definition: Tpetra_CrsMatrix_def.hpp:820
Tpetra::Classes::DistObject< Scalar, LocalOrdinal, GlobalOrdinal, Node >::doExport
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, CombineMode CM)
Export data into this object using an Export object ("forward mode").
Definition: Tpetra_DistObject_def.hpp:294
Tpetra::Classes::CrsGraph::getRowInfo
RowInfo getRowInfo(const LocalOrdinal myRow) const
Get information about the locally owned row with local index myRow.
Definition: Tpetra_CrsGraph_def.hpp:1784
Tpetra::Classes::Operator::getDomainMap
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getDomainMap() const =0
The Map associated with the domain of this operator, which must be compatible with X....
Tpetra::Classes::DistObject< Scalar, LocalOrdinal, GlobalOrdinal, Node >::getMap
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Definition: Tpetra_DistObject_decl.hpp:510
Tpetra::Classes::CrsMatrix::resumeFill
void resumeFill(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Resume operations that may change the values or structure of the matrix.
Definition: Tpetra_CrsMatrix_def.hpp:4543
Tpetra::Classes::CrsGraph::isGloballyIndexed
bool isGloballyIndexed() const override
If graph indices are in the global range, this function returns true. Otherwise, this function return...
Definition: Tpetra_CrsGraph_def.hpp:1156
Tpetra_Import_Util2.hpp
Utility functions for packing and unpacking sparse matrix entries.
Tpetra::ADD
Sum new values into existing values.
Definition: Tpetra_CombineMode.hpp:95
Tpetra::Details::PackTraits
Traits class for packing / unpacking data of type T, using Kokkos data structures that live in the gi...
Definition: Tpetra_Details_PackTraits.hpp:63
Tpetra::Classes::CrsMatrix::getDomainMap
Teuchos::RCP< const map_type > getDomainMap() const override
The domain Map of this matrix.
Definition: Tpetra_CrsMatrix_def.hpp:813
Tpetra::DynamicProfile
Definition: Tpetra_ConfigDefs.hpp:132
Tpetra::Classes::Map::getLocalElement
LocalOrdinal getLocalElement(GlobalOrdinal globalIndex) const
The local index corresponding to the given global index.
Definition: Tpetra_Map_def.hpp:1091
Tpetra::Classes::CrsMatrix
Sparse matrix that presents a row-oriented interface that lets users read or modify entries.
Definition: Tpetra_CrsMatrix_decl.hpp:424
Tpetra::ABSMAX
Replace old value with maximum of magnitudes of old and new values.
Definition: Tpetra_CombineMode.hpp:98
Tpetra::Classes::RowMatrix::getRowMap
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
The Map that describes the distribution of rows over processes.
Tpetra::Classes::DistObject< char, LO, GO, NT >
Tpetra::Classes::CrsMatrix::haveGlobalConstants
bool haveGlobalConstants() const
Returns true if globalConstants have been computed; false otherwise.
Definition: Tpetra_CrsMatrix_def.hpp:4569
Tpetra::Classes::CrsMatrix::replaceDomainMapAndImporter
void replaceDomainMapAndImporter(const Teuchos::RCP< const map_type > &newDomainMap, Teuchos::RCP< const import_type > &newImporter)
Replace the current domain Map and Import with the given objects.
Definition: Tpetra_CrsMatrix_def.hpp:4303
Tpetra_Details_Profiling.hpp
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Tpetra::Details::getGlobalNumDiags
CrsGraphType::global_ordinal_type getGlobalNumDiags(const CrsGraphType &G)
Number of populated diagonal entries in the given sparse graph, over all processes in the graph's (MP...
Definition: Tpetra_Details_getNumDiags.hpp:406
Tpetra::Classes::CrsMatrix::getComm
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
Definition: Tpetra_CrsMatrix_def.hpp:627
Tpetra::Details::dualViewStatusToString
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
Definition: Tpetra_Util.hpp:942
Tpetra_Details_castAwayConstDualView.hpp
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
Tpetra::Classes::Import
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Definition: Tpetra_Import_decl.hpp:115
Tpetra::Distributor
Sets up and executes a communication plan for a Tpetra DistObject.
Definition: Tpetra_Distributor.hpp:188
Tpetra::Classes::RowMatrix::getNumEntriesInGlobalRow
virtual size_t getNumEntriesInGlobalRow(GlobalOrdinal globalRow) const =0
The current number of entries on the calling process in the specified global row.
Tpetra::Classes::CrsMatrix::getIndexBase
GlobalOrdinal getIndexBase() const override
The index base for global indices for this matrix.
Definition: Tpetra_CrsMatrix_def.hpp:792
Tpetra::Details::Behavior::verbose
static bool verbose()
Whether Tpetra is in verbose mode.
Definition: Tpetra_Details_Behavior.cpp:260
Tpetra::Classes::CrsMatrix< SC, LO, GO, NT >::impl_scalar_type
Kokkos::Details::ArithTraits< SC >::val_type impl_scalar_type
The type used internally in place of Scalar.
Definition: Tpetra_CrsMatrix_decl.hpp:445
Tpetra::Details::AbsMax::operator()
Scalar operator()(const Scalar &x, const Scalar &y)
Return the maximum of the magnitudes (absolute values) of x and y.
Definition: Tpetra_CrsMatrix_def.hpp:123
Tpetra::createOneToOne
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Creates a one-to-one version of the given Map where each GID lives on only one process.
Tpetra::Classes::CrsGraph::isFillComplete
bool isFillComplete() const override
Returns true if fillComplete() has been called and the graph is in compute mode.
Definition: Tpetra_CrsGraph_def.hpp:1093
Tpetra::Classes::MultiVector
One or more distributed dense vectors.
Definition: Tpetra_MultiVector_decl.hpp:389
Tpetra::Classes::CrsMatrix::getNode
Teuchos::RCP< node_type > getNode() const override
The Kokkos Node instance.
Definition: Tpetra_CrsMatrix_def.hpp:634
Tpetra::Classes::CrsGraph::getRowInfoFromGlobalRowIndex
RowInfo getRowInfoFromGlobalRowIndex(const GlobalOrdinal gblRow) const
Get information about the locally owned row with global index gblRow.
Definition: Tpetra_CrsGraph_def.hpp:1852
Tpetra::Classes::CrsGraph::getNumEntriesInLocalRow
size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const override
Get the number of entries in the given row (local index).
Definition: Tpetra_CrsGraph_def.hpp:2763
Tpetra::Classes::CrsGraph::indicesAreSorted_
bool indicesAreSorted_
Whether the graph's indices are sorted in each row, on this process.
Definition: Tpetra_CrsGraph_decl.hpp:2304
Tpetra::Classes::CrsGraph::k_numRowEntries_
num_row_entries_type k_numRowEntries_
The number of local entries in each locally owned row.
Definition: Tpetra_CrsGraph_decl.hpp:2279
Tpetra::Classes::RowMatrix::getGlobalRowView
virtual void getGlobalRowView(GlobalOrdinal GlobalRow, Teuchos::ArrayView< const GlobalOrdinal > &indices, Teuchos::ArrayView< const Scalar > &values) const =0
Get a constant, nonpersisting, globally indexed view of the given row of the matrix.
Tpetra::Classes::CrsMatrix< SC, LO, GO, NT >::local_matrix_type
KokkosSparse::CrsMatrix< impl_scalar_type, LO, execution_space, void, typename local_graph_type::size_type > local_matrix_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
Definition: Tpetra_CrsMatrix_decl.hpp:483
Tpetra_Details_getEntryOnHost.hpp
Declaration and definition of Tpetra::Details::getEntryOnHost.
Tpetra::removeEmptyProcessesInPlace
void removeEmptyProcessesInPlace(Teuchos::RCP< DistObjectType > &input, const Teuchos::RCP< const Map< typename DistObjectType::local_ordinal_type, typename DistObjectType::global_ordinal_type, typename DistObjectType::node_type > > &newMap)
Remove processes which contain no elements in this object's Map.
Definition: Tpetra_DistObject_def.hpp:1643
Tpetra::Classes::MultiVector::getNumVectors
size_t getNumVectors() const
Number of columns in the multivector.
Definition: Tpetra_MultiVector_def.hpp:1739
Tpetra::Classes::CrsMatrix::getNumEntriesInLocalRow
size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const override
Number of entries in the sparse matrix in the given local row, on the calling (MPI) process.
Definition: Tpetra_CrsMatrix_def.hpp:771
Tpetra::Classes::CrsMatrix::isStorageOptimized
bool isStorageOptimized() const
Returns true if storage has been optimized.
Definition: Tpetra_CrsMatrix_def.hpp:662
Tpetra::Classes::CrsMatrix::isGloballyIndexed
bool isGloballyIndexed() const override
Whether the matrix is globally indexed on the calling process.
Definition: Tpetra_CrsMatrix_def.hpp:676
Tpetra::Classes::CrsGraph::rowMap_
Teuchos::RCP< const map_type > rowMap_
The Map describing the distribution of rows of the graph.
Definition: Tpetra_CrsGraph_decl.hpp:2071
Tpetra::Classes::CrsMatrix< SC, LO, GO, NT >::device_type
NT ::device_type device_type
The Kokkos device type.
Definition: Tpetra_CrsMatrix_decl.hpp:454
Tpetra::Classes::Map< LO, GO, NT >
Tpetra::Classes::CrsGraph::colMap_
Teuchos::RCP< const map_type > colMap_
The Map describing the distribution of columns of the graph.
Definition: Tpetra_CrsGraph_decl.hpp:2073
Tpetra::Classes::Export::isLocallyComplete
bool isLocallyComplete() const
Do all source Map indices on the calling process exist on at least one process (not necessarily this ...
Definition: Tpetra_Export_def.hpp:381
Tpetra::Classes::Map::isNodeGlobalElement
bool isNodeGlobalElement(GlobalOrdinal globalIndex) const
Whether the given global index is owned by this Map on the calling process.
Definition: Tpetra_Map_def.hpp:1146
Tpetra::CrsMatrix
Classes::CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > CrsMatrix
Alias for Tpetra::Classes::CrsMatrix.
Definition: Tpetra_CrsMatrix_fwd.hpp:72
Tpetra::Classes::CrsGraph::lclInds2D_
Teuchos::ArrayRCP< Teuchos::Array< LocalOrdinal > > lclInds2D_
Local column indices for all rows.
Definition: Tpetra_CrsGraph_decl.hpp:2239
Tpetra::Classes::CrsMatrix::getProfileType
ProfileType getProfileType() const
Returns true if the matrix was allocated with static data structures.
Definition: Tpetra_CrsMatrix_def.hpp:641
Tpetra_Details_computeOffsets.hpp
Declare and define the function Tpetra::Details::computeOffsetsFromCounts, an implementation detail o...
Tpetra::Classes::CrsMatrix::getGlobalNumEntries
global_size_t getGlobalNumEntries() const override
The global number of entries in this matrix.
Definition: Tpetra_CrsMatrix_def.hpp:690
Tpetra::Classes::CrsGraph::local_graph_type
Kokkos::StaticCrsGraph< LocalOrdinal, Kokkos::LayoutLeft, execution_space > local_graph_type
The type of the part of the sparse graph on each MPI process.
Definition: Tpetra_CrsGraph_decl.hpp:292
Tpetra::Classes::CrsGraph::getRowMap
Teuchos::RCP< const map_type > getRowMap() const override
Returns the Map that describes the row distribution in this graph.
Definition: Tpetra_CrsGraph_def.hpp:904
Tpetra::Classes::CrsGraph::reindexColumns
void reindexColumns(const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortIndicesInEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
Definition: Tpetra_CrsGraph_def.hpp:4454
Tpetra::Details::unpackAndCombineWithOwningPIDsCount
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, Distributor &distor, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks.
Tpetra::global_size_t
size_t global_size_t
Global size_t object.
Definition: Tpetra_ConfigDefs.hpp:109
Tpetra::merge2
void merge2(IT1 &indResultOut, IT2 &valResultOut, IT1 indBeg, IT1 indEnd, IT2 valBeg, IT2 valEnd)
Merge values in place, additively, with the same index.
Definition: Tpetra_Util.hpp:633
Tpetra::Classes::RowMatrix::getNumEntriesInLocalRow
virtual size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const =0
The current number of entries on the calling process in the specified local row.
Tpetra
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Tpetra::deep_copy
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Definition: Tpetra_MultiVector_decl.hpp:2453
Tpetra::Details::castAwayConstDualView
Kokkos::DualView< ValueType *, DeviceType > castAwayConstDualView(const Kokkos::DualView< const ValueType *, DeviceType > &input_dv)
Cast away const-ness of a 1-D Kokkos::DualView.
Definition: Tpetra_Details_castAwayConstDualView.hpp:64
Tpetra::Classes::Map::isNodeLocalElement
bool isNodeLocalElement(LocalOrdinal localIndex) const
Whether the given local index is valid for this Map on the calling process.
Definition: Tpetra_Map_def.hpp:1134
Tpetra_Details_createMirrorView.hpp
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary,...
Tpetra::Classes::Export
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Definition: Tpetra_Export_decl.hpp:124
Tpetra::Classes::MultiVector::reduce
void reduce()
Sum values of a locally replicated multivector across all processes.
Definition: Tpetra_MultiVector_def.hpp:4243
Tpetra::SrcDistObject
Abstract base class for objects that can be the source of an Import or Export operation.
Definition: Tpetra_SrcDistObject.hpp:89
Tpetra::Details::copyOffsets
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types.
Definition: Tpetra_Details_copyOffsets.hpp:407
Tpetra::Classes::CrsMatrix< SC, LO, GO, NT >::local_ordinal_type
LO local_ordinal_type
This class' second template parameter; the type of local indices.
Definition: Tpetra_CrsMatrix_decl.hpp:447
Tpetra::Classes::DistObject< Scalar, LocalOrdinal, GlobalOrdinal, Node >::isDistributed
bool isDistributed() const
Whether this is a globally distributed object.
Definition: Tpetra_DistObject_def.hpp:420
Tpetra::INSERT
Insert new values that don't currently exist.
Definition: Tpetra_CombineMode.hpp:96
Tpetra::Classes::CrsGraph
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
Definition: Tpetra_CrsGraph_decl.hpp:259
Tpetra::Classes::Operator::getRangeMap
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRangeMap() const =0
The Map associated with the range of this operator, which must be compatible with Y....
Tpetra::Classes::CrsMatrix::getGlobalMaxNumRowEntries
size_t getGlobalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, over all processes in the matrix's communicator.
Definition: Tpetra_CrsMatrix_def.hpp:778
Tpetra::Classes::CrsGraph::noRedundancies_
bool noRedundancies_
Whether the graph's indices are non-redundant (merged) in each row, on this process.
Definition: Tpetra_CrsGraph_decl.hpp:2307
Tpetra::CombineMode
CombineMode
Rule for combining data in an Import or Export.
Definition: Tpetra_CombineMode.hpp:94
Tpetra::Classes::RowMatrix
A read-only, row-oriented interface to a sparse matrix.
Definition: Tpetra_RowMatrix_decl.hpp:85
Tpetra::Classes::MultiVector::getDualView
dual_view_type getDualView() const
Get the Kokkos::DualView which implements local storage.
Definition: Tpetra_MultiVector_def.hpp:4547
Tpetra::Classes::Vector
A distributed dense vector.
Definition: Tpetra_Vector_decl.hpp:82