Tpetra parallel linear algebra  Version of the Day
Tpetra_Details_packCrsMatrix_def.hpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
43 #define TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
44 
45 #include "TpetraCore_config.h"
46 #include "Teuchos_Array.hpp"
47 #include "Teuchos_ArrayView.hpp"
54 #include <memory>
55 #include <string>
56 
79 
80 namespace Tpetra {
81 
82 #ifndef DOXYGEN_SHOULD_SKIP_THIS
83 // Forward declaration of Distributor
84 class Distributor;
85 #endif // DOXYGEN_SHOULD_SKIP_THIS
86 
87 //
88 // Users must never rely on anything in the Details namespace.
89 //
90 namespace Details {
91 
92 namespace PackCrsMatrixImpl {
100 template<class OutputOffsetsViewType,
101  class CountsViewType,
102  class InputOffsetsViewType,
103  class InputLocalRowIndicesViewType,
104  class InputLocalRowPidsViewType,
105  const bool debug =
106 #ifdef HAVE_TPETRA_DEBUG
107  true
108 #else
109  false
110 #endif // HAVE_TPETRA_DEBUG
111  >
113 public:
114  typedef typename OutputOffsetsViewType::non_const_value_type output_offset_type;
115  typedef typename CountsViewType::non_const_value_type count_type;
116  typedef typename InputOffsetsViewType::non_const_value_type input_offset_type;
117  typedef typename InputLocalRowIndicesViewType::non_const_value_type local_row_index_type;
118  typedef typename InputLocalRowPidsViewType::non_const_value_type local_row_pid_type;
119  // output Views drive where execution happens.
120  typedef typename OutputOffsetsViewType::device_type device_type;
121  static_assert (std::is_same<typename CountsViewType::device_type::execution_space,
122  typename device_type::execution_space>::value,
123  "OutputOffsetsViewType and CountsViewType must have the same execution space.");
124  static_assert (Kokkos::Impl::is_view<OutputOffsetsViewType>::value,
125  "OutputOffsetsViewType must be a Kokkos::View.");
126  static_assert (std::is_same<typename OutputOffsetsViewType::value_type, output_offset_type>::value,
127  "OutputOffsetsViewType must be a nonconst Kokkos::View.");
128  static_assert (std::is_integral<output_offset_type>::value,
129  "The type of each entry of OutputOffsetsViewType must be a built-in integer type.");
130  static_assert (Kokkos::Impl::is_view<CountsViewType>::value,
131  "CountsViewType must be a Kokkos::View.");
132  static_assert (std::is_same<typename CountsViewType::value_type, output_offset_type>::value,
133  "CountsViewType must be a nonconst Kokkos::View.");
134  static_assert (std::is_integral<count_type>::value,
135  "The type of each entry of CountsViewType must be a built-in integer type.");
136  static_assert (Kokkos::Impl::is_view<InputOffsetsViewType>::value,
137  "InputOffsetsViewType must be a Kokkos::View.");
138  static_assert (std::is_integral<input_offset_type>::value,
139  "The type of each entry of InputOffsetsViewType must be a built-in integer type.");
140  static_assert (Kokkos::Impl::is_view<InputLocalRowIndicesViewType>::value,
141  "InputLocalRowIndicesViewType must be a Kokkos::View.");
142  static_assert (std::is_integral<local_row_index_type>::value,
143  "The type of each entry of InputLocalRowIndicesViewType must be a built-in integer type.");
144 
145  NumPacketsAndOffsetsFunctor (const OutputOffsetsViewType& outputOffsets,
146  const CountsViewType& counts,
147  const InputOffsetsViewType& rowOffsets,
148  const InputLocalRowIndicesViewType& lclRowInds,
149  const InputLocalRowPidsViewType& lclRowPids,
150  const count_type sizeOfLclCount,
151  const count_type sizeOfGblColInd,
152  const count_type sizeOfPid,
153  const count_type sizeOfValue) :
154  outputOffsets_ (outputOffsets),
155  counts_ (counts),
156  rowOffsets_ (rowOffsets),
157  lclRowInds_ (lclRowInds),
158  lclRowPids_ (lclRowPids),
159  sizeOfLclCount_ (sizeOfLclCount),
160  sizeOfGblColInd_ (sizeOfGblColInd),
161  sizeOfPid_ (sizeOfPid),
162  sizeOfValue_ (sizeOfValue),
163  error_ ("error") // don't forget this, or you'll get segfaults!
164  {
165  if (debug) {
166  const size_t numRowsToPack = static_cast<size_t> (lclRowInds_.extent (0));
167 
168  if (numRowsToPack != static_cast<size_t> (counts_.extent (0))) {
169  std::ostringstream os;
170  os << "lclRowInds.extent(0) = " << numRowsToPack
171  << " != counts.extent(0) = " << counts_.extent (0)
172  << ".";
173  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, os.str ());
174  }
175  if (static_cast<size_t> (numRowsToPack + 1) !=
176  static_cast<size_t> (outputOffsets_.extent (0))) {
177  std::ostringstream os;
178  os << "lclRowInds.extent(0) + 1 = " << (numRowsToPack + 1)
179  << " != outputOffsets.extent(0) = " << outputOffsets_.extent (0)
180  << ".";
181  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, os.str ());
182  }
183  }
184  }
185 
186  KOKKOS_INLINE_FUNCTION void
187  operator() (const local_row_index_type& curInd,
188  output_offset_type& update,
189  const bool final) const
190  {
191  if (debug) {
192  if (curInd < static_cast<local_row_index_type> (0)) {
193  error_ () = 1;
194  return;
195  }
196  }
197 
198  if (final) {
199  if (debug) {
200  if (curInd >= static_cast<local_row_index_type> (outputOffsets_.extent (0))) {
201  error_ () = 2;
202  return;
203  }
204  }
205  outputOffsets_(curInd) = update;
206  }
207 
208  if (curInd < static_cast<local_row_index_type> (counts_.extent (0))) {
209  const auto lclRow = lclRowInds_(curInd);
210  if (static_cast<size_t> (lclRow + 1) >= static_cast<size_t> (rowOffsets_.extent (0)) ||
211  static_cast<local_row_index_type> (lclRow) < static_cast<local_row_index_type> (0)) {
212  error_ () = 3;
213  return;
214  }
215  // count_type could differ from the type of each row offset.
216  // For example, row offsets might each be 64 bits, but if their
217  // difference always fits in 32 bits, we may then safely use a
218  // 32-bit count_type.
219  const count_type count =
220  static_cast<count_type> (rowOffsets_(lclRow+1) - rowOffsets_(lclRow));
221 
222  // We pack first the number of entries in the row, then that
223  // many global column indices, then that many pids (if any),
224  // then that many values. However, if the number of entries in
225  // the row is zero, we pack nothing.
226  const count_type numBytes = (count == 0) ?
227  static_cast<count_type> (0) :
228  sizeOfLclCount_ + count * (sizeOfGblColInd_ +
229  (lclRowPids_.size() > 0 ? sizeOfPid_ : 0) +
230  sizeOfValue_);
231 
232  if (final) {
233  counts_(curInd) = numBytes;
234  }
235  update += numBytes;
236  }
237  }
238 
239  // mfh 31 May 2017: Don't need init or join. If you have join, MUST
240  // have join both with and without volatile! Otherwise intrawarp
241  // joins are really slow on GPUs.
242 
244  int getError () const {
245  auto error_h = Kokkos::create_mirror_view (error_);
246  Kokkos::deep_copy (error_h, error_);
247  return error_h ();
248  }
249 
250 private:
251  OutputOffsetsViewType outputOffsets_;
252  CountsViewType counts_;
253  typename InputOffsetsViewType::const_type rowOffsets_;
254  typename InputLocalRowIndicesViewType::const_type lclRowInds_;
255  typename InputLocalRowPidsViewType::const_type lclRowPids_;
256  count_type sizeOfLclCount_;
257  count_type sizeOfGblColInd_;
258  count_type sizeOfPid_;
259  count_type sizeOfValue_;
260  Kokkos::View<int, device_type> error_;
261 };
262 
272 template<class OutputOffsetsViewType,
273  class CountsViewType,
274  class InputOffsetsViewType,
275  class InputLocalRowIndicesViewType,
276  class InputLocalRowPidsViewType>
277 typename CountsViewType::non_const_value_type
278 computeNumPacketsAndOffsets (const OutputOffsetsViewType& outputOffsets,
279  const CountsViewType& counts,
280  const InputOffsetsViewType& rowOffsets,
281  const InputLocalRowIndicesViewType& lclRowInds,
282  const InputLocalRowPidsViewType& lclRowPids,
283  const typename CountsViewType::non_const_value_type sizeOfLclCount,
284  const typename CountsViewType::non_const_value_type sizeOfGblColInd,
285  const typename CountsViewType::non_const_value_type sizeOfPid,
286  const typename CountsViewType::non_const_value_type sizeOfValue)
287 {
288  typedef NumPacketsAndOffsetsFunctor<OutputOffsetsViewType,
289  CountsViewType, typename InputOffsetsViewType::const_type,
290  typename InputLocalRowIndicesViewType::const_type,
291  typename InputLocalRowPidsViewType::const_type> functor_type;
292  typedef typename CountsViewType::non_const_value_type count_type;
293  typedef typename OutputOffsetsViewType::size_type size_type;
294  typedef typename OutputOffsetsViewType::execution_space execution_space;
295  typedef typename functor_type::local_row_index_type LO;
296  typedef Kokkos::RangePolicy<execution_space, LO> range_type;
297  const char prefix[] = "computeNumPacketsAndOffsets: ";
298 
299  count_type count = 0;
300  const count_type numRowsToPack = lclRowInds.extent (0);
301 
302  if (numRowsToPack == 0) {
303  return count;
304  }
305  else {
306  TEUCHOS_TEST_FOR_EXCEPTION
307  (rowOffsets.extent (0) <= static_cast<size_type> (1),
308  std::invalid_argument, prefix << "There is at least one row to pack, "
309  "but the matrix has no rows. lclRowInds.extent(0) = " <<
310  numRowsToPack << ", but rowOffsets.extent(0) = " <<
311  rowOffsets.extent (0) << " <= 1.");
312  TEUCHOS_TEST_FOR_EXCEPTION
313  (outputOffsets.extent (0) !=
314  static_cast<size_type> (numRowsToPack + 1), std::invalid_argument,
315  prefix << "Output dimension does not match number of rows to pack. "
316  << "outputOffsets.extent(0) = " << outputOffsets.extent (0)
317  << " != lclRowInds.extent(0) + 1 = "
318  << static_cast<size_type> (numRowsToPack + 1) << ".");
319  TEUCHOS_TEST_FOR_EXCEPTION
320  (counts.extent (0) != numRowsToPack, std::invalid_argument,
321  prefix << "counts.extent(0) = " << counts.extent (0)
322  << " != numRowsToPack = " << numRowsToPack << ".");
323 
324  functor_type f (outputOffsets, counts, rowOffsets,
325  lclRowInds, lclRowPids, sizeOfLclCount,
326  sizeOfGblColInd, sizeOfPid, sizeOfValue);
327  Kokkos::parallel_scan (range_type (0, numRowsToPack + 1), f);
328 
329  // At least in debug mode, this functor checks for errors.
330  const int errCode = f.getError ();
331  TEUCHOS_TEST_FOR_EXCEPTION
332  (errCode != 0, std::runtime_error, prefix << "parallel_scan error code "
333  << errCode << " != 0.");
334 
335 #if 0
336  size_t total = 0;
337  for (LO k = 0; k < numRowsToPack; ++k) {
338  total += counts[k];
339  }
340  if (outputOffsets(numRowsToPack) != total) {
341  if (errStr.get () == NULL) {
342  errStr = std::unique_ptr<std::ostringstream> (new std::ostringstream ());
343  }
344  std::ostringstream& os = *errStr;
345  os << prefix
346  << "outputOffsets(numRowsToPack=" << numRowsToPack << ") "
347  << outputOffsets(numRowsToPack) << " != sum of counts = "
348  << total << "." << std::endl;
349  if (numRowsToPack != 0) {
350  // Only print the array if it's not too long.
351  if (numRowsToPack < static_cast<LO> (10)) {
352  os << "outputOffsets: [";
353  for (LO i = 0; i <= numRowsToPack; ++i) {
354  os << outputOffsets(i);
355  if (static_cast<LO> (i + 1) <= numRowsToPack) {
356  os << ",";
357  }
358  }
359  os << "]" << std::endl;
360  os << "counts: [";
361  for (LO i = 0; i < numRowsToPack; ++i) {
362  os << counts(i);
363  if (static_cast<LO> (i + 1) < numRowsToPack) {
364  os << ",";
365  }
366  }
367  os << "]" << std::endl;
368  }
369  else {
370  os << "outputOffsets(" << (numRowsToPack-1) << ") = "
371  << outputOffsets(numRowsToPack-1) << "." << std::endl;
372  }
373  }
374  count = outputOffsets(numRowsToPack);
375  return {false, errStr};
376  }
377 #endif // HAVE_TPETRA_DEBUG
378 
379  // Get last entry of outputOffsets, which is the sum of the entries
380  // of counts. Don't assume UVM.
381  using Tpetra::Details::getEntryOnHost;
382  return static_cast<count_type> (getEntryOnHost (outputOffsets,
383  numRowsToPack));
384  }
385 }
386 
402 template<class ST, class ColumnMap, class BufferDeviceType>
403 KOKKOS_FUNCTION
404 Kokkos::pair<int, size_t>
405 packCrsMatrixRow (const ColumnMap& col_map,
406  const Kokkos::View<char*, BufferDeviceType>& exports,
410  const size_t offset,
411  const size_t num_ent,
412  const size_t num_bytes_per_value,
413  const bool pack_pids)
414 {
415  using Kokkos::subview;
416  typedef typename ColumnMap::local_ordinal_type LO;
417  typedef typename ColumnMap::global_ordinal_type GO;
418  typedef BufferDeviceType BDT;
419  typedef Kokkos::pair<int, size_t> return_type;
420 
421  if (num_ent == 0) {
422  // Empty rows always take zero bytes, to ensure sparsity.
423  return return_type (0, 0);
424  }
425 
426  const LO num_ent_LO = static_cast<LO> (num_ent); // packValueCount wants this
427  const size_t num_ent_beg = offset;
428  const size_t num_ent_len = PackTraits<LO, BDT>::packValueCount (num_ent_LO);
429 
430  const size_t gids_beg = num_ent_beg + num_ent_len;
431  const size_t gids_len = num_ent * PackTraits<GO, BDT>::packValueCount (GO (0));
432 
433  const size_t pids_beg = gids_beg + gids_len;
434  const size_t pids_len = pack_pids ?
435  num_ent * PackTraits<int, BDT>::packValueCount (int (0)) :
436  static_cast<size_t> (0);
437 
438  const size_t vals_beg = gids_beg + gids_len + pids_len;
439  const size_t vals_len = num_ent * num_bytes_per_value;
440 
441  char* const num_ent_out = exports.data () + num_ent_beg;
442  char* const gids_out = exports.data () + gids_beg;
443  char* const pids_out = pack_pids ? exports.data () + pids_beg : NULL;
444  char* const vals_out = exports.data () + vals_beg;
445 
446  size_t num_bytes_out = 0;
447  int error_code = 0;
448  num_bytes_out += PackTraits<LO, BDT>::packValue (num_ent_out, num_ent_LO);
449 
450  {
451  // Copy column indices one at a time, so that we don't need
452  // temporary storage.
453  for (size_t k = 0; k < num_ent; ++k) {
454  const LO lid = lids_in[k];
455  const GO gid = col_map.getGlobalElement (lid);
456  num_bytes_out += PackTraits<GO, BDT>::packValue (gids_out, k, gid);
457  }
458  // Copy PIDs one at a time, so that we don't need temporary storage.
459  if (pack_pids) {
460  for (size_t k = 0; k < num_ent; ++k) {
461  const LO lid = lids_in[k];
462  const int pid = pids_in[lid];
463  num_bytes_out += PackTraits<int, BDT>::packValue (pids_out, k, pid);
464  }
465  }
466  const auto p =
467  PackTraits<ST, BDT>::packArray (vals_out, vals_in.data (), num_ent);
468  error_code += p.first;
469  num_bytes_out += p.second;
470  }
471 
472  if (error_code != 0) {
473  return return_type (10, num_bytes_out);
474  }
475 
476  const size_t expected_num_bytes =
477  num_ent_len + gids_len + pids_len + vals_len;
478  if (num_bytes_out != expected_num_bytes) {
479  return return_type (11, num_bytes_out);
480  }
481  return return_type (0, num_bytes_out);
482 }
483 
484 template<class LocalMatrix, class LocalMap, class BufferDeviceType>
485 struct PackCrsMatrixFunctor {
486  typedef LocalMatrix local_matrix_type;
487  typedef LocalMap local_map_type;
488  typedef typename local_matrix_type::value_type ST;
489  typedef typename local_map_type::local_ordinal_type LO;
490  typedef typename local_map_type::global_ordinal_type GO;
491  typedef typename local_matrix_type::device_type DT;
492 
493  typedef Kokkos::View<const size_t*, BufferDeviceType>
494  num_packets_per_lid_view_type;
495  typedef Kokkos::View<const size_t*, BufferDeviceType> offsets_view_type;
496  typedef Kokkos::View<char*, BufferDeviceType> exports_view_type;
497  typedef typename PackTraits<LO, DT>::input_array_type
498  export_lids_view_type;
500  source_pids_view_type;
501 
502  typedef typename num_packets_per_lid_view_type::non_const_value_type
503  count_type;
504  typedef typename offsets_view_type::non_const_value_type
505  offset_type;
506  typedef Kokkos::pair<int, LO> value_type;
507 
508  static_assert (std::is_same<LO, typename local_matrix_type::ordinal_type>::value,
509  "local_map_type::local_ordinal_type and "
510  "local_matrix_type::ordinal_type must be the same.");
511 
512  local_matrix_type local_matrix;
513  local_map_type local_col_map;
514  exports_view_type exports;
515  num_packets_per_lid_view_type num_packets_per_lid;
516  export_lids_view_type export_lids;
517  source_pids_view_type source_pids;
518  offsets_view_type offsets;
519  size_t num_bytes_per_value;
520  bool pack_pids;
521 
522  PackCrsMatrixFunctor (const local_matrix_type& local_matrix_in,
523  const local_map_type& local_col_map_in,
524  const exports_view_type& exports_in,
525  const num_packets_per_lid_view_type& num_packets_per_lid_in,
526  const export_lids_view_type& export_lids_in,
527  const source_pids_view_type& source_pids_in,
528  const offsets_view_type& offsets_in,
529  const size_t num_bytes_per_value_in,
530  const bool pack_pids_in) :
531  local_matrix (local_matrix_in),
532  local_col_map (local_col_map_in),
533  exports (exports_in),
534  num_packets_per_lid (num_packets_per_lid_in),
535  export_lids (export_lids_in),
536  source_pids (source_pids_in),
537  offsets (offsets_in),
538  num_bytes_per_value (num_bytes_per_value_in),
539  pack_pids (pack_pids_in)
540  {
541  const LO numRows = local_matrix_in.numRows ();
542  const LO rowMapDim =
543  static_cast<LO> (local_matrix.graph.row_map.extent (0));
544  TEUCHOS_TEST_FOR_EXCEPTION
545  (numRows != 0 && rowMapDim != numRows + static_cast<LO> (1),
546  std::logic_error, "local_matrix.graph.row_map.extent(0) = "
547  << rowMapDim << " != numRows (= " << numRows << " ) + 1.");
548  }
549 
550  KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
551  {
552  using ::Tpetra::Details::OrdinalTraits;
553  dst = Kokkos::make_pair (0, OrdinalTraits<LO>::invalid ());
554  }
555 
556  KOKKOS_INLINE_FUNCTION void
557  join (volatile value_type& dst, const volatile value_type& src) const
558  {
559  // `dst` should reflect the first (least) bad index and all other
560  // associated error codes and data, so prefer keeping it.
561  if (src.first != 0 && dst.first == 0) {
562  dst = src;
563  }
564  }
565 
566  KOKKOS_INLINE_FUNCTION
567  void operator() (const LO i, value_type& dst) const
568  {
569  const size_t offset = offsets[i];
570  const LO export_lid = export_lids[i];
571  const size_t buf_size = exports.size();
572  const size_t num_bytes = num_packets_per_lid(i);
573  const size_t num_ent =
574  static_cast<size_t> (local_matrix.graph.row_map[export_lid+1]
575  - local_matrix.graph.row_map[export_lid]);
576 
577  // Only pack this row's data if it has a nonzero number of
578  // entries. We can do this because receiving processes get the
579  // number of packets, and will know that zero packets means zero
580  // entries.
581  if (num_ent == 0) {
582  return;
583  }
584 
585  if (export_lid >= local_matrix.numRows ()) {
586  if (dst.first != 0) { // keep only the first error
587  dst = Kokkos::make_pair (1, i); // invalid row
588  }
589  return;
590  }
591  else if ((offset > buf_size || offset + num_bytes > buf_size)) {
592  if (dst.first != 0) { // keep only the first error
593  dst = Kokkos::make_pair (2, i); // out of bounds
594  }
595  return;
596  }
597 
598  // We can now pack this row
599 
600  // Since the matrix is locally indexed on the calling process, we
601  // have to use its column Map (which it _must_ have in this case)
602  // to convert to global indices.
603  const auto row_beg = local_matrix.graph.row_map[export_lid];
604  const auto row_end = local_matrix.graph.row_map[export_lid + 1];
605  auto vals_in = subview (local_matrix.values,
606  Kokkos::make_pair (row_beg, row_end));
607  auto lids_in = subview (local_matrix.graph.entries,
608  Kokkos::make_pair (row_beg, row_end));
609  typedef local_map_type LMT;
610  typedef BufferDeviceType BDT;
611  auto p = packCrsMatrixRow<ST, LMT, BDT> (local_col_map, exports, lids_in,
612  source_pids, vals_in, offset,
613  num_ent, num_bytes_per_value,
614  pack_pids);
615  int error_code_this_row = p.first;
616  size_t num_bytes_packed_this_row = p.second;
617  if (error_code_this_row != 0) {
618  if (dst.first != 0) { // keep only the first error
619  dst = Kokkos::make_pair (error_code_this_row, i); // bad pack
620  }
621  }
622  else if (num_bytes_packed_this_row != num_bytes) {
623  if (dst.first != 0) { // keep only the first error
624  dst = Kokkos::make_pair (3, i);
625  }
626  }
627  }
628 };
629 
637 template<class LocalMatrix, class LocalMap, class BufferDeviceType>
638 void
639 do_pack (const LocalMatrix& local_matrix,
640  const LocalMap& local_map,
641  const Kokkos::View<char*, BufferDeviceType>& exports,
642  const typename PackTraits<
643  size_t,
644  BufferDeviceType
645  >::input_array_type& num_packets_per_lid,
646  const typename PackTraits<
647  typename LocalMap::local_ordinal_type,
648  typename LocalMatrix::device_type
649  >::input_array_type& export_lids,
650  const typename PackTraits<
651  int,
652  typename LocalMatrix::device_type
653  >::input_array_type& source_pids,
654  const Kokkos::View<const size_t*, BufferDeviceType>& offsets,
655  const size_t num_bytes_per_value,
656  const bool pack_pids)
657 {
658  typedef typename LocalMap::local_ordinal_type LO;
659  typedef typename LocalMatrix::device_type DT;
660  typedef Kokkos::RangePolicy<typename DT::execution_space, LO> range_type;
661  const char prefix[] = "Tpetra::Details::do_pack: ";
662 
663  if (export_lids.extent (0) != 0) {
664  TEUCHOS_TEST_FOR_EXCEPTION
665  (static_cast<size_t> (offsets.extent (0)) !=
666  static_cast<size_t> (export_lids.extent (0) + 1),
667  std::invalid_argument, prefix << "offsets.extent(0) = "
668  << offsets.extent (0) << " != export_lids.extent(0) (= "
669  << export_lids.extent (0) << ") + 1.");
670  TEUCHOS_TEST_FOR_EXCEPTION
671  (export_lids.extent (0) != num_packets_per_lid.extent (0),
672  std::invalid_argument, prefix << "export_lids.extent(0) = " <<
673  export_lids.extent (0) << " != num_packets_per_lid.extent(0) = "
674  << num_packets_per_lid.extent (0) << ".");
675  // If exports has nonzero length at this point, then the matrix
676  // has at least one entry to pack. Thus, if packing process
677  // ranks, we had better have at least one process rank to pack.
678  TEUCHOS_TEST_FOR_EXCEPTION
679  (pack_pids && exports.extent (0) != 0 &&
680  source_pids.extent (0) == 0, std::invalid_argument, prefix <<
681  "pack_pids is true, and exports.extent(0) = " <<
682  exports.extent (0) << " != 0, meaning that we need to pack at "
683  "least one matrix entry, but source_pids.extent(0) = 0.");
684  }
685 
686  typedef PackCrsMatrixFunctor<LocalMatrix, LocalMap,
687  BufferDeviceType> pack_functor_type;
688  pack_functor_type f (local_matrix, local_map, exports,
689  num_packets_per_lid, export_lids,
690  source_pids, offsets, num_bytes_per_value,
691  pack_pids);
692 
693  typename pack_functor_type::value_type result;
694  range_type range (0, num_packets_per_lid.extent (0));
695  Kokkos::parallel_reduce (range, f, result);
696 
697  if (result.first != 0) {
698  std::ostringstream os;
699 
700  if (result.first == 1) { // invalid local row index
701  auto export_lids_h = Kokkos::create_mirror_view (export_lids);
702  Kokkos::deep_copy (export_lids_h, export_lids);
703  const auto firstBadLid = export_lids_h(result.second);
704  os << "First bad export LID: export_lids(i=" << result.second << ") = "
705  << firstBadLid;
706  }
707  else if (result.first == 2) { // invalid offset
708  auto offsets_h = Kokkos::create_mirror_view (offsets);
709  Kokkos::deep_copy (offsets_h, offsets);
710  const auto firstBadOffset = offsets_h(result.second);
711 
712  auto num_packets_per_lid_h =
713  Kokkos::create_mirror_view (num_packets_per_lid);
714  Kokkos::deep_copy (num_packets_per_lid_h, num_packets_per_lid);
715  os << "First bad offset: offsets(i=" << result.second << ") = "
716  << firstBadOffset << ", num_packets_per_lid(i) = "
717  << num_packets_per_lid_h(result.second) << ", buf_size = "
718  << exports.size ();
719  }
720 
721  TEUCHOS_TEST_FOR_EXCEPTION
722  (true, std::runtime_error, prefix << "PackCrsMatrixFunctor reported "
723  "error code " << result.first << " for the first bad row "
724  << result.second << ". " << os.str ());
725  }
726 }
727 
757 template<typename ST, typename LO, typename GO, typename NT, typename BufferDeviceType>
758 void
759 packCrsMatrix (const CrsMatrix<ST, LO, GO, NT>& sourceMatrix,
760  Kokkos::DualView<char*, BufferDeviceType>& exports,
761  const Kokkos::View<size_t*, BufferDeviceType>& num_packets_per_lid,
762  const Kokkos::View<const LO*, typename NT::device_type>& export_lids,
763  const Kokkos::View<const int*, typename NT::device_type>& export_pids,
764  size_t& constant_num_packets,
765  const bool pack_pids,
766  Distributor& /* dist */)
767 {
768  using Kokkos::View;
769  typedef BufferDeviceType DT;
770  typedef typename DT::execution_space execution_space;
771  typedef Kokkos::DualView<char*, BufferDeviceType> exports_view_type;
772  const char prefix[] = "Tpetra::Details::PackCrsMatrixImpl::packCrsMatrix: ";
773  constexpr bool debug = false;
774 
775  auto local_matrix = sourceMatrix.getLocalMatrix ();
776  auto local_col_map = sourceMatrix.getColMap ()->getLocalMap ();
777 
778  // Setting this to zero tells the caller to expect a possibly
779  // different ("nonconstant") number of packets per local index
780  // (i.e., a possibly different number of entries per row).
781  constant_num_packets = 0;
782 
783  const size_t num_export_lids =
784  static_cast<size_t> (export_lids.extent (0));
785  TEUCHOS_TEST_FOR_EXCEPTION
786  (num_export_lids !=
787  static_cast<size_t> (num_packets_per_lid.extent (0)),
788  std::invalid_argument, prefix << "num_export_lids.extent(0) = "
789  << num_export_lids << " != num_packets_per_lid.extent(0) = "
790  << num_packets_per_lid.extent (0) << ".");
791  if (num_export_lids != 0) {
792  TEUCHOS_TEST_FOR_EXCEPTION
793  (num_packets_per_lid.data () == NULL, std::invalid_argument,
794  prefix << "num_export_lids = "<< num_export_lids << " != 0, but "
795  "num_packets_per_lid.data() = "
796  << num_packets_per_lid.data () << " == NULL.");
797  }
798 
799  const size_t num_bytes_per_lid = PackTraits<LO, DT>::packValueCount (LO (0));
800  const size_t num_bytes_per_gid = PackTraits<GO, DT>::packValueCount (GO (0));
801  const size_t num_bytes_per_pid = PackTraits<int, DT>::packValueCount (int (0));
802 
803  size_t num_bytes_per_value = 0;
805  // Assume ST is default constructible; packValueCount wants an instance.
806  num_bytes_per_value = PackTraits<ST,DT>::packValueCount (ST ());
807  }
808  else {
809  // Since the packed data come from the source matrix, we can use
810  // the source matrix to get the number of bytes per Scalar value
811  // stored in the matrix. This assumes that all Scalar values in
812  // the source matrix require the same number of bytes. If the
813  // source matrix has no entries on the calling process, then we
814  // hope that some process does have some idea how big a Scalar
815  // value is. Of course, if no processes have any entries, then no
816  // values should be packed (though this does assume that in our
817  // packing scheme, rows with zero entries take zero bytes).
818  size_t num_bytes_per_value_l = 0;
819  if (local_matrix.values.extent(0) > 0) {
820  const ST& val = local_matrix.values(0);
821  num_bytes_per_value_l = PackTraits<ST, DT>::packValueCount (val);
822  }
823  using Teuchos::reduceAll;
824  reduceAll<int, size_t> (* (sourceMatrix.getComm ()),
825  Teuchos::REDUCE_MAX,
826  num_bytes_per_value_l,
827  Teuchos::outArg (num_bytes_per_value));
828  }
829 
830  if (num_export_lids == 0) {
831  // FIXME (26 Apr 2016) Fences around (UVM) allocations only
832  // temporarily needed for #227 debugging. Should be able to
833  // remove them after that's fixed.
834  execution_space::fence ();
835  exports = exports_view_type ("exports", 0);
836  execution_space::fence ();
837  return;
838  }
839 
840  // Array of offsets into the pack buffer.
841  Kokkos::View<size_t*, DT> offsets ("offsets", num_export_lids + 1);
842 
843  // Compute number of packets per LID (row to send), as well as
844  // corresponding offsets (the prefix sum of the packet counts).
845  const size_t count =
846  computeNumPacketsAndOffsets (offsets, num_packets_per_lid,
847  local_matrix.graph.row_map, export_lids,
848  export_pids,
849  num_bytes_per_lid, num_bytes_per_gid,
850  num_bytes_per_pid, num_bytes_per_value);
851 
852  // Resize the output pack buffer if needed.
853  if (count > static_cast<size_t> (exports.extent (0))) {
854  // FIXME (26 Apr 2016) Fences around (UVM) allocations only
855  // temporarily needed for #227 debugging. Should be able to
856  // remove them after that's fixed.
857  execution_space::fence ();
858  exports = exports_view_type ("exports", count);
859  if (debug) {
860  std::ostringstream os;
861  os << "*** exports resized to " << count << std::endl;
862  std::cerr << os.str ();
863  }
864  execution_space::fence ();
865  }
866  if (debug) {
867  std::ostringstream os;
868  os << "*** count: " << count << ", exports.extent(0): "
869  << exports.extent (0) << std::endl;
870  std::cerr << os.str ();
871  }
872 
873  // If exports has nonzero length at this point, then the matrix has
874  // at least one entry to pack. Thus, if packing process ranks, we
875  // had better have at least one process rank to pack.
876  TEUCHOS_TEST_FOR_EXCEPTION
877  (pack_pids && exports.extent (0) != 0 &&
878  export_pids.extent (0) == 0, std::invalid_argument, prefix <<
879  "pack_pids is true, and exports.extent(0) = " <<
880  exports.extent (0) << " != 0, meaning that we need to pack at least "
881  "one matrix entry, but export_pids.extent(0) = 0.");
882 
883  typedef typename std::decay<decltype (local_matrix)>::type
884  local_matrix_type;
885  typedef typename std::decay<decltype (local_col_map)>::type
886  local_map_type;
887  typedef typename exports_view_type::t_dev dev_exports_view_type;
888  typedef typename dev_exports_view_type::memory_space buf_mem_space;
889  exports.template modify<buf_mem_space> ();
890  auto exports_d = exports.template view<buf_mem_space> ();
891  do_pack<local_matrix_type, local_map_type, DT>
892  (local_matrix, local_col_map, exports_d, num_packets_per_lid,
893  export_lids, export_pids, offsets, num_bytes_per_value,
894  pack_pids);
895  // If we got this far, we succeeded.
896 }
897 
898 } // namespace PackCrsMatrixImpl
899 
900 template<typename ST, typename LO, typename GO, typename NT>
901 void
903  Teuchos::Array<char>& exports,
904  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
905  const Teuchos::ArrayView<const LO>& exportLIDs,
906  size_t& constantNumPackets,
907  Distributor& distor)
908 {
909  typedef typename CrsMatrix<ST,LO,GO,NT>::local_matrix_type local_matrix_type;
910  typedef typename local_matrix_type::device_type device_type;
911  typedef typename Kokkos::View<size_t*, device_type>::HostMirror::execution_space host_exec_space;
912  typedef Kokkos::Device<host_exec_space, Kokkos::HostSpace> host_dev_type;
913 
914  // mfh 23 Aug 2017: Fix for #1088 requires pack / unpack buffers to
915  // have a possibly different memory space (CudaSpace) than the
916  // default CUDA memory space (currently CudaUVMSpace).
917  typedef typename device_type::execution_space buffer_exec_space;
918 #ifdef KOKKOS_ENABLE_CUDA
919  typedef typename std::conditional<
920  std::is_same<
921  buffer_exec_space, Kokkos::Cuda
922  >::value,
923  Kokkos::CudaSpace,
924  typename device_type::memory_space
925  >::type buffer_memory_space;
926 #else
927  typedef typename device_type::memory_space buffer_memory_space;
928 #endif // KOKKOS_ENABLE_CUDA
929  typedef Kokkos::Device<buffer_exec_space,
930  buffer_memory_space> buffer_device_type;
931 
932  // Convert all Teuchos::Array to Kokkos::View
933 
934  // This is an output array, so we don't have to copy to device here.
935  // However, we'll have to remember to copy back to host when done.
936  typename local_matrix_type::device_type outputDevice;
937  auto num_packets_per_lid_d =
939  numPacketsPerLID.getRawPtr (),
940  numPacketsPerLID.size (), false,
941  "num_packets_per_lid");
942  // This is an input array, so we have to copy to device here.
943  // However, we never need to copy it back to host.
944  auto export_lids_d =
946  exportLIDs.getRawPtr (),
947  exportLIDs.size (), true,
948  "export_lids");
949  // Create an empty array of PIDs
950  Kokkos::View<int*, device_type> export_pids_d ("export_pids", 0);
951 
952  Kokkos::DualView<char*, buffer_device_type> exports_dv ("exports", 0);
953  constexpr bool pack_pids = false;
954  PackCrsMatrixImpl::packCrsMatrix<ST, LO, GO, NT, buffer_device_type> (
955  sourceMatrix, exports_dv, num_packets_per_lid_d, export_lids_d,
956  export_pids_d, constantNumPackets, pack_pids, distor);
957 
958  // The counts are an output of PackCrsMatrixImpl::packCrsMatrix, so we have to
959  // copy them back to host.
960  Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
961  (numPacketsPerLID.getRawPtr (),
962  numPacketsPerLID.size ());
963  Kokkos::deep_copy (num_packets_per_lid_h, num_packets_per_lid_d);
964 
965  // FIXME (mfh 23 Aug 2017) If we're forced to use a DualView for
966  // exports_dv above, then we have two host copies for exports_h.
967 
968  // The exports are an output of PackCrsMatrixImpl::packCrsMatrix, so we have
969  // to copy them back to host.
970  if (static_cast<size_t> (exports.size ()) !=
971  static_cast<size_t> (exports_dv.extent (0))) {
972  exports.resize (exports_dv.extent (0));
973  }
974  Kokkos::View<char*, host_dev_type> exports_h (exports.getRawPtr (),
975  exports.size ());
976  Kokkos::deep_copy (exports_h, exports_dv.d_view);
977 }
978 
979 template<typename ST, typename LO, typename GO, typename NT>
980 void
982  Kokkos::DualView<char*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& exports,
983  const Kokkos::DualView<size_t*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& numPacketsPerLID,
984  const Kokkos::DualView<const LO*, typename NT::device_type>& exportLIDs,
985  size_t& constantNumPackets,
986  Distributor& distor)
987 {
988  typedef typename CrsMatrix<ST,LO,GO,NT>::local_matrix_type local_matrix_type;
989  typedef typename local_matrix_type::device_type device_type;
990 
991  // mfh 23 Aug 2017: Fix for #1088 requires pack / unpack buffers to
992  // have a possibly different memory space (CudaSpace) than the
993  // default CUDA memory space (currently CudaUVMSpace).
994  typedef typename device_type::execution_space buffer_exec_space;
995 #ifdef KOKKOS_ENABLE_CUDA
996  typedef typename std::conditional<
997  std::is_same<
998  buffer_exec_space, Kokkos::Cuda
999  >::value,
1000  Kokkos::CudaSpace,
1001  typename device_type::memory_space
1002  >::type buffer_memory_space;
1003 #else
1004  typedef typename device_type::memory_space buffer_memory_space;
1005 #endif // KOKKOS_ENABLE_CUDA
1006  typedef Kokkos::Device<buffer_exec_space,
1007  buffer_memory_space> buffer_device_type;
1008 
1009  // Create an empty array of PIDs, since the interface needs it.
1010  Kokkos::View<int*, device_type> exportPIDs_d ("exportPIDs", 0);
1011  constexpr bool pack_pids = false;
1012 
1013  // Write-only device access
1014  auto numPacketsPerLID_nc = numPacketsPerLID; // const DV& -> DV
1015  numPacketsPerLID_nc.modified_host() = 0;
1016  numPacketsPerLID_nc.modified_device() = 1;
1017  auto numPacketsPerLID_d = numPacketsPerLID.template view<buffer_memory_space> ();
1018 
1019  // Read-only device access
1020  auto exportLIDs_nc = Tpetra::Details::castAwayConstDualView (exportLIDs);
1021  exportLIDs_nc.template sync<typename device_type::memory_space> ();
1022  auto exportLIDs_d = exportLIDs.template view<typename device_type::memory_space> ();
1023 
1024  PackCrsMatrixImpl::packCrsMatrix<ST,LO,GO,NT,buffer_device_type> (
1025  sourceMatrix, exports, numPacketsPerLID_d, exportLIDs_d,
1026  exportPIDs_d, constantNumPackets, pack_pids, distor);
1027 }
1028 
1029 template<typename ST, typename LO, typename GO, typename NT>
1030 void
1032  Kokkos::DualView<char*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& exports_dv,
1033  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
1034  const Teuchos::ArrayView<const LO>& exportLIDs,
1035  const Teuchos::ArrayView<const int>& sourcePIDs,
1036  size_t& constantNumPackets,
1037  Distributor& distor)
1038 {
1039  typedef typename CrsMatrix<ST,LO,GO,NT>::local_matrix_type local_matrix_type;
1040  typedef typename DistObject<char, LO, GO, NT>::buffer_device_type buffer_device_type;
1041  typedef typename Kokkos::DualView<char*, buffer_device_type>::t_host::execution_space host_exec_space;
1042  typedef Kokkos::Device<host_exec_space, Kokkos::HostSpace> host_dev_type;
1043 
1044  typename local_matrix_type::device_type outputDevice;
1045 
1046  // Convert all Teuchos::Array to Kokkos::View
1047 
1048  // This is an output array, so we don't have to copy to device here.
1049  // However, we'll have to remember to copy back to host when done.
1050  auto num_packets_per_lid_d =
1051  create_mirror_view_from_raw_host_array (buffer_device_type (),
1052  numPacketsPerLID.getRawPtr (),
1053  numPacketsPerLID.size (), false,
1054  "num_packets_per_lid");
1055 
1056  // This is an input array, so we have to copy to device here.
1057  // However, we never need to copy it back to host.
1058  auto export_lids_d =
1060  exportLIDs.getRawPtr (),
1061  exportLIDs.size (), true,
1062  "export_lids");
1063  // This is an input array, so we have to copy to device here.
1064  // However, we never need to copy it back to host.
1065  auto export_pids_d =
1067  sourcePIDs.getRawPtr (),
1068  sourcePIDs.size (), true,
1069  "export_pids");
1070  constexpr bool pack_pids = true;
1071  PackCrsMatrixImpl::packCrsMatrix(
1072  sourceMatrix, exports_dv, num_packets_per_lid_d, export_lids_d,
1073  export_pids_d, constantNumPackets, pack_pids, distor);
1074 
1075  // The counts are an output of PackCrsMatrixImpl::packCrsMatrix, so we have to
1076  // copy them back to host.
1077  Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
1078  (numPacketsPerLID.getRawPtr (), numPacketsPerLID.size ());
1079  Kokkos::deep_copy (num_packets_per_lid_h, num_packets_per_lid_d);
1080 }
1081 
1082 } // namespace Details
1083 } // namespace Tpetra
1084 
1085 #define TPETRA_DETAILS_PACKCRSMATRIX_INSTANT( ST, LO, GO, NT ) \
1086  template void \
1087  Details::packCrsMatrix<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1088  Teuchos::Array<char>&, \
1089  const Teuchos::ArrayView<size_t>&, \
1090  const Teuchos::ArrayView<const LO>&, \
1091  size_t&, \
1092  Distributor&); \
1093  template void \
1094  Details::packCrsMatrixNew<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1095  Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1096  const Kokkos::DualView<size_t*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1097  const Kokkos::DualView<const LO*, NT::device_type>&, \
1098  size_t&, \
1099  Distributor&); \
1100  template void \
1101  Details::packCrsMatrixWithOwningPIDs<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1102  Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1103  const Teuchos::ArrayView<size_t>&, \
1104  const Teuchos::ArrayView<const LO>&, \
1105  const Teuchos::ArrayView<const int>&, \
1106  size_t&, \
1107  Distributor&);
1108 
1109 #endif // TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
Tpetra_Details_OrdinalTraits.hpp
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types,...
Tpetra::Details::packCrsMatrixWithOwningPIDs
void packCrsMatrixWithOwningPIDs(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse matrix for communication.
Definition: Tpetra_Details_packCrsMatrix_def.hpp:1031
Tpetra::Details::create_mirror_view_from_raw_host_array
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Definition: Tpetra_Details_createMirrorView.hpp:201
Tpetra::Details::PackTraits::input_array_type
Kokkos::View< const value_type *, D, Kokkos::MemoryUnmanaged > input_array_type
The type of an input array of value_type.
Definition: Tpetra_Details_PackTraits.hpp:89
Tpetra_Details_PackTraits.hpp
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Details
Implementation details of Tpetra.
Tpetra::Details::PackTraits::packArray
static KOKKOS_INLINE_FUNCTION Kokkos::pair< int, size_t > packArray(char outBuf[], const value_type inBuf[], const size_t numEnt)
Pack the first numEnt entries of the given input buffer of value_type, into the output buffer of byte...
Definition: Tpetra_Details_PackTraits.hpp:181
Tpetra::Classes::CrsMatrix
Sparse matrix that presents a row-oriented interface that lets users read or modify entries.
Definition: Tpetra_CrsMatrix_decl.hpp:424
Tpetra::Details::PackTraits::compileTimeSize
static const bool compileTimeSize
Whether the number of bytes required to pack one instance of value_type is fixed at compile time.
Definition: Tpetra_Details_PackTraits.hpp:80
Tpetra_CrsMatrix_decl.hpp
Declaration of the Tpetra::CrsMatrix class.
Tpetra::Classes::DistObject
Base class for distributed Tpetra objects that support data redistribution.
Definition: Tpetra_DistObject_decl.hpp:349
Tpetra_Details_castAwayConstDualView.hpp
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
Tpetra::Distributor
Sets up and executes a communication plan for a Tpetra DistObject.
Definition: Tpetra_Distributor.hpp:188
Tpetra::Details::packCrsMatrixNew
void packCrsMatrixNew(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports, const Kokkos::DualView< size_t *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &numPacketsPerLID, const Kokkos::DualView< const LO *, typename NT::device_type > &exportLIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse matrix for communication, for "new" DistObject inter...
Definition: Tpetra_Details_packCrsMatrix_def.hpp:981
Tpetra::Details::packCrsMatrix
void packCrsMatrix(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Teuchos::Array< char > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse matrix for communication.
Definition: Tpetra_Details_packCrsMatrix_def.hpp:902
Tpetra::Details::PackCrsMatrixImpl::NumPacketsAndOffsetsFunctor
Compute the number of packets and offsets for the pack procedure.
Definition: Tpetra_Details_packCrsMatrix_def.hpp:112
Tpetra::Classes::CrsMatrix::local_matrix_type
KokkosSparse::CrsMatrix< impl_scalar_type, LocalOrdinal, execution_space, void, typename local_graph_type::size_type > local_matrix_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
Definition: Tpetra_CrsMatrix_decl.hpp:483
Tpetra_Details_getEntryOnHost.hpp
Declaration and definition of Tpetra::Details::getEntryOnHost.
Tpetra::Details::LocalMap
::Tpetra::Details::Classes::LocalMap< LocalOrdinal, GlobalOrdinal, DeviceType > LocalMap
Alias for Tpetra::Details::Classes::LocalMap.
Definition: Tpetra_Details_LocalMap_fwd.hpp:72
Tpetra::Details::PackTraits::packValue
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const T &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
Definition: Tpetra_Details_PackTraits.hpp:303
Tpetra
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Tpetra::deep_copy
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Definition: Tpetra_MultiVector_decl.hpp:2453
Tpetra::Details::castAwayConstDualView
Kokkos::DualView< ValueType *, DeviceType > castAwayConstDualView(const Kokkos::DualView< const ValueType *, DeviceType > &input_dv)
Cast away const-ness of a 1-D Kokkos::DualView.
Definition: Tpetra_Details_castAwayConstDualView.hpp:64
Tpetra_Details_createMirrorView.hpp
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary,...
Tpetra::Details::PackTraits::packValueCount
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const T &)
Number of bytes required to pack or unpack the given value of type value_type.
Definition: Tpetra_Details_PackTraits.hpp:287
Tpetra::Details::PackCrsMatrixImpl::NumPacketsAndOffsetsFunctor::getError
int getError() const
Host function for getting the error.
Definition: Tpetra_Details_packCrsMatrix_def.hpp:244