42 #ifndef TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
43 #define TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
45 #include "TpetraCore_config.h"
46 #include "Teuchos_Array.hpp"
47 #include "Teuchos_ArrayView.hpp"
82 #ifndef DOXYGEN_SHOULD_SKIP_THIS
85 #endif // DOXYGEN_SHOULD_SKIP_THIS
92 namespace PackCrsMatrixImpl {
100 template<
class OutputOffsetsViewType,
101 class CountsViewType,
102 class InputOffsetsViewType,
103 class InputLocalRowIndicesViewType,
104 class InputLocalRowPidsViewType,
106 #ifdef HAVE_TPETRA_DEBUG
110 #endif // HAVE_TPETRA_DEBUG
114 typedef typename OutputOffsetsViewType::non_const_value_type output_offset_type;
115 typedef typename CountsViewType::non_const_value_type count_type;
116 typedef typename InputOffsetsViewType::non_const_value_type input_offset_type;
117 typedef typename InputLocalRowIndicesViewType::non_const_value_type local_row_index_type;
118 typedef typename InputLocalRowPidsViewType::non_const_value_type local_row_pid_type;
120 typedef typename OutputOffsetsViewType::device_type device_type;
121 static_assert (std::is_same<
typename CountsViewType::device_type::execution_space,
122 typename device_type::execution_space>::value,
123 "OutputOffsetsViewType and CountsViewType must have the same execution space.");
124 static_assert (Kokkos::Impl::is_view<OutputOffsetsViewType>::value,
125 "OutputOffsetsViewType must be a Kokkos::View.");
126 static_assert (std::is_same<typename OutputOffsetsViewType::value_type, output_offset_type>::value,
127 "OutputOffsetsViewType must be a nonconst Kokkos::View.");
128 static_assert (std::is_integral<output_offset_type>::value,
129 "The type of each entry of OutputOffsetsViewType must be a built-in integer type.");
130 static_assert (Kokkos::Impl::is_view<CountsViewType>::value,
131 "CountsViewType must be a Kokkos::View.");
132 static_assert (std::is_same<typename CountsViewType::value_type, output_offset_type>::value,
133 "CountsViewType must be a nonconst Kokkos::View.");
134 static_assert (std::is_integral<count_type>::value,
135 "The type of each entry of CountsViewType must be a built-in integer type.");
136 static_assert (Kokkos::Impl::is_view<InputOffsetsViewType>::value,
137 "InputOffsetsViewType must be a Kokkos::View.");
138 static_assert (std::is_integral<input_offset_type>::value,
139 "The type of each entry of InputOffsetsViewType must be a built-in integer type.");
140 static_assert (Kokkos::Impl::is_view<InputLocalRowIndicesViewType>::value,
141 "InputLocalRowIndicesViewType must be a Kokkos::View.");
142 static_assert (std::is_integral<local_row_index_type>::value,
143 "The type of each entry of InputLocalRowIndicesViewType must be a built-in integer type.");
146 const CountsViewType& counts,
147 const InputOffsetsViewType& rowOffsets,
148 const InputLocalRowIndicesViewType& lclRowInds,
149 const InputLocalRowPidsViewType& lclRowPids,
150 const count_type sizeOfLclCount,
151 const count_type sizeOfGblColInd,
152 const count_type sizeOfPid,
153 const count_type sizeOfValue) :
154 outputOffsets_ (outputOffsets),
156 rowOffsets_ (rowOffsets),
157 lclRowInds_ (lclRowInds),
158 lclRowPids_ (lclRowPids),
159 sizeOfLclCount_ (sizeOfLclCount),
160 sizeOfGblColInd_ (sizeOfGblColInd),
161 sizeOfPid_ (sizeOfPid),
162 sizeOfValue_ (sizeOfValue),
166 const size_t numRowsToPack = static_cast<size_t> (lclRowInds_.extent (0));
168 if (numRowsToPack != static_cast<size_t> (counts_.extent (0))) {
169 std::ostringstream os;
170 os <<
"lclRowInds.extent(0) = " << numRowsToPack
171 <<
" != counts.extent(0) = " << counts_.extent (0)
173 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument, os.str ());
175 if (static_cast<size_t> (numRowsToPack + 1) !=
176 static_cast<size_t> (outputOffsets_.extent (0))) {
177 std::ostringstream os;
178 os <<
"lclRowInds.extent(0) + 1 = " << (numRowsToPack + 1)
179 <<
" != outputOffsets.extent(0) = " << outputOffsets_.extent (0)
181 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument, os.str ());
186 KOKKOS_INLINE_FUNCTION
void
187 operator() (
const local_row_index_type& curInd,
188 output_offset_type& update,
189 const bool final)
const
192 if (curInd < static_cast<local_row_index_type> (0)) {
200 if (curInd >= static_cast<local_row_index_type> (outputOffsets_.extent (0))) {
205 outputOffsets_(curInd) = update;
208 if (curInd < static_cast<local_row_index_type> (counts_.extent (0))) {
209 const auto lclRow = lclRowInds_(curInd);
210 if (static_cast<size_t> (lclRow + 1) >= static_cast<size_t> (rowOffsets_.extent (0)) ||
211 static_cast<local_row_index_type> (lclRow) < static_cast<local_row_index_type> (0)) {
219 const count_type count =
220 static_cast<count_type> (rowOffsets_(lclRow+1) - rowOffsets_(lclRow));
226 const count_type numBytes = (count == 0) ?
227 static_cast<count_type> (0) :
228 sizeOfLclCount_ + count * (sizeOfGblColInd_ +
229 (lclRowPids_.size() > 0 ? sizeOfPid_ : 0) +
233 counts_(curInd) = numBytes;
245 auto error_h = Kokkos::create_mirror_view (error_);
251 OutputOffsetsViewType outputOffsets_;
252 CountsViewType counts_;
253 typename InputOffsetsViewType::const_type rowOffsets_;
254 typename InputLocalRowIndicesViewType::const_type lclRowInds_;
255 typename InputLocalRowPidsViewType::const_type lclRowPids_;
256 count_type sizeOfLclCount_;
257 count_type sizeOfGblColInd_;
258 count_type sizeOfPid_;
259 count_type sizeOfValue_;
260 Kokkos::View<int, device_type> error_;
272 template<
class OutputOffsetsViewType,
273 class CountsViewType,
274 class InputOffsetsViewType,
275 class InputLocalRowIndicesViewType,
276 class InputLocalRowPidsViewType>
277 typename CountsViewType::non_const_value_type
278 computeNumPacketsAndOffsets (
const OutputOffsetsViewType& outputOffsets,
279 const CountsViewType& counts,
280 const InputOffsetsViewType& rowOffsets,
281 const InputLocalRowIndicesViewType& lclRowInds,
282 const InputLocalRowPidsViewType& lclRowPids,
283 const typename CountsViewType::non_const_value_type sizeOfLclCount,
284 const typename CountsViewType::non_const_value_type sizeOfGblColInd,
285 const typename CountsViewType::non_const_value_type sizeOfPid,
286 const typename CountsViewType::non_const_value_type sizeOfValue)
288 typedef NumPacketsAndOffsetsFunctor<OutputOffsetsViewType,
289 CountsViewType,
typename InputOffsetsViewType::const_type,
290 typename InputLocalRowIndicesViewType::const_type,
291 typename InputLocalRowPidsViewType::const_type> functor_type;
292 typedef typename CountsViewType::non_const_value_type count_type;
293 typedef typename OutputOffsetsViewType::size_type size_type;
294 typedef typename OutputOffsetsViewType::execution_space execution_space;
295 typedef typename functor_type::local_row_index_type LO;
296 typedef Kokkos::RangePolicy<execution_space, LO> range_type;
297 const char prefix[] =
"computeNumPacketsAndOffsets: ";
299 count_type count = 0;
300 const count_type numRowsToPack = lclRowInds.extent (0);
302 if (numRowsToPack == 0) {
306 TEUCHOS_TEST_FOR_EXCEPTION
307 (rowOffsets.extent (0) <= static_cast<size_type> (1),
308 std::invalid_argument, prefix <<
"There is at least one row to pack, "
309 "but the matrix has no rows. lclRowInds.extent(0) = " <<
310 numRowsToPack <<
", but rowOffsets.extent(0) = " <<
311 rowOffsets.extent (0) <<
" <= 1.");
312 TEUCHOS_TEST_FOR_EXCEPTION
313 (outputOffsets.extent (0) !=
314 static_cast<size_type> (numRowsToPack + 1), std::invalid_argument,
315 prefix <<
"Output dimension does not match number of rows to pack. "
316 <<
"outputOffsets.extent(0) = " << outputOffsets.extent (0)
317 <<
" != lclRowInds.extent(0) + 1 = "
318 << static_cast<size_type> (numRowsToPack + 1) <<
".");
319 TEUCHOS_TEST_FOR_EXCEPTION
320 (counts.extent (0) != numRowsToPack, std::invalid_argument,
321 prefix <<
"counts.extent(0) = " << counts.extent (0)
322 <<
" != numRowsToPack = " << numRowsToPack <<
".");
324 functor_type f (outputOffsets, counts, rowOffsets,
325 lclRowInds, lclRowPids, sizeOfLclCount,
326 sizeOfGblColInd, sizeOfPid, sizeOfValue);
327 Kokkos::parallel_scan (range_type (0, numRowsToPack + 1), f);
330 const int errCode = f.getError ();
331 TEUCHOS_TEST_FOR_EXCEPTION
332 (errCode != 0, std::runtime_error, prefix <<
"parallel_scan error code "
333 << errCode <<
" != 0.");
337 for (LO k = 0; k < numRowsToPack; ++k) {
340 if (outputOffsets(numRowsToPack) != total) {
341 if (errStr.get () == NULL) {
342 errStr = std::unique_ptr<std::ostringstream> (
new std::ostringstream ());
344 std::ostringstream& os = *errStr;
346 <<
"outputOffsets(numRowsToPack=" << numRowsToPack <<
") "
347 << outputOffsets(numRowsToPack) <<
" != sum of counts = "
348 << total <<
"." << std::endl;
349 if (numRowsToPack != 0) {
351 if (numRowsToPack < static_cast<LO> (10)) {
352 os <<
"outputOffsets: [";
353 for (LO i = 0; i <= numRowsToPack; ++i) {
354 os << outputOffsets(i);
355 if (static_cast<LO> (i + 1) <= numRowsToPack) {
359 os <<
"]" << std::endl;
361 for (LO i = 0; i < numRowsToPack; ++i) {
363 if (static_cast<LO> (i + 1) < numRowsToPack) {
367 os <<
"]" << std::endl;
370 os <<
"outputOffsets(" << (numRowsToPack-1) <<
") = "
371 << outputOffsets(numRowsToPack-1) <<
"." << std::endl;
374 count = outputOffsets(numRowsToPack);
375 return {
false, errStr};
377 #endif // HAVE_TPETRA_DEBUG
381 using Tpetra::Details::getEntryOnHost;
382 return static_cast<count_type> (getEntryOnHost (outputOffsets,
402 template<
class ST,
class ColumnMap,
class BufferDeviceType>
404 Kokkos::pair<int, size_t>
405 packCrsMatrixRow (
const ColumnMap& col_map,
406 const Kokkos::View<char*, BufferDeviceType>& exports,
411 const size_t num_ent,
412 const size_t num_bytes_per_value,
413 const bool pack_pids)
415 using Kokkos::subview;
416 typedef typename ColumnMap::local_ordinal_type LO;
417 typedef typename ColumnMap::global_ordinal_type GO;
418 typedef BufferDeviceType BDT;
419 typedef Kokkos::pair<int, size_t> return_type;
423 return return_type (0, 0);
426 const LO num_ent_LO = static_cast<LO> (num_ent);
427 const size_t num_ent_beg = offset;
430 const size_t gids_beg = num_ent_beg + num_ent_len;
433 const size_t pids_beg = gids_beg + gids_len;
434 const size_t pids_len = pack_pids ?
436 static_cast<size_t> (0);
438 const size_t vals_beg = gids_beg + gids_len + pids_len;
439 const size_t vals_len = num_ent * num_bytes_per_value;
441 char*
const num_ent_out = exports.data () + num_ent_beg;
442 char*
const gids_out = exports.data () + gids_beg;
443 char*
const pids_out = pack_pids ? exports.data () + pids_beg : NULL;
444 char*
const vals_out = exports.data () + vals_beg;
446 size_t num_bytes_out = 0;
453 for (
size_t k = 0; k < num_ent; ++k) {
454 const LO lid = lids_in[k];
455 const GO gid = col_map.getGlobalElement (lid);
460 for (
size_t k = 0; k < num_ent; ++k) {
461 const LO lid = lids_in[k];
462 const int pid = pids_in[lid];
468 error_code += p.first;
469 num_bytes_out += p.second;
472 if (error_code != 0) {
473 return return_type (10, num_bytes_out);
476 const size_t expected_num_bytes =
477 num_ent_len + gids_len + pids_len + vals_len;
478 if (num_bytes_out != expected_num_bytes) {
479 return return_type (11, num_bytes_out);
481 return return_type (0, num_bytes_out);
484 template<
class LocalMatrix,
class LocalMap,
class BufferDeviceType>
485 struct PackCrsMatrixFunctor {
486 typedef LocalMatrix local_matrix_type;
488 typedef typename local_matrix_type::value_type ST;
489 typedef typename local_map_type::local_ordinal_type LO;
490 typedef typename local_map_type::global_ordinal_type GO;
491 typedef typename local_matrix_type::device_type DT;
493 typedef Kokkos::View<const size_t*, BufferDeviceType>
494 num_packets_per_lid_view_type;
495 typedef Kokkos::View<const size_t*, BufferDeviceType> offsets_view_type;
496 typedef Kokkos::View<char*, BufferDeviceType> exports_view_type;
498 export_lids_view_type;
500 source_pids_view_type;
502 typedef typename num_packets_per_lid_view_type::non_const_value_type
504 typedef typename offsets_view_type::non_const_value_type
506 typedef Kokkos::pair<int, LO> value_type;
508 static_assert (std::is_same<LO, typename local_matrix_type::ordinal_type>::value,
509 "local_map_type::local_ordinal_type and "
510 "local_matrix_type::ordinal_type must be the same.");
512 local_matrix_type local_matrix;
513 local_map_type local_col_map;
514 exports_view_type exports;
515 num_packets_per_lid_view_type num_packets_per_lid;
516 export_lids_view_type export_lids;
517 source_pids_view_type source_pids;
518 offsets_view_type offsets;
519 size_t num_bytes_per_value;
522 PackCrsMatrixFunctor (
const local_matrix_type& local_matrix_in,
523 const local_map_type& local_col_map_in,
524 const exports_view_type& exports_in,
525 const num_packets_per_lid_view_type& num_packets_per_lid_in,
526 const export_lids_view_type& export_lids_in,
527 const source_pids_view_type& source_pids_in,
528 const offsets_view_type& offsets_in,
529 const size_t num_bytes_per_value_in,
530 const bool pack_pids_in) :
531 local_matrix (local_matrix_in),
532 local_col_map (local_col_map_in),
533 exports (exports_in),
534 num_packets_per_lid (num_packets_per_lid_in),
535 export_lids (export_lids_in),
536 source_pids (source_pids_in),
537 offsets (offsets_in),
538 num_bytes_per_value (num_bytes_per_value_in),
539 pack_pids (pack_pids_in)
541 const LO numRows = local_matrix_in.numRows ();
543 static_cast<LO> (local_matrix.graph.row_map.extent (0));
544 TEUCHOS_TEST_FOR_EXCEPTION
545 (numRows != 0 && rowMapDim != numRows + static_cast<LO> (1),
546 std::logic_error,
"local_matrix.graph.row_map.extent(0) = "
547 << rowMapDim <<
" != numRows (= " << numRows <<
" ) + 1.");
550 KOKKOS_INLINE_FUNCTION
void init (value_type& dst)
const
552 using ::Tpetra::Details::OrdinalTraits;
553 dst = Kokkos::make_pair (0, OrdinalTraits<LO>::invalid ());
556 KOKKOS_INLINE_FUNCTION
void
557 join (
volatile value_type& dst,
const volatile value_type& src)
const
561 if (src.first != 0 && dst.first == 0) {
566 KOKKOS_INLINE_FUNCTION
567 void operator() (
const LO i, value_type& dst)
const
569 const size_t offset = offsets[i];
570 const LO export_lid = export_lids[i];
571 const size_t buf_size = exports.size();
572 const size_t num_bytes = num_packets_per_lid(i);
573 const size_t num_ent =
574 static_cast<size_t> (local_matrix.graph.row_map[export_lid+1]
575 - local_matrix.graph.row_map[export_lid]);
585 if (export_lid >= local_matrix.numRows ()) {
586 if (dst.first != 0) {
587 dst = Kokkos::make_pair (1, i);
591 else if ((offset > buf_size || offset + num_bytes > buf_size)) {
592 if (dst.first != 0) {
593 dst = Kokkos::make_pair (2, i);
603 const auto row_beg = local_matrix.graph.row_map[export_lid];
604 const auto row_end = local_matrix.graph.row_map[export_lid + 1];
605 auto vals_in = subview (local_matrix.values,
606 Kokkos::make_pair (row_beg, row_end));
607 auto lids_in = subview (local_matrix.graph.entries,
608 Kokkos::make_pair (row_beg, row_end));
609 typedef local_map_type LMT;
610 typedef BufferDeviceType BDT;
611 auto p = packCrsMatrixRow<ST, LMT, BDT> (local_col_map, exports, lids_in,
612 source_pids, vals_in, offset,
613 num_ent, num_bytes_per_value,
615 int error_code_this_row = p.first;
616 size_t num_bytes_packed_this_row = p.second;
617 if (error_code_this_row != 0) {
618 if (dst.first != 0) {
619 dst = Kokkos::make_pair (error_code_this_row, i);
622 else if (num_bytes_packed_this_row != num_bytes) {
623 if (dst.first != 0) {
624 dst = Kokkos::make_pair (3, i);
637 template<
class LocalMatrix,
class LocalMap,
class BufferDeviceType>
639 do_pack (
const LocalMatrix& local_matrix,
641 const Kokkos::View<char*, BufferDeviceType>& exports,
642 const typename PackTraits<
645 >::input_array_type& num_packets_per_lid,
646 const typename PackTraits<
647 typename LocalMap::local_ordinal_type,
648 typename LocalMatrix::device_type
649 >::input_array_type& export_lids,
650 const typename PackTraits<
652 typename LocalMatrix::device_type
653 >::input_array_type& source_pids,
654 const Kokkos::View<const size_t*, BufferDeviceType>& offsets,
655 const size_t num_bytes_per_value,
656 const bool pack_pids)
658 typedef typename LocalMap::local_ordinal_type LO;
659 typedef typename LocalMatrix::device_type DT;
660 typedef Kokkos::RangePolicy<typename DT::execution_space, LO> range_type;
661 const char prefix[] =
"Tpetra::Details::do_pack: ";
663 if (export_lids.extent (0) != 0) {
664 TEUCHOS_TEST_FOR_EXCEPTION
665 (static_cast<size_t> (offsets.extent (0)) !=
666 static_cast<size_t> (export_lids.extent (0) + 1),
667 std::invalid_argument, prefix <<
"offsets.extent(0) = "
668 << offsets.extent (0) <<
" != export_lids.extent(0) (= "
669 << export_lids.extent (0) <<
") + 1.");
670 TEUCHOS_TEST_FOR_EXCEPTION
671 (export_lids.extent (0) != num_packets_per_lid.extent (0),
672 std::invalid_argument, prefix <<
"export_lids.extent(0) = " <<
673 export_lids.extent (0) <<
" != num_packets_per_lid.extent(0) = "
674 << num_packets_per_lid.extent (0) <<
".");
678 TEUCHOS_TEST_FOR_EXCEPTION
679 (pack_pids && exports.extent (0) != 0 &&
680 source_pids.extent (0) == 0, std::invalid_argument, prefix <<
681 "pack_pids is true, and exports.extent(0) = " <<
682 exports.extent (0) <<
" != 0, meaning that we need to pack at "
683 "least one matrix entry, but source_pids.extent(0) = 0.");
686 typedef PackCrsMatrixFunctor<LocalMatrix,
LocalMap,
687 BufferDeviceType> pack_functor_type;
688 pack_functor_type f (local_matrix, local_map, exports,
689 num_packets_per_lid, export_lids,
690 source_pids, offsets, num_bytes_per_value,
693 typename pack_functor_type::value_type result;
694 range_type range (0, num_packets_per_lid.extent (0));
695 Kokkos::parallel_reduce (range, f, result);
697 if (result.first != 0) {
698 std::ostringstream os;
700 if (result.first == 1) {
701 auto export_lids_h = Kokkos::create_mirror_view (export_lids);
703 const auto firstBadLid = export_lids_h(result.second);
704 os <<
"First bad export LID: export_lids(i=" << result.second <<
") = "
707 else if (result.first == 2) {
708 auto offsets_h = Kokkos::create_mirror_view (offsets);
710 const auto firstBadOffset = offsets_h(result.second);
712 auto num_packets_per_lid_h =
713 Kokkos::create_mirror_view (num_packets_per_lid);
715 os <<
"First bad offset: offsets(i=" << result.second <<
") = "
716 << firstBadOffset <<
", num_packets_per_lid(i) = "
717 << num_packets_per_lid_h(result.second) <<
", buf_size = "
721 TEUCHOS_TEST_FOR_EXCEPTION
722 (
true, std::runtime_error, prefix <<
"PackCrsMatrixFunctor reported "
723 "error code " << result.first <<
" for the first bad row "
724 << result.second <<
". " << os.str ());
757 template<
typename ST,
typename LO,
typename GO,
typename NT,
typename BufferDeviceType>
759 packCrsMatrix (
const CrsMatrix<ST, LO, GO, NT>& sourceMatrix,
760 Kokkos::DualView<char*, BufferDeviceType>& exports,
761 const Kokkos::View<size_t*, BufferDeviceType>& num_packets_per_lid,
762 const Kokkos::View<const LO*, typename NT::device_type>& export_lids,
763 const Kokkos::View<const int*, typename NT::device_type>& export_pids,
764 size_t& constant_num_packets,
765 const bool pack_pids,
769 typedef BufferDeviceType DT;
770 typedef typename DT::execution_space execution_space;
771 typedef Kokkos::DualView<char*, BufferDeviceType> exports_view_type;
772 const char prefix[] =
"Tpetra::Details::PackCrsMatrixImpl::packCrsMatrix: ";
773 constexpr
bool debug =
false;
775 auto local_matrix = sourceMatrix.getLocalMatrix ();
776 auto local_col_map = sourceMatrix.getColMap ()->getLocalMap ();
781 constant_num_packets = 0;
783 const size_t num_export_lids =
784 static_cast<size_t> (export_lids.extent (0));
785 TEUCHOS_TEST_FOR_EXCEPTION
787 static_cast<size_t> (num_packets_per_lid.extent (0)),
788 std::invalid_argument, prefix <<
"num_export_lids.extent(0) = "
789 << num_export_lids <<
" != num_packets_per_lid.extent(0) = "
790 << num_packets_per_lid.extent (0) <<
".");
791 if (num_export_lids != 0) {
792 TEUCHOS_TEST_FOR_EXCEPTION
793 (num_packets_per_lid.data () == NULL, std::invalid_argument,
794 prefix <<
"num_export_lids = "<< num_export_lids <<
" != 0, but "
795 "num_packets_per_lid.data() = "
796 << num_packets_per_lid.data () <<
" == NULL.");
803 size_t num_bytes_per_value = 0;
818 size_t num_bytes_per_value_l = 0;
819 if (local_matrix.values.extent(0) > 0) {
820 const ST& val = local_matrix.values(0);
823 using Teuchos::reduceAll;
824 reduceAll<int, size_t> (* (sourceMatrix.getComm ()),
826 num_bytes_per_value_l,
827 Teuchos::outArg (num_bytes_per_value));
830 if (num_export_lids == 0) {
834 execution_space::fence ();
835 exports = exports_view_type (
"exports", 0);
836 execution_space::fence ();
841 Kokkos::View<size_t*, DT> offsets (
"offsets", num_export_lids + 1);
846 computeNumPacketsAndOffsets (offsets, num_packets_per_lid,
847 local_matrix.graph.row_map, export_lids,
849 num_bytes_per_lid, num_bytes_per_gid,
850 num_bytes_per_pid, num_bytes_per_value);
853 if (count > static_cast<size_t> (exports.extent (0))) {
857 execution_space::fence ();
858 exports = exports_view_type (
"exports", count);
860 std::ostringstream os;
861 os <<
"*** exports resized to " << count << std::endl;
862 std::cerr << os.str ();
864 execution_space::fence ();
867 std::ostringstream os;
868 os <<
"*** count: " << count <<
", exports.extent(0): "
869 << exports.extent (0) << std::endl;
870 std::cerr << os.str ();
876 TEUCHOS_TEST_FOR_EXCEPTION
877 (pack_pids && exports.extent (0) != 0 &&
878 export_pids.extent (0) == 0, std::invalid_argument, prefix <<
879 "pack_pids is true, and exports.extent(0) = " <<
880 exports.extent (0) <<
" != 0, meaning that we need to pack at least "
881 "one matrix entry, but export_pids.extent(0) = 0.");
883 typedef typename std::decay<decltype (local_matrix)>::type
885 typedef typename std::decay<decltype (local_col_map)>::type
887 typedef typename exports_view_type::t_dev dev_exports_view_type;
888 typedef typename dev_exports_view_type::memory_space buf_mem_space;
889 exports.template modify<buf_mem_space> ();
890 auto exports_d = exports.template view<buf_mem_space> ();
891 do_pack<local_matrix_type, local_map_type, DT>
892 (local_matrix, local_col_map, exports_d, num_packets_per_lid,
893 export_lids, export_pids, offsets, num_bytes_per_value,
900 template<
typename ST,
typename LO,
typename GO,
typename NT>
903 Teuchos::Array<char>& exports,
904 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
905 const Teuchos::ArrayView<const LO>& exportLIDs,
906 size_t& constantNumPackets,
910 typedef typename local_matrix_type::device_type device_type;
911 typedef typename Kokkos::View<size_t*, device_type>::HostMirror::execution_space host_exec_space;
912 typedef Kokkos::Device<host_exec_space, Kokkos::HostSpace> host_dev_type;
917 typedef typename device_type::execution_space buffer_exec_space;
918 #ifdef KOKKOS_ENABLE_CUDA
919 typedef typename std::conditional<
921 buffer_exec_space, Kokkos::Cuda
924 typename device_type::memory_space
925 >::type buffer_memory_space;
927 typedef typename device_type::memory_space buffer_memory_space;
928 #endif // KOKKOS_ENABLE_CUDA
929 typedef Kokkos::Device<buffer_exec_space,
930 buffer_memory_space> buffer_device_type;
936 typename local_matrix_type::device_type outputDevice;
937 auto num_packets_per_lid_d =
939 numPacketsPerLID.getRawPtr (),
940 numPacketsPerLID.size (),
false,
941 "num_packets_per_lid");
946 exportLIDs.getRawPtr (),
947 exportLIDs.size (),
true,
950 Kokkos::View<int*, device_type> export_pids_d (
"export_pids", 0);
952 Kokkos::DualView<char*, buffer_device_type> exports_dv (
"exports", 0);
953 constexpr
bool pack_pids =
false;
954 PackCrsMatrixImpl::packCrsMatrix<ST, LO, GO, NT, buffer_device_type> (
955 sourceMatrix, exports_dv, num_packets_per_lid_d, export_lids_d,
956 export_pids_d, constantNumPackets, pack_pids, distor);
960 Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
961 (numPacketsPerLID.getRawPtr (),
962 numPacketsPerLID.size ());
970 if (static_cast<size_t> (exports.size ()) !=
971 static_cast<size_t> (exports_dv.extent (0))) {
972 exports.resize (exports_dv.extent (0));
974 Kokkos::View<char*, host_dev_type> exports_h (exports.getRawPtr (),
979 template<
typename ST,
typename LO,
typename GO,
typename NT>
984 const Kokkos::DualView<const LO*, typename NT::device_type>& exportLIDs,
985 size_t& constantNumPackets,
989 typedef typename local_matrix_type::device_type device_type;
994 typedef typename device_type::execution_space buffer_exec_space;
995 #ifdef KOKKOS_ENABLE_CUDA
996 typedef typename std::conditional<
998 buffer_exec_space, Kokkos::Cuda
1001 typename device_type::memory_space
1002 >::type buffer_memory_space;
1004 typedef typename device_type::memory_space buffer_memory_space;
1005 #endif // KOKKOS_ENABLE_CUDA
1006 typedef Kokkos::Device<buffer_exec_space,
1007 buffer_memory_space> buffer_device_type;
1010 Kokkos::View<int*, device_type> exportPIDs_d (
"exportPIDs", 0);
1011 constexpr
bool pack_pids =
false;
1014 auto numPacketsPerLID_nc = numPacketsPerLID;
1015 numPacketsPerLID_nc.modified_host() = 0;
1016 numPacketsPerLID_nc.modified_device() = 1;
1017 auto numPacketsPerLID_d = numPacketsPerLID.template view<buffer_memory_space> ();
1021 exportLIDs_nc.template sync<typename device_type::memory_space> ();
1022 auto exportLIDs_d = exportLIDs.template view<typename device_type::memory_space> ();
1024 PackCrsMatrixImpl::packCrsMatrix<ST,LO,GO,NT,buffer_device_type> (
1025 sourceMatrix, exports, numPacketsPerLID_d, exportLIDs_d,
1026 exportPIDs_d, constantNumPackets, pack_pids, distor);
1029 template<
typename ST,
typename LO,
typename GO,
typename NT>
1033 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
1034 const Teuchos::ArrayView<const LO>& exportLIDs,
1035 const Teuchos::ArrayView<const int>& sourcePIDs,
1036 size_t& constantNumPackets,
1041 typedef typename Kokkos::DualView<char*, buffer_device_type>::t_host::execution_space host_exec_space;
1042 typedef Kokkos::Device<host_exec_space, Kokkos::HostSpace> host_dev_type;
1044 typename local_matrix_type::device_type outputDevice;
1050 auto num_packets_per_lid_d =
1052 numPacketsPerLID.getRawPtr (),
1053 numPacketsPerLID.size (),
false,
1054 "num_packets_per_lid");
1058 auto export_lids_d =
1060 exportLIDs.getRawPtr (),
1061 exportLIDs.size (),
true,
1065 auto export_pids_d =
1067 sourcePIDs.getRawPtr (),
1068 sourcePIDs.size (),
true,
1070 constexpr
bool pack_pids =
true;
1071 PackCrsMatrixImpl::packCrsMatrix(
1072 sourceMatrix, exports_dv, num_packets_per_lid_d, export_lids_d,
1073 export_pids_d, constantNumPackets, pack_pids, distor);
1077 Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
1078 (numPacketsPerLID.getRawPtr (), numPacketsPerLID.size ());
1085 #define TPETRA_DETAILS_PACKCRSMATRIX_INSTANT( ST, LO, GO, NT ) \
1087 Details::packCrsMatrix<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1088 Teuchos::Array<char>&, \
1089 const Teuchos::ArrayView<size_t>&, \
1090 const Teuchos::ArrayView<const LO>&, \
1094 Details::packCrsMatrixNew<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1095 Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1096 const Kokkos::DualView<size_t*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1097 const Kokkos::DualView<const LO*, NT::device_type>&, \
1101 Details::packCrsMatrixWithOwningPIDs<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1102 Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1103 const Teuchos::ArrayView<size_t>&, \
1104 const Teuchos::ArrayView<const LO>&, \
1105 const Teuchos::ArrayView<const int>&, \
1109 #endif // TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP