42 #ifndef TPETRA_DETAILS_PACKCRSGRAPH_DEF_HPP
43 #define TPETRA_DETAILS_PACKCRSGRAPH_DEF_HPP
45 #include "TpetraCore_config.h"
46 #include "Teuchos_Array.hpp"
47 #include "Teuchos_ArrayView.hpp"
81 #ifndef DOXYGEN_SHOULD_SKIP_THIS
84 #endif // DOXYGEN_SHOULD_SKIP_THIS
91 namespace PackCrsGraphImpl {
99 template<
class OutputOffsetsViewType,
100 class CountsViewType,
101 class InputOffsetsViewType,
102 class InputLocalRowIndicesViewType,
103 class InputLocalRowPidsViewType,
105 #ifdef HAVE_TPETRA_DEBUG
109 #endif // HAVE_TPETRA_DEBUG
113 typedef typename OutputOffsetsViewType::non_const_value_type output_offset_type;
114 typedef typename CountsViewType::non_const_value_type count_type;
115 typedef typename InputOffsetsViewType::non_const_value_type input_offset_type;
116 typedef typename InputLocalRowIndicesViewType::non_const_value_type local_row_index_type;
117 typedef typename InputLocalRowPidsViewType::non_const_value_type local_row_pid_type;
119 typedef typename OutputOffsetsViewType::device_type device_type;
120 static_assert (std::is_same<
typename CountsViewType::device_type::execution_space,
121 typename device_type::execution_space>::value,
122 "OutputOffsetsViewType and CountsViewType must have the same execution space.");
123 static_assert (Kokkos::Impl::is_view<OutputOffsetsViewType>::value,
124 "OutputOffsetsViewType must be a Kokkos::View.");
125 static_assert (std::is_same<typename OutputOffsetsViewType::value_type, output_offset_type>::value,
126 "OutputOffsetsViewType must be a nonconst Kokkos::View.");
127 static_assert (std::is_integral<output_offset_type>::value,
128 "The type of each entry of OutputOffsetsViewType must be a built-in integer type.");
129 static_assert (Kokkos::Impl::is_view<CountsViewType>::value,
130 "CountsViewType must be a Kokkos::View.");
131 static_assert (std::is_same<typename CountsViewType::value_type, output_offset_type>::value,
132 "CountsViewType must be a nonconst Kokkos::View.");
133 static_assert (std::is_integral<count_type>::value,
134 "The type of each entry of CountsViewType must be a built-in integer type.");
135 static_assert (Kokkos::Impl::is_view<InputOffsetsViewType>::value,
136 "InputOffsetsViewType must be a Kokkos::View.");
137 static_assert (std::is_integral<input_offset_type>::value,
138 "The type of each entry of InputOffsetsViewType must be a built-in integer type.");
139 static_assert (Kokkos::Impl::is_view<InputLocalRowIndicesViewType>::value,
140 "InputLocalRowIndicesViewType must be a Kokkos::View.");
141 static_assert (std::is_integral<local_row_index_type>::value,
142 "The type of each entry of InputLocalRowIndicesViewType must be a built-in integer type.");
145 const CountsViewType& counts,
146 const InputOffsetsViewType& rowOffsets,
147 const InputLocalRowIndicesViewType& lclRowInds,
148 const InputLocalRowPidsViewType& lclRowPids) :
149 outputOffsets_ (outputOffsets),
151 rowOffsets_ (rowOffsets),
152 lclRowInds_ (lclRowInds),
153 lclRowPids_ (lclRowPids),
157 const size_t numRowsToPack = static_cast<size_t> (lclRowInds_.extent (0));
159 if (numRowsToPack != static_cast<size_t> (counts_.extent (0))) {
160 std::ostringstream os;
161 os <<
"lclRowInds.extent(0) = " << numRowsToPack
162 <<
" != counts.extent(0) = " << counts_.extent (0)
164 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument, os.str ());
166 if (static_cast<size_t> (numRowsToPack + 1) !=
167 static_cast<size_t> (outputOffsets_.extent (0))) {
168 std::ostringstream os;
169 os <<
"lclRowInds.extent(0) + 1 = " << (numRowsToPack + 1)
170 <<
" != outputOffsets.extent(0) = " << outputOffsets_.extent (0)
172 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument, os.str ());
177 KOKKOS_INLINE_FUNCTION
void
178 operator() (
const local_row_index_type& curInd,
179 output_offset_type& update,
180 const bool final)
const
183 if (curInd < static_cast<local_row_index_type> (0)) {
191 if (curInd >= static_cast<local_row_index_type> (outputOffsets_.extent (0))) {
196 outputOffsets_(curInd) = update;
199 if (curInd < static_cast<local_row_index_type> (counts_.extent (0))) {
200 const auto lclRow = lclRowInds_(curInd);
201 if (static_cast<size_t> (lclRow + 1) >= static_cast<size_t> (rowOffsets_.extent (0)) ||
202 static_cast<local_row_index_type> (lclRow) < static_cast<local_row_index_type> (0)) {
210 const count_type count =
211 static_cast<count_type> (rowOffsets_(lclRow+1) - rowOffsets_(lclRow));
215 const count_type numEntToPack = (count == 0)
216 ? static_cast<count_type>(0)
217 : count * (1 + (lclRowPids_.size() > 0 ? 1 : 0));
220 counts_(curInd) = numEntToPack;
222 update += numEntToPack;
232 auto error_h = Kokkos::create_mirror_view (error_);
238 OutputOffsetsViewType outputOffsets_;
239 CountsViewType counts_;
240 typename InputOffsetsViewType::const_type rowOffsets_;
241 typename InputLocalRowIndicesViewType::const_type lclRowInds_;
242 typename InputLocalRowPidsViewType::const_type lclRowPids_;
243 Kokkos::View<int, device_type> error_;
255 template<
class OutputOffsetsViewType,
256 class CountsViewType,
257 class InputOffsetsViewType,
258 class InputLocalRowIndicesViewType,
259 class InputLocalRowPidsViewType>
260 typename CountsViewType::non_const_value_type
261 computeNumPacketsAndOffsets(
const OutputOffsetsViewType& outputOffsets,
262 const CountsViewType& counts,
263 const InputOffsetsViewType& rowOffsets,
264 const InputLocalRowIndicesViewType& lclRowInds,
265 const InputLocalRowPidsViewType& lclRowPids)
267 typedef NumPacketsAndOffsetsFunctor<OutputOffsetsViewType,
268 CountsViewType,
typename InputOffsetsViewType::const_type,
269 typename InputLocalRowIndicesViewType::const_type,
270 typename InputLocalRowPidsViewType::const_type> functor_type;
271 typedef typename CountsViewType::non_const_value_type count_type;
272 typedef typename OutputOffsetsViewType::size_type size_type;
273 typedef typename OutputOffsetsViewType::execution_space execution_space;
274 typedef typename functor_type::local_row_index_type LO;
275 typedef Kokkos::RangePolicy<execution_space, LO> range_type;
276 const char prefix[] =
"computeNumPacketsAndOffsets: ";
278 count_type count = 0;
279 const count_type numRowsToPack = lclRowInds.extent (0);
281 if (numRowsToPack == 0) {
285 TEUCHOS_TEST_FOR_EXCEPTION
286 (rowOffsets.extent (0) <= static_cast<size_type> (1),
287 std::invalid_argument, prefix <<
"There is at least one row to pack, "
288 "but the graph has no rows. lclRowInds.extent(0) = " <<
289 numRowsToPack <<
", but rowOffsets.extent(0) = " <<
290 rowOffsets.extent (0) <<
" <= 1.");
291 TEUCHOS_TEST_FOR_EXCEPTION
292 (outputOffsets.extent (0) !=
293 static_cast<size_type> (numRowsToPack + 1), std::invalid_argument,
294 prefix <<
"Output dimension does not match number of rows to pack. "
295 <<
"outputOffsets.extent(0) = " << outputOffsets.extent (0)
296 <<
" != lclRowInds.extent(0) + 1 = "
297 << static_cast<size_type> (numRowsToPack + 1) <<
".");
298 TEUCHOS_TEST_FOR_EXCEPTION
299 (counts.extent (0) != numRowsToPack, std::invalid_argument,
300 prefix <<
"counts.extent(0) = " << counts.extent (0)
301 <<
" != numRowsToPack = " << numRowsToPack <<
".");
303 functor_type f (outputOffsets, counts, rowOffsets, lclRowInds, lclRowPids);
304 Kokkos::parallel_scan (range_type (0, numRowsToPack + 1), f);
307 const int errCode = f.getError ();
308 TEUCHOS_TEST_FOR_EXCEPTION
309 (errCode != 0, std::runtime_error, prefix <<
"parallel_scan error code "
310 << errCode <<
" != 0.");
314 for (LO k = 0; k < numRowsToPack; ++k) {
317 if (outputOffsets(numRowsToPack) != total) {
318 if (errStr.get () == NULL) {
319 errStr = std::unique_ptr<std::ostringstream> (
new std::ostringstream ());
321 std::ostringstream& os = *errStr;
323 <<
"outputOffsets(numRowsToPack=" << numRowsToPack <<
") "
324 << outputOffsets(numRowsToPack) <<
" != sum of counts = "
325 << total <<
"." << std::endl;
326 if (numRowsToPack != 0) {
328 if (numRowsToPack < static_cast<LO> (10)) {
329 os <<
"outputOffsets: [";
330 for (LO i = 0; i <= numRowsToPack; ++i) {
331 os << outputOffsets(i);
332 if (static_cast<LO> (i + 1) <= numRowsToPack) {
336 os <<
"]" << std::endl;
338 for (LO i = 0; i < numRowsToPack; ++i) {
340 if (static_cast<LO> (i + 1) < numRowsToPack) {
344 os <<
"]" << std::endl;
347 os <<
"outputOffsets(" << (numRowsToPack-1) <<
") = "
348 << outputOffsets(numRowsToPack-1) <<
"." << std::endl;
351 count = outputOffsets(numRowsToPack);
352 return {
false, errStr};
354 #endif // HAVE_TPETRA_DEBUG
358 using Tpetra::Details::getEntryOnHost;
359 return static_cast<count_type> (getEntryOnHost (outputOffsets,
374 template<
class Packet,
class ColumnMap,
class BufferDeviceType>
377 packRow(
const ColumnMap& col_map,
378 const Kokkos::View<Packet*, BufferDeviceType>& exports,
382 const size_t num_ent,
383 const bool pack_pids)
385 using Kokkos::subview;
386 typedef typename ColumnMap::local_ordinal_type LO;
387 typedef typename ColumnMap::global_ordinal_type GO;
391 return static_cast<size_t>(0);
394 size_t num_ent_packed = num_ent;
395 if (pack_pids) num_ent_packed += num_ent;
399 for (
size_t k = 0; k < num_ent; ++k) {
400 const LO lid = lids_in[k];
401 const GO gid = col_map.getGlobalElement (lid);
402 exports(offset+k) = gid;
406 for (
size_t k = 0; k < num_ent; ++k) {
407 const LO lid = lids_in[k];
408 const int pid = pids_in[lid];
409 exports(offset+num_ent+k) = static_cast<GO>(pid);
413 return num_ent_packed;
416 template<
class Packet,
class LocalGraph,
class LocalMap,
class BufferDeviceType>
417 struct PackCrsGraphFunctor {
418 typedef LocalGraph local_graph_type;
420 typedef typename local_map_type::local_ordinal_type LO;
421 typedef typename local_map_type::global_ordinal_type GO;
422 typedef typename local_graph_type::device_type device_type;
424 typedef Kokkos::View<const size_t*, BufferDeviceType>
425 num_packets_per_lid_view_type;
426 typedef Kokkos::View<const size_t*, BufferDeviceType> offsets_view_type;
427 typedef Kokkos::View<Packet*, BufferDeviceType> exports_view_type;
429 export_lids_view_type;
431 source_pids_view_type;
433 typedef typename num_packets_per_lid_view_type::non_const_value_type
435 typedef typename offsets_view_type::non_const_value_type
437 typedef Kokkos::pair<int, LO> value_type;
439 static_assert (std::is_same<LO, typename local_graph_type::data_type>::value,
440 "local_map_type::local_ordinal_type and "
441 "local_graph_type::data_type must be the same.");
443 local_graph_type local_graph;
444 local_map_type local_col_map;
445 exports_view_type exports;
446 num_packets_per_lid_view_type num_packets_per_lid;
447 export_lids_view_type export_lids;
448 source_pids_view_type source_pids;
449 offsets_view_type offsets;
452 PackCrsGraphFunctor(
const local_graph_type& local_graph_in,
453 const local_map_type& local_col_map_in,
454 const exports_view_type& exports_in,
455 const num_packets_per_lid_view_type& num_packets_per_lid_in,
456 const export_lids_view_type& export_lids_in,
457 const source_pids_view_type& source_pids_in,
458 const offsets_view_type& offsets_in,
459 const bool pack_pids_in) :
460 local_graph (local_graph_in),
461 local_col_map (local_col_map_in),
462 exports (exports_in),
463 num_packets_per_lid (num_packets_per_lid_in),
464 export_lids (export_lids_in),
465 source_pids (source_pids_in),
466 offsets (offsets_in),
467 pack_pids (pack_pids_in)
469 const LO numRows = local_graph_in.numRows ();
471 static_cast<LO> (local_graph.row_map.extent (0));
472 TEUCHOS_TEST_FOR_EXCEPTION
473 (numRows != 0 && rowMapDim != numRows + static_cast<LO> (1),
474 std::logic_error,
"local_graph.row_map.extent(0) = "
475 << rowMapDim <<
" != numRows (= " << numRows <<
" ) + 1.");
478 KOKKOS_INLINE_FUNCTION
void init (value_type& dst)
const
480 using ::Tpetra::Details::OrdinalTraits;
481 dst = Kokkos::make_pair (0, OrdinalTraits<LO>::invalid ());
484 KOKKOS_INLINE_FUNCTION
void
485 join (
volatile value_type& dst,
const volatile value_type& src)
const
489 if (src.first != 0 && dst.first == 0) {
494 KOKKOS_INLINE_FUNCTION
495 void operator() (
const LO i, value_type& dst)
const
497 const size_t offset = offsets[i];
498 const LO export_lid = export_lids[i];
499 const size_t buf_size = exports.size();
500 const size_t num_packets_this_lid = num_packets_per_lid(i);
501 const size_t num_ent =
502 static_cast<size_t> (local_graph.row_map[export_lid+1]
503 - local_graph.row_map[export_lid]);
513 if (export_lid >= static_cast<LO>(local_graph.numRows())) {
514 if (dst.first != 0) {
515 dst = Kokkos::make_pair (1, i);
519 else if ((offset > buf_size || offset + num_packets_this_lid > buf_size)) {
520 if (dst.first != 0) {
521 dst = Kokkos::make_pair (2, i);
531 const auto row_beg = local_graph.row_map[export_lid];
532 const auto row_end = local_graph.row_map[export_lid + 1];
533 auto lids_in = subview (local_graph.entries,
534 Kokkos::make_pair (row_beg, row_end));
535 typedef local_map_type LMT;
537 typedef BufferDeviceType BDT;
538 size_t num_ent_packed_this_row =
539 packRow<PT,LMT,BDT>(local_col_map, exports, lids_in,
540 source_pids, offset, num_ent, pack_pids);
541 if (num_ent_packed_this_row != num_packets_this_lid) {
542 if (dst.first != 0) {
543 dst = Kokkos::make_pair (3, i);
556 template<
class Packet,
class LocalGraph,
class LocalMap,
class BufferDeviceType>
558 do_pack(
const LocalGraph& local_graph,
560 const Kokkos::View<Packet*, BufferDeviceType>& exports,
561 const typename PackTraits<
564 >::input_array_type& num_packets_per_lid,
565 const typename PackTraits<
566 typename LocalMap::local_ordinal_type,
567 typename LocalGraph::device_type
568 >::input_array_type& export_lids,
569 const typename PackTraits<
571 typename LocalGraph::device_type
572 >::input_array_type& source_pids,
573 const Kokkos::View<const size_t*, BufferDeviceType>& offsets,
574 const bool pack_pids)
576 typedef typename LocalMap::local_ordinal_type LO;
577 typedef typename LocalGraph::device_type device_type;
578 typedef Kokkos::RangePolicy<typename device_type::execution_space, LO> range_type;
579 const char prefix[] =
"Tpetra::Details::PackCrsGraphImpl::do_pack: ";
581 if (export_lids.extent (0) != 0) {
582 TEUCHOS_TEST_FOR_EXCEPTION
583 (static_cast<size_t> (offsets.extent (0)) !=
584 static_cast<size_t> (export_lids.extent (0) + 1),
585 std::invalid_argument, prefix <<
"offsets.extent(0) = "
586 << offsets.extent (0) <<
" != export_lids.extent(0) (= "
587 << export_lids.extent (0) <<
") + 1.");
588 TEUCHOS_TEST_FOR_EXCEPTION
589 (export_lids.extent (0) != num_packets_per_lid.extent (0),
590 std::invalid_argument, prefix <<
"export_lids.extent(0) = " <<
591 export_lids.extent (0) <<
" != num_packets_per_lid.extent(0) = "
592 << num_packets_per_lid.extent (0) <<
".");
596 TEUCHOS_TEST_FOR_EXCEPTION
597 (pack_pids && exports.extent (0) != 0 &&
598 source_pids.extent (0) == 0, std::invalid_argument, prefix <<
599 "pack_pids is true, and exports.extent(0) = " <<
600 exports.extent (0) <<
" != 0, meaning that we need to pack at "
601 "least one graph entry, but source_pids.extent(0) = 0.");
604 typedef PackCrsGraphFunctor<Packet,LocalGraph,LocalMap,BufferDeviceType> pack_functor_type;
605 pack_functor_type f (local_graph, local_map, exports,
606 num_packets_per_lid, export_lids,
607 source_pids, offsets, pack_pids);
609 typename pack_functor_type::value_type result;
610 range_type range (0, num_packets_per_lid.extent (0));
611 Kokkos::parallel_reduce (range, f, result);
613 if (result.first != 0) {
614 std::ostringstream os;
616 if (result.first == 1) {
617 auto export_lids_h = Kokkos::create_mirror_view (export_lids);
619 const auto firstBadLid = export_lids_h(result.second);
620 os <<
"First bad export LID: export_lids(i=" << result.second <<
") = "
623 else if (result.first == 2) {
624 auto offsets_h = Kokkos::create_mirror_view (offsets);
626 const auto firstBadOffset = offsets_h(result.second);
628 auto num_packets_per_lid_h =
629 Kokkos::create_mirror_view (num_packets_per_lid);
631 os <<
"First bad offset: offsets(i=" << result.second <<
") = "
632 << firstBadOffset <<
", num_packets_per_lid(i) = "
633 << num_packets_per_lid_h(result.second) <<
", buf_size = "
637 TEUCHOS_TEST_FOR_EXCEPTION
638 (
true, std::runtime_error, prefix <<
"PackCrsGraphFunctor reported "
639 "error code " << result.first <<
" for the first bad row "
640 << result.second <<
". " << os.str ());
670 template<
typename LO,
typename GO,
typename NT>
675 const Kokkos::View<
size_t*,
677 const Kokkos::View<const LO*, typename NT::device_type>& export_lids,
678 const Kokkos::View<const int*, typename NT::device_type>& export_pids,
679 size_t& constant_num_packets,
680 const bool pack_pids,
686 typedef typename buffer_device_type::execution_space execution_space;
687 typedef Kokkos::DualView<packet_type*,buffer_device_type> exports_view_type;
688 const char prefix[] =
"Tpetra::Details::packCrsGraph: ";
689 constexpr
bool debug =
false;
691 auto local_graph = sourceGraph.getLocalGraph ();
692 auto local_col_map = sourceGraph.getColMap ()->getLocalMap ();
697 constant_num_packets = 0;
699 const size_t num_export_lids =
700 static_cast<size_t> (export_lids.extent (0));
701 TEUCHOS_TEST_FOR_EXCEPTION
703 static_cast<size_t> (num_packets_per_lid.extent (0)),
704 std::invalid_argument, prefix <<
"num_export_lids.extent(0) = "
705 << num_export_lids <<
" != num_packets_per_lid.extent(0) = "
706 << num_packets_per_lid.extent (0) <<
".");
707 if (num_export_lids != 0) {
708 TEUCHOS_TEST_FOR_EXCEPTION
709 (num_packets_per_lid.data () == NULL, std::invalid_argument,
710 prefix <<
"num_export_lids = "<< num_export_lids <<
" != 0, but "
711 "num_packets_per_lid.data() = "
712 << num_packets_per_lid.data () <<
" == NULL.");
715 if (num_export_lids == 0) {
719 execution_space::fence ();
720 exports = exports_view_type (
"exports", 0);
721 execution_space::fence ();
726 Kokkos::View<size_t*,buffer_device_type> offsets (
"offsets", num_export_lids + 1);
731 computeNumPacketsAndOffsets(offsets, num_packets_per_lid,
732 local_graph.row_map, export_lids, export_pids);
735 if (count > static_cast<size_t> (exports.extent (0))) {
739 execution_space::fence ();
740 exports = exports_view_type (
"exports", count);
742 std::ostringstream os;
743 os <<
"*** exports resized to " << count << std::endl;
744 std::cerr << os.str ();
746 execution_space::fence ();
749 std::ostringstream os;
750 os <<
"*** count: " << count <<
", exports.extent(0): "
751 << exports.extent (0) << std::endl;
752 std::cerr << os.str ();
758 TEUCHOS_TEST_FOR_EXCEPTION
759 (pack_pids && exports.extent (0) != 0 &&
760 export_pids.extent (0) == 0, std::invalid_argument, prefix <<
761 "pack_pids is true, and exports.extent(0) = " <<
762 exports.extent (0) <<
" != 0, meaning that we need to pack at least "
763 "one graph entry, but export_pids.extent(0) = 0.");
765 typedef typename std::decay<decltype (local_graph)>::type
767 typedef typename std::decay<decltype (local_col_map)>::type
769 typedef typename exports_view_type::t_dev dev_exports_view_type;
770 typedef typename dev_exports_view_type::memory_space buf_mem_space;
771 exports.template modify<buf_mem_space> ();
772 auto exports_d = exports.template view<buf_mem_space> ();
773 do_pack<packet_type,local_graph_type,local_map_type,buffer_device_type>
774 (local_graph, local_col_map, exports_d, num_packets_per_lid,
775 export_lids, export_pids, offsets, pack_pids);
781 template<
typename LO,
typename GO,
typename NT>
785 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
786 const Teuchos::ArrayView<const LO>& exportLIDs,
787 size_t& constantNumPackets,
792 typedef typename local_graph_type::device_type device_type;
793 typedef typename Kokkos::View<size_t*, device_type>::HostMirror::execution_space host_exec_space;
794 typedef Kokkos::Device<host_exec_space, Kokkos::HostSpace> host_dev_type;
799 typedef typename device_type::execution_space buffer_exec_space;
800 #ifdef KOKKOS_ENABLE_CUDA
801 typedef typename std::conditional<
803 buffer_exec_space, Kokkos::Cuda
806 typename device_type::memory_space
807 >::type buffer_memory_space;
809 typedef typename device_type::memory_space buffer_memory_space;
810 #endif // KOKKOS_ENABLE_CUDA
812 typedef Kokkos::Device<buffer_exec_space,
813 buffer_memory_space> buffer_device_type;
819 typename local_graph_type::device_type outputDevice;
820 auto num_packets_per_lid_d =
822 numPacketsPerLID.getRawPtr (),
823 numPacketsPerLID.size (),
false,
824 "num_packets_per_lid");
829 exportLIDs.getRawPtr (),
830 exportLIDs.size (),
true,
833 Kokkos::View<int*, device_type> export_pids_d (
"export_pids", 0);
835 Kokkos::DualView<packet_type*,buffer_device_type> exports_dv (
"exports", 0);
836 constexpr
bool pack_pids =
false;
837 PackCrsGraphImpl::packCrsGraph<LO,GO,NT>(
838 sourceGraph, exports_dv, num_packets_per_lid_d, export_lids_d,
839 export_pids_d, constantNumPackets, pack_pids, distor);
842 Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
843 (numPacketsPerLID.getRawPtr (),
844 numPacketsPerLID.size ());
852 if (static_cast<size_t> (exports.size ()) !=
853 static_cast<size_t> (exports_dv.extent (0))) {
854 exports.resize (exports_dv.extent (0));
856 Kokkos::View<packet_type*, host_dev_type> exports_h (exports.getRawPtr (),
861 template<
typename LO,
typename GO,
typename NT>
866 const Kokkos::DualView<
size_t*,
868 const Kokkos::DualView<const LO*, typename NT::device_type>& exportLIDs,
869 size_t& constantNumPackets,
873 typedef typename local_graph_type::device_type device_type;
878 #ifdef KOKKOS_ENABLE_CUDA
879 typedef typename device_type::execution_space buffer_exec_space;
880 typedef typename std::conditional<
882 buffer_exec_space, Kokkos::Cuda
885 typename device_type::memory_space
886 >::type buffer_memory_space;
888 typedef typename device_type::memory_space buffer_memory_space;
889 #endif // KOKKOS_ENABLE_CUDA
892 Kokkos::View<int*, device_type> exportPIDs_d (
"exportPIDs", 0);
893 constexpr
bool pack_pids =
false;
896 auto numPacketsPerLID_nc = numPacketsPerLID;
897 numPacketsPerLID_nc.modified_host() = 0;
898 numPacketsPerLID_nc.modified_device() = 1;
899 auto numPacketsPerLID_d = numPacketsPerLID.template view<buffer_memory_space> ();
903 exportLIDs_nc.template sync<typename device_type::memory_space> ();
904 auto exportLIDs_d = exportLIDs.template view<typename device_type::memory_space> ();
906 PackCrsGraphImpl::packCrsGraph<LO,GO,NT>(
907 sourceGraph, exports, numPacketsPerLID_d, exportLIDs_d,
908 exportPIDs_d, constantNumPackets, pack_pids, distor);
911 template<
typename LO,
typename GO,
typename NT>
917 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
918 const Teuchos::ArrayView<const LO>& exportLIDs,
919 const Teuchos::ArrayView<const int>& sourcePIDs,
920 size_t& constantNumPackets,
926 typedef typename Kokkos::DualView<packet_type*, buffer_device_type>::t_host::execution_space host_exec_space;
927 typedef Kokkos::Device<host_exec_space, Kokkos::HostSpace> host_dev_type;
929 typename local_graph_type::device_type outputDevice;
935 auto num_packets_per_lid_d =
937 numPacketsPerLID.getRawPtr (),
938 numPacketsPerLID.size (),
false,
939 "num_packets_per_lid");
945 exportLIDs.getRawPtr (),
946 exportLIDs.size (),
true,
952 sourcePIDs.getRawPtr (),
953 sourcePIDs.size (),
true,
955 constexpr
bool pack_pids =
true;
956 PackCrsGraphImpl::packCrsGraph<LO,GO,NT>(
957 sourceGraph, exports_dv, num_packets_per_lid_d, export_lids_d,
958 export_pids_d, constantNumPackets, pack_pids, distor);
962 Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
963 (numPacketsPerLID.getRawPtr (), numPacketsPerLID.size ());
970 #define TPETRA_DETAILS_PACKCRSGRAPH_INSTANT( LO, GO, NT ) \
972 Details::packCrsGraph<LO, GO, NT> ( \
973 const CrsGraph<LO, GO, NT>&, \
974 Teuchos::Array<CrsGraph<LO,GO,NT>::packet_type>&, \
975 const Teuchos::ArrayView<size_t>&, \
976 const Teuchos::ArrayView<const LO>&, \
980 Details::packCrsGraphNew<LO, GO, NT> ( \
981 const CrsGraph<LO, GO, NT>&, \
982 Kokkos::DualView<CrsGraph<LO,GO,NT>::packet_type*, CrsGraph<LO,GO,NT>::buffer_device_type>&, \
983 const Kokkos::DualView<size_t*, CrsGraph<LO,GO,NT>::buffer_device_type>&, \
984 const Kokkos::DualView<const LO*, NT::device_type>&, \
988 Details::packCrsGraphWithOwningPIDs<LO, GO, NT> ( \
989 const CrsGraph<LO, GO, NT>&, \
990 Kokkos::DualView<CrsGraph<LO,GO,NT>::packet_type*, CrsGraph<LO,GO,NT>::buffer_device_type>&, \
991 const Teuchos::ArrayView<size_t>&, \
992 const Teuchos::ArrayView<const LO>&, \
993 const Teuchos::ArrayView<const int>&, \
997 #endif // TPETRA_DETAILS_PACKCRSGRAPH_DEF_HPP