49 #ifndef TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
50 #define TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
52 #include "Kokkos_Core.hpp"
53 #include "Kokkos_ArithTraits.hpp"
58 namespace KokkosRefactor {
74 template<
class IntegerType,
75 const bool isSigned = std::numeric_limits<IntegerType>::is_signed>
77 static KOKKOS_INLINE_FUNCTION
bool
78 test (
const IntegerType x,
79 const IntegerType exclusiveUpperBound);
83 template<
class IntegerType>
85 static KOKKOS_INLINE_FUNCTION
bool
86 test (
const IntegerType x,
87 const IntegerType exclusiveUpperBound)
89 return x < static_cast<IntegerType> (0) || x >= exclusiveUpperBound;
94 template<
class IntegerType>
95 struct OutOfBounds<IntegerType, false> {
96 static KOKKOS_INLINE_FUNCTION
bool
97 test (
const IntegerType x,
98 const IntegerType exclusiveUpperBound)
100 return x >= exclusiveUpperBound;
106 template<
class IntegerType>
107 KOKKOS_INLINE_FUNCTION
bool
108 outOfBounds (
const IntegerType x,
const IntegerType exclusiveUpperBound)
118 template <
typename DstView,
typename SrcView,
typename IdxView>
119 struct PackArraySingleColumn {
120 typedef typename DstView::execution_space execution_space;
121 typedef typename execution_space::size_type size_type;
128 PackArraySingleColumn(
const DstView& dst_,
132 dst(dst_), src(src_), idx(idx_), col(col_) {}
134 KOKKOS_INLINE_FUNCTION
135 void operator()(
const size_type k )
const {
136 dst(k) = src(idx(k), col);
140 pack (
const DstView& dst,
145 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
146 Kokkos::parallel_for (range_type (0, idx.size ()),
147 PackArraySingleColumn (dst,src,idx,col));
151 template <
typename DstView,
154 typename SizeType =
typename DstView::execution_space::size_type>
155 class PackArraySingleColumnWithBoundsCheck {
157 static_assert (Kokkos::Impl::is_view<DstView>::value,
158 "DstView must be a Kokkos::View.");
159 static_assert (Kokkos::Impl::is_view<SrcView>::value,
160 "SrcView must be a Kokkos::View.");
161 static_assert (Kokkos::Impl::is_view<IdxView>::value,
162 "IdxView must be a Kokkos::View.");
163 static_assert (static_cast<int> (DstView::rank) == 1,
164 "DstView must be a rank-1 Kokkos::View.");
165 static_assert (static_cast<int> (SrcView::rank) == 2,
166 "SrcView must be a rank-2 Kokkos::View.");
167 static_assert (static_cast<int> (IdxView::rank) == 1,
168 "IdxView must be a rank-1 Kokkos::View.");
169 static_assert (std::is_integral<SizeType>::value,
170 "SizeType must be a built-in integer type.");
172 typedef SizeType size_type;
174 typedef int value_type;
183 PackArraySingleColumnWithBoundsCheck (
const DstView& dst_,
186 const size_type col_) :
187 dst (dst_), src (src_), idx (idx_), col (col_) {}
189 KOKKOS_INLINE_FUNCTION
void
190 operator() (
const size_type& k, value_type& result)
const {
191 typedef typename IdxView::non_const_value_type index_type;
193 const index_type lclRow = idx(k);
194 if (lclRow < static_cast<index_type> (0) ||
195 lclRow >= static_cast<index_type> (src.extent (0))) {
199 dst(k) = src(lclRow, col);
203 KOKKOS_INLINE_FUNCTION
204 void init (value_type& initialResult)
const {
208 KOKKOS_INLINE_FUNCTION
void
209 join (
volatile value_type& dstResult,
210 const volatile value_type& srcResult)
const
212 dstResult = (dstResult == 0 || srcResult == 0) ? 0 : 1;
216 pack (
const DstView& dst,
221 typedef typename DstView::execution_space execution_space;
222 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
223 typedef typename IdxView::non_const_value_type index_type;
226 Kokkos::parallel_reduce (range_type (0, idx.size ()),
227 PackArraySingleColumnWithBoundsCheck (dst, src,
234 auto idx_h = Kokkos::create_mirror_view (idx);
237 std::vector<index_type> badIndices;
238 const size_type numInds = idx_h.extent (0);
239 for (size_type k = 0; k < numInds; ++k) {
240 if (idx_h(k) < static_cast<index_type> (0) ||
241 idx_h(k) >= static_cast<index_type> (src.extent (0))) {
242 badIndices.push_back (idx_h(k));
246 std::ostringstream os;
247 os <<
"MultiVector single-column pack kernel had "
248 << badIndices.size () <<
" out-of bounds index/ices. "
250 for (
size_t k = 0; k < badIndices.size (); ++k) {
252 if (k + 1 < badIndices.size ()) {
257 throw std::runtime_error (os.str ());
263 template <
typename DstView,
typename SrcView,
typename IdxView>
265 pack_array_single_column (
const DstView& dst,
269 const bool debug =
true)
271 static_assert (Kokkos::Impl::is_view<DstView>::value,
272 "DstView must be a Kokkos::View.");
273 static_assert (Kokkos::Impl::is_view<SrcView>::value,
274 "SrcView must be a Kokkos::View.");
275 static_assert (Kokkos::Impl::is_view<IdxView>::value,
276 "IdxView must be a Kokkos::View.");
277 static_assert (static_cast<int> (DstView::rank) == 1,
278 "DstView must be a rank-1 Kokkos::View.");
279 static_assert (static_cast<int> (SrcView::rank) == 2,
280 "SrcView must be a rank-2 Kokkos::View.");
281 static_assert (static_cast<int> (IdxView::rank) == 1,
282 "IdxView must be a rank-1 Kokkos::View.");
285 typedef PackArraySingleColumnWithBoundsCheck<DstView,SrcView,IdxView> impl_type;
286 impl_type::pack (dst, src, idx, col);
289 typedef PackArraySingleColumn<DstView,SrcView,IdxView> impl_type;
290 impl_type::pack (dst, src, idx, col);
294 template <
typename DstView,
typename SrcView,
typename IdxView>
295 struct PackArrayMultiColumn {
296 typedef typename DstView::execution_space execution_space;
297 typedef typename execution_space::size_type size_type;
304 PackArrayMultiColumn(
const DstView& dst_,
308 dst(dst_), src(src_), idx(idx_), numCols(numCols_) {}
310 KOKKOS_INLINE_FUNCTION
311 void operator()(
const size_type k )
const {
312 const typename IdxView::value_type localRow = idx(k);
313 const size_t offset = k*numCols;
314 for (
size_t j = 0; j < numCols; ++j)
315 dst(offset + j) = src(localRow, j);
318 static void pack(
const DstView& dst,
322 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
323 Kokkos::parallel_for (range_type (0, idx.size ()),
324 PackArrayMultiColumn (dst,src,idx,numCols));
328 template <
typename DstView,
331 typename SizeType =
typename DstView::execution_space::size_type>
332 class PackArrayMultiColumnWithBoundsCheck {
334 typedef SizeType size_type;
336 typedef int value_type;
345 PackArrayMultiColumnWithBoundsCheck (
const DstView& dst_,
348 const size_type numCols_) :
349 dst (dst_), src (src_), idx (idx_), numCols (numCols_) {}
351 KOKKOS_INLINE_FUNCTION
void
352 operator() (
const size_type& k, value_type& result)
const {
353 typedef typename IdxView::non_const_value_type index_type;
355 const index_type lclRow = idx(k);
356 if (lclRow < static_cast<index_type> (0) ||
357 lclRow >= static_cast<index_type> (src.extent (0))) {
361 const size_type offset = k*numCols;
362 for (size_type j = 0; j < numCols; ++j) {
363 dst(offset + j) = src(lclRow, j);
368 KOKKOS_INLINE_FUNCTION
369 void init (value_type& initialResult)
const {
373 KOKKOS_INLINE_FUNCTION
void
374 join (
volatile value_type& dstResult,
375 const volatile value_type& srcResult)
const
377 dstResult = (dstResult == 0 || srcResult == 0) ? 0 : 1;
381 pack (
const DstView& dst,
384 const size_type numCols)
386 typedef typename DstView::execution_space execution_space;
387 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
388 typedef typename IdxView::non_const_value_type index_type;
391 Kokkos::parallel_reduce (range_type (0, idx.size ()),
392 PackArrayMultiColumnWithBoundsCheck (dst, src,
399 auto idx_h = Kokkos::create_mirror_view (idx);
402 std::vector<index_type> badIndices;
403 const size_type numInds = idx_h.extent (0);
404 for (size_type k = 0; k < numInds; ++k) {
405 if (idx_h(k) < static_cast<index_type> (0) ||
406 idx_h(k) >= static_cast<index_type> (src.extent (0))) {
407 badIndices.push_back (idx_h(k));
411 std::ostringstream os;
412 os <<
"MultiVector multiple-column pack kernel had "
413 << badIndices.size () <<
" out-of bounds index/ices. "
415 for (
size_t k = 0; k < badIndices.size (); ++k) {
417 if (k + 1 < badIndices.size ()) {
422 throw std::runtime_error (os.str ());
428 template <
typename DstView,
432 pack_array_multi_column (
const DstView& dst,
435 const size_t numCols,
436 const bool debug =
true)
438 static_assert (Kokkos::Impl::is_view<DstView>::value,
439 "DstView must be a Kokkos::View.");
440 static_assert (Kokkos::Impl::is_view<SrcView>::value,
441 "SrcView must be a Kokkos::View.");
442 static_assert (Kokkos::Impl::is_view<IdxView>::value,
443 "IdxView must be a Kokkos::View.");
444 static_assert (static_cast<int> (DstView::rank) == 1,
445 "DstView must be a rank-1 Kokkos::View.");
446 static_assert (static_cast<int> (SrcView::rank) == 2,
447 "SrcView must be a rank-2 Kokkos::View.");
448 static_assert (static_cast<int> (IdxView::rank) == 1,
449 "IdxView must be a rank-1 Kokkos::View.");
452 typedef PackArrayMultiColumnWithBoundsCheck<DstView,
453 SrcView, IdxView> impl_type;
454 impl_type::pack (dst, src, idx, numCols);
457 typedef PackArrayMultiColumn<DstView, SrcView, IdxView> impl_type;
458 impl_type::pack (dst, src, idx, numCols);
462 template <
typename DstView,
typename SrcView,
typename IdxView,
464 struct PackArrayMultiColumnVariableStride {
465 typedef typename DstView::execution_space execution_space;
466 typedef typename execution_space::size_type size_type;
474 PackArrayMultiColumnVariableStride(
const DstView& dst_,
479 dst(dst_), src(src_), idx(idx_), col(col_), numCols(numCols_) {}
481 KOKKOS_INLINE_FUNCTION
482 void operator()(
const size_type k )
const {
483 const typename IdxView::value_type localRow = idx(k);
484 const size_t offset = k*numCols;
485 for (
size_t j = 0; j < numCols; ++j)
486 dst(offset + j) = src(localRow, col(j));
489 static void pack(
const DstView& dst,
494 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
495 Kokkos::parallel_for (range_type (0, idx.size ()),
496 PackArrayMultiColumnVariableStride(
497 dst,src,idx,col,numCols) );
501 template <
typename DstView,
505 typename SizeType =
typename DstView::execution_space::size_type>
506 class PackArrayMultiColumnVariableStrideWithBoundsCheck {
508 typedef SizeType size_type;
510 typedef Kokkos::pair<int, int> value_type;
520 PackArrayMultiColumnVariableStrideWithBoundsCheck (
const DstView& dst_,
524 const size_type numCols_) :
525 dst (dst_), src (src_), idx (idx_), col (col_), numCols (numCols_) {}
527 KOKKOS_INLINE_FUNCTION
void
528 operator() (
const size_type& k, value_type& result)
const {
529 typedef typename IdxView::non_const_value_type row_index_type;
530 typedef typename ColView::non_const_value_type col_index_type;
532 const row_index_type lclRow = idx(k);
533 if (lclRow < static_cast<row_index_type> (0) ||
534 lclRow >= static_cast<row_index_type> (src.extent (0))) {
538 const size_type offset = k*numCols;
539 for (size_type j = 0; j < numCols; ++j) {
540 const col_index_type lclCol = col(j);
541 if (Impl::outOfBounds<col_index_type> (lclCol, src.extent (1))) {
545 dst(offset + j) = src(lclRow, lclCol);
551 KOKKOS_INLINE_FUNCTION
void
552 init (value_type& initialResult)
const {
553 initialResult.first = 1;
554 initialResult.second = 1;
557 KOKKOS_INLINE_FUNCTION
void
558 join (
volatile value_type& dstResult,
559 const volatile value_type& srcResult)
const
561 dstResult.first = (dstResult.first == 0 || srcResult.first == 0) ? 0 : 1;
562 dstResult.second = (dstResult.second == 0 || srcResult.second == 0) ? 0 : 1;
566 pack (
const DstView& dst,
570 const size_type numCols)
572 using Kokkos::parallel_reduce;
573 typedef typename DstView::execution_space execution_space;
574 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
575 typedef typename IdxView::non_const_value_type row_index_type;
576 typedef typename ColView::non_const_value_type col_index_type;
578 Kokkos::pair<int, int> result (1, 1);
579 parallel_reduce (range_type (0, idx.size ()),
580 PackArrayMultiColumnVariableStrideWithBoundsCheck (dst, src,
584 const bool hasBadRows = (result.first != 1);
585 const bool hasBadCols = (result.second != 1);
586 const bool hasErr = hasBadRows || hasBadCols;
588 std::ostringstream os;
594 auto idx_h = Kokkos::create_mirror_view (idx);
597 std::vector<row_index_type> badRows;
598 const size_type numInds = idx_h.extent (0);
599 for (size_type k = 0; k < numInds; ++k) {
600 if (Impl::outOfBounds<row_index_type> (idx_h(k), src.extent (0))) {
601 badRows.push_back (idx_h(k));
604 os <<
"MultiVector multiple-column pack kernel had "
605 << badRows.size () <<
" out-of bounds row index/ices: [";
606 for (
size_t k = 0; k < badRows.size (); ++k) {
608 if (k + 1 < badRows.size ()) {
620 auto col_h = Kokkos::create_mirror_view (col);
623 std::vector<col_index_type> badCols;
624 const size_type numInds = col_h.extent (0);
625 for (size_type k = 0; k < numInds; ++k) {
626 if (Impl::outOfBounds<col_index_type> (col_h(k), src.extent (1))) {
627 badCols.push_back (col_h(k));
634 os <<
"MultiVector multiple-column pack kernel had "
635 << badCols.size () <<
" out-of bounds column index/ices: [";
636 for (
size_t k = 0; k < badCols.size (); ++k) {
638 if (k + 1 < badCols.size ()) {
645 throw std::runtime_error (os.str ());
650 template <
typename DstView,
655 pack_array_multi_column_variable_stride (
const DstView& dst,
659 const size_t numCols,
660 const bool debug =
true)
662 static_assert (Kokkos::Impl::is_view<DstView>::value,
663 "DstView must be a Kokkos::View.");
664 static_assert (Kokkos::Impl::is_view<SrcView>::value,
665 "SrcView must be a Kokkos::View.");
666 static_assert (Kokkos::Impl::is_view<IdxView>::value,
667 "IdxView must be a Kokkos::View.");
668 static_assert (Kokkos::Impl::is_view<ColView>::value,
669 "ColView must be a Kokkos::View.");
670 static_assert (static_cast<int> (DstView::rank) == 1,
671 "DstView must be a rank-1 Kokkos::View.");
672 static_assert (static_cast<int> (SrcView::rank) == 2,
673 "SrcView must be a rank-2 Kokkos::View.");
674 static_assert (static_cast<int> (IdxView::rank) == 1,
675 "IdxView must be a rank-1 Kokkos::View.");
676 static_assert (static_cast<int> (ColView::rank) == 1,
677 "ColView must be a rank-1 Kokkos::View.");
680 typedef PackArrayMultiColumnVariableStrideWithBoundsCheck<DstView,
681 SrcView, IdxView, ColView> impl_type;
682 impl_type::pack (dst, src, idx, col, numCols);
685 typedef PackArrayMultiColumnVariableStride<DstView,
686 SrcView, IdxView, ColView> impl_type;
687 impl_type::pack (dst, src, idx, col, numCols);
699 template<
class ExecutionSpace>
701 template <
typename Scalar>
702 KOKKOS_INLINE_FUNCTION
703 void operator() (Scalar& dest,
const Scalar& src)
const {
704 Kokkos::atomic_assign(&dest, src);
708 #ifdef KOKKOS_ENABLE_SERIAL
710 struct InsertOp< ::Kokkos::Serial > {
711 template <
typename Scalar>
712 KOKKOS_INLINE_FUNCTION
713 void operator() (Scalar& dest,
const Scalar& src)
const {
717 #endif // KOKKOS_ENABLE_SERIAL
719 template<
class ExecutionSpace>
721 template <
typename Scalar>
722 KOKKOS_INLINE_FUNCTION
723 void operator() (Scalar& dest,
const Scalar& src)
const {
724 Kokkos::atomic_add(&dest, src);
728 #ifdef KOKKOS_ENABLE_SERIAL
730 struct AddOp< ::Kokkos::Serial > {
731 template <
typename Scalar>
732 KOKKOS_INLINE_FUNCTION
733 void operator() (Scalar& dest,
const Scalar& src)
const {
737 #endif // KOKKOS_ENABLE_SERIAL
739 template<
class ExecutionSpace>
747 template <
typename T>
748 KOKKOS_INLINE_FUNCTION
749 T max(
const T& a,
const T& b)
const {
return a > b ? a : b; }
751 template <
typename Scalar>
752 KOKKOS_INLINE_FUNCTION
753 void operator() (Scalar& dest,
const Scalar& src)
const {
754 typedef Kokkos::Details::ArithTraits<Scalar> SCT;
755 Kokkos::atomic_assign(&dest, Scalar(max(SCT::abs(dest),SCT::abs(src))));
759 #ifdef KOKKOS_ENABLE_SERIAL
761 struct AbsMaxOp< ::Kokkos::Serial > {
768 template <
typename T>
769 KOKKOS_INLINE_FUNCTION
770 T max(
const T& a,
const T& b)
const {
return a > b ? a : b; }
772 template <
typename Scalar>
773 KOKKOS_INLINE_FUNCTION
774 void operator() (Scalar& dest,
const Scalar& src)
const {
775 typedef Kokkos::Details::ArithTraits<Scalar> SCT;
777 dest = static_cast<Scalar> (max (SCT::abs (dest), SCT::abs (src)));
780 #endif // KOKKOS_ENABLE_SERIAL
782 template <
typename ExecutionSpace,
787 class UnpackArrayMultiColumn {
789 static_assert (Kokkos::Impl::is_view<DstView>::value,
790 "DstView must be a Kokkos::View.");
791 static_assert (Kokkos::Impl::is_view<SrcView>::value,
792 "SrcView must be a Kokkos::View.");
793 static_assert (Kokkos::Impl::is_view<IdxView>::value,
794 "IdxView must be a Kokkos::View.");
795 static_assert (static_cast<int> (DstView::rank) == 2,
796 "DstView must be a rank-2 Kokkos::View.");
797 static_assert (static_cast<int> (SrcView::rank) == 1,
798 "SrcView must be a rank-1 Kokkos::View.");
799 static_assert (static_cast<int> (IdxView::rank) == 1,
800 "IdxView must be a rank-1 Kokkos::View.");
803 typedef typename ExecutionSpace::execution_space execution_space;
804 typedef typename execution_space::size_type size_type;
814 UnpackArrayMultiColumn (
const ExecutionSpace& ,
819 const size_t numCols_) :
827 KOKKOS_INLINE_FUNCTION
void
828 operator() (
const size_type k)
const
830 const typename IdxView::value_type localRow = idx(k);
831 const size_t offset = k*numCols;
832 for (
size_t j = 0; j < numCols; ++j) {
833 op (dst(localRow, j), src(offset+j));
838 unpack (
const ExecutionSpace& execSpace,
843 const size_t numCols)
846 (
"Tpetra::MultiVector unpack (constant stride)",
847 Kokkos::RangePolicy<execution_space, size_type> (0, idx.size ()),
848 UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
852 template <
typename ExecutionSpace,
857 typename SizeType =
typename ExecutionSpace::execution_space::size_type>
858 class UnpackArrayMultiColumnWithBoundsCheck {
860 static_assert (Kokkos::Impl::is_view<DstView>::value,
861 "DstView must be a Kokkos::View.");
862 static_assert (Kokkos::Impl::is_view<SrcView>::value,
863 "SrcView must be a Kokkos::View.");
864 static_assert (Kokkos::Impl::is_view<IdxView>::value,
865 "IdxView must be a Kokkos::View.");
866 static_assert (static_cast<int> (DstView::rank) == 2,
867 "DstView must be a rank-2 Kokkos::View.");
868 static_assert (static_cast<int> (SrcView::rank) == 1,
869 "SrcView must be a rank-1 Kokkos::View.");
870 static_assert (static_cast<int> (IdxView::rank) == 1,
871 "IdxView must be a rank-1 Kokkos::View.");
872 static_assert (std::is_integral<SizeType>::value,
873 "SizeType must be a built-in integer type.");
876 typedef typename ExecutionSpace::execution_space execution_space;
877 typedef SizeType size_type;
879 typedef int value_type;
889 UnpackArrayMultiColumnWithBoundsCheck (
const ExecutionSpace& ,
894 const size_type numCols_) :
902 KOKKOS_INLINE_FUNCTION
903 void operator() (
const size_type& k, value_type& result)
const
905 typedef typename IdxView::non_const_value_type index_type;
907 const index_type lclRow = idx(k);
908 if (lclRow < static_cast<index_type> (0) ||
909 lclRow >= static_cast<index_type> (dst.extent (0))) {
913 const size_type offset = k*numCols;
914 for (size_type j = 0; j < numCols; ++j) {
915 op (dst(lclRow,j), src(offset+j));
920 KOKKOS_INLINE_FUNCTION
921 void init (value_type& initialResult)
const {
925 KOKKOS_INLINE_FUNCTION
void
926 join (
volatile value_type& dstResult,
927 const volatile value_type& srcResult)
const
929 dstResult = (dstResult == 0 || srcResult == 0) ? 0 : 1;
933 unpack (
const ExecutionSpace& execSpace,
938 const size_type numCols)
940 typedef typename IdxView::non_const_value_type index_type;
943 Kokkos::parallel_reduce
944 (
"Tpetra::MultiVector unpack (constant stride) (with bounds check)",
945 Kokkos::RangePolicy<execution_space, size_type> (0, idx.size ()),
946 UnpackArrayMultiColumnWithBoundsCheck (execSpace, dst, src,
954 auto idx_h = Kokkos::create_mirror_view (idx);
957 std::vector<index_type> badIndices;
958 const size_type numInds = idx_h.extent (0);
959 for (size_type k = 0; k < numInds; ++k) {
960 if (idx_h(k) < static_cast<index_type> (0) ||
961 idx_h(k) >= static_cast<index_type> (dst.extent (0))) {
962 badIndices.push_back (idx_h(k));
966 std::ostringstream os;
967 os <<
"MultiVector unpack kernel had " << badIndices.size ()
968 <<
" out-of bounds index/ices. Here they are: [";
969 for (
size_t k = 0; k < badIndices.size (); ++k) {
971 if (k + 1 < badIndices.size ()) {
976 throw std::runtime_error (os.str ());
981 template <
typename ExecutionSpace,
987 unpack_array_multi_column (
const ExecutionSpace& execSpace,
992 const size_t numCols,
993 const bool debug =
true)
995 static_assert (Kokkos::Impl::is_view<DstView>::value,
996 "DstView must be a Kokkos::View.");
997 static_assert (Kokkos::Impl::is_view<SrcView>::value,
998 "SrcView must be a Kokkos::View.");
999 static_assert (Kokkos::Impl::is_view<IdxView>::value,
1000 "IdxView must be a Kokkos::View.");
1001 static_assert (static_cast<int> (DstView::rank) == 2,
1002 "DstView must be a rank-2 Kokkos::View.");
1003 static_assert (static_cast<int> (SrcView::rank) == 1,
1004 "SrcView must be a rank-1 Kokkos::View.");
1005 static_assert (static_cast<int> (IdxView::rank) == 1,
1006 "IdxView must be a rank-1 Kokkos::View.");
1009 typedef UnpackArrayMultiColumnWithBoundsCheck<ExecutionSpace,
1010 DstView, SrcView, IdxView, Op> impl_type;
1011 impl_type::unpack (execSpace, dst, src, idx, op, numCols);
1014 typedef UnpackArrayMultiColumn<ExecutionSpace,
1015 DstView, SrcView, IdxView, Op> impl_type;
1016 impl_type::unpack (execSpace, dst, src, idx, op, numCols);
1020 template <
typename ExecutionSpace,
1026 class UnpackArrayMultiColumnVariableStride {
1028 static_assert (Kokkos::Impl::is_view<DstView>::value,
1029 "DstView must be a Kokkos::View.");
1030 static_assert (Kokkos::Impl::is_view<SrcView>::value,
1031 "SrcView must be a Kokkos::View.");
1032 static_assert (Kokkos::Impl::is_view<IdxView>::value,
1033 "IdxView must be a Kokkos::View.");
1034 static_assert (Kokkos::Impl::is_view<ColView>::value,
1035 "ColView must be a Kokkos::View.");
1036 static_assert (static_cast<int> (DstView::rank) == 2,
1037 "DstView must be a rank-2 Kokkos::View.");
1038 static_assert (static_cast<int> (SrcView::rank) == 1,
1039 "SrcView must be a rank-1 Kokkos::View.");
1040 static_assert (static_cast<int> (IdxView::rank) == 1,
1041 "IdxView must be a rank-1 Kokkos::View.");
1042 static_assert (static_cast<int> (ColView::rank) == 1,
1043 "ColView must be a rank-1 Kokkos::View.");
1046 typedef typename ExecutionSpace::execution_space execution_space;
1047 typedef typename execution_space::size_type size_type;
1058 UnpackArrayMultiColumnVariableStride (
const ExecutionSpace& ,
1059 const DstView& dst_,
1060 const SrcView& src_,
1061 const IdxView& idx_,
1062 const ColView& col_,
1073 KOKKOS_INLINE_FUNCTION
void
1074 operator() (
const size_type k)
const
1076 const typename IdxView::value_type localRow = idx(k);
1077 const size_t offset = k*numCols;
1078 for (
size_t j = 0; j < numCols; ++j) {
1079 op (dst(localRow, col(j)), src(offset+j));
1084 unpack (
const ExecutionSpace& execSpace,
1090 const size_t numCols)
1092 Kokkos::parallel_for
1093 (
"Tpetra::MultiVector unpack (nonconstant stride)",
1094 Kokkos::RangePolicy<execution_space, size_type> (0, idx.size ()),
1095 UnpackArrayMultiColumnVariableStride (execSpace, dst, src,
1096 idx, col, op, numCols));
1100 template <
typename ExecutionSpace,
1106 typename SizeType =
typename ExecutionSpace::execution_space::size_type>
1107 class UnpackArrayMultiColumnVariableStrideWithBoundsCheck {
1109 static_assert (Kokkos::Impl::is_view<DstView>::value,
1110 "DstView must be a Kokkos::View.");
1111 static_assert (Kokkos::Impl::is_view<SrcView>::value,
1112 "SrcView must be a Kokkos::View.");
1113 static_assert (Kokkos::Impl::is_view<IdxView>::value,
1114 "IdxView must be a Kokkos::View.");
1115 static_assert (Kokkos::Impl::is_view<ColView>::value,
1116 "ColView must be a Kokkos::View.");
1117 static_assert (static_cast<int> (DstView::rank) == 2,
1118 "DstView must be a rank-2 Kokkos::View.");
1119 static_assert (static_cast<int> (SrcView::rank) == 1,
1120 "SrcView must be a rank-1 Kokkos::View.");
1121 static_assert (static_cast<int> (IdxView::rank) == 1,
1122 "IdxView must be a rank-1 Kokkos::View.");
1123 static_assert (static_cast<int> (ColView::rank) == 1,
1124 "ColView must be a rank-1 Kokkos::View.");
1125 static_assert (std::is_integral<SizeType>::value,
1126 "SizeType must be a built-in integer type.");
1129 typedef typename ExecutionSpace::execution_space execution_space;
1130 typedef SizeType size_type;
1132 typedef Kokkos::pair<int, int> value_type;
1143 UnpackArrayMultiColumnVariableStrideWithBoundsCheck (
const ExecutionSpace& ,
1144 const DstView& dst_,
1145 const SrcView& src_,
1146 const IdxView& idx_,
1147 const ColView& col_,
1149 const size_t numCols_) :
1158 KOKKOS_INLINE_FUNCTION
void
1159 operator() (
const size_type& k, value_type& result)
const
1161 typedef typename IdxView::non_const_value_type row_index_type;
1162 typedef typename ColView::non_const_value_type col_index_type;
1164 const row_index_type lclRow = idx(k);
1165 if (lclRow < static_cast<row_index_type> (0) ||
1166 lclRow >= static_cast<row_index_type> (dst.extent (0))) {
1170 const size_type offset = k*numCols;
1171 for (size_type j = 0; j < numCols; ++j) {
1172 const col_index_type lclCol = col(j);
1174 if (Impl::outOfBounds<col_index_type> (lclCol, dst.extent (1))) {
1178 op (dst(lclRow, col(j)), src(offset+j));
1184 KOKKOS_INLINE_FUNCTION
void
1185 init (value_type& initialResult)
const {
1186 initialResult.first = 1;
1187 initialResult.second = 1;
1190 KOKKOS_INLINE_FUNCTION
void
1191 join (
volatile value_type& dstResult,
1192 const volatile value_type& srcResult)
const
1194 dstResult.first = (dstResult.first == 0 || srcResult.first == 0) ? 0 : 1;
1195 dstResult.second = (dstResult.second == 0 || srcResult.second == 0) ? 0 : 1;
1199 unpack (
const ExecutionSpace& execSpace,
1205 const size_type numCols)
1207 typedef typename IdxView::non_const_value_type row_index_type;
1208 typedef typename ColView::non_const_value_type col_index_type;
1210 Kokkos::pair<int, int> result (1, 1);
1211 Kokkos::parallel_reduce
1212 (
"Tpetra::MultiVector unpack (nonconstant stride) (with bounds check)",
1213 Kokkos::RangePolicy<execution_space, size_type> (0, idx.size ()),
1214 UnpackArrayMultiColumnVariableStrideWithBoundsCheck (execSpace, dst,
1219 const bool hasBadRows = (result.first != 1);
1220 const bool hasBadCols = (result.second != 1);
1221 const bool hasErr = hasBadRows || hasBadCols;
1223 std::ostringstream os;
1230 auto idx_h = Kokkos::create_mirror_view (idx);
1233 std::vector<row_index_type> badRows;
1234 const size_type numInds = idx_h.extent (0);
1235 for (size_type k = 0; k < numInds; ++k) {
1236 if (idx_h(k) < static_cast<row_index_type> (0) ||
1237 idx_h(k) >= static_cast<row_index_type> (dst.extent (0))) {
1238 badRows.push_back (idx_h(k));
1241 os <<
"MultiVector multiple-column unpack kernel had "
1242 << badRows.size () <<
" out-of bounds row index/ices: [";
1243 for (
size_t k = 0; k < badRows.size (); ++k) {
1245 if (k + 1 < badRows.size ()) {
1257 auto col_h = Kokkos::create_mirror_view (col);
1260 std::vector<col_index_type> badCols;
1261 const size_type numInds = col_h.extent (0);
1262 for (size_type k = 0; k < numInds; ++k) {
1263 if (Impl::outOfBounds<col_index_type> (col_h(k), dst.extent (1))) {
1264 badCols.push_back (col_h(k));
1271 os <<
"MultiVector multiple-column unpack kernel had "
1272 << badCols.size () <<
" out-of bounds column index/ices: [";
1273 for (
size_t k = 0; k < badCols.size (); ++k) {
1275 if (k + 1 < badCols.size ()) {
1282 throw std::runtime_error (os.str ());
1287 template <
typename ExecutionSpace,
1294 unpack_array_multi_column_variable_stride (
const ExecutionSpace& execSpace,
1300 const size_t numCols,
1301 const bool debug =
true)
1303 static_assert (Kokkos::Impl::is_view<DstView>::value,
1304 "DstView must be a Kokkos::View.");
1305 static_assert (Kokkos::Impl::is_view<SrcView>::value,
1306 "SrcView must be a Kokkos::View.");
1307 static_assert (Kokkos::Impl::is_view<IdxView>::value,
1308 "IdxView must be a Kokkos::View.");
1309 static_assert (Kokkos::Impl::is_view<ColView>::value,
1310 "ColView must be a Kokkos::View.");
1311 static_assert (static_cast<int> (DstView::rank) == 2,
1312 "DstView must be a rank-2 Kokkos::View.");
1313 static_assert (static_cast<int> (SrcView::rank) == 1,
1314 "SrcView must be a rank-1 Kokkos::View.");
1315 static_assert (static_cast<int> (IdxView::rank) == 1,
1316 "IdxView must be a rank-1 Kokkos::View.");
1317 static_assert (static_cast<int> (ColView::rank) == 1,
1318 "ColView must be a rank-1 Kokkos::View.");
1321 typedef UnpackArrayMultiColumnVariableStrideWithBoundsCheck<ExecutionSpace,
1322 DstView, SrcView, IdxView, ColView, Op> impl_type;
1323 impl_type::unpack (execSpace, dst, src, idx, col, op, numCols);
1326 typedef UnpackArrayMultiColumnVariableStride<ExecutionSpace,
1327 DstView, SrcView, IdxView, ColView, Op> impl_type;
1328 impl_type::unpack (execSpace, dst, src, idx, col, op, numCols);
1332 template <
typename DstView,
typename SrcView,
1333 typename DstIdxView,
typename SrcIdxView>
1334 struct PermuteArrayMultiColumn {
1335 typedef typename DstView::execution_space execution_space;
1336 typedef typename execution_space::size_type size_type;
1344 PermuteArrayMultiColumn(
const DstView& dst_,
1345 const SrcView& src_,
1346 const DstIdxView& dst_idx_,
1347 const SrcIdxView& src_idx_,
1349 dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
1350 numCols(numCols_) {}
1352 KOKKOS_INLINE_FUNCTION
1353 void operator()(
const size_type k )
const {
1354 const typename DstIdxView::value_type toRow = dst_idx(k);
1355 const typename SrcIdxView::value_type fromRow = src_idx(k);
1356 for (
size_t j = 0; j < numCols; ++j)
1357 dst(toRow, j) = src(fromRow, j);
1360 static void permute(
const DstView& dst,
1362 const DstIdxView& dst_idx,
1363 const SrcIdxView& src_idx,
1365 const size_type n = std::min( dst_idx.size(), src_idx.size() );
1366 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
1367 Kokkos::parallel_for (range_type (0, n),
1368 PermuteArrayMultiColumn (dst,src,dst_idx,src_idx,numCols));
1374 template <
typename DstView,
typename SrcView,
1375 typename DstIdxView,
typename SrcIdxView>
1376 void permute_array_multi_column(
const DstView& dst,
1378 const DstIdxView& dst_idx,
1379 const SrcIdxView& src_idx,
1381 PermuteArrayMultiColumn<DstView,SrcView,DstIdxView,SrcIdxView>::permute(
1382 dst, src, dst_idx, src_idx, numCols);
1385 template <
typename DstView,
typename SrcView,
1386 typename DstIdxView,
typename SrcIdxView,
1387 typename DstColView,
typename SrcColView>
1388 struct PermuteArrayMultiColumnVariableStride {
1389 typedef typename DstView::execution_space execution_space;
1390 typedef typename execution_space::size_type size_type;
1400 PermuteArrayMultiColumnVariableStride(
const DstView& dst_,
1401 const SrcView& src_,
1402 const DstIdxView& dst_idx_,
1403 const SrcIdxView& src_idx_,
1404 const DstColView& dst_col_,
1405 const SrcColView& src_col_,
1407 dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
1408 dst_col(dst_col_), src_col(src_col_),
1409 numCols(numCols_) {}
1411 KOKKOS_INLINE_FUNCTION
1412 void operator()(
const size_type k )
const {
1413 const typename DstIdxView::value_type toRow = dst_idx(k);
1414 const typename SrcIdxView::value_type fromRow = src_idx(k);
1415 for (
size_t j = 0; j < numCols; ++j)
1416 dst(toRow, dst_col(j)) = src(fromRow, src_col(j));
1419 static void permute(
const DstView& dst,
1421 const DstIdxView& dst_idx,
1422 const SrcIdxView& src_idx,
1423 const DstColView& dst_col,
1424 const SrcColView& src_col,
1426 const size_type n = std::min( dst_idx.size(), src_idx.size() );
1427 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
1428 Kokkos::parallel_for (range_type (0, n),
1429 PermuteArrayMultiColumnVariableStride (dst, src,
1440 template <
typename DstView,
typename SrcView,
1441 typename DstIdxView,
typename SrcIdxView,
1442 typename DstColView,
typename SrcColView>
1443 void permute_array_multi_column_variable_stride(
const DstView& dst,
1445 const DstIdxView& dst_idx,
1446 const SrcIdxView& src_idx,
1447 const DstColView& dst_col,
1448 const SrcColView& src_col,
1450 PermuteArrayMultiColumnVariableStride<DstView,SrcView,
1451 DstIdxView,SrcIdxView,DstColView,SrcColView>::permute(
1452 dst, src, dst_idx, src_idx, dst_col, src_col, numCols);
1459 #endif // TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP