Tpetra parallel linear algebra  Version of the Day
Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp
1 /*
2 // @HEADER
3 // ***********************************************************************
4 //
5 // Tpetra: Templated Linear Algebra Services Package
6 // Copyright (2008) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ************************************************************************
41 // @HEADER
42 */
43 
44 // mfh 13/14 Sep 2013 The "should use as<size_t>" comments are both
45 // incorrect (as() is not a device function) and usually irrelevant
46 // (it would only matter if LocalOrdinal were bigger than size_t on a
47 // particular platform, which is unlikely).
48 
49 #ifndef TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
50 #define TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
51 
52 #include "Kokkos_Core.hpp"
53 #include "Kokkos_ArithTraits.hpp"
54 #include <sstream>
55 #include <stdexcept>
56 
57 namespace Tpetra {
58 namespace KokkosRefactor {
59 namespace Details {
60 
66 namespace Impl {
67 
74 template<class IntegerType,
75  const bool isSigned = std::numeric_limits<IntegerType>::is_signed>
76 struct OutOfBounds {
77  static KOKKOS_INLINE_FUNCTION bool
78  test (const IntegerType x,
79  const IntegerType exclusiveUpperBound);
80 };
81 
82 // Partial specialization for the case where IntegerType IS signed.
83 template<class IntegerType>
84 struct OutOfBounds<IntegerType, true> {
85  static KOKKOS_INLINE_FUNCTION bool
86  test (const IntegerType x,
87  const IntegerType exclusiveUpperBound)
88  {
89  return x < static_cast<IntegerType> (0) || x >= exclusiveUpperBound;
90  }
91 };
92 
93 // Partial specialization for the case where IntegerType is NOT signed.
94 template<class IntegerType>
95 struct OutOfBounds<IntegerType, false> {
96  static KOKKOS_INLINE_FUNCTION bool
97  test (const IntegerType x,
98  const IntegerType exclusiveUpperBound)
99  {
100  return x >= exclusiveUpperBound;
101  }
102 };
103 
106 template<class IntegerType>
107 KOKKOS_INLINE_FUNCTION bool
108 outOfBounds (const IntegerType x, const IntegerType exclusiveUpperBound)
109 {
110  return OutOfBounds<IntegerType>::test (x, exclusiveUpperBound);
111 }
112 
113 } // namespace Impl
114 
115  // Functors for implementing packAndPrepare and unpackAndCombine
116  // through parallel_for
117 
118  template <typename DstView, typename SrcView, typename IdxView>
119  struct PackArraySingleColumn {
120  typedef typename DstView::execution_space execution_space;
121  typedef typename execution_space::size_type size_type;
122 
123  DstView dst;
124  SrcView src;
125  IdxView idx;
126  size_t col;
127 
128  PackArraySingleColumn(const DstView& dst_,
129  const SrcView& src_,
130  const IdxView& idx_,
131  size_t col_) :
132  dst(dst_), src(src_), idx(idx_), col(col_) {}
133 
134  KOKKOS_INLINE_FUNCTION
135  void operator()( const size_type k ) const {
136  dst(k) = src(idx(k), col);
137  }
138 
139  static void
140  pack (const DstView& dst,
141  const SrcView& src,
142  const IdxView& idx,
143  size_t col)
144  {
145  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
146  Kokkos::parallel_for (range_type (0, idx.size ()),
147  PackArraySingleColumn (dst,src,idx,col));
148  }
149  };
150 
151  template <typename DstView,
152  typename SrcView,
153  typename IdxView,
154  typename SizeType = typename DstView::execution_space::size_type>
155  class PackArraySingleColumnWithBoundsCheck {
156  private:
157  static_assert (Kokkos::Impl::is_view<DstView>::value,
158  "DstView must be a Kokkos::View.");
159  static_assert (Kokkos::Impl::is_view<SrcView>::value,
160  "SrcView must be a Kokkos::View.");
161  static_assert (Kokkos::Impl::is_view<IdxView>::value,
162  "IdxView must be a Kokkos::View.");
163  static_assert (static_cast<int> (DstView::rank) == 1,
164  "DstView must be a rank-1 Kokkos::View.");
165  static_assert (static_cast<int> (SrcView::rank) == 2,
166  "SrcView must be a rank-2 Kokkos::View.");
167  static_assert (static_cast<int> (IdxView::rank) == 1,
168  "IdxView must be a rank-1 Kokkos::View.");
169  static_assert (std::is_integral<SizeType>::value,
170  "SizeType must be a built-in integer type.");
171  public:
172  typedef SizeType size_type;
174  typedef int value_type;
175 
176  private:
177  DstView dst;
178  SrcView src;
179  IdxView idx;
180  size_type col;
181 
182  public:
183  PackArraySingleColumnWithBoundsCheck (const DstView& dst_,
184  const SrcView& src_,
185  const IdxView& idx_,
186  const size_type col_) :
187  dst (dst_), src (src_), idx (idx_), col (col_) {}
188 
189  KOKKOS_INLINE_FUNCTION void
190  operator() (const size_type& k, value_type& result) const {
191  typedef typename IdxView::non_const_value_type index_type;
192 
193  const index_type lclRow = idx(k);
194  if (lclRow < static_cast<index_type> (0) ||
195  lclRow >= static_cast<index_type> (src.extent (0))) {
196  result = 0; // failed!
197  }
198  else {
199  dst(k) = src(lclRow, col);
200  }
201  }
202 
203  KOKKOS_INLINE_FUNCTION
204  void init (value_type& initialResult) const {
205  initialResult = 1; // success
206  }
207 
208  KOKKOS_INLINE_FUNCTION void
209  join (volatile value_type& dstResult,
210  const volatile value_type& srcResult) const
211  {
212  dstResult = (dstResult == 0 || srcResult == 0) ? 0 : 1;
213  }
214 
215  static void
216  pack (const DstView& dst,
217  const SrcView& src,
218  const IdxView& idx,
219  const size_type col)
220  {
221  typedef typename DstView::execution_space execution_space;
222  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
223  typedef typename IdxView::non_const_value_type index_type;
224 
225  int result = 1;
226  Kokkos::parallel_reduce (range_type (0, idx.size ()),
227  PackArraySingleColumnWithBoundsCheck (dst, src,
228  idx, col),
229  result);
230  if (result != 1) {
231  // Go back and find the out-of-bounds entries in the index
232  // array. Performance doesn't matter since we are already in
233  // an error state, so we can do this sequentially, on host.
234  auto idx_h = Kokkos::create_mirror_view (idx);
235  Kokkos::deep_copy (idx_h, idx);
236 
237  std::vector<index_type> badIndices;
238  const size_type numInds = idx_h.extent (0);
239  for (size_type k = 0; k < numInds; ++k) {
240  if (idx_h(k) < static_cast<index_type> (0) ||
241  idx_h(k) >= static_cast<index_type> (src.extent (0))) {
242  badIndices.push_back (idx_h(k));
243  }
244  }
245 
246  std::ostringstream os;
247  os << "MultiVector single-column pack kernel had "
248  << badIndices.size () << " out-of bounds index/ices. "
249  "Here they are: [";
250  for (size_t k = 0; k < badIndices.size (); ++k) {
251  os << badIndices[k];
252  if (k + 1 < badIndices.size ()) {
253  os << ", ";
254  }
255  }
256  os << "].";
257  throw std::runtime_error (os.str ());
258  }
259  }
260  };
261 
262 
263  template <typename DstView, typename SrcView, typename IdxView>
264  void
265  pack_array_single_column (const DstView& dst,
266  const SrcView& src,
267  const IdxView& idx,
268  const size_t col,
269  const bool debug = true)
270  {
271  static_assert (Kokkos::Impl::is_view<DstView>::value,
272  "DstView must be a Kokkos::View.");
273  static_assert (Kokkos::Impl::is_view<SrcView>::value,
274  "SrcView must be a Kokkos::View.");
275  static_assert (Kokkos::Impl::is_view<IdxView>::value,
276  "IdxView must be a Kokkos::View.");
277  static_assert (static_cast<int> (DstView::rank) == 1,
278  "DstView must be a rank-1 Kokkos::View.");
279  static_assert (static_cast<int> (SrcView::rank) == 2,
280  "SrcView must be a rank-2 Kokkos::View.");
281  static_assert (static_cast<int> (IdxView::rank) == 1,
282  "IdxView must be a rank-1 Kokkos::View.");
283 
284  if (debug) {
285  typedef PackArraySingleColumnWithBoundsCheck<DstView,SrcView,IdxView> impl_type;
286  impl_type::pack (dst, src, idx, col);
287  }
288  else {
289  typedef PackArraySingleColumn<DstView,SrcView,IdxView> impl_type;
290  impl_type::pack (dst, src, idx, col);
291  }
292  }
293 
294  template <typename DstView, typename SrcView, typename IdxView>
295  struct PackArrayMultiColumn {
296  typedef typename DstView::execution_space execution_space;
297  typedef typename execution_space::size_type size_type;
298 
299  DstView dst;
300  SrcView src;
301  IdxView idx;
302  size_t numCols;
303 
304  PackArrayMultiColumn(const DstView& dst_,
305  const SrcView& src_,
306  const IdxView& idx_,
307  size_t numCols_) :
308  dst(dst_), src(src_), idx(idx_), numCols(numCols_) {}
309 
310  KOKKOS_INLINE_FUNCTION
311  void operator()( const size_type k ) const {
312  const typename IdxView::value_type localRow = idx(k);
313  const size_t offset = k*numCols;
314  for (size_t j = 0; j < numCols; ++j)
315  dst(offset + j) = src(localRow, j);
316  }
317 
318  static void pack(const DstView& dst,
319  const SrcView& src,
320  const IdxView& idx,
321  size_t numCols) {
322  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
323  Kokkos::parallel_for (range_type (0, idx.size ()),
324  PackArrayMultiColumn (dst,src,idx,numCols));
325  }
326  };
327 
328  template <typename DstView,
329  typename SrcView,
330  typename IdxView,
331  typename SizeType = typename DstView::execution_space::size_type>
332  class PackArrayMultiColumnWithBoundsCheck {
333  public:
334  typedef SizeType size_type;
336  typedef int value_type;
337 
338  private:
339  DstView dst;
340  SrcView src;
341  IdxView idx;
342  size_type numCols;
343 
344  public:
345  PackArrayMultiColumnWithBoundsCheck (const DstView& dst_,
346  const SrcView& src_,
347  const IdxView& idx_,
348  const size_type numCols_) :
349  dst (dst_), src (src_), idx (idx_), numCols (numCols_) {}
350 
351  KOKKOS_INLINE_FUNCTION void
352  operator() (const size_type& k, value_type& result) const {
353  typedef typename IdxView::non_const_value_type index_type;
354 
355  const index_type lclRow = idx(k);
356  if (lclRow < static_cast<index_type> (0) ||
357  lclRow >= static_cast<index_type> (src.extent (0))) {
358  result = 0; // failed!
359  }
360  else {
361  const size_type offset = k*numCols;
362  for (size_type j = 0; j < numCols; ++j) {
363  dst(offset + j) = src(lclRow, j);
364  }
365  }
366  }
367 
368  KOKKOS_INLINE_FUNCTION
369  void init (value_type& initialResult) const {
370  initialResult = 1; // success
371  }
372 
373  KOKKOS_INLINE_FUNCTION void
374  join (volatile value_type& dstResult,
375  const volatile value_type& srcResult) const
376  {
377  dstResult = (dstResult == 0 || srcResult == 0) ? 0 : 1;
378  }
379 
380  static void
381  pack (const DstView& dst,
382  const SrcView& src,
383  const IdxView& idx,
384  const size_type numCols)
385  {
386  typedef typename DstView::execution_space execution_space;
387  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
388  typedef typename IdxView::non_const_value_type index_type;
389 
390  int result = 1;
391  Kokkos::parallel_reduce (range_type (0, idx.size ()),
392  PackArrayMultiColumnWithBoundsCheck (dst, src,
393  idx, numCols),
394  result);
395  if (result != 1) {
396  // Go back and find the out-of-bounds entries in the index
397  // array. Performance doesn't matter since we are already in
398  // an error state, so we can do this sequentially, on host.
399  auto idx_h = Kokkos::create_mirror_view (idx);
400  Kokkos::deep_copy (idx_h, idx);
401 
402  std::vector<index_type> badIndices;
403  const size_type numInds = idx_h.extent (0);
404  for (size_type k = 0; k < numInds; ++k) {
405  if (idx_h(k) < static_cast<index_type> (0) ||
406  idx_h(k) >= static_cast<index_type> (src.extent (0))) {
407  badIndices.push_back (idx_h(k));
408  }
409  }
410 
411  std::ostringstream os;
412  os << "MultiVector multiple-column pack kernel had "
413  << badIndices.size () << " out-of bounds index/ices. "
414  "Here they are: [";
415  for (size_t k = 0; k < badIndices.size (); ++k) {
416  os << badIndices[k];
417  if (k + 1 < badIndices.size ()) {
418  os << ", ";
419  }
420  }
421  os << "].";
422  throw std::runtime_error (os.str ());
423  }
424  }
425  };
426 
427 
428  template <typename DstView,
429  typename SrcView,
430  typename IdxView>
431  void
432  pack_array_multi_column (const DstView& dst,
433  const SrcView& src,
434  const IdxView& idx,
435  const size_t numCols,
436  const bool debug = true)
437  {
438  static_assert (Kokkos::Impl::is_view<DstView>::value,
439  "DstView must be a Kokkos::View.");
440  static_assert (Kokkos::Impl::is_view<SrcView>::value,
441  "SrcView must be a Kokkos::View.");
442  static_assert (Kokkos::Impl::is_view<IdxView>::value,
443  "IdxView must be a Kokkos::View.");
444  static_assert (static_cast<int> (DstView::rank) == 1,
445  "DstView must be a rank-1 Kokkos::View.");
446  static_assert (static_cast<int> (SrcView::rank) == 2,
447  "SrcView must be a rank-2 Kokkos::View.");
448  static_assert (static_cast<int> (IdxView::rank) == 1,
449  "IdxView must be a rank-1 Kokkos::View.");
450 
451  if (debug) {
452  typedef PackArrayMultiColumnWithBoundsCheck<DstView,
453  SrcView, IdxView> impl_type;
454  impl_type::pack (dst, src, idx, numCols);
455  }
456  else {
457  typedef PackArrayMultiColumn<DstView, SrcView, IdxView> impl_type;
458  impl_type::pack (dst, src, idx, numCols);
459  }
460  }
461 
462  template <typename DstView, typename SrcView, typename IdxView,
463  typename ColView>
464  struct PackArrayMultiColumnVariableStride {
465  typedef typename DstView::execution_space execution_space;
466  typedef typename execution_space::size_type size_type;
467 
468  DstView dst;
469  SrcView src;
470  IdxView idx;
471  ColView col;
472  size_t numCols;
473 
474  PackArrayMultiColumnVariableStride(const DstView& dst_,
475  const SrcView& src_,
476  const IdxView& idx_,
477  const ColView& col_,
478  size_t numCols_) :
479  dst(dst_), src(src_), idx(idx_), col(col_), numCols(numCols_) {}
480 
481  KOKKOS_INLINE_FUNCTION
482  void operator()( const size_type k ) const {
483  const typename IdxView::value_type localRow = idx(k);
484  const size_t offset = k*numCols;
485  for (size_t j = 0; j < numCols; ++j)
486  dst(offset + j) = src(localRow, col(j));
487  }
488 
489  static void pack(const DstView& dst,
490  const SrcView& src,
491  const IdxView& idx,
492  const ColView& col,
493  size_t numCols) {
494  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
495  Kokkos::parallel_for (range_type (0, idx.size ()),
496  PackArrayMultiColumnVariableStride(
497  dst,src,idx,col,numCols) );
498  }
499  };
500 
501  template <typename DstView,
502  typename SrcView,
503  typename IdxView,
504  typename ColView,
505  typename SizeType = typename DstView::execution_space::size_type>
506  class PackArrayMultiColumnVariableStrideWithBoundsCheck {
507  public:
508  typedef SizeType size_type;
510  typedef Kokkos::pair<int, int> value_type;
511 
512  private:
513  DstView dst;
514  SrcView src;
515  IdxView idx;
516  ColView col;
517  size_type numCols;
518 
519  public:
520  PackArrayMultiColumnVariableStrideWithBoundsCheck (const DstView& dst_,
521  const SrcView& src_,
522  const IdxView& idx_,
523  const ColView& col_,
524  const size_type numCols_) :
525  dst (dst_), src (src_), idx (idx_), col (col_), numCols (numCols_) {}
526 
527  KOKKOS_INLINE_FUNCTION void
528  operator() (const size_type& k, value_type& result) const {
529  typedef typename IdxView::non_const_value_type row_index_type;
530  typedef typename ColView::non_const_value_type col_index_type;
531 
532  const row_index_type lclRow = idx(k);
533  if (lclRow < static_cast<row_index_type> (0) ||
534  lclRow >= static_cast<row_index_type> (src.extent (0))) {
535  result.first = 0; // failed!
536  }
537  else {
538  const size_type offset = k*numCols;
539  for (size_type j = 0; j < numCols; ++j) {
540  const col_index_type lclCol = col(j);
541  if (Impl::outOfBounds<col_index_type> (lclCol, src.extent (1))) {
542  result.second = 0; // failed!
543  }
544  else { // all indices are valid; do the assignment
545  dst(offset + j) = src(lclRow, lclCol);
546  }
547  }
548  }
549  }
550 
551  KOKKOS_INLINE_FUNCTION void
552  init (value_type& initialResult) const {
553  initialResult.first = 1; // success
554  initialResult.second = 1; // success
555  }
556 
557  KOKKOS_INLINE_FUNCTION void
558  join (volatile value_type& dstResult,
559  const volatile value_type& srcResult) const
560  {
561  dstResult.first = (dstResult.first == 0 || srcResult.first == 0) ? 0 : 1;
562  dstResult.second = (dstResult.second == 0 || srcResult.second == 0) ? 0 : 1;
563  }
564 
565  static void
566  pack (const DstView& dst,
567  const SrcView& src,
568  const IdxView& idx,
569  const ColView& col,
570  const size_type numCols)
571  {
572  using Kokkos::parallel_reduce;
573  typedef typename DstView::execution_space execution_space;
574  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
575  typedef typename IdxView::non_const_value_type row_index_type;
576  typedef typename ColView::non_const_value_type col_index_type;
577 
578  Kokkos::pair<int, int> result (1, 1);
579  parallel_reduce (range_type (0, idx.size ()),
580  PackArrayMultiColumnVariableStrideWithBoundsCheck (dst, src,
581  idx, col,
582  numCols),
583  result);
584  const bool hasBadRows = (result.first != 1);
585  const bool hasBadCols = (result.second != 1);
586  const bool hasErr = hasBadRows || hasBadCols;
587  if (hasErr) {
588  std::ostringstream os; // for error reporting
589 
590  if (hasBadRows) {
591  // Go back and find the out-of-bounds entries in the array of
592  // row indices. Performance doesn't matter since we are already
593  // in an error state, so we can do this sequentially, on host.
594  auto idx_h = Kokkos::create_mirror_view (idx);
595  Kokkos::deep_copy (idx_h, idx);
596 
597  std::vector<row_index_type> badRows;
598  const size_type numInds = idx_h.extent (0);
599  for (size_type k = 0; k < numInds; ++k) {
600  if (Impl::outOfBounds<row_index_type> (idx_h(k), src.extent (0))) {
601  badRows.push_back (idx_h(k));
602  }
603  }
604  os << "MultiVector multiple-column pack kernel had "
605  << badRows.size () << " out-of bounds row index/ices: [";
606  for (size_t k = 0; k < badRows.size (); ++k) {
607  os << badRows[k];
608  if (k + 1 < badRows.size ()) {
609  os << ", ";
610  }
611  }
612  os << "].";
613  } // hasBadRows
614 
615  if (hasBadCols) {
616  // Go back and find the out-of-bounds entries in the array
617  // of column indices. Performance doesn't matter since we
618  // are already in an error state, so we can do this
619  // sequentially, on host.
620  auto col_h = Kokkos::create_mirror_view (col);
621  Kokkos::deep_copy (col_h, col);
622 
623  std::vector<col_index_type> badCols;
624  const size_type numInds = col_h.extent (0);
625  for (size_type k = 0; k < numInds; ++k) {
626  if (Impl::outOfBounds<col_index_type> (col_h(k), src.extent (1))) {
627  badCols.push_back (col_h(k));
628  }
629  }
630 
631  if (hasBadRows) {
632  os << " ";
633  }
634  os << "MultiVector multiple-column pack kernel had "
635  << badCols.size () << " out-of bounds column index/ices: [";
636  for (size_t k = 0; k < badCols.size (); ++k) {
637  os << badCols[k];
638  if (k + 1 < badCols.size ()) {
639  os << ", ";
640  }
641  }
642  os << "].";
643  } // hasBadCols
644 
645  throw std::runtime_error (os.str ());
646  } // hasErr
647  }
648  };
649 
650  template <typename DstView,
651  typename SrcView,
652  typename IdxView,
653  typename ColView>
654  void
655  pack_array_multi_column_variable_stride (const DstView& dst,
656  const SrcView& src,
657  const IdxView& idx,
658  const ColView& col,
659  const size_t numCols,
660  const bool debug = true)
661  {
662  static_assert (Kokkos::Impl::is_view<DstView>::value,
663  "DstView must be a Kokkos::View.");
664  static_assert (Kokkos::Impl::is_view<SrcView>::value,
665  "SrcView must be a Kokkos::View.");
666  static_assert (Kokkos::Impl::is_view<IdxView>::value,
667  "IdxView must be a Kokkos::View.");
668  static_assert (Kokkos::Impl::is_view<ColView>::value,
669  "ColView must be a Kokkos::View.");
670  static_assert (static_cast<int> (DstView::rank) == 1,
671  "DstView must be a rank-1 Kokkos::View.");
672  static_assert (static_cast<int> (SrcView::rank) == 2,
673  "SrcView must be a rank-2 Kokkos::View.");
674  static_assert (static_cast<int> (IdxView::rank) == 1,
675  "IdxView must be a rank-1 Kokkos::View.");
676  static_assert (static_cast<int> (ColView::rank) == 1,
677  "ColView must be a rank-1 Kokkos::View.");
678 
679  if (debug) {
680  typedef PackArrayMultiColumnVariableStrideWithBoundsCheck<DstView,
681  SrcView, IdxView, ColView> impl_type;
682  impl_type::pack (dst, src, idx, col, numCols);
683  }
684  else {
685  typedef PackArrayMultiColumnVariableStride<DstView,
686  SrcView, IdxView, ColView> impl_type;
687  impl_type::pack (dst, src, idx, col, numCols);
688  }
689  }
690 
691  // FIXME (mfh 16 Dec 2016) It looks like these are totally generic
692  // and don't get specialized in Stokhos. This suggests we can
693  // template them on the execution space, and specialize them for
694  // Kokkos::Serial so they don't do atomic update operations. It
695  // might be better to make the unpack kernels (the pack kernels
696  // don't use atomic updates) functions that we prebuild, in the
697  // manner of KokkosKernels, or at least handle via ETI.
698 
699  template<class ExecutionSpace>
700  struct InsertOp {
701  template <typename Scalar>
702  KOKKOS_INLINE_FUNCTION
703  void operator() (Scalar& dest, const Scalar& src) const {
704  Kokkos::atomic_assign(&dest, src);
705  }
706  };
707 
708 #ifdef KOKKOS_ENABLE_SERIAL
709  template<>
710  struct InsertOp< ::Kokkos::Serial > {
711  template <typename Scalar>
712  KOKKOS_INLINE_FUNCTION
713  void operator() (Scalar& dest, const Scalar& src) const {
714  dest = src; // no need for an atomic operation here
715  }
716  };
717 #endif // KOKKOS_ENABLE_SERIAL
718 
719  template<class ExecutionSpace>
720  struct AddOp {
721  template <typename Scalar>
722  KOKKOS_INLINE_FUNCTION
723  void operator() (Scalar& dest, const Scalar& src) const {
724  Kokkos::atomic_add(&dest, src);
725  }
726  };
727 
728 #ifdef KOKKOS_ENABLE_SERIAL
729  template<>
730  struct AddOp< ::Kokkos::Serial > {
731  template <typename Scalar>
732  KOKKOS_INLINE_FUNCTION
733  void operator() (Scalar& dest, const Scalar& src) const {
734  dest += src; // no need for an atomic operation here
735  }
736  };
737 #endif // KOKKOS_ENABLE_SERIAL
738 
739  template<class ExecutionSpace>
740  struct AbsMaxOp {
741  // ETP: Is this really what we want? This seems very odd if
742  // Scalar != SCT::mag_type (e.g., Scalar == std::complex<T>)
743  //
744  // mfh: I didn't write this code, but note that we don't use T =
745  // Scalar here, we use T = ArithTraits<Scalar>::mag_type. That
746  // makes this code reasonable.
747  template <typename T>
748  KOKKOS_INLINE_FUNCTION
749  T max(const T& a, const T& b) const { return a > b ? a : b; }
750 
751  template <typename Scalar>
752  KOKKOS_INLINE_FUNCTION
753  void operator() (Scalar& dest, const Scalar& src) const {
754  typedef Kokkos::Details::ArithTraits<Scalar> SCT;
755  Kokkos::atomic_assign(&dest, Scalar(max(SCT::abs(dest),SCT::abs(src))));
756  }
757  };
758 
759 #ifdef KOKKOS_ENABLE_SERIAL
760  template<>
761  struct AbsMaxOp< ::Kokkos::Serial > {
762  // ETP: Is this really what we want? This seems very odd if
763  // Scalar != SCT::mag_type (e.g., Scalar == std::complex<T>)
764  //
765  // mfh: I didn't write this code, but note that we don't use T =
766  // Scalar here, we use T = ArithTraits<Scalar>::mag_type. That
767  // makes this code reasonable.
768  template <typename T>
769  KOKKOS_INLINE_FUNCTION
770  T max(const T& a, const T& b) const { return a > b ? a : b; }
771 
772  template <typename Scalar>
773  KOKKOS_INLINE_FUNCTION
774  void operator() (Scalar& dest, const Scalar& src) const {
775  typedef Kokkos::Details::ArithTraits<Scalar> SCT;
776  // no need for an atomic operation here
777  dest = static_cast<Scalar> (max (SCT::abs (dest), SCT::abs (src)));
778  }
779  };
780 #endif // KOKKOS_ENABLE_SERIAL
781 
782  template <typename ExecutionSpace,
783  typename DstView,
784  typename SrcView,
785  typename IdxView,
786  typename Op>
787  class UnpackArrayMultiColumn {
788  private:
789  static_assert (Kokkos::Impl::is_view<DstView>::value,
790  "DstView must be a Kokkos::View.");
791  static_assert (Kokkos::Impl::is_view<SrcView>::value,
792  "SrcView must be a Kokkos::View.");
793  static_assert (Kokkos::Impl::is_view<IdxView>::value,
794  "IdxView must be a Kokkos::View.");
795  static_assert (static_cast<int> (DstView::rank) == 2,
796  "DstView must be a rank-2 Kokkos::View.");
797  static_assert (static_cast<int> (SrcView::rank) == 1,
798  "SrcView must be a rank-1 Kokkos::View.");
799  static_assert (static_cast<int> (IdxView::rank) == 1,
800  "IdxView must be a rank-1 Kokkos::View.");
801 
802  public:
803  typedef typename ExecutionSpace::execution_space execution_space;
804  typedef typename execution_space::size_type size_type;
805 
806  private:
807  DstView dst;
808  SrcView src;
809  IdxView idx;
810  Op op;
811  size_t numCols;
812 
813  public:
814  UnpackArrayMultiColumn (const ExecutionSpace& /* execSpace */,
815  const DstView& dst_,
816  const SrcView& src_,
817  const IdxView& idx_,
818  const Op& op_,
819  const size_t numCols_) :
820  dst (dst_),
821  src (src_),
822  idx (idx_),
823  op (op_),
824  numCols (numCols_)
825  {}
826 
827  KOKKOS_INLINE_FUNCTION void
828  operator() (const size_type k) const
829  {
830  const typename IdxView::value_type localRow = idx(k);
831  const size_t offset = k*numCols;
832  for (size_t j = 0; j < numCols; ++j) {
833  op (dst(localRow, j), src(offset+j));
834  }
835  }
836 
837  static void
838  unpack (const ExecutionSpace& execSpace,
839  const DstView& dst,
840  const SrcView& src,
841  const IdxView& idx,
842  const Op& op,
843  const size_t numCols)
844  {
845  Kokkos::parallel_for
846  ("Tpetra::MultiVector unpack (constant stride)",
847  Kokkos::RangePolicy<execution_space, size_type> (0, idx.size ()),
848  UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
849  }
850  };
851 
852  template <typename ExecutionSpace,
853  typename DstView,
854  typename SrcView,
855  typename IdxView,
856  typename Op,
857  typename SizeType = typename ExecutionSpace::execution_space::size_type>
858  class UnpackArrayMultiColumnWithBoundsCheck {
859  private:
860  static_assert (Kokkos::Impl::is_view<DstView>::value,
861  "DstView must be a Kokkos::View.");
862  static_assert (Kokkos::Impl::is_view<SrcView>::value,
863  "SrcView must be a Kokkos::View.");
864  static_assert (Kokkos::Impl::is_view<IdxView>::value,
865  "IdxView must be a Kokkos::View.");
866  static_assert (static_cast<int> (DstView::rank) == 2,
867  "DstView must be a rank-2 Kokkos::View.");
868  static_assert (static_cast<int> (SrcView::rank) == 1,
869  "SrcView must be a rank-1 Kokkos::View.");
870  static_assert (static_cast<int> (IdxView::rank) == 1,
871  "IdxView must be a rank-1 Kokkos::View.");
872  static_assert (std::is_integral<SizeType>::value,
873  "SizeType must be a built-in integer type.");
874 
875  public:
876  typedef typename ExecutionSpace::execution_space execution_space;
877  typedef SizeType size_type;
879  typedef int value_type;
880 
881  private:
882  DstView dst;
883  SrcView src;
884  IdxView idx;
885  Op op;
886  size_type numCols;
887 
888  public:
889  UnpackArrayMultiColumnWithBoundsCheck (const ExecutionSpace& /* execSpace */,
890  const DstView& dst_,
891  const SrcView& src_,
892  const IdxView& idx_,
893  const Op& op_,
894  const size_type numCols_) :
895  dst (dst_),
896  src (src_),
897  idx (idx_),
898  op (op_),
899  numCols (numCols_)
900  {}
901 
902  KOKKOS_INLINE_FUNCTION
903  void operator() (const size_type& k, value_type& result) const
904  {
905  typedef typename IdxView::non_const_value_type index_type;
906 
907  const index_type lclRow = idx(k);
908  if (lclRow < static_cast<index_type> (0) ||
909  lclRow >= static_cast<index_type> (dst.extent (0))) {
910  result = 0; // failed!
911  }
912  else {
913  const size_type offset = k*numCols;
914  for (size_type j = 0; j < numCols; ++j) {
915  op (dst(lclRow,j), src(offset+j));
916  }
917  }
918  }
919 
920  KOKKOS_INLINE_FUNCTION
921  void init (value_type& initialResult) const {
922  initialResult = 1; // success
923  }
924 
925  KOKKOS_INLINE_FUNCTION void
926  join (volatile value_type& dstResult,
927  const volatile value_type& srcResult) const
928  {
929  dstResult = (dstResult == 0 || srcResult == 0) ? 0 : 1;
930  }
931 
932  static void
933  unpack (const ExecutionSpace& execSpace,
934  const DstView& dst,
935  const SrcView& src,
936  const IdxView& idx,
937  const Op& op,
938  const size_type numCols)
939  {
940  typedef typename IdxView::non_const_value_type index_type;
941 
942  int result = 1;
943  Kokkos::parallel_reduce
944  ("Tpetra::MultiVector unpack (constant stride) (with bounds check)",
945  Kokkos::RangePolicy<execution_space, size_type> (0, idx.size ()),
946  UnpackArrayMultiColumnWithBoundsCheck (execSpace, dst, src,
947  idx, op, numCols),
948  result);
949 
950  if (result != 1) {
951  // Go back and find the out-of-bounds entries in the index
952  // array. Performance doesn't matter since we are already in
953  // an error state, so we can do this sequentially, on host.
954  auto idx_h = Kokkos::create_mirror_view (idx);
955  Kokkos::deep_copy (idx_h, idx);
956 
957  std::vector<index_type> badIndices;
958  const size_type numInds = idx_h.extent (0);
959  for (size_type k = 0; k < numInds; ++k) {
960  if (idx_h(k) < static_cast<index_type> (0) ||
961  idx_h(k) >= static_cast<index_type> (dst.extent (0))) {
962  badIndices.push_back (idx_h(k));
963  }
964  }
965 
966  std::ostringstream os;
967  os << "MultiVector unpack kernel had " << badIndices.size ()
968  << " out-of bounds index/ices. Here they are: [";
969  for (size_t k = 0; k < badIndices.size (); ++k) {
970  os << badIndices[k];
971  if (k + 1 < badIndices.size ()) {
972  os << ", ";
973  }
974  }
975  os << "].";
976  throw std::runtime_error (os.str ());
977  }
978  }
979  };
980 
981  template <typename ExecutionSpace,
982  typename DstView,
983  typename SrcView,
984  typename IdxView,
985  typename Op>
986  void
987  unpack_array_multi_column (const ExecutionSpace& execSpace,
988  const DstView& dst,
989  const SrcView& src,
990  const IdxView& idx,
991  const Op& op,
992  const size_t numCols,
993  const bool debug = true)
994  {
995  static_assert (Kokkos::Impl::is_view<DstView>::value,
996  "DstView must be a Kokkos::View.");
997  static_assert (Kokkos::Impl::is_view<SrcView>::value,
998  "SrcView must be a Kokkos::View.");
999  static_assert (Kokkos::Impl::is_view<IdxView>::value,
1000  "IdxView must be a Kokkos::View.");
1001  static_assert (static_cast<int> (DstView::rank) == 2,
1002  "DstView must be a rank-2 Kokkos::View.");
1003  static_assert (static_cast<int> (SrcView::rank) == 1,
1004  "SrcView must be a rank-1 Kokkos::View.");
1005  static_assert (static_cast<int> (IdxView::rank) == 1,
1006  "IdxView must be a rank-1 Kokkos::View.");
1007 
1008  if (debug) {
1009  typedef UnpackArrayMultiColumnWithBoundsCheck<ExecutionSpace,
1010  DstView, SrcView, IdxView, Op> impl_type;
1011  impl_type::unpack (execSpace, dst, src, idx, op, numCols);
1012  }
1013  else {
1014  typedef UnpackArrayMultiColumn<ExecutionSpace,
1015  DstView, SrcView, IdxView, Op> impl_type;
1016  impl_type::unpack (execSpace, dst, src, idx, op, numCols);
1017  }
1018  }
1019 
1020  template <typename ExecutionSpace,
1021  typename DstView,
1022  typename SrcView,
1023  typename IdxView,
1024  typename ColView,
1025  typename Op>
1026  class UnpackArrayMultiColumnVariableStride {
1027  private:
1028  static_assert (Kokkos::Impl::is_view<DstView>::value,
1029  "DstView must be a Kokkos::View.");
1030  static_assert (Kokkos::Impl::is_view<SrcView>::value,
1031  "SrcView must be a Kokkos::View.");
1032  static_assert (Kokkos::Impl::is_view<IdxView>::value,
1033  "IdxView must be a Kokkos::View.");
1034  static_assert (Kokkos::Impl::is_view<ColView>::value,
1035  "ColView must be a Kokkos::View.");
1036  static_assert (static_cast<int> (DstView::rank) == 2,
1037  "DstView must be a rank-2 Kokkos::View.");
1038  static_assert (static_cast<int> (SrcView::rank) == 1,
1039  "SrcView must be a rank-1 Kokkos::View.");
1040  static_assert (static_cast<int> (IdxView::rank) == 1,
1041  "IdxView must be a rank-1 Kokkos::View.");
1042  static_assert (static_cast<int> (ColView::rank) == 1,
1043  "ColView must be a rank-1 Kokkos::View.");
1044 
1045  public:
1046  typedef typename ExecutionSpace::execution_space execution_space;
1047  typedef typename execution_space::size_type size_type;
1048 
1049  private:
1050  DstView dst;
1051  SrcView src;
1052  IdxView idx;
1053  ColView col;
1054  Op op;
1055  size_t numCols;
1056 
1057  public:
1058  UnpackArrayMultiColumnVariableStride (const ExecutionSpace& /* execSpace */,
1059  const DstView& dst_,
1060  const SrcView& src_,
1061  const IdxView& idx_,
1062  const ColView& col_,
1063  const Op& op_,
1064  size_t numCols_) :
1065  dst (dst_),
1066  src (src_),
1067  idx (idx_),
1068  col (col_),
1069  op (op_),
1070  numCols (numCols_)
1071  {}
1072 
1073  KOKKOS_INLINE_FUNCTION void
1074  operator() (const size_type k) const
1075  {
1076  const typename IdxView::value_type localRow = idx(k);
1077  const size_t offset = k*numCols;
1078  for (size_t j = 0; j < numCols; ++j) {
1079  op (dst(localRow, col(j)), src(offset+j));
1080  }
1081  }
1082 
1083  static void
1084  unpack (const ExecutionSpace& execSpace,
1085  const DstView& dst,
1086  const SrcView& src,
1087  const IdxView& idx,
1088  const ColView& col,
1089  const Op& op,
1090  const size_t numCols)
1091  {
1092  Kokkos::parallel_for
1093  ("Tpetra::MultiVector unpack (nonconstant stride)",
1094  Kokkos::RangePolicy<execution_space, size_type> (0, idx.size ()),
1095  UnpackArrayMultiColumnVariableStride (execSpace, dst, src,
1096  idx, col, op, numCols));
1097  }
1098  };
1099 
1100  template <typename ExecutionSpace,
1101  typename DstView,
1102  typename SrcView,
1103  typename IdxView,
1104  typename ColView,
1105  typename Op,
1106  typename SizeType = typename ExecutionSpace::execution_space::size_type>
1107  class UnpackArrayMultiColumnVariableStrideWithBoundsCheck {
1108  private:
1109  static_assert (Kokkos::Impl::is_view<DstView>::value,
1110  "DstView must be a Kokkos::View.");
1111  static_assert (Kokkos::Impl::is_view<SrcView>::value,
1112  "SrcView must be a Kokkos::View.");
1113  static_assert (Kokkos::Impl::is_view<IdxView>::value,
1114  "IdxView must be a Kokkos::View.");
1115  static_assert (Kokkos::Impl::is_view<ColView>::value,
1116  "ColView must be a Kokkos::View.");
1117  static_assert (static_cast<int> (DstView::rank) == 2,
1118  "DstView must be a rank-2 Kokkos::View.");
1119  static_assert (static_cast<int> (SrcView::rank) == 1,
1120  "SrcView must be a rank-1 Kokkos::View.");
1121  static_assert (static_cast<int> (IdxView::rank) == 1,
1122  "IdxView must be a rank-1 Kokkos::View.");
1123  static_assert (static_cast<int> (ColView::rank) == 1,
1124  "ColView must be a rank-1 Kokkos::View.");
1125  static_assert (std::is_integral<SizeType>::value,
1126  "SizeType must be a built-in integer type.");
1127 
1128  public:
1129  typedef typename ExecutionSpace::execution_space execution_space;
1130  typedef SizeType size_type;
1132  typedef Kokkos::pair<int, int> value_type;
1133 
1134  private:
1135  DstView dst;
1136  SrcView src;
1137  IdxView idx;
1138  ColView col;
1139  Op op;
1140  size_type numCols;
1141 
1142  public:
1143  UnpackArrayMultiColumnVariableStrideWithBoundsCheck (const ExecutionSpace& /* execSpace */,
1144  const DstView& dst_,
1145  const SrcView& src_,
1146  const IdxView& idx_,
1147  const ColView& col_,
1148  const Op& op_,
1149  const size_t numCols_) :
1150  dst (dst_),
1151  src (src_),
1152  idx (idx_),
1153  col (col_),
1154  op (op_),
1155  numCols (numCols_)
1156  {}
1157 
1158  KOKKOS_INLINE_FUNCTION void
1159  operator() (const size_type& k, value_type& result) const
1160  {
1161  typedef typename IdxView::non_const_value_type row_index_type;
1162  typedef typename ColView::non_const_value_type col_index_type;
1163 
1164  const row_index_type lclRow = idx(k);
1165  if (lclRow < static_cast<row_index_type> (0) ||
1166  lclRow >= static_cast<row_index_type> (dst.extent (0))) {
1167  result.first = 0; // failed!
1168  }
1169  else {
1170  const size_type offset = k*numCols;
1171  for (size_type j = 0; j < numCols; ++j) {
1172  const col_index_type lclCol = col(j);
1173 
1174  if (Impl::outOfBounds<col_index_type> (lclCol, dst.extent (1))) {
1175  result.second = 0; // failed!
1176  }
1177  else { // all indices are valid; apply the op
1178  op (dst(lclRow, col(j)), src(offset+j));
1179  }
1180  }
1181  }
1182  }
1183 
1184  KOKKOS_INLINE_FUNCTION void
1185  init (value_type& initialResult) const {
1186  initialResult.first = 1; // success
1187  initialResult.second = 1; // success
1188  }
1189 
1190  KOKKOS_INLINE_FUNCTION void
1191  join (volatile value_type& dstResult,
1192  const volatile value_type& srcResult) const
1193  {
1194  dstResult.first = (dstResult.first == 0 || srcResult.first == 0) ? 0 : 1;
1195  dstResult.second = (dstResult.second == 0 || srcResult.second == 0) ? 0 : 1;
1196  }
1197 
1198  static void
1199  unpack (const ExecutionSpace& execSpace,
1200  const DstView& dst,
1201  const SrcView& src,
1202  const IdxView& idx,
1203  const ColView& col,
1204  const Op& op,
1205  const size_type numCols)
1206  {
1207  typedef typename IdxView::non_const_value_type row_index_type;
1208  typedef typename ColView::non_const_value_type col_index_type;
1209 
1210  Kokkos::pair<int, int> result (1, 1);
1211  Kokkos::parallel_reduce
1212  ("Tpetra::MultiVector unpack (nonconstant stride) (with bounds check)",
1213  Kokkos::RangePolicy<execution_space, size_type> (0, idx.size ()),
1214  UnpackArrayMultiColumnVariableStrideWithBoundsCheck (execSpace, dst,
1215  src, idx, col,
1216  op, numCols),
1217  result);
1218 
1219  const bool hasBadRows = (result.first != 1);
1220  const bool hasBadCols = (result.second != 1);
1221  const bool hasErr = hasBadRows || hasBadCols;
1222  if (hasErr) {
1223  std::ostringstream os; // for error reporting
1224 
1225  if (hasBadRows) {
1226  // Go back and find the out-of-bounds entries in the array
1227  // of row indices. Performance doesn't matter since we are
1228  // already in an error state, so we can do this
1229  // sequentially, on host.
1230  auto idx_h = Kokkos::create_mirror_view (idx);
1231  Kokkos::deep_copy (idx_h, idx);
1232 
1233  std::vector<row_index_type> badRows;
1234  const size_type numInds = idx_h.extent (0);
1235  for (size_type k = 0; k < numInds; ++k) {
1236  if (idx_h(k) < static_cast<row_index_type> (0) ||
1237  idx_h(k) >= static_cast<row_index_type> (dst.extent (0))) {
1238  badRows.push_back (idx_h(k));
1239  }
1240  }
1241  os << "MultiVector multiple-column unpack kernel had "
1242  << badRows.size () << " out-of bounds row index/ices: [";
1243  for (size_t k = 0; k < badRows.size (); ++k) {
1244  os << badRows[k];
1245  if (k + 1 < badRows.size ()) {
1246  os << ", ";
1247  }
1248  }
1249  os << "].";
1250  } // hasBadRows
1251 
1252  if (hasBadCols) {
1253  // Go back and find the out-of-bounds entries in the array
1254  // of column indices. Performance doesn't matter since we
1255  // are already in an error state, so we can do this
1256  // sequentially, on host.
1257  auto col_h = Kokkos::create_mirror_view (col);
1258  Kokkos::deep_copy (col_h, col);
1259 
1260  std::vector<col_index_type> badCols;
1261  const size_type numInds = col_h.extent (0);
1262  for (size_type k = 0; k < numInds; ++k) {
1263  if (Impl::outOfBounds<col_index_type> (col_h(k), dst.extent (1))) {
1264  badCols.push_back (col_h(k));
1265  }
1266  }
1267 
1268  if (hasBadRows) {
1269  os << " ";
1270  }
1271  os << "MultiVector multiple-column unpack kernel had "
1272  << badCols.size () << " out-of bounds column index/ices: [";
1273  for (size_t k = 0; k < badCols.size (); ++k) {
1274  os << badCols[k];
1275  if (k + 1 < badCols.size ()) {
1276  os << ", ";
1277  }
1278  }
1279  os << "].";
1280  } // hasBadCols
1281 
1282  throw std::runtime_error (os.str ());
1283  } // hasErr
1284  }
1285  };
1286 
1287  template <typename ExecutionSpace,
1288  typename DstView,
1289  typename SrcView,
1290  typename IdxView,
1291  typename ColView,
1292  typename Op>
1293  void
1294  unpack_array_multi_column_variable_stride (const ExecutionSpace& execSpace,
1295  const DstView& dst,
1296  const SrcView& src,
1297  const IdxView& idx,
1298  const ColView& col,
1299  const Op& op,
1300  const size_t numCols,
1301  const bool debug = true)
1302  {
1303  static_assert (Kokkos::Impl::is_view<DstView>::value,
1304  "DstView must be a Kokkos::View.");
1305  static_assert (Kokkos::Impl::is_view<SrcView>::value,
1306  "SrcView must be a Kokkos::View.");
1307  static_assert (Kokkos::Impl::is_view<IdxView>::value,
1308  "IdxView must be a Kokkos::View.");
1309  static_assert (Kokkos::Impl::is_view<ColView>::value,
1310  "ColView must be a Kokkos::View.");
1311  static_assert (static_cast<int> (DstView::rank) == 2,
1312  "DstView must be a rank-2 Kokkos::View.");
1313  static_assert (static_cast<int> (SrcView::rank) == 1,
1314  "SrcView must be a rank-1 Kokkos::View.");
1315  static_assert (static_cast<int> (IdxView::rank) == 1,
1316  "IdxView must be a rank-1 Kokkos::View.");
1317  static_assert (static_cast<int> (ColView::rank) == 1,
1318  "ColView must be a rank-1 Kokkos::View.");
1319 
1320  if (debug) {
1321  typedef UnpackArrayMultiColumnVariableStrideWithBoundsCheck<ExecutionSpace,
1322  DstView, SrcView, IdxView, ColView, Op> impl_type;
1323  impl_type::unpack (execSpace, dst, src, idx, col, op, numCols);
1324  }
1325  else {
1326  typedef UnpackArrayMultiColumnVariableStride<ExecutionSpace,
1327  DstView, SrcView, IdxView, ColView, Op> impl_type;
1328  impl_type::unpack (execSpace, dst, src, idx, col, op, numCols);
1329  }
1330  }
1331 
1332  template <typename DstView, typename SrcView,
1333  typename DstIdxView, typename SrcIdxView>
1334  struct PermuteArrayMultiColumn {
1335  typedef typename DstView::execution_space execution_space;
1336  typedef typename execution_space::size_type size_type;
1337 
1338  DstView dst;
1339  SrcView src;
1340  DstIdxView dst_idx;
1341  SrcIdxView src_idx;
1342  size_t numCols;
1343 
1344  PermuteArrayMultiColumn(const DstView& dst_,
1345  const SrcView& src_,
1346  const DstIdxView& dst_idx_,
1347  const SrcIdxView& src_idx_,
1348  size_t numCols_) :
1349  dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
1350  numCols(numCols_) {}
1351 
1352  KOKKOS_INLINE_FUNCTION
1353  void operator()( const size_type k ) const {
1354  const typename DstIdxView::value_type toRow = dst_idx(k);
1355  const typename SrcIdxView::value_type fromRow = src_idx(k);
1356  for (size_t j = 0; j < numCols; ++j)
1357  dst(toRow, j) = src(fromRow, j);
1358  }
1359 
1360  static void permute(const DstView& dst,
1361  const SrcView& src,
1362  const DstIdxView& dst_idx,
1363  const SrcIdxView& src_idx,
1364  size_t numCols) {
1365  const size_type n = std::min( dst_idx.size(), src_idx.size() );
1366  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
1367  Kokkos::parallel_for (range_type (0, n),
1368  PermuteArrayMultiColumn (dst,src,dst_idx,src_idx,numCols));
1369  }
1370  };
1371 
1372  // To do: Add enable_if<> restrictions on DstView::Rank == 1,
1373  // SrcView::Rank == 2
1374  template <typename DstView, typename SrcView,
1375  typename DstIdxView, typename SrcIdxView>
1376  void permute_array_multi_column(const DstView& dst,
1377  const SrcView& src,
1378  const DstIdxView& dst_idx,
1379  const SrcIdxView& src_idx,
1380  size_t numCols) {
1381  PermuteArrayMultiColumn<DstView,SrcView,DstIdxView,SrcIdxView>::permute(
1382  dst, src, dst_idx, src_idx, numCols);
1383  }
1384 
1385  template <typename DstView, typename SrcView,
1386  typename DstIdxView, typename SrcIdxView,
1387  typename DstColView, typename SrcColView>
1388  struct PermuteArrayMultiColumnVariableStride {
1389  typedef typename DstView::execution_space execution_space;
1390  typedef typename execution_space::size_type size_type;
1391 
1392  DstView dst;
1393  SrcView src;
1394  DstIdxView dst_idx;
1395  SrcIdxView src_idx;
1396  DstColView dst_col;
1397  SrcColView src_col;
1398  size_t numCols;
1399 
1400  PermuteArrayMultiColumnVariableStride(const DstView& dst_,
1401  const SrcView& src_,
1402  const DstIdxView& dst_idx_,
1403  const SrcIdxView& src_idx_,
1404  const DstColView& dst_col_,
1405  const SrcColView& src_col_,
1406  size_t numCols_) :
1407  dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
1408  dst_col(dst_col_), src_col(src_col_),
1409  numCols(numCols_) {}
1410 
1411  KOKKOS_INLINE_FUNCTION
1412  void operator()( const size_type k ) const {
1413  const typename DstIdxView::value_type toRow = dst_idx(k);
1414  const typename SrcIdxView::value_type fromRow = src_idx(k);
1415  for (size_t j = 0; j < numCols; ++j)
1416  dst(toRow, dst_col(j)) = src(fromRow, src_col(j));
1417  }
1418 
1419  static void permute(const DstView& dst,
1420  const SrcView& src,
1421  const DstIdxView& dst_idx,
1422  const SrcIdxView& src_idx,
1423  const DstColView& dst_col,
1424  const SrcColView& src_col,
1425  size_t numCols) {
1426  const size_type n = std::min( dst_idx.size(), src_idx.size() );
1427  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
1428  Kokkos::parallel_for (range_type (0, n),
1429  PermuteArrayMultiColumnVariableStride (dst, src,
1430  dst_idx,
1431  src_idx,
1432  dst_col,
1433  src_col,
1434  numCols));
1435  }
1436  };
1437 
1438  // To do: Add enable_if<> restrictions on DstView::Rank == 1,
1439  // SrcView::Rank == 2
1440  template <typename DstView, typename SrcView,
1441  typename DstIdxView, typename SrcIdxView,
1442  typename DstColView, typename SrcColView>
1443  void permute_array_multi_column_variable_stride(const DstView& dst,
1444  const SrcView& src,
1445  const DstIdxView& dst_idx,
1446  const SrcIdxView& src_idx,
1447  const DstColView& dst_col,
1448  const SrcColView& src_col,
1449  size_t numCols) {
1450  PermuteArrayMultiColumnVariableStride<DstView,SrcView,
1451  DstIdxView,SrcIdxView,DstColView,SrcColView>::permute(
1452  dst, src, dst_idx, src_idx, dst_col, src_col, numCols);
1453  }
1454 
1455 } // Details namespace
1456 } // KokkosRefactor namespace
1457 } // Tpetra namespace
1458 
1459 #endif // TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
Details
Implementation details of Tpetra.
Tpetra::KokkosRefactor::Details::Impl::OutOfBounds
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...
Definition: Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp:76
Tpetra::KokkosRefactor::Details::Impl::outOfBounds
KOKKOS_INLINE_FUNCTION bool outOfBounds(const IntegerType x, const IntegerType exclusiveUpperBound)
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...
Definition: Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp:108
Tpetra
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Tpetra::deep_copy
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Definition: Tpetra_MultiVector_decl.hpp:2453