Tpetra parallel linear algebra  Version of the Day
Tpetra_Experimental_BlockMultiVector_def.hpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_EXPERIMENTAL_BLOCKMULTIVECTOR_DEF_HPP
43 #define TPETRA_EXPERIMENTAL_BLOCKMULTIVECTOR_DEF_HPP
44 
45 #include "Tpetra_Experimental_BlockMultiVector_decl.hpp"
46 
47 namespace { // anonymous
48 
60  template<class MultiVectorType>
61  struct RawHostPtrFromMultiVector {
62  typedef typename MultiVectorType::impl_scalar_type impl_scalar_type;
63 
64  static impl_scalar_type* getRawPtr (MultiVectorType& X) {
65  // NOTE (mfh 09 Jun 2016) This does NOT sync to host, or mark
66  // host as modified. This is on purpose, because we don't want
67  // the BlockMultiVector sync'd to host unnecessarily.
68  // Otherwise, all the MultiVector and BlockMultiVector kernels
69  // would run on host instead of device. See Github Issue #428.
70  auto X_view_host = X.template getLocalView<Kokkos::HostSpace> ();
71  impl_scalar_type* X_raw = X_view_host.data ();
72  return X_raw;
73  }
74  };
75 
88  template<class S, class LO, class GO, class N>
90  getRawHostPtrFromMultiVector (Tpetra::MultiVector<S, LO, GO, N>& X) {
92  return RawHostPtrFromMultiVector<MV>::getRawPtr (X);
93  }
94 
95 } // namespace (anonymous)
96 
97 namespace Tpetra {
98 namespace Experimental {
99 namespace Classes {
100 
101 template<class Scalar, class LO, class GO, class Node>
105 {
106  return mv_;
107 }
108 
109 template<class Scalar, class LO, class GO, class Node>
110 Teuchos::RCP<const BlockMultiVector<Scalar, LO, GO, Node> >
113 {
115  const BMV* src_bmv = dynamic_cast<const BMV*> (&src);
116  TEUCHOS_TEST_FOR_EXCEPTION(
117  src_bmv == NULL, std::invalid_argument, "Tpetra::Experimental::"
118  "BlockMultiVector: The source object of an Import or Export to a "
119  "BlockMultiVector, must also be a BlockMultiVector.");
120  return Teuchos::rcp (src_bmv, false); // nonowning RCP
121 }
122 
123 template<class Scalar, class LO, class GO, class Node>
125 BlockMultiVector (const map_type& meshMap,
126  const LO blockSize,
127  const LO numVecs) :
128  dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
129  meshMap_ (meshMap),
130  pointMap_ (makePointMap (meshMap, blockSize)),
131  mv_ (Teuchos::rcpFromRef (pointMap_), numVecs), // nonowning RCP is OK, since pointMap_ won't go away
132  mvData_ (getRawHostPtrFromMultiVector (mv_)),
133  blockSize_ (blockSize)
134 {
135  // Make sure that mv_ has view semantics.
136  mv_.setCopyOrView (Teuchos::View);
137 }
138 
139 template<class Scalar, class LO, class GO, class Node>
141 BlockMultiVector (const map_type& meshMap,
142  const map_type& pointMap,
143  const LO blockSize,
144  const LO numVecs) :
145  dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
146  meshMap_ (meshMap),
147  pointMap_ (pointMap),
148  mv_ (Teuchos::rcpFromRef (pointMap_), numVecs),
149  mvData_ (getRawHostPtrFromMultiVector (mv_)),
150  blockSize_ (blockSize)
151 {
152  // Make sure that mv_ has view semantics.
153  mv_.setCopyOrView (Teuchos::View);
154 }
155 
156 template<class Scalar, class LO, class GO, class Node>
159  const map_type& meshMap,
160  const LO blockSize) :
161  dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
162  meshMap_ (meshMap),
163  mvData_ (NULL), // just for now
164  blockSize_ (blockSize)
165 {
166  using Teuchos::RCP;
167 
168  if (X_mv.getCopyOrView () == Teuchos::View) {
169  // The input MultiVector has view semantics, so assignment just
170  // does a shallow copy.
171  mv_ = X_mv;
172  }
173  else if (X_mv.getCopyOrView () == Teuchos::Copy) {
174  // The input MultiVector has copy semantics. We can't change
175  // that, but we can make a view of the input MultiVector and
176  // change the view to have view semantics. (That sounds silly;
177  // shouldn't views always have view semantics? but remember that
178  // "view semantics" just governs the default behavior of the copy
179  // constructor and assignment operator.)
180  RCP<const mv_type> X_view_const;
181  const size_t numCols = X_mv.getNumVectors ();
182  if (numCols == 0) {
183  Teuchos::Array<size_t> cols (0); // view will have zero columns
184  X_view_const = X_mv.subView (cols ());
185  } else { // Range1D is an inclusive range
186  X_view_const = X_mv.subView (Teuchos::Range1D (0, numCols-1));
187  }
188  TEUCHOS_TEST_FOR_EXCEPTION(
189  X_view_const.is_null (), std::logic_error, "Tpetra::Experimental::"
190  "BlockMultiVector constructor: X_mv.subView(...) returned null. This "
191  "should never happen. Please report this bug to the Tpetra developers.");
192 
193  // It's perfectly OK to cast away const here. Those view methods
194  // should be marked const anyway, because views can't change the
195  // allocation (just the entries themselves).
196  RCP<mv_type> X_view = Teuchos::rcp_const_cast<mv_type> (X_view_const);
197  X_view->setCopyOrView (Teuchos::View);
198  TEUCHOS_TEST_FOR_EXCEPTION(
199  X_view->getCopyOrView () != Teuchos::View, std::logic_error, "Tpetra::"
200  "Experimental::BlockMultiVector constructor: We just set a MultiVector "
201  "to have view semantics, but it claims that it doesn't have view "
202  "semantics. This should never happen. "
203  "Please report this bug to the Tpetra developers.");
204  mv_ = *X_view; // MultiVector::operator= does a shallow copy here
205  }
206 
207  // At this point, mv_ has been assigned, so we can ignore X_mv.
208  Teuchos::RCP<const map_type> pointMap = mv_.getMap ();
209  if (! pointMap.is_null ()) {
210  pointMap_ = *pointMap; // Map::operator= also does a shallow copy
211  }
212  mvData_ = getRawHostPtrFromMultiVector (mv_);
213 }
214 
215 template<class Scalar, class LO, class GO, class Node>
218  const map_type& newMeshMap,
219  const map_type& newPointMap,
220  const size_t offset) :
221  dist_object_type (Teuchos::rcp (new map_type (newMeshMap))), // shallow copy
222  meshMap_ (newMeshMap),
223  pointMap_ (newPointMap),
224  mv_ (X.mv_, newPointMap, offset * X.getBlockSize ()), // MV "offset view" constructor
225  mvData_ (getRawHostPtrFromMultiVector (mv_)),
226  blockSize_ (X.getBlockSize ())
227 {
228  // Make sure that mv_ has view semantics.
229  mv_.setCopyOrView (Teuchos::View);
230 }
231 
232 template<class Scalar, class LO, class GO, class Node>
235  const map_type& newMeshMap,
236  const size_t offset) :
237  dist_object_type (Teuchos::rcp (new map_type (newMeshMap))), // shallow copy
238  meshMap_ (newMeshMap),
239  pointMap_ (makePointMap (newMeshMap, X.getBlockSize ())),
240  mv_ (X.mv_, pointMap_, offset * X.getBlockSize ()), // MV "offset view" constructor
241  mvData_ (getRawHostPtrFromMultiVector (mv_)),
242  blockSize_ (X.getBlockSize ())
243 {
244  // Make sure that mv_ has view semantics.
245  mv_.setCopyOrView (Teuchos::View);
246 }
247 
248 template<class Scalar, class LO, class GO, class Node>
251  dist_object_type (Teuchos::null),
252  mvData_ (NULL),
253  blockSize_ (0)
254 {
255  // Make sure that mv_ has view semantics.
256  mv_.setCopyOrView (Teuchos::View);
257 }
258 
259 template<class Scalar, class LO, class GO, class Node>
262 makePointMap (const map_type& meshMap, const LO blockSize)
263 {
264  typedef Tpetra::global_size_t GST;
265  typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
266 
267  const GST gblNumMeshMapInds =
268  static_cast<GST> (meshMap.getGlobalNumElements ());
269  const size_t lclNumMeshMapIndices =
270  static_cast<size_t> (meshMap.getNodeNumElements ());
271  const GST gblNumPointMapInds =
272  gblNumMeshMapInds * static_cast<GST> (blockSize);
273  const size_t lclNumPointMapInds =
274  lclNumMeshMapIndices * static_cast<size_t> (blockSize);
275  const GO indexBase = meshMap.getIndexBase ();
276 
277  if (meshMap.isContiguous ()) {
278  return map_type (gblNumPointMapInds, lclNumPointMapInds, indexBase,
279  meshMap.getComm (), meshMap.getNode ());
280  }
281  else {
282  // "Hilbert's Hotel" trick: multiply each process' GIDs by
283  // blockSize, and fill in. That ensures correctness even if the
284  // mesh Map is overlapping.
285  Teuchos::ArrayView<const GO> lclMeshGblInds = meshMap.getNodeElementList ();
286  const size_type lclNumMeshGblInds = lclMeshGblInds.size ();
287  Teuchos::Array<GO> lclPointGblInds (lclNumPointMapInds);
288  for (size_type g = 0; g < lclNumMeshGblInds; ++g) {
289  const GO meshGid = lclMeshGblInds[g];
290  const GO pointGidStart = indexBase +
291  (meshGid - indexBase) * static_cast<GO> (blockSize);
292  const size_type offset = g * static_cast<size_type> (blockSize);
293  for (LO k = 0; k < blockSize; ++k) {
294  const GO pointGid = pointGidStart + static_cast<GO> (k);
295  lclPointGblInds[offset + static_cast<size_type> (k)] = pointGid;
296  }
297  }
298  return map_type (gblNumPointMapInds, lclPointGblInds (), indexBase,
299  meshMap.getComm (), meshMap.getNode ());
300  }
301 }
302 
303 
304 template<class Scalar, class LO, class GO, class Node>
305 void
307 replaceLocalValuesImpl (const LO localRowIndex,
308  const LO colIndex,
309  const Scalar vals[]) const
310 {
311  auto X_dst = getLocalBlock (localRowIndex, colIndex);
312  typename const_little_vec_type::HostMirror::const_type X_src (reinterpret_cast<const impl_scalar_type*> (vals),
313  getBlockSize ());
314  Kokkos::deep_copy (X_dst, X_src);
315 }
316 
317 
318 template<class Scalar, class LO, class GO, class Node>
319 bool
321 replaceLocalValues (const LO localRowIndex,
322  const LO colIndex,
323  const Scalar vals[]) const
324 {
325  if (! meshMap_.isNodeLocalElement (localRowIndex)) {
326  return false;
327  } else {
328  replaceLocalValuesImpl (localRowIndex, colIndex, vals);
329  return true;
330  }
331 }
332 
333 template<class Scalar, class LO, class GO, class Node>
334 bool
336 replaceGlobalValues (const GO globalRowIndex,
337  const LO colIndex,
338  const Scalar vals[]) const
339 {
340  const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
341  if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
342  return false;
343  } else {
344  replaceLocalValuesImpl (localRowIndex, colIndex, vals);
345  return true;
346  }
347 }
348 
349 template<class Scalar, class LO, class GO, class Node>
350 void
352 sumIntoLocalValuesImpl (const LO localRowIndex,
353  const LO colIndex,
354  const Scalar vals[]) const
355 {
356  auto X_dst = getLocalBlock (localRowIndex, colIndex);
357  typename const_little_vec_type::HostMirror::const_type X_src (reinterpret_cast<const impl_scalar_type*> (vals),
358  getBlockSize ());
359  AXPY (static_cast<impl_scalar_type> (STS::one ()), X_src, X_dst);
360 }
361 
362 template<class Scalar, class LO, class GO, class Node>
363 bool
365 sumIntoLocalValues (const LO localRowIndex,
366  const LO colIndex,
367  const Scalar vals[]) const
368 {
369  if (! meshMap_.isNodeLocalElement (localRowIndex)) {
370  return false;
371  } else {
372  sumIntoLocalValuesImpl (localRowIndex, colIndex, vals);
373  return true;
374  }
375 }
376 
377 template<class Scalar, class LO, class GO, class Node>
378 bool
380 sumIntoGlobalValues (const GO globalRowIndex,
381  const LO colIndex,
382  const Scalar vals[]) const
383 {
384  const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
385  if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
386  return false;
387  } else {
388  sumIntoLocalValuesImpl (localRowIndex, colIndex, vals);
389  return true;
390  }
391 }
392 
393 template<class Scalar, class LO, class GO, class Node>
394 bool
396 getLocalRowView (const LO localRowIndex, const LO colIndex, Scalar*& vals) const
397 {
398  if (! meshMap_.isNodeLocalElement (localRowIndex)) {
399  return false;
400  } else {
401  auto X_ij = getLocalBlock (localRowIndex, colIndex);
402  vals = reinterpret_cast<Scalar*> (X_ij.data ());
403  return true;
404  }
405 }
406 
407 template<class Scalar, class LO, class GO, class Node>
408 bool
410 getGlobalRowView (const GO globalRowIndex, const LO colIndex, Scalar*& vals) const
411 {
412  const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
413  if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
414  return false;
415  } else {
416  auto X_ij = getLocalBlock (localRowIndex, colIndex);
417  vals = reinterpret_cast<Scalar*> (X_ij.data ());
418  return true;
419  }
420 }
421 
422 template<class Scalar, class LO, class GO, class Node>
425 getLocalBlock (const LO localRowIndex,
426  const LO colIndex) const
427 {
428  // NOTE (mfh 07 Jul 2016) It should be correct to add the
429  // commented-out test below. However, I've conservatively commented
430  // it out, since users might not realize that they need to have
431  // things sync'd correctly.
432 
433 // #ifdef HAVE_TPETRA_DEBUG
434 // TEUCHOS_TEST_FOR_EXCEPTION
435 // (mv_.template need_sync<Kokkos::HostSpace> (), std::runtime_error,
436 // "Tpetra::Experimental::BlockMultiVector::getLocalBlock: This method "
437 // "accesses host data, but the object is not in sync on host." );
438 // #endif // HAVE_TPETRA_DEBUG
439 
440  if (! isValidLocalMeshIndex (localRowIndex)) {
441  return typename little_vec_type::HostMirror ();
442  } else {
443  const size_t blockSize = getBlockSize ();
444  const size_t offset = colIndex * this->getStrideY () +
445  localRowIndex * blockSize;
446  impl_scalar_type* blockRaw = this->getRawPtr () + offset;
447  return typename little_vec_type::HostMirror (blockRaw, blockSize);
448  }
449 }
450 
451 template<class Scalar, class LO, class GO, class Node>
452 Teuchos::RCP<const typename BlockMultiVector<Scalar, LO, GO, Node>::mv_type>
455 {
456  using Teuchos::rcp;
457  using Teuchos::rcpFromRef;
458 
459  // The source object of an Import or Export must be a
460  // BlockMultiVector or MultiVector (a Vector is a MultiVector). Try
461  // them in that order; one must succeed. Note that the target of
462  // the Import or Export calls checkSizes in DistObject's doTransfer.
463  typedef BlockMultiVector<Scalar, LO, GO, Node> this_type;
464  const this_type* srcBlkVec = dynamic_cast<const this_type*> (&src);
465  if (srcBlkVec == NULL) {
466  const mv_type* srcMultiVec = dynamic_cast<const mv_type*> (&src);
467  if (srcMultiVec == NULL) {
468  // FIXME (mfh 05 May 2014) Tpetra::MultiVector's operator=
469  // currently does a shallow copy by default. This is why we
470  // return by RCP, rather than by value.
471  return rcp (new mv_type ());
472  } else { // src is a MultiVector
473  return rcp (srcMultiVec, false); // nonowning RCP
474  }
475  } else { // src is a BlockMultiVector
476  return rcpFromRef (srcBlkVec->mv_); // nonowning RCP
477  }
478 }
479 
480 template<class Scalar, class LO, class GO, class Node>
483 {
484  return ! getMultiVectorFromSrcDistObject (src).is_null ();
485 }
486 
487 template<class Scalar, class LO, class GO, class Node>
490  size_t numSameIDs,
491  const Teuchos::ArrayView<const LO>& permuteToLIDs,
492  const Teuchos::ArrayView<const LO>& permuteFromLIDs)
493 {
494  const char tfecfFuncName[] = "copyAndPermute: ";
495  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
496  permuteToLIDs.size() != permuteFromLIDs.size(), std::runtime_error,
497  "permuteToLIDs and permuteFromLIDs must have the same size."
498  << std::endl << "permuteToLIDs.size() = " << permuteToLIDs.size ()
499  << " != permuteFromLIDs.size() = " << permuteFromLIDs.size () << ".");
500 
502  Teuchos::RCP<const BMV> srcAsBmvPtr = getBlockMultiVectorFromSrcDistObject (src);
503  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
504  srcAsBmvPtr.is_null (), std::invalid_argument,
505  "The source of an Import or Export to a BlockMultiVector "
506  "must also be a BlockMultiVector.");
507  const BMV& srcAsBmv = *srcAsBmvPtr;
508 
509  // FIXME (mfh 23 Apr 2014) This implementation is sequential and
510  // assumes UVM.
511 
512  const LO numVecs = getNumVectors ();
513  const LO numSame = static_cast<LO> (numSameIDs);
514  for (LO j = 0; j < numVecs; ++j) {
515  for (LO lclRow = 0; lclRow < numSame; ++lclRow) {
516  deep_copy (getLocalBlock (lclRow, j), srcAsBmv.getLocalBlock (lclRow, j));
517  }
518  }
519 
520  // FIXME (mfh 20 June 2012) For an Export with duplicate GIDs on the
521  // same process, this merges their values by replacement of the last
522  // encountered GID, not by the specified merge rule (such as ADD).
523  const LO numPermuteLIDs = static_cast<LO> (permuteToLIDs.size ());
524  for (LO j = 0; j < numVecs; ++j) {
525  for (LO k = numSame; k < numPermuteLIDs; ++k) {
526  deep_copy (getLocalBlock (permuteToLIDs[k], j), srcAsBmv.getLocalBlock (permuteFromLIDs[k], j));
527  }
528  }
529 }
530 
531 template<class Scalar, class LO, class GO, class Node>
532 void BlockMultiVector<Scalar, LO, GO, Node>::
533 packAndPrepare (const Tpetra::SrcDistObject& src,
534  const Teuchos::ArrayView<const LO>& exportLIDs,
535  Teuchos::Array<impl_scalar_type>& exports,
536  const Teuchos::ArrayView<size_t>& /* numPacketsPerLID */,
537  size_t& constantNumPackets,
538  Tpetra::Distributor& /* distor */)
539 {
540  typedef BlockMultiVector<Scalar, LO, GO, Node> BMV;
541  typedef typename Teuchos::ArrayView<const LO>::size_type size_type;
542  const char tfecfFuncName[] = "packAndPrepare: ";
543 
544  Teuchos::RCP<const BMV> srcAsBmvPtr = getBlockMultiVectorFromSrcDistObject (src);
545  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
546  srcAsBmvPtr.is_null (), std::invalid_argument,
547  "The source of an Import or Export to a BlockMultiVector "
548  "must also be a BlockMultiVector.");
549  const BMV& srcAsBmv = *srcAsBmvPtr;
550 
551  const LO numVecs = getNumVectors ();
552  const LO blockSize = getBlockSize ();
553 
554  // Number of things to pack per LID is the block size, times the
555  // number of columns. Input LIDs correspond to the mesh points, not
556  // the degrees of freedom (DOFs).
557  constantNumPackets =
558  static_cast<size_t> (blockSize) * static_cast<size_t> (numVecs);
559  const size_type numMeshLIDs = exportLIDs.size ();
560 
561  const size_type requiredExportsSize = numMeshLIDs *
562  static_cast<size_type> (blockSize) * static_cast<size_type> (numVecs);
563  exports.resize (requiredExportsSize);
564 
565  try {
566  size_type curExportPos = 0;
567  for (size_type meshLidIndex = 0; meshLidIndex < numMeshLIDs; ++meshLidIndex) {
568  for (LO j = 0; j < numVecs; ++j, curExportPos += blockSize) {
569  const LO meshLid = exportLIDs[meshLidIndex];
570  impl_scalar_type* const curExportPtr = &exports[curExportPos];
571  typename little_vec_type::HostMirror X_dst (curExportPtr, blockSize);
572  auto X_src = srcAsBmv.getLocalBlock (meshLid, j);
573 
574  Kokkos::deep_copy (X_dst, X_src);
575  }
576  }
577  } catch (std::exception& e) {
578  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
579  true, std::logic_error, "Oh no! packAndPrepare on Process "
580  << meshMap_.getComm ()->getRank () << " raised the following exception: "
581  << e.what ());
582  }
583 }
584 
585 template<class Scalar, class LO, class GO, class Node>
586 void BlockMultiVector<Scalar, LO, GO, Node>::
587 unpackAndCombine (const Teuchos::ArrayView<const LO>& importLIDs,
588  const Teuchos::ArrayView<const impl_scalar_type>& imports,
589  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
590  size_t constantNumPackets,
591  Tpetra::Distributor& distor,
593 {
594  typedef typename Teuchos::ArrayView<const LO>::size_type size_type;
595  const char tfecfFuncName[] = "unpackAndCombine: ";
596 
597  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
598  CM != ADD && CM != REPLACE && CM != INSERT && CM != ABSMAX && CM != ZERO,
599  std::invalid_argument, "Invalid CombineMode: " << CM << ". Valid "
600  "CombineMode values are ADD, REPLACE, INSERT, ABSMAX, and ZERO.");
601 
602  if (CM == ZERO) {
603  return; // Combining does nothing, so we don't have to combine anything.
604  }
605 
606  // Number of things to pack per LID is the block size.
607  // Input LIDs correspond to the mesh points, not the DOFs.
608  const size_type numMeshLIDs = importLIDs.size ();
609  const LO blockSize = getBlockSize ();
610  const LO numVecs = getNumVectors ();
611 
612  const size_type requiredImportsSize = numMeshLIDs *
613  static_cast<size_type> (blockSize) * static_cast<size_type> (numVecs);
614  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
615  imports.size () < requiredImportsSize, std::logic_error,
616  ": imports.size () = " << imports.size ()
617  << " < requiredImportsSize = " << requiredImportsSize << ".");
618 
619  size_type curImportPos = 0;
620  for (size_type meshLidIndex = 0; meshLidIndex < numMeshLIDs; ++meshLidIndex) {
621  for (LO j = 0; j < numVecs; ++j, curImportPos += blockSize) {
622  const LO meshLid = importLIDs[meshLidIndex];
623  const impl_scalar_type* const curImportPtr = &imports[curImportPos];
624 
625  typename const_little_vec_type::HostMirror::const_type X_src (curImportPtr, blockSize);
626  auto X_dst = getLocalBlock (meshLid, j);
627 
628  if (CM == INSERT || CM == REPLACE) {
629  deep_copy (X_dst, X_src);
630  } else if (CM == ADD) {
631  AXPY (static_cast<impl_scalar_type> (STS::one ()), X_src, X_dst);
632  } else if (CM == ABSMAX) {
633  Impl::absMax (X_dst, X_src);
634  }
635  }
636  }
637 }
638 
639 template<class Scalar, class LO, class GO, class Node>
641 putScalar (const Scalar& val)
642 {
643  mv_.putScalar (val);
644 }
645 
646 template<class Scalar, class LO, class GO, class Node>
648 scale (const Scalar& val)
649 {
650  mv_.scale (val);
651 }
652 
653 template<class Scalar, class LO, class GO, class Node>
655 update (const Scalar& alpha,
657  const Scalar& beta)
658 {
659  mv_.update (alpha, X.mv_, beta);
660 }
661 
662 namespace Impl {
663 // Y := alpha * D * X
664 template <typename Scalar, typename ViewY, typename ViewD, typename ViewX>
665 struct BlockWiseMultiply {
666  typedef typename ViewD::size_type Size;
667 
668 private:
669  typedef typename ViewD::device_type Device;
670  typedef typename ViewD::non_const_value_type ImplScalar;
671  typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
672 
673  template <typename View>
674  using UnmanagedView = Kokkos::View<typename View::data_type, typename View::array_layout,
675  typename View::device_type, Unmanaged>;
676  typedef UnmanagedView<ViewY> UnMViewY;
677  typedef UnmanagedView<ViewD> UnMViewD;
678  typedef UnmanagedView<ViewX> UnMViewX;
679 
680  const Size block_size_;
681  Scalar alpha_;
682  UnMViewY Y_;
683  UnMViewD D_;
684  UnMViewX X_;
685 
686 public:
687  BlockWiseMultiply (const Size block_size, const Scalar& alpha,
688  const ViewY& Y, const ViewD& D, const ViewX& X)
689  : block_size_(block_size), alpha_(alpha), Y_(Y), D_(D), X_(X)
690  {}
691 
692  KOKKOS_INLINE_FUNCTION
693  void operator() (const Size k) const {
694  const auto zero = Kokkos::Details::ArithTraits<Scalar>::zero();
695  auto D_curBlk = Kokkos::subview(D_, k, Kokkos::ALL (), Kokkos::ALL ());
696  const auto num_vecs = X_.extent(1);
697  for (Size i = 0; i < num_vecs; ++i) {
698  Kokkos::pair<Size, Size> kslice(k*block_size_, (k+1)*block_size_);
699  auto X_curBlk = Kokkos::subview(X_, kslice, i);
700  auto Y_curBlk = Kokkos::subview(Y_, kslice, i);
701  // Y_curBlk := alpha * D_curBlk * X_curBlk.
702  // Recall that GEMV does an update (+=) of the last argument.
703  Tpetra::Experimental::FILL(Y_curBlk, zero);
704  Tpetra::Experimental::GEMV(alpha_, D_curBlk, X_curBlk, Y_curBlk);
705  }
706  }
707 };
708 
709 template <typename Scalar, typename ViewY, typename ViewD, typename ViewX>
710 inline BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>
711 createBlockWiseMultiply (const int block_size, const Scalar& alpha,
712  const ViewY& Y, const ViewD& D, const ViewX& X) {
713  return BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>(block_size, alpha, Y, D, X);
714 }
715 
716 template <typename ViewY,
717  typename Scalar,
718  typename ViewD,
719  typename ViewZ,
720  typename LO = typename ViewY::size_type>
721 class BlockJacobiUpdate {
722 private:
723  typedef typename ViewD::device_type Device;
724  typedef typename ViewD::non_const_value_type ImplScalar;
725  typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
726 
727  template <typename ViewType>
728  using UnmanagedView = Kokkos::View<typename ViewType::data_type,
729  typename ViewType::array_layout,
730  typename ViewType::device_type,
731  Unmanaged>;
732  typedef UnmanagedView<ViewY> UnMViewY;
733  typedef UnmanagedView<ViewD> UnMViewD;
734  typedef UnmanagedView<ViewZ> UnMViewZ;
735 
736  const LO blockSize_;
737  UnMViewY Y_;
738  const Scalar alpha_;
739  UnMViewD D_;
740  UnMViewZ Z_;
741  const Scalar beta_;
742 
743 public:
744  BlockJacobiUpdate (const ViewY& Y,
745  const Scalar& alpha,
746  const ViewD& D,
747  const ViewZ& Z,
748  const Scalar& beta) :
749  blockSize_ (D.extent (1)),
750  // numVecs_ (static_cast<int> (ViewY::rank) == 1 ? static_cast<size_type> (1) : static_cast<size_type> (Y_.extent (1))),
751  Y_ (Y),
752  alpha_ (alpha),
753  D_ (D),
754  Z_ (Z),
755  beta_ (beta)
756  {
757  static_assert (static_cast<int> (ViewY::rank) == 1,
758  "Y must have rank 1.");
759  static_assert (static_cast<int> (ViewD::rank) == 3, "D must have rank 3.");
760  static_assert (static_cast<int> (ViewZ::rank) == 1,
761  "Z must have rank 1.");
762  // static_assert (static_cast<int> (ViewZ::rank) ==
763  // static_cast<int> (ViewY::rank),
764  // "Z must have the same rank as Y.");
765  }
766 
767  KOKKOS_INLINE_FUNCTION void
768  operator() (const LO& k) const
769  {
770  using Kokkos::ALL;
771  using Kokkos::subview;
772  typedef Kokkos::pair<LO, LO> range_type;
773  typedef Kokkos::Details::ArithTraits<Scalar> KAT;
774 
775  // We only have to implement the alpha != 0 case.
776 
777  auto D_curBlk = subview (D_, k, ALL (), ALL ());
778  const range_type kslice (k*blockSize_, (k+1)*blockSize_);
779 
780  // Z.update (STS::one (), X, -STS::one ()); // assume this is done
781 
782  auto Z_curBlk = subview (Z_, kslice);
783  auto Y_curBlk = subview (Y_, kslice);
784  // Y_curBlk := beta * Y_curBlk + alpha * D_curBlk * Z_curBlk
785  if (beta_ == KAT::zero ()) {
786  Tpetra::Experimental::FILL (Y_curBlk, KAT::zero ());
787  }
788  else if (beta_ != KAT::one ()) {
789  Tpetra::Experimental::SCAL (beta_, Y_curBlk);
790  }
791  Tpetra::Experimental::GEMV (alpha_, D_curBlk, Z_curBlk, Y_curBlk);
792  }
793 };
794 
795 template<class ViewY,
796  class Scalar,
797  class ViewD,
798  class ViewZ,
799  class LO = typename ViewD::size_type>
800 void
801 blockJacobiUpdate (const ViewY& Y,
802  const Scalar& alpha,
803  const ViewD& D,
804  const ViewZ& Z,
805  const Scalar& beta)
806 {
807  static_assert (Kokkos::Impl::is_view<ViewY>::value, "Y must be a Kokkos::View.");
808  static_assert (Kokkos::Impl::is_view<ViewD>::value, "D must be a Kokkos::View.");
809  static_assert (Kokkos::Impl::is_view<ViewZ>::value, "Z must be a Kokkos::View.");
810  static_assert (static_cast<int> (ViewY::rank) == static_cast<int> (ViewZ::rank),
811  "Y and Z must have the same rank.");
812  static_assert (static_cast<int> (ViewD::rank) == 3, "D must have rank 3.");
813 
814  const auto lclNumMeshRows = D.extent (0);
815 
816 #ifdef HAVE_TPETRA_DEBUG
817  // D.extent(0) is the (local) number of mesh rows.
818  // D.extent(1) is the block size. Thus, their product should be
819  // the local number of point rows, that is, the number of rows in Y.
820  const auto blkSize = D.extent (1);
821  const auto lclNumPtRows = lclNumMeshRows * blkSize;
822  TEUCHOS_TEST_FOR_EXCEPTION
823  (Y.extent (0) != lclNumPtRows, std::invalid_argument,
824  "blockJacobiUpdate: Y.extent(0) = " << Y.extent (0) << " != "
825  "D.extent(0)*D.extent(1) = " << lclNumMeshRows << " * " << blkSize
826  << " = " << lclNumPtRows << ".");
827  TEUCHOS_TEST_FOR_EXCEPTION
828  (Y.extent (0) != Z.extent (0), std::invalid_argument,
829  "blockJacobiUpdate: Y.extent(0) = " << Y.extent (0) << " != "
830  "Z.extent(0) = " << Z.extent (0) << ".");
831  TEUCHOS_TEST_FOR_EXCEPTION
832  (Y.extent (1) != Z.extent (1), std::invalid_argument,
833  "blockJacobiUpdate: Y.extent(1) = " << Y.extent (1) << " != "
834  "Z.extent(1) = " << Z.extent (1) << ".");
835 #endif // HAVE_TPETRA_DEBUG
836 
837  BlockJacobiUpdate<ViewY, Scalar, ViewD, ViewZ, LO> functor (Y, alpha, D, Z, beta);
838  typedef Kokkos::RangePolicy<typename ViewY::execution_space, LO> range_type;
839  // lclNumMeshRows must fit in LO, else the Map would not be correct.
840  range_type range (0, static_cast<LO> (lclNumMeshRows));
841  Kokkos::parallel_for (range, functor);
842 }
843 
844 } // namespace Impl
845 
846 template<class Scalar, class LO, class GO, class Node>
848 blockWiseMultiply (const Scalar& alpha,
849  const Kokkos::View<const impl_scalar_type***,
850  device_type, Kokkos::MemoryUnmanaged>& D,
852 {
853  using Kokkos::ALL;
854  typedef typename device_type::execution_space execution_space;
855  typedef typename device_type::memory_space memory_space;
856  const LO lclNumMeshRows = meshMap_.getNodeNumElements ();
857 
858  if (alpha == STS::zero ()) {
859  this->putScalar (STS::zero ());
860  }
861  else { // alpha != 0
862  const LO blockSize = this->getBlockSize ();
863  const impl_scalar_type alphaImpl = static_cast<impl_scalar_type> (alpha);
864  auto X_lcl = X.mv_.template getLocalView<memory_space> ();
865  auto Y_lcl = this->mv_.template getLocalView<memory_space> ();
866  auto bwm = Impl::createBlockWiseMultiply (blockSize, alphaImpl, Y_lcl, D, X_lcl);
867 
868  // Use an explicit RangePolicy with the desired execution space.
869  // Otherwise, if you just use a number, it runs on the default
870  // execution space. For example, if the default execution space
871  // is Cuda but the current execution space is Serial, using just a
872  // number would incorrectly run with Cuda.
873  Kokkos::RangePolicy<execution_space, LO> range (0, lclNumMeshRows);
874  Kokkos::parallel_for (range, bwm);
875  }
876 }
877 
878 template<class Scalar, class LO, class GO, class Node>
880 blockJacobiUpdate (const Scalar& alpha,
881  const Kokkos::View<const impl_scalar_type***,
882  device_type, Kokkos::MemoryUnmanaged>& D,
885  const Scalar& beta)
886 {
887  using Kokkos::ALL;
888  using Kokkos::subview;
889  typedef typename device_type::memory_space memory_space;
890  typedef impl_scalar_type IST;
891 
892  const IST alphaImpl = static_cast<IST> (alpha);
893  const IST betaImpl = static_cast<IST> (beta);
894  const LO numVecs = mv_.getNumVectors ();
895 
896  auto X_lcl = X.mv_.template getLocalView<memory_space> ();
897  auto Y_lcl = this->mv_.template getLocalView<memory_space> ();
898  auto Z_lcl = Z.mv_.template getLocalView<memory_space> ();
899 
900  if (alpha == STS::zero ()) { // Y := beta * Y
901  this->scale (beta);
902  }
903  else { // alpha != 0
904  Z.update (STS::one (), X, -STS::one ());
905  for (LO j = 0; j < numVecs; ++j) {
906  auto X_lcl_j = subview (X_lcl, ALL (), j);
907  auto Y_lcl_j = subview (Y_lcl, ALL (), j);
908  auto Z_lcl_j = subview (Z_lcl, ALL (), j);
909  Impl::blockJacobiUpdate (Y_lcl_j, alphaImpl, D, Z_lcl_j, betaImpl);
910  }
911  }
912 }
913 
914 } // namespace Classes
915 } // namespace Experimental
916 } // namespace Tpetra
917 
918 //
919 // Explicit instantiation macro
920 //
921 // Must be expanded from within the Tpetra namespace!
922 //
923 #define TPETRA_EXPERIMENTAL_BLOCKMULTIVECTOR_INSTANT(S,LO,GO,NODE) \
924  namespace Experimental { \
925  namespace Classes { \
926  template class BlockMultiVector< S, LO, GO, NODE >; \
927  } \
928  }
929 
930 #endif // TPETRA_EXPERIMENTAL_BLOCKMULTIVECTOR_DEF_HPP
Tpetra::Experimental::Classes::BlockMultiVector::scale
void scale(const Scalar &val)
Multiply all entries in place by the given value val.
Definition: Tpetra_Experimental_BlockMultiVector_def.hpp:648
Tpetra::Experimental::Classes::BlockMultiVector::BlockMultiVector
BlockMultiVector()
Default constructor.
Definition: Tpetra_Experimental_BlockMultiVector_def.hpp:250
Tpetra::Experimental::Classes::BlockMultiVector::sumIntoLocalValues
bool sumIntoLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[]) const
Sum into all values at the given mesh point, using a local index.
Definition: Tpetra_Experimental_BlockMultiVector_def.hpp:365
Tpetra::Experimental::Classes::BlockMultiVector::getMultiVectorView
mv_type getMultiVectorView() const
Get a Tpetra::MultiVector that views this BlockMultiVector's data.
Definition: Tpetra_Experimental_BlockMultiVector_def.hpp:104
Tpetra::Experimental::Classes::BlockMultiVector::putScalar
void putScalar(const Scalar &val)
Fill all entries with the given value val.
Definition: Tpetra_Experimental_BlockMultiVector_def.hpp:641
Tpetra::Experimental::FILL
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
Definition: Tpetra_Experimental_BlockView.hpp:684
Tpetra::Experimental::Classes::BlockMultiVector::sumIntoGlobalValues
bool sumIntoGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[]) const
Sum into all values at the given mesh point, using a global index.
Definition: Tpetra_Experimental_BlockMultiVector_def.hpp:380
Tpetra::Classes::MultiVector::setCopyOrView
void setCopyOrView(const Teuchos::DataAccess copyOrView)
Set whether this has copy (copyOrView = Teuchos::Copy) or view (copyOrView = Teuchos::View) semantics...
Definition: Tpetra_MultiVector_decl.hpp:2152
Tpetra::Experimental::Classes::BlockMultiVector::blockWiseMultiply
void blockWiseMultiply(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X)
*this := alpha * D * X, where D is a block diagonal matrix.
Definition: Tpetra_Experimental_BlockMultiVector_def.hpp:848
Tpetra::Experimental::AXPY
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
Definition: Tpetra_Experimental_BlockView.hpp:702
Tpetra::Experimental::Classes::BlockMultiVector::checkSizes
virtual bool checkSizes(const Tpetra::SrcDistObject &source)
Compare the source and target (this) objects for compatibility.
Definition: Tpetra_Experimental_BlockMultiVector_def.hpp:482
Tpetra::REPLACE
Replace existing values with new values.
Definition: Tpetra_CombineMode.hpp:97
Tpetra::Classes::Map::getGlobalNumElements
global_size_t getGlobalNumElements() const
The number of elements in this Map.
Definition: Tpetra_Map_decl.hpp:569
Tpetra::Classes::Map::getNode
Teuchos::RCP< Node > getNode() const
Get this Map's Node object.
Definition: Tpetra_Map_def.hpp:1973
Tpetra::Classes::DistObject::device_type
Node::device_type device_type
The Kokkos Device type.
Definition: Tpetra_DistObject_decl.hpp:370
Tpetra::Experimental::Classes::BlockMultiVector::replaceLocalValues
bool replaceLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[]) const
Replace all values at the given mesh point, using local row and column indices.
Definition: Tpetra_Experimental_BlockMultiVector_def.hpp:321
Tpetra::Classes::MultiVector::subView
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subView(const Teuchos::Range1D &colRng) const
Return a const MultiVector with const views of selected columns.
Definition: Tpetra_MultiVector_def.hpp:3538
Tpetra::Experimental::Classes::BlockMultiVector::mv_type
Tpetra::MultiVector< Scalar, LO, GO, Node > mv_type
The specialization of Tpetra::MultiVector that this class uses.
Definition: Tpetra_Experimental_BlockMultiVector_decl.hpp:164
Tpetra::ZERO
Replace old values with zero.
Definition: Tpetra_CombineMode.hpp:99
Tpetra::Classes::DistObject::getMap
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Definition: Tpetra_DistObject_decl.hpp:510
Tpetra::Experimental::Classes::BlockMultiVector::getLocalBlock
little_vec_type::HostMirror getLocalBlock(const LO localRowIndex, const LO colIndex) const
Get a host view of the degrees of freedom at the given mesh point.
Definition: Tpetra_Experimental_BlockMultiVector_def.hpp:425
Tpetra::Experimental::Classes::BlockMultiVector::impl_scalar_type
mv_type::impl_scalar_type impl_scalar_type
The implementation type of entries in the matrix.
Definition: Tpetra_Experimental_BlockMultiVector_decl.hpp:173
Tpetra::ADD
Sum new values into existing values.
Definition: Tpetra_CombineMode.hpp:95
Tpetra::Experimental::Classes::BlockMultiVector::getGlobalRowView
bool getGlobalRowView(const GO globalRowIndex, const LO colIndex, Scalar *&vals) const
Get a writeable view of the entries at the given mesh point, using a global index.
Definition: Tpetra_Experimental_BlockMultiVector_def.hpp:410
Tpetra::ABSMAX
Replace old value with maximum of magnitudes of old and new values.
Definition: Tpetra_CombineMode.hpp:98
Tpetra::Classes::DistObject
Base class for distributed Tpetra objects that support data redistribution.
Definition: Tpetra_DistObject_decl.hpp:349
Tpetra::Experimental::Classes::BlockMultiVector::getLocalRowView
bool getLocalRowView(const LO localRowIndex, const LO colIndex, Scalar *&vals) const
Get a writeable view of the entries at the given mesh point, using a local index.
Definition: Tpetra_Experimental_BlockMultiVector_def.hpp:396
Tpetra::Distributor
Sets up and executes a communication plan for a Tpetra DistObject.
Definition: Tpetra_Distributor.hpp:188
Tpetra::Experimental::Classes::BlockMultiVector::makePointMap
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
Definition: Tpetra_Experimental_BlockMultiVector_def.hpp:262
Tpetra::Classes::MultiVector
One or more distributed dense vectors.
Definition: Tpetra_MultiVector_decl.hpp:389
Tpetra::Experimental::Classes::BlockMultiVector::replaceGlobalValues
bool replaceGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[]) const
Replace all values at the given mesh point, using a global index.
Definition: Tpetra_Experimental_BlockMultiVector_def.hpp:336
Tpetra::Classes::Map::getNodeElementList
Teuchos::ArrayView< const GlobalOrdinal > getNodeElementList() const
Return a NONOWNING view of the global indices owned by this process.
Definition: Tpetra_Map_def.hpp:1515
Tpetra::Classes::MultiVector::getNumVectors
size_t getNumVectors() const
Number of columns in the multivector.
Definition: Tpetra_MultiVector_def.hpp:1739
Tpetra::Experimental::Classes::BlockMultiVector
MultiVector for multiple degrees of freedom per mesh point.
Definition: Tpetra_Experimental_BlockMultiVector_decl.hpp:147
Tpetra::Classes::Map
A parallel distribution of indices over processes.
Definition: Tpetra_Map_decl.hpp:247
Tpetra::Classes::MultiVector::impl_scalar_type
Kokkos::Details::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
Definition: Tpetra_MultiVector_decl.hpp:414
Tpetra::Classes::DistObject::execution_space
device_type::execution_space execution_space
The Kokkos execution space.
Definition: Tpetra_DistObject_decl.hpp:372
Tpetra::Experimental::SCAL
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
Definition: Tpetra_Experimental_BlockView.hpp:672
Tpetra::Classes::Map::getIndexBase
GlobalOrdinal getIndexBase() const
The index base for this Map.
Definition: Tpetra_Map_decl.hpp:587
Tpetra::Classes::Map::getComm
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
Definition: Tpetra_Map_def.hpp:1967
Tpetra::Classes::Map::getNodeNumElements
size_t getNodeNumElements() const
The number of elements belonging to the calling process.
Definition: Tpetra_Map_decl.hpp:578
Tpetra::global_size_t
size_t global_size_t
Global size_t object.
Definition: Tpetra_ConfigDefs.hpp:109
Tpetra
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Tpetra::deep_copy
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Definition: Tpetra_MultiVector_decl.hpp:2453
Tpetra::Experimental::Classes::BlockMultiVector::update
void update(const Scalar &alpha, const BlockMultiVector< Scalar, LO, GO, Node > &X, const Scalar &beta)
Update: this = beta*this + alpha*X.
Definition: Tpetra_Experimental_BlockMultiVector_def.hpp:655
Tpetra::Experimental::GEMV
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
Definition: Tpetra_Experimental_BlockView.hpp:754
Tpetra::SrcDistObject
Abstract base class for objects that can be the source of an Import or Export operation.
Definition: Tpetra_SrcDistObject.hpp:89
Tpetra::Classes::MultiVector::getCopyOrView
Teuchos::DataAccess getCopyOrView() const
Get whether this has copy (copyOrView = Teuchos::Copy) or view (copyOrView = Teuchos::View) semantics...
Definition: Tpetra_MultiVector_decl.hpp:2169
Tpetra::Classes::Map::isContiguous
bool isContiguous() const
True if this Map is distributed contiguously, else false.
Definition: Tpetra_Map_def.hpp:1157
Tpetra::Experimental::Classes::BlockMultiVector::mv_
mv_type mv_
The Tpetra::MultiVector used to represent the data.
Definition: Tpetra_Experimental_BlockMultiVector_decl.hpp:636
Tpetra::INSERT
Insert new values that don't currently exist.
Definition: Tpetra_CombineMode.hpp:96
Tpetra::Experimental::Classes::BlockMultiVector::blockJacobiUpdate
void blockJacobiUpdate(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Z, const Scalar &beta)
Block Jacobi update .
Definition: Tpetra_Experimental_BlockMultiVector_def.hpp:880
Tpetra::CombineMode
CombineMode
Rule for combining data in an Import or Export.
Definition: Tpetra_CombineMode.hpp:94