42 #ifndef TPETRA_CRSMATRIXMULTIPLYOP_HPP
43 #define TPETRA_CRSMATRIXMULTIPLYOP_HPP
50 #include <Tpetra_CrsMatrix.hpp>
52 #include <Teuchos_TimeMonitor.hpp>
53 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
54 # include "Teuchos_VerboseObject.hpp"
123 template <
class Scalar,
124 class MatScalar = Scalar,
126 class GlobalOrdinal = ::Tpetra::Details::DefaultTypes::global_ordinal_type,
129 public Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node>
149 #ifdef HAVE_KOKKOSCLASSIC_CUDA_NODE_MEMORY_PROFILING
150 importTimer_ = Teuchos::TimeMonitor::getNewCounter (
"CrsMatrixMultiplyOp::import");
151 exportTimer_ = Teuchos::TimeMonitor::getNewCounter (
"CrsMatrixMultiplyOp::export");
170 Teuchos::ETransp mode = Teuchos::NO_TRANS,
171 Scalar alpha = Teuchos::ScalarTraits<Scalar>::one (),
172 Scalar beta = Teuchos::ScalarTraits<Scalar>::zero ())
const
174 TEUCHOS_TEST_FOR_EXCEPTION
175 (!
matrix_->isFillComplete (), std::runtime_error,
176 Teuchos::typeName (*
this) <<
"::apply(): underlying matrix is not fill-complete.");
177 TEUCHOS_TEST_FOR_EXCEPTION
179 Teuchos::typeName (*
this) <<
"::apply(X,Y): X and Y must have the same number of vectors.");
180 TEUCHOS_TEST_FOR_EXCEPTION
181 (Teuchos::ScalarTraits<Scalar>::isComplex && mode == Teuchos::TRANS, std::logic_error,
182 Teuchos::typeName (*
this) <<
"::apply() does not currently support transposed multiplications for complex scalar types.");
183 if (mode == Teuchos::NO_TRANS) {
234 const Scalar& dampingFactor,
236 const int numSweeps)
const
241 using Teuchos::rcpFromRef;
242 using Teuchos::rcp_const_cast;
249 TEUCHOS_TEST_FOR_EXCEPTION
250 (numSweeps < 0, std::invalid_argument,
251 "gaussSeidel: The number of sweeps must be nonnegative, "
252 "but you provided numSweeps = " << numSweeps <<
" < 0.");
257 if (direction == Forward) {
258 localDirection = Forward;
260 else if (direction == Backward) {
261 localDirection = Backward;
263 else if (direction == Symmetric) {
265 localDirection = Forward;
268 TEUCHOS_TEST_FOR_EXCEPTION
269 (
true, std::invalid_argument,
270 "gaussSeidel: The 'direction' enum does not have any of its valid "
271 "values: Forward, Backward, or Symmetric.");
274 if (numSweeps == 0) {
281 RCP<const import_type> importer =
matrix_->getGraph()->getImporter();
282 RCP<const export_type> exporter =
matrix_->getGraph()->getExporter();
283 TEUCHOS_TEST_FOR_EXCEPTION
284 (! exporter.is_null (), std::runtime_error,
285 "Tpetra's gaussSeidel implementation requires that the row, domain, "
286 "and range Maps be the same. This cannot be the case, because the "
287 "matrix has a nontrivial Export object.");
289 RCP<const map_type> domainMap =
matrix_->getDomainMap ();
290 RCP<const map_type> rangeMap =
matrix_->getRangeMap ();
291 RCP<const map_type> rowMap =
matrix_->getGraph ()->getRowMap ();
292 RCP<const map_type> colMap =
matrix_->getGraph ()->getColMap ();
294 #ifdef HAVE_TEUCHOS_DEBUG
299 TEUCHOS_TEST_FOR_EXCEPTION
300 (! X.
getMap ()->isSameAs (*domainMap), std::runtime_error,
301 "Tpetra::CrsMatrix::gaussSeidel requires that the input "
302 "multivector X be in the domain Map of the matrix.");
303 TEUCHOS_TEST_FOR_EXCEPTION
304 (! B.
getMap ()->isSameAs (*rangeMap), std::runtime_error,
305 "Tpetra::CrsMatrix::gaussSeidel requires that the input "
306 "B be in the range Map of the matrix.");
307 TEUCHOS_TEST_FOR_EXCEPTION
308 (! D.
getMap ()->isSameAs (*rowMap), std::runtime_error,
309 "Tpetra::CrsMatrix::gaussSeidel requires that the input "
310 "D be in the row Map of the matrix.");
311 TEUCHOS_TEST_FOR_EXCEPTION
312 (! rowMap->isSameAs (*rangeMap), std::runtime_error,
313 "Tpetra::CrsMatrix::gaussSeidel requires that the row Map and the "
314 "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
315 TEUCHOS_TEST_FOR_EXCEPTION
316 (! domainMap->isSameAs (*rangeMap), std::runtime_error,
317 "Tpetra::CrsMatrix::gaussSeidel requires that the domain Map and "
318 "the range Map of the matrix be the same.");
324 #endif // HAVE_TEUCHOS_DEBUG
332 RCP<const OSMV> B_in;
334 B_in = rcpFromRef (B);
341 RCP<OSMV> B_in_nonconst = getRowMapMultiVector (B,
true);
343 B_in = rcp_const_cast<const OSMV> (B_in_nonconst);
347 "gaussSeidel: The current implementation of the Gauss-Seidel kernel "
348 "requires that X and B both have constant stride. Since B does not "
349 "have constant stride, we had to make a copy. This is a limitation of "
350 "the current implementation and not your fault, but we still report it "
351 "as an efficiency warning for your information.");
360 RCP<OSMV> X_domainMap;
362 bool copiedInput =
false;
364 if (importer.is_null ()) {
366 X_domainMap = rcpFromRef (X);
367 X_colMap = X_domainMap;
374 X_colMap = getColumnMapMultiVector (X,
true);
375 X_domainMap = X_colMap;
380 "gaussSeidel: The current implementation of the Gauss-Seidel kernel "
381 "requires that X and B both have constant stride. Since X does not "
382 "have constant stride, we had to make a copy. This is a limitation of "
383 "the current implementation and not your fault, but we still report it "
384 "as an efficiency warning for your information.");
389 X_domainMap = rcpFromRef (X);
393 X_colMap = X_domainMap->offsetViewNonConst (colMap, 0);
397 X_colMap->doImport (X, *importer,
INSERT);
405 X_colMap = getColumnMapMultiVector (X,
true);
406 X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
407 X_colMap->doImport (X, *importer,
INSERT);
411 "gaussSeidel: The current implementation of the Gauss-Seidel kernel "
412 "requires that X and B both have constant stride. Since X does not "
413 "have constant stride, we had to make a copy. This is a limitation of "
414 "the current implementation and not your fault, but we still report it "
415 "as an efficiency warning for your information.");
419 for (
int sweep = 0; sweep < numSweeps; ++sweep) {
420 if (! importer.is_null () && sweep > 0) {
422 X_colMap->doImport (*X_domainMap, *importer,
INSERT);
426 if (direction != Symmetric) {
427 matrix_->template localGaussSeidel<OS,OS> (*B_in, *X_colMap, D,
432 matrix_->template localGaussSeidel<OS,OS> (*B_in, *X_colMap, D,
436 if (! importer.is_null ()) {
437 X_colMap->doImport (*X_domainMap, *importer,
INSERT);
439 matrix_->template localGaussSeidel<OS,OS> (*B_in, *X_colMap, D,
478 const Scalar& dampingFactor,
480 const int numSweeps)
const
485 using Teuchos::rcpFromRef;
486 using Teuchos::rcp_const_cast;
493 TEUCHOS_TEST_FOR_EXCEPTION
494 (numSweeps < 0, std::invalid_argument,
495 "gaussSeidelCopy: The number of sweeps must be nonnegative, "
496 "but you provided numSweeps = " << numSweeps <<
" < 0.");
501 if (direction == Forward) {
502 localDirection = Forward;
504 else if (direction == Backward) {
505 localDirection = Backward;
507 else if (direction == Symmetric) {
509 localDirection = Forward;
512 TEUCHOS_TEST_FOR_EXCEPTION
513 (
true, std::invalid_argument,
514 "gaussSeidelCopy: The 'direction' enum does not have any of its "
515 "valid values: Forward, Backward, or Symmetric.");
518 if (numSweeps == 0) {
522 RCP<const import_type> importer =
matrix_->getGraph()->getImporter();
523 RCP<const export_type> exporter =
matrix_->getGraph()->getExporter();
524 TEUCHOS_TEST_FOR_EXCEPTION
525 (! exporter.is_null (),
527 "Tpetra's gaussSeidelCopy implementation requires that the row, domain, "
528 "and range Maps be the same. This cannot be the case, because the "
529 "matrix has a nontrivial Export object.");
531 RCP<const map_type> domainMap =
matrix_->getDomainMap ();
532 RCP<const map_type> rangeMap =
matrix_->getRangeMap ();
533 RCP<const map_type> rowMap =
matrix_->getGraph ()->getRowMap ();
534 RCP<const map_type> colMap =
matrix_->getGraph ()->getColMap ();
536 #ifdef HAVE_TEUCHOS_DEBUG
541 TEUCHOS_TEST_FOR_EXCEPTION
542 (! X.
getMap ()->isSameAs (*domainMap), std::runtime_error,
543 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
544 "multivector X be in the domain Map of the matrix.");
545 TEUCHOS_TEST_FOR_EXCEPTION
546 (! B.
getMap ()->isSameAs (*rangeMap), std::runtime_error,
547 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
548 "B be in the range Map of the matrix.");
549 TEUCHOS_TEST_FOR_EXCEPTION
550 (! D.
getMap ()->isSameAs (*rowMap), std::runtime_error,
551 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
552 "D be in the row Map of the matrix.");
553 TEUCHOS_TEST_FOR_EXCEPTION
554 (! rowMap->isSameAs (*rangeMap), std::runtime_error,
555 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the row Map and the "
556 "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
557 TEUCHOS_TEST_FOR_EXCEPTION
558 (! domainMap->isSameAs (*rangeMap), std::runtime_error,
559 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the domain Map and "
560 "the range Map of the matrix be the same.");
566 #endif // HAVE_TEUCHOS_DEBUG
577 RCP<OSMV> X_domainMap;
578 bool copyBackOutput =
false;
579 if (importer.is_null ()) {
581 X_colMap = rcpFromRef (X);
582 X_domainMap = rcpFromRef (X);
587 X_colMap = getColumnMapMultiVector (X,
true);
591 X_domainMap = X_colMap;
593 copyBackOutput =
true;
596 "gaussSeidelCopy: The current implementation of the Gauss-Seidel "
597 "kernel requires that X and B both have constant stride. Since X "
598 "does not have constant stride, we had to make a copy. This is a "
599 "limitation of the current implementation and not your fault, but we "
600 "still report it as an efficiency warning for your information.");
604 X_colMap = getColumnMapMultiVector (X);
605 X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
615 X_colMap->doImport (X, *importer,
INSERT);
617 copyBackOutput =
true;
623 RCP<const OSMV> B_in;
625 B_in = rcpFromRef (B);
631 RCP<OSMV> B_in_nonconst = getRowMapMultiVector (B,
true);
633 B_in = rcp_const_cast<const OSMV> (B_in_nonconst);
637 "gaussSeidelCopy: The current implementation requires that B have "
638 "constant stride. Since B does not have constant stride, we had to "
639 "copy it into a separate constant-stride multivector. This is a "
640 "limitation of the current implementation and not your fault, but we "
641 "still report it as an efficiency warning for your information.");
644 for (
int sweep = 0; sweep < numSweeps; ++sweep) {
645 if (! importer.is_null () && sweep > 0) {
647 X_colMap->doImport (*X_domainMap, *importer,
INSERT);
651 if (direction != Symmetric) {
652 matrix_->template localGaussSeidel<OS,OS> (*B_in, *X_colMap, D,
657 matrix_->template localGaussSeidel<OS,OS> (*B_in, *X_colMap, D,
661 if (! importer.is_null ()) {
662 X_colMap->doImport (*X_domainMap, *importer,
INSERT);
664 matrix_->template localGaussSeidel<OS,OS> (*B_in, *X_colMap, D,
670 if (copyBackOutput) {
686 return matrix_->getDomainMap ();
691 return matrix_->getRangeMap ();
700 const Teuchos::RCP<const crs_matrix_type>
matrix_;
714 mutable Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
importMV_;
728 mutable Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
exportMV_;
730 #ifdef HAVE_KOKKOSCLASSIC_CUDA_NODE_MEMORY_PROFILING
731 Teuchos::RCP<Teuchos::Time> importTimer_, exportTimer_;
739 Teuchos::ETransp mode,
743 typedef Teuchos::ScalarTraits<Scalar> ST;
746 int myImageID = Teuchos::rank(*
matrix_->getComm());
747 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
748 Teuchos::RCP<Teuchos::FancyOStream> out = Teuchos::VerboseObjectBase::getDefaultOStream();
749 if (myImageID == 0) {
750 *out <<
"Entering CrsMatrixMultiplyOp::applyTranspose()" << std::endl
751 <<
"Column Map: " << std::endl;
753 *out <<
matrix_->getColMap() << std::endl;
754 if (myImageID == 0) {
755 *out <<
"Initial input: " << std::endl;
757 X_in.
describe(*out,Teuchos::VERB_EXTREME);
764 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Node> > importer =
matrix_->getGraph()->getImporter();
765 Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Node> > exporter =
matrix_->getGraph()->getExporter();
767 Teuchos::RCP<const MV> X;
771 Y_is_overwritten = (beta == ST::zero());
772 if (Y_is_replicated && myImageID > 0) {
779 X = Teuchos::rcp(
new MV(X_in));
780 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
781 if (myImageID == 0) *out <<
"X is not constant stride, duplicating X results in a strided copy" << std::endl;
782 X->describe(*out,Teuchos::VERB_EXTREME);
787 X = Teuchos::rcp(&X_in,
false);
791 if (importer !=
null) {
797 if (exporter !=
null) {
805 if (exporter !=
null) {
807 #ifdef HAVE_KOKKOSCLASSIC_CUDA_NODE_MEMORY_PROFILING
808 Teuchos::TimeMonitor lcltimer(*importTimer_);
814 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
815 if (myImageID == 0) {
816 *out <<
"Performed import of X using exporter..." << std::endl;
818 X->describe(*out,Teuchos::VERB_EXTREME);
825 if (importer !=
null) {
827 matrix_->template localMultiply<Scalar, Scalar>(*X, *
importMV_, mode, alpha, ST::zero());
828 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
829 if (myImageID == 0) *out <<
"Import vector after localMultiply()..." << std::endl;
830 importMV_->describe(*out,Teuchos::VERB_EXTREME);
832 if (Y_is_overwritten) Y_in.
putScalar(ST::zero());
833 else Y_in.
scale(beta);
836 #ifdef HAVE_KOKKOSCLASSIC_CUDA_NODE_MEMORY_PROFILING
837 Teuchos::TimeMonitor lcltimer(*importTimer_);
848 matrix_->template localMultiply<Scalar, Scalar>(*X, Y, mode, alpha, beta);
852 matrix_->template localMultiply<Scalar, Scalar>(*X, Y_in, mode, alpha, beta);
855 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
856 if (myImageID == 0) *out <<
"Y_in vector after local multiply/export..." << std::endl;
857 Y_in.
describe(*out,Teuchos::VERB_EXTREME);
860 if (Y_is_replicated) {
862 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
863 if (myImageID == 0) *out <<
"Output vector is local; result after reduce()..." << std::endl;
864 Y_in.
describe(*out,Teuchos::VERB_EXTREME);
879 using Teuchos::rcp_const_cast;
880 using Teuchos::rcpFromRef;
883 typedef Teuchos::ScalarTraits<Scalar> STS;
885 const int myImageID =
matrix_->getComm ()->getRank ();
887 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
888 RCP<Teuchos::FancyOStream> out = Teuchos::VerboseObjectBase::getDefaultOStream();
889 if (myImageID == 0) {
890 *out <<
"Entering CrsMatrixMultiplyOp::applyNonTranspose()" << std::endl
891 <<
"Column Map: " << std::endl;
893 *out <<
matrix_->getColMap() << std::endl;
894 if (myImageID == 0) {
895 *out <<
"Initial input: " << std::endl;
897 X_in.
describe (*out, Teuchos::VERB_EXTREME);
898 #endif // TPETRA_CRSMATRIX_MULTIPLY_DUMP
903 RCP<const import_type> importer =
matrix_->getGraph()->getImporter();
904 RCP<const export_type> exporter =
matrix_->getGraph()->getExporter();
910 const bool Y_is_overwritten = (beta == STS::zero());
921 if (Y_is_replicated && myImageID > 0) {
928 RCP<const MV> X_colMap;
929 if (importer.is_null ()) {
938 RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in,
true);
939 *X_colMapNonConst = X_in;
940 X_colMap = rcp_const_cast<const MV> (X_colMapNonConst);
945 X_colMap = rcpFromRef (X_in);
953 RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in);
957 #ifdef HAVE_KOKKOSCLASSIC_CUDA_NODE_MEMORY_PROFILING
958 Teuchos::TimeMonitor lcltimer (*importTimer_);
960 X_colMapNonConst->doImport (X_in, *importer,
INSERT);
962 X_colMap = rcp_const_cast<const MV> (X_colMapNonConst);
967 RCP<MV> Y_rowMap = getRowMapMultiVector (Y_in);
974 if (! exporter.is_null ()) {
975 matrix_->template localMultiply<Scalar, Scalar> (*X_colMap, *Y_rowMap,
982 if (Y_is_overwritten) {
992 #ifdef HAVE_KOKKOSCLASSIC_CUDA_NODE_MEMORY_PROFILING
993 Teuchos::TimeMonitor lcltimer (*exportTimer_);
1007 Y_rowMap = getRowMapMultiVector (Y_in,
true);
1011 if (beta != STS::zero ()) {
1014 matrix_->template localMultiply<Scalar, Scalar> (*X_colMap,
1021 matrix_->template localMultiply<Scalar, Scalar> (*X_colMap, Y_in,
1026 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
1027 if (myImageID == 0) {
1028 *out <<
"Result Y_in after localMultiply and Export:" << std::endl;
1030 Y_in.
describe (*out, Teuchos::VERB_EXTREME);
1031 #endif // TPETRA_CRSMATRIX_MULTIPLY_DUMP
1037 if (Y_is_replicated) {
1039 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
1040 if (myImageID == 0) {
1041 *out <<
"Result Y_in after reduce:" << std::endl;
1043 Y_in.
describe (*out, Teuchos::VERB_EXTREME);
1044 #endif // TPETRA_CRSMATRIX_MULTIPLY_DUMP
1071 const bool force =
false)
const
1073 using Teuchos::null;
1080 RCP<const import_type> importer =
matrix_->getGraph ()->getImporter ();
1081 RCP<const map_type> colMap =
matrix_->getColMap ();
1094 if (! importer.is_null () || force) {
1096 X_colMap = rcp (
new MV (colMap, numVecs));
1135 getRowMapMultiVector (
const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Y_rangeMap,
1136 const bool force =
false)
const
1138 using Teuchos::null;
1141 typedef Export<LocalOrdinal,GlobalOrdinal,Node> export_type;
1142 typedef Map<LocalOrdinal,GlobalOrdinal,Node>
map_type;
1144 const size_t numVecs = Y_rangeMap.getNumVectors ();
1145 RCP<const export_type> exporter =
matrix_->getGraph ()->getExporter ();
1146 RCP<const map_type> rowMap =
matrix_->getRowMap ();
1158 if (! exporter.is_null () || force) {
1160 Y_rowMap = rcp (
new MV (rowMap, numVecs));
1180 template <
class OpScalar,
1183 class GlobalOrdinal,
1186 CrsMatrixMultiplyOp<OpScalar, MatScalar, LocalOrdinal, GlobalOrdinal, Node> >
1191 GlobalOrdinal, Node> op_type;
1192 return Teuchos::rcp (
new op_type (A));
1197 #endif // TPETRA_CRSMATRIXMULTIPLYOP_HPP