41 #ifndef TPETRA_MATRIXMATRIX_DEF_HPP
42 #define TPETRA_MATRIXMATRIX_DEF_HPP
43 #include "Tpetra_ConfigDefs.hpp"
45 #include "Teuchos_VerboseObject.hpp"
46 #include "Teuchos_Array.hpp"
48 #include "Tpetra_CrsMatrix.hpp"
50 #include "Tpetra_RowMatrixTransposer.hpp"
52 #include "Tpetra_Details_radixSort.hpp"
53 #include "Tpetra_ConfigDefs.hpp"
54 #include "Tpetra_Map.hpp"
55 #include "Tpetra_Export.hpp"
59 #include <type_traits>
60 #include "Teuchos_FancyOStream.hpp"
62 #include "TpetraExt_MatrixMatrix_ExtraKernels_def.hpp"
65 #include "KokkosSparse_spgemm.hpp"
66 #include "KokkosSparse_spadd.hpp"
80 #include "TpetraExt_MatrixMatrix_OpenMP.hpp"
81 #include "TpetraExt_MatrixMatrix_Cuda.hpp"
86 namespace MatrixMatrix{
94 template <
class Scalar,
104 bool call_FillComplete_on_result,
105 const std::string& label,
106 const Teuchos::RCP<Teuchos::ParameterList>& params)
111 typedef LocalOrdinal LO;
112 typedef GlobalOrdinal GO;
120 #ifdef HAVE_TPETRA_MMM_TIMINGS
121 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
122 using Teuchos::TimeMonitor;
123 RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM All Setup"))));
126 const std::string prefix =
"TpetraExt::MatrixMatrix::Multiply(): ";
131 TEUCHOS_TEST_FOR_EXCEPTION(!A.
isFillComplete(), std::runtime_error, prefix <<
"Matrix A is not fill complete.");
132 TEUCHOS_TEST_FOR_EXCEPTION(!B.
isFillComplete(), std::runtime_error, prefix <<
"Matrix B is not fill complete.");
137 RCP<const crs_matrix_type> Aprime =
null;
141 RCP<const crs_matrix_type> Bprime =
null;
151 const bool newFlag = !C.
getGraph()->isLocallyIndexed() && !C.
getGraph()->isGloballyIndexed();
153 bool use_optimized_ATB =
false;
154 if (transposeA && !transposeB && call_FillComplete_on_result && newFlag)
155 use_optimized_ATB =
true;
157 #ifdef USE_OLD_TRANSPOSE // NOTE: For Grey Ballard's use. Remove this later.
158 use_optimized_ATB =
false;
161 if (!use_optimized_ATB && transposeA) {
162 transposer_type transposer(rcpFromRef (A));
163 Aprime = transposer.createTranspose();
166 Aprime = rcpFromRef(A);
170 transposer_type transposer(rcpFromRef (B));
171 Bprime = transposer.createTranspose();
174 Bprime = rcpFromRef(B);
184 TEUCHOS_TEST_FOR_EXCEPTION(Ainner != Binner, std::runtime_error,
185 prefix <<
"ERROR, inner dimensions of op(A) and op(B) "
186 "must match for matrix-matrix product. op(A) is "
187 << Aouter <<
"x" << Ainner <<
", op(B) is "<< Binner <<
"x" << Bouter);
193 TEUCHOS_TEST_FOR_EXCEPTION(Aouter > C.
getGlobalNumRows(), std::runtime_error,
194 prefix <<
"ERROR, dimensions of result C must "
196 <<
" rows, should have at least " << Aouter << std::endl);
208 int numProcs = A.
getComm()->getSize();
212 crs_matrix_struct_type Aview;
213 crs_matrix_struct_type Bview;
215 RCP<const map_type> targetMap_A = Aprime->getRowMap();
216 RCP<const map_type> targetMap_B = Bprime->getRowMap();
218 #ifdef HAVE_TPETRA_MMM_TIMINGS
219 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM All I&X"))));
225 if (!use_optimized_ATB) {
226 RCP<const import_type> dummyImporter;
227 MMdetails::import_and_extract_views(*Aprime, targetMap_A, Aview, dummyImporter,
true, label, params);
233 targetMap_B = Aprime->getColMap();
236 if (!use_optimized_ATB)
237 MMdetails::import_and_extract_views(*Bprime, targetMap_B, Bview, Aprime->getGraph()->getImporter(),
false, label, params);
239 #ifdef HAVE_TPETRA_MMM_TIMINGS
240 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM All Multiply"))));
244 if (use_optimized_ATB) {
245 MMdetails::mult_AT_B_newmatrix(A, B, C, label,params);
247 }
else if (call_FillComplete_on_result && newFlag) {
248 MMdetails::mult_A_B_newmatrix(Aview, Bview, C, label,params);
250 }
else if (call_FillComplete_on_result) {
251 MMdetails::mult_A_B_reuse(Aview, Bview, C, label,params);
257 CrsWrapper_CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> crsmat(C);
259 MMdetails::mult_A_B(Aview, Bview, crsmat, label,params);
261 #ifdef HAVE_TPETRA_MMM_TIMINGS
262 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM All FillComplete"))));
264 if (call_FillComplete_on_result) {
272 C.
fillComplete(Bprime->getDomainMap(), Aprime->getRangeMap());
278 template <
class Scalar,
287 bool call_FillComplete_on_result,
288 const std::string& label,
289 const Teuchos::RCP<Teuchos::ParameterList>& params)
293 typedef LocalOrdinal LO;
294 typedef GlobalOrdinal GO;
301 #ifdef HAVE_TPETRA_MMM_TIMINGS
302 std::string prefix_mmm = std::string(
"TpetraExt ")+ label + std::string(
": ");
303 using Teuchos::TimeMonitor;
304 RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm+std::string(
"Jacobi All Setup"))));
307 const std::string prefix =
"TpetraExt::MatrixMatrix::Jacobi(): ";
312 TEUCHOS_TEST_FOR_EXCEPTION(!A.
isFillComplete(), std::runtime_error, prefix <<
"Matrix A is not fill complete.");
313 TEUCHOS_TEST_FOR_EXCEPTION(!B.
isFillComplete(), std::runtime_error, prefix <<
"Matrix B is not fill complete.");
315 RCP<const crs_matrix_type> Aprime = rcpFromRef(A);
316 RCP<const crs_matrix_type> Bprime = rcpFromRef(B);
325 TEUCHOS_TEST_FOR_EXCEPTION(Ainner != Binner, std::runtime_error,
326 prefix <<
"ERROR, inner dimensions of op(A) and op(B) "
327 "must match for matrix-matrix product. op(A) is "
328 << Aouter <<
"x" << Ainner <<
", op(B) is "<< Binner <<
"x" << Bouter);
334 TEUCHOS_TEST_FOR_EXCEPTION(Aouter > C.
getGlobalNumRows(), std::runtime_error,
335 prefix <<
"ERROR, dimensions of result C must "
337 <<
" rows, should have at least "<< Aouter << std::endl);
349 int numProcs = A.
getComm()->getSize();
353 crs_matrix_struct_type Aview;
354 crs_matrix_struct_type Bview;
356 RCP<const map_type> targetMap_A = Aprime->getRowMap();
357 RCP<const map_type> targetMap_B = Bprime->getRowMap();
359 #ifdef HAVE_TPETRA_MMM_TIMINGS
360 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi All I&X"))));
365 RCP<Teuchos::ParameterList> importParams1 = Teuchos::rcp(
new Teuchos::ParameterList);
366 if(!params.is_null()) importParams1->set(
"compute global constants",params->get(
"compute global constants: temporaries",
false));
369 RCP<const import_type> dummyImporter;
370 MMdetails::import_and_extract_views(*Aprime, targetMap_A, Aview, dummyImporter,
false, label,importParams1);
375 targetMap_B = Aprime->getColMap();
380 RCP<Teuchos::ParameterList> importParams2 = Teuchos::rcp(
new Teuchos::ParameterList);
381 if(!params.is_null()) importParams2->set(
"compute global constants",params->get(
"compute global constants: temporaries",
false));
382 MMdetails::import_and_extract_views(*Bprime, targetMap_B, Bview, Aprime->getGraph()->getImporter(),
false, label,importParams2);
384 #ifdef HAVE_TPETRA_MMM_TIMINGS
385 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi All Multiply"))));
389 CrsWrapper_CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> crsmat(C);
392 bool newFlag = !C.
getGraph()->isLocallyIndexed() && !C.
getGraph()->isGloballyIndexed();
394 if (call_FillComplete_on_result && newFlag) {
395 MMdetails::jacobi_A_B_newmatrix(omega, Dinv, Aview, Bview, C, label, params);
397 }
else if (call_FillComplete_on_result) {
398 MMdetails::jacobi_A_B_reuse(omega, Dinv, Aview, Bview, C, label, params);
401 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
"jacobi_A_B_general not implemented");
424 template <
class Scalar,
435 using Teuchos::Array;
439 typedef LocalOrdinal LO;
440 typedef GlobalOrdinal GO;
445 const std::string prefix =
"TpetraExt::MatrixMatrix::Add(): ";
447 TEUCHOS_TEST_FOR_EXCEPTION(!A.
isFillComplete(), std::runtime_error,
448 prefix <<
"ERROR, input matrix A.isFillComplete() is false; it is required to be true. "
449 "(Result matrix B is not required to be isFillComplete()).");
450 TEUCHOS_TEST_FOR_EXCEPTION(B.
isFillComplete() , std::runtime_error,
451 prefix <<
"ERROR, input matrix B must not be fill complete!");
452 TEUCHOS_TEST_FOR_EXCEPTION(B.
isStaticGraph() , std::runtime_error,
453 prefix <<
"ERROR, input matrix B must not have static graph!");
455 prefix <<
"ERROR, input matrix B must not be locally indexed!");
457 prefix <<
"ERROR, input matrix B must have a dynamic profile!");
460 RCP<const crs_matrix_type> Aprime =
null;
462 transposer_type transposer(rcpFromRef (A));
463 Aprime = transposer.createTranspose();
465 Aprime = rcpFromRef(A);
473 if (scalarB != Teuchos::ScalarTraits<SC>::one())
478 if (scalarA != Teuchos::ScalarTraits<SC>::zero()) {
479 for (LO i = 0; (size_t)i < numMyRows; ++i) {
480 row = B.
getRowMap()->getGlobalElement(i);
481 Aprime->getGlobalRowCopy(row, a_inds(), a_vals(), a_numEntries);
483 if (scalarA != Teuchos::ScalarTraits<SC>::one())
484 for (
size_t j = 0; j < a_numEntries; ++j)
485 a_vals[j] *= scalarA;
495 namespace ColMapFunctors
497 template<
typename ByteView,
typename GView>
500 UnionEntries(ByteView entryUnion_,
const GView gids_) : entryUnion(entryUnion_), gids(gids_) {}
501 KOKKOS_INLINE_FUNCTION
void operator()(
const size_t i)
const
503 entryUnion(gids(i)) = 1;
509 template<
typename LView,
typename GView>
510 struct ConvertGlobalToLocal
512 ConvertGlobalToLocal(
const LView gtol_,
const GView gids_, LView lids_) : gtol(gtol_), gids(gids_), lids(lids_) {}
513 KOKKOS_INLINE_FUNCTION
void operator()(
const size_t i)
const
515 lids(i) = gtol(gids(i));
525 template<
typename Scalar,
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
526 Teuchos::RCP<Map<LocalOrdinal, GlobalOrdinal, Node> > AddDetails::AddKernels<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
527 makeColMapAndConvertGids(GlobalOrdinal ncols,
528 const typename AddDetails::AddKernels<Scalar, LocalOrdinal, GlobalOrdinal, Node>::global_col_inds_array& gids,
529 typename AddDetails::AddKernels<Scalar, LocalOrdinal, GlobalOrdinal, Node>::col_inds_array& lids,
530 const Teuchos::RCP<
const Teuchos::Comm<int>>& comm)
532 using namespace ColMapFunctors;
535 typedef Kokkos::View<char*, device_type> ByteView;
536 typedef global_col_inds_array GView;
537 typedef col_inds_array LView;
539 auto nentries = gids.extent(0);
541 ByteView entryUnion(
"entry union", ncols);
542 UnionEntries<ByteView, GView> ue(entryUnion, gids);
543 Kokkos::parallel_for(
"Tpetra_MatrixMatrix_unionEntries", range_type(0, nentries), ue);
545 LView gtol(
"global col -> local col", ncols + 1);
546 ::Tpetra::Details::computeOffsetsFromCounts<decltype(gtol), decltype(entryUnion)>(gtol, entryUnion);
548 ConvertGlobalToLocal<LView, GView> cgtl(gtol, gids, lids);
549 Kokkos::parallel_for(
"Tpetra_MatrixMatrix_convertGlobalToLocal", range_type(0, gids.extent(0)), cgtl);
551 execution_space::fence();
552 GView colmap(
"column map", gtol(ncols));
553 size_t localIter = 0;
554 execution_space::fence();
555 for(
size_t i = 0; i < entryUnion.extent(0); i++)
557 if(entryUnion(i) != 0)
559 colmap(localIter++) = i;
562 execution_space::fence();
564 return rcp(
new map_type(Teuchos::OrdinalTraits<GlobalOrdinal>::invalid(), colmap, 0, comm));
567 template <
class Scalar,
571 Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
573 const bool transposeA,
576 const bool transposeB,
580 const Teuchos::RCP<Teuchos::ParameterList>& params)
586 Teuchos::RCP<crs_matrix_type> C = rcp(
new crs_matrix_type(CrowMap, 0));
588 add(alpha,transposeA,A,beta,transposeB,B,*C,domainMap,rangeMap,params);
592 template <
class Scalar,
598 const bool transposeA,
601 const bool transposeB,
606 const Teuchos::RCP<Teuchos::ParameterList>& params)
610 using Teuchos::rcpFromRef;
611 using Teuchos::rcp_implicit_cast;
612 using Teuchos::rcp_dynamic_cast;
613 using Teuchos::TimeMonitor;
615 typedef LocalOrdinal LO;
616 typedef GlobalOrdinal GO;
623 typedef AddDetails::AddKernels<SC,LO,GO,NO> AddKern;
624 const char* prefix_mmm =
"TpetraExt::MatrixMatrix::add: ";
625 constexpr
bool debug =
false;
627 #ifdef HAVE_TPETRA_MMM_TIMINGS
628 RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"transpose"))));
632 std::ostringstream os;
633 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
634 <<
"TpetraExt::MatrixMatrix::add" << std::endl;
635 std::cerr << os.str ();
639 prefix_mmm <<
"A and B must both be fill complete.");
640 #ifdef HAVE_TPETRA_DEBUG
643 const bool domainMapsSame =
647 TEUCHOS_TEST_FOR_EXCEPTION(domainMapsSame, std::invalid_argument,
648 prefix_mmm <<
"The domain Maps of Op(A) and Op(B) are not the same.");
650 const bool rangeMapsSame =
654 TEUCHOS_TEST_FOR_EXCEPTION(rangeMapsSame, std::invalid_argument,
655 prefix_mmm <<
"The range Maps of Op(A) and Op(B) are not the same.");
657 #endif // HAVE_TPETRA_DEBUG
660 RCP<const crs_matrix_type> Aprime = rcpFromRef(A);
662 transposer_type transposer(Aprime);
663 Aprime = transposer.createTranspose ();
665 #ifdef HAVE_TPETRA_DEBUG
666 TEUCHOS_TEST_FOR_EXCEPTION(Aprime.is_null (), std::logic_error,
667 prefix_mmm <<
"Failed to compute Op(A). Please report this bug to the Tpetra developers.");
668 #endif // HAVE_TPETRA_DEBUG
670 RCP<const crs_matrix_type> Bprime = rcpFromRef(B);
673 std::ostringstream os;
674 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
675 <<
"Form explicit xpose of B" << std::endl;
676 std::cerr << os.str ();
678 transposer_type transposer(Bprime);
679 Bprime = transposer.createTranspose ();
681 #ifdef HAVE_TPETRA_DEBUG
682 TEUCHOS_TEST_FOR_EXCEPTION(Bprime.is_null (), std::logic_error,
683 prefix_mmm <<
"Failed to compute Op(B). Please report this bug to the Tpetra developers.");
684 TEUCHOS_TEST_FOR_EXCEPTION(
685 !Aprime->isFillComplete () || !Bprime->isFillComplete (), std::invalid_argument,
686 prefix_mmm <<
"Aprime and Bprime must both be fill complete. "
687 "Please report this bug to the Tpetra developers.");
688 #endif // HAVE_TPETRA_DEBUG
689 RCP<const map_type> CDomainMap = domainMap;
690 RCP<const map_type> CRangeMap = rangeMap;
691 if(CDomainMap.is_null())
693 CDomainMap = Bprime->getDomainMap();
695 if(CRangeMap.is_null())
697 CRangeMap = Bprime->getRangeMap();
699 assert(!(CDomainMap.is_null()));
700 assert(!(CRangeMap.is_null()));
701 typedef typename AddKern::values_array values_array;
702 typedef typename AddKern::row_ptrs_array row_ptrs_array;
703 typedef typename AddKern::col_inds_array col_inds_array;
704 bool AGraphSorted = Aprime->getCrsGraph()->isSorted();
705 bool BGraphSorted = Bprime->getCrsGraph()->isSorted();
707 row_ptrs_array rowptrs;
708 col_inds_array colinds;
709 auto acolmap = Aprime->getColMap()->getMyGlobalIndices();
710 auto bcolmap = Bprime->getColMap()->getMyGlobalIndices();
711 #ifdef HAVE_TPETRA_MMM_TIMINGS
712 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"rowmap check/import"))));
714 if(!(Aprime->getRowMap()->isSameAs(*(Bprime->getRowMap()))))
717 auto import = rcp(
new import_type(Aprime->getRowMap(), Bprime->getRowMap()));
718 Aprime = importAndFillCompleteCrsMatrix<crs_matrix_type>(Aprime, *
import, Bprime->getDomainMap(), Bprime->getRangeMap());
720 bool matchingColMaps = Aprime->getColMap()->isSameAs(*(Bprime->getColMap()));
721 bool sorted = AGraphSorted && BGraphSorted;
722 RCP<const map_type> CrowMap;
723 RCP<const map_type> CcolMap;
724 RCP<const import_type> Cimport = Teuchos::null;
725 RCP<export_type> Cexport = Teuchos::null;
727 if(!matchingColMaps && !(CDomainMap->isContiguous()))
730 #ifdef HAVE_TPETRA_MMM_TIMINGS
731 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"fallback to CrsMatrix::add"))));
734 std::ostringstream os;
735 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
736 <<
"Call Bprime->add(...)" << std::endl;
737 std::cerr << os.str ();
739 Teuchos::RCP<crs_matrix_type> C_ = Teuchos::rcp_static_cast<crs_matrix_type>(Bprime->add(alpha, *Aprime, beta, CDomainMap, CRangeMap, params));
741 C.
setAllValues(C_->getLocalMatrix().graph.row_map,C_->getLocalMatrix().graph.entries,C_->getLocalMatrix().values);
742 C.
expertStaticFillComplete(CDomainMap, CRangeMap, C_->getGraph()->getImporter(), C_->getGraph()->getExporter(), params);
745 else if(!matchingColMaps)
747 #ifdef HAVE_TPETRA_MMM_TIMINGS
748 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"mismatched col map full kernel"))));
751 auto Acolmap = Aprime->getColMap()->getMyGlobalIndices();
752 auto Bcolmap = Bprime->getColMap()->getMyGlobalIndices();
753 typename AddKern::global_col_inds_array globalColinds(
"", 0);
755 std::ostringstream os;
756 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
757 <<
"Call AddKern::convertToGlobalAndAdd(...)" << std::endl;
758 std::cerr << os.str ();
760 AddKern::convertToGlobalAndAdd(
761 Aprime->getLocalMatrix(), alpha, Bprime->getLocalMatrix(), beta, Acolmap, Bcolmap,
762 CRangeMap->getMinGlobalIndex(), Aprime->getGlobalNumCols(), vals, rowptrs, globalColinds);
763 colinds = col_inds_array(
"C colinds", globalColinds.extent(0));
765 std::ostringstream os;
766 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
767 <<
"Finished AddKern::convertToGlobalAndAdd(...)" << std::endl;
768 std::cerr << os.str ();
770 #ifdef HAVE_TPETRA_MMM_TIMINGS
771 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"building optimized column map"))));
773 CrowMap = Bprime->getRowMap();
776 CcolMap = AddKern::makeColMapAndConvertGids(Aprime->getGlobalNumCols(), globalColinds, colinds, comm);
781 auto localA = Aprime->getLocalMatrix();
782 auto localB = Bprime->getLocalMatrix();
783 auto Avals = localA.values;
784 auto Bvals = localB.values;
785 auto Arowptrs = localA.graph.row_map;
786 auto Browptrs = localB.graph.row_map;
787 auto Acolinds = localA.graph.entries;
788 auto Bcolinds = localB.graph.entries;
792 #ifdef HAVE_TPETRA_MMM_TIMINGS
793 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"sorted entries full kernel"))));
796 std::ostringstream os;
797 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
798 <<
"Call AddKern::addSorted(...)" << std::endl;
799 std::cerr << os.str ();
801 AddKern::addSorted(Avals, Arowptrs, Acolinds, alpha, Bvals, Browptrs, Bcolinds, beta, vals, rowptrs, colinds);
806 #ifdef HAVE_TPETRA_MMM_TIMINGS
807 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"mm add unsorted entries full kernel"))));
810 std::ostringstream os;
811 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
812 <<
"Call AddKern::addUnsorted(...)" << std::endl;
813 std::cerr << os.str ();
815 AddKern::addUnsorted(Avals, Arowptrs, Acolinds, alpha, Bvals, Browptrs, Bcolinds, beta, Aprime->getGlobalNumCols(), vals, rowptrs, colinds);
817 CrowMap = Bprime->getRowMap();
818 CcolMap = Bprime->getColMap();
821 #ifdef HAVE_TPETRA_MMM_TIMINGS
822 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Tpetra::Crs constructor"))));
827 #ifdef HAVE_TPETRA_MMM_TIMINGS
828 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Tpetra::Crs expertStaticFillComplete"))));
830 if(!CDomainMap->isSameAs(*CcolMap))
833 std::ostringstream os;
834 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
835 <<
"Create Cimport" << std::endl;
836 std::cerr << os.str ();
838 Cimport = rcp(
new import_type(CDomainMap, CcolMap));
840 if(!CrowMap->isSameAs(*CRangeMap))
843 std::ostringstream os;
844 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
845 <<
"Create Cexport" << std::endl;
846 std::cerr << os.str ();
848 Cexport = rcp(
new export_type(CrowMap, CRangeMap));
852 std::ostringstream os;
853 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": "
854 <<
"Call C->expertStaticFillComplete(...)" << std::endl;
855 std::cerr << os.str ();
861 template <
class Scalar,
874 using Teuchos::Array;
875 using Teuchos::ArrayRCP;
876 using Teuchos::ArrayView;
879 using Teuchos::rcp_dynamic_cast;
880 using Teuchos::rcpFromRef;
881 using Teuchos::tuple;
884 typedef Teuchos::ScalarTraits<Scalar> STS;
892 std::string prefix =
"TpetraExt::MatrixMatrix::Add(): ";
894 TEUCHOS_TEST_FOR_EXCEPTION(C.is_null (), std::logic_error,
895 prefix <<
"The case C == null does not actually work. Fixing this will require an interface change.");
897 TEUCHOS_TEST_FOR_EXCEPTION(
899 prefix <<
"Both input matrices must be fill complete before calling this function.");
901 #ifdef HAVE_TPETRA_DEBUG
903 const bool domainMapsSame =
907 TEUCHOS_TEST_FOR_EXCEPTION(domainMapsSame, std::invalid_argument,
908 prefix <<
"The domain Maps of Op(A) and Op(B) are not the same.");
910 const bool rangeMapsSame =
914 TEUCHOS_TEST_FOR_EXCEPTION(rangeMapsSame, std::invalid_argument,
915 prefix <<
"The range Maps of Op(A) and Op(B) are not the same.");
917 #endif // HAVE_TPETRA_DEBUG
920 RCP<const crs_matrix_type> Aprime;
922 transposer_type theTransposer (rcpFromRef (A));
923 Aprime = theTransposer.createTranspose ();
925 Aprime = rcpFromRef (A);
928 #ifdef HAVE_TPETRA_DEBUG
929 TEUCHOS_TEST_FOR_EXCEPTION(Aprime.is_null (), std::logic_error,
930 prefix <<
"Failed to compute Op(A). Please report this bug to the Tpetra developers.");
931 #endif // HAVE_TPETRA_DEBUG
934 RCP<const crs_matrix_type> Bprime;
936 transposer_type theTransposer (rcpFromRef (B));
937 Bprime = theTransposer.createTranspose ();
939 Bprime = rcpFromRef (B);
942 #ifdef HAVE_TPETRA_DEBUG
943 TEUCHOS_TEST_FOR_EXCEPTION(Bprime.is_null (), std::logic_error,
944 prefix <<
"Failed to compute Op(B). Please report this bug to the Tpetra developers.");
945 #endif // HAVE_TPETRA_DEBUG
948 if (! C.is_null ()) {
949 C->setAllToScalar (STS::zero ());
957 if (Aprime->getRowMap ()->isSameAs (* (Bprime->getRowMap ())) {
958 RCP<const map_type> rowMap = Aprime->getRowMap ();
960 RCP<const crs_graph_type> A_graph =
961 rcp_dynamic_cast<const crs_graph_type> (Aprime->getGraph (),
true);
962 #ifdef HAVE_TPETRA_DEBUG
963 TEUCHOS_TEST_FOR_EXCEPTION(A_graph.is_null (), std::logic_error,
964 "Tpetra::MatrixMatrix::Add: Graph of Op(A) is null. "
965 "Please report this bug to the Tpetra developers.");
966 #endif // HAVE_TPETRA_DEBUG
967 RCP<const crs_graph_type> B_graph =
968 rcp_dynamic_cast<const crs_graph_type> (Bprime->getGraph (),
true);
969 #ifdef HAVE_TPETRA_DEBUG
970 TEUCHOS_TEST_FOR_EXCEPTION(B_graph.is_null (), std::logic_error,
971 "Tpetra::MatrixMatrix::Add: Graph of Op(B) is null. "
972 "Please report this bug to the Tpetra developers.");
973 #endif // HAVE_TPETRA_DEBUG
974 RCP<const map_type> A_colMap = A_graph->getColMap ();
975 #ifdef HAVE_TPETRA_DEBUG
976 TEUCHOS_TEST_FOR_EXCEPTION(A_colMap.is_null (), std::logic_error,
977 "Tpetra::MatrixMatrix::Add: Column Map of Op(A) is null. "
978 "Please report this bug to the Tpetra developers.");
979 #endif // HAVE_TPETRA_DEBUG
980 RCP<const map_type> B_colMap = B_graph->getColMap ();
981 #ifdef HAVE_TPETRA_DEBUG
982 TEUCHOS_TEST_FOR_EXCEPTION(B_colMap.is_null (), std::logic_error,
983 "Tpetra::MatrixMatrix::Add: Column Map of Op(B) is null. "
984 "Please report this bug to the Tpetra developers.");
985 TEUCHOS_TEST_FOR_EXCEPTION(A_graph->getImporter ().is_null (),
987 "Tpetra::MatrixMatrix::Add: Op(A)'s Import is null. "
988 "Please report this bug to the Tpetra developers.");
989 TEUCHOS_TEST_FOR_EXCEPTION(B_graph->getImporter ().is_null (),
991 "Tpetra::MatrixMatrix::Add: Op(B)'s Import is null. "
992 "Please report this bug to the Tpetra developers.");
993 #endif // HAVE_TPETRA_DEBUG
996 RCP<const import_type> sumImport =
997 A_graph->getImporter ()->setUnion (* (B_graph->getImporter ()));
998 RCP<const map_type> C_colMap = sumImport->getTargetMap ();
1006 ArrayView<const LocalOrdinal> A_local_ind;
1007 Array<GlobalOrdinal> A_global_ind;
1008 ArrayView<const LocalOrdinal> B_local_ind;
1009 Array<GlobalOrdinal> B_global_ind;
1011 const size_t localNumRows = rowMap->getNodeNumElements ();
1012 ArrayRCP<size_t> numEntriesPerRow (localNumRows);
1015 size_t maxNumEntriesPerRow = 0;
1016 for (
size_t localRow = 0; localRow < localNumRows; ++localRow) {
1018 A_graph->getLocalRowView (as<LocalOrdinal> (localRow), A_local_ind);
1019 const size_type A_numEnt = A_local_ind.size ();
1020 if (A_numEnt > A_global_ind.size ()) {
1021 A_global_ind.resize (A_numEnt);
1024 for (size_type k = 0; k < A_numEnt; ++k) {
1025 A_global_ind[k] = A_colMap->getGlobalElement (A_local_ind[k]);
1029 B_graph->getLocalRowView (as<LocalOrdinal> (localRow), B_local_ind);
1030 const size_type B_numEnt = B_local_ind.size ();
1031 if (B_numEnt > B_global_ind.size ()) {
1032 B_global_ind.resize (B_numEnt);
1035 for (size_type k = 0; k < B_numEnt; ++k) {
1036 B_global_ind[k] = B_colMap->getGlobalElement (B_local_ind[k]);
1040 const size_t curNumEntriesPerRow =
1041 keyMergeCount (A_global_ind.begin (), A_global_ind.end (),
1042 B_global_ind.begin (), B_global_ind.end ());
1043 numEntriesPerRow[localRow] = curNumEntriesPerRow;
1044 maxNumEntriesPerRow = std::max (maxNumEntriesPerRow, curNumEntriesPerRow);
1051 C = rcp (
new crs_matrix_type (rowMap, C_colMap, numEntriesPerRow,
StaticProfile));
1056 ArrayView<const Scalar> A_val;
1057 ArrayView<const Scalar> B_val;
1059 Array<LocalOrdinal> AplusB_local_ind (maxNumEntriesPerRow);
1060 Array<GlobalOrdinal> AplusB_global_ind (maxNumEntriesPerRow);
1061 Array<Scalar> AplusB_val (maxNumEntriesPerRow);
1063 for (
size_t localRow = 0; localRow < localNumRows; ++localRow) {
1065 Aprime->getLocalRowView (as<LocalOrdinal> (localRow), A_local_ind, A_val);
1067 for (size_type k = 0; k < A_local_ind.size (); ++k) {
1068 A_global_ind[k] = A_colMap->getGlobalElement (A_local_ind[k]);
1072 Bprime->getLocalRowView (as<LocalOrdinal> (localRow), B_local_ind, B_val);
1074 for (size_type k = 0; k < B_local_ind.size (); ++k) {
1075 B_global_ind[k] = B_colMap->getGlobalElement (B_local_ind[k]);
1078 const size_t curNumEntries = numEntriesPerRow[localRow];
1079 ArrayView<LocalOrdinal> C_local_ind = AplusB_local_ind (0, curNumEntries);
1080 ArrayView<GlobalOrdinal> C_global_ind = AplusB_global_ind (0, curNumEntries);
1081 ArrayView<Scalar> C_val = AplusB_val (0, curNumEntries);
1085 A_val.begin (), A_val.end (),
1086 B_global_ind.begin (), B_global_ind.end (),
1087 B_val.begin (), B_val.end (),
1088 C_global_ind.begin (), C_val.begin (),
1089 std::plus<Scalar> ());
1091 for (size_type k = 0; k < as<size_type> (numEntriesPerRow[localRow]); ++k) {
1092 C_local_ind[k] = C_colMap->getLocalElement (C_global_ind[k]);
1095 C->replaceLocalValues (localRow, C_local_ind, C_val);
1100 RCP<const map_type> domainMap = A_graph->getDomainMap ();
1101 RCP<const map_type> rangeMap = A_graph->getRangeMap ();
1102 C->expertStaticFillComplete (domainMap, rangeMap, sumImport, A_graph->getExporter ());
1110 C = rcp (
new crs_matrix_type (Aprime->getRowMap (),
null));
1117 C = rcp (
new crs_matrix_type (Aprime->getRowMap (), 0));
1121 #ifdef HAVE_TPETRA_DEBUG
1122 TEUCHOS_TEST_FOR_EXCEPTION(Aprime.is_null (), std::logic_error,
1123 prefix <<
"At this point, Aprime is null. Please report this bug to the Tpetra developers.");
1124 TEUCHOS_TEST_FOR_EXCEPTION(Bprime.is_null (), std::logic_error,
1125 prefix <<
"At this point, Bprime is null. Please report this bug to the Tpetra developers.");
1126 TEUCHOS_TEST_FOR_EXCEPTION(C.is_null (), std::logic_error,
1127 prefix <<
"At this point, C is null. Please report this bug to the Tpetra developers.");
1128 #endif // HAVE_TPETRA_DEBUG
1130 Array<RCP<const crs_matrix_type> > Mat =
1131 tuple<RCP<const crs_matrix_type> > (Aprime, Bprime);
1132 Array<Scalar> scalar = tuple<Scalar> (scalarA, scalarB);
1135 for (
int k = 0; k < 2; ++k) {
1136 Array<GlobalOrdinal> Indices;
1137 Array<Scalar> Values;
1145 #ifdef HAVE_TPETRA_DEBUG
1146 TEUCHOS_TEST_FOR_EXCEPTION(Mat[k].is_null (), std::logic_error,
1147 prefix <<
"At this point, curRowMap is null. Please report this bug to the Tpetra developers.");
1148 #endif // HAVE_TPETRA_DEBUG
1149 RCP<const map_type> curRowMap = Mat[k]->getRowMap ();
1150 #ifdef HAVE_TPETRA_DEBUG
1151 TEUCHOS_TEST_FOR_EXCEPTION(curRowMap.is_null (), std::logic_error,
1152 prefix <<
"At this point, curRowMap is null. Please report this bug to the Tpetra developers.");
1153 #endif // HAVE_TPETRA_DEBUG
1155 const size_t localNumRows = Mat[k]->getNodeNumRows ();
1156 for (
size_t i = 0; i < localNumRows; ++i) {
1157 const GlobalOrdinal globalRow = curRowMap->getGlobalElement (i);
1158 size_t numEntries = Mat[k]->getNumEntriesInGlobalRow (globalRow);
1159 if (numEntries > 0) {
1160 Indices.resize (numEntries);
1161 Values.resize (numEntries);
1162 Mat[k]->getGlobalRowCopy (globalRow, Indices (), Values (), numEntries);
1164 if (scalar[k] != STS::one ()) {
1165 for (
size_t j = 0; j < numEntries; ++j) {
1166 Values[j] *= scalar[k];
1170 if (C->isFillComplete ()) {
1171 C->sumIntoGlobalValues (globalRow, Indices, Values);
1173 C->insertGlobalValues (globalRow, Indices, Values);
1180 template<
typename SC,
typename LO,
typename GO,
typename NO>
1181 void AddDetails::AddKernels<SC, LO, GO, NO>::
1183 const typename AddDetails::AddKernels<SC, LO, GO, NO>::values_array& Avals,
1184 const typename AddDetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Arowptrs,
1185 const typename AddDetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Acolinds,
1186 const typename AddDetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarA,
1187 const typename AddDetails::AddKernels<SC, LO, GO, NO>::values_array& Bvals,
1188 const typename AddDetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Browptrs,
1189 const typename AddDetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Bcolinds,
1190 const typename AddDetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarB,
1191 typename AddDetails::AddKernels<SC, LO, GO, NO>::values_array& Cvals,
1192 typename AddDetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array& Crowptrs,
1193 typename AddDetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Ccolinds)
1195 using Teuchos::TimeMonitor;
1196 TEUCHOS_TEST_FOR_EXCEPTION(Arowptrs.extent(0) != Browptrs.extent(0), std::runtime_error,
"Can't add matrices with different numbers of rows.");
1197 auto nrows = Arowptrs.extent(0) - 1;
1198 Crowptrs = row_ptrs_array(
"C row ptrs", nrows + 1);
1199 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
typename col_inds_array::size_type, LO, impl_scalar_type,
1200 execution_space, memory_space, memory_space> KKH;
1202 handle.create_spadd_handle(
true);
1203 auto addHandle = handle.get_spadd_handle();
1204 #ifdef HAVE_TPETRA_MMM_TIMINGS
1205 auto MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() sorted symbolic")));
1207 KokkosSparse::Experimental::spadd_symbolic
1209 typename row_ptrs_array::const_type,
typename col_inds_array::const_type,
1210 typename row_ptrs_array::const_type,
typename col_inds_array::const_type,
1211 row_ptrs_array, col_inds_array>
1212 (&handle, Arowptrs, Acolinds, Browptrs, Bcolinds, Crowptrs);
1213 Cvals = values_array(
"C values", addHandle->get_max_result_nnz());
1214 Ccolinds = col_inds_array(
"C colinds", addHandle->get_max_result_nnz());
1215 #ifdef HAVE_TPETRA_MMM_TIMINGS
1216 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() sorted numeric")));
1218 KokkosSparse::Experimental::spadd_numeric(&handle,
1219 Arowptrs, Acolinds, Avals, scalarA,
1220 Browptrs, Bcolinds, Bvals, scalarB,
1221 Crowptrs, Ccolinds, Cvals);
1224 template<
typename SC,
typename LO,
typename GO,
typename NO>
1225 void AddDetails::AddKernels<SC, LO, GO, NO>::
1227 const typename AddDetails::AddKernels<SC, LO, GO, NO>::values_array& Avals,
1228 const typename AddDetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Arowptrs,
1229 const typename AddDetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Acolinds,
1230 const typename AddDetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarA,
1231 const typename AddDetails::AddKernels<SC, LO, GO, NO>::values_array& Bvals,
1232 const typename AddDetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Browptrs,
1233 const typename AddDetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Bcolinds,
1234 const typename AddDetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarB,
1236 typename AddDetails::AddKernels<SC, LO, GO, NO>::values_array& Cvals,
1237 typename AddDetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array& Crowptrs,
1238 typename AddDetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Ccolinds)
1240 using Teuchos::TimeMonitor;
1241 TEUCHOS_TEST_FOR_EXCEPTION(Arowptrs.extent(0) != Browptrs.extent(0), std::runtime_error,
"Can't add matrices with different numbers of rows.");
1242 auto nrows = Arowptrs.extent(0) - 1;
1243 Crowptrs = row_ptrs_array(
"C row ptrs", nrows + 1);
1244 typedef AddDetails::AddKernels<SC, LO, GO, NO> AddKern;
1245 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
typename col_inds_array::size_type, LO, AddKern::impl_scalar_type,
1246 AddKern::execution_space, AddKern::memory_space, AddKern::memory_space> KKH;
1248 handle.create_spadd_handle(
false);
1249 auto addHandle = handle.get_spadd_handle();
1250 #ifdef HAVE_TPETRA_MMM_TIMINGS
1251 auto MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() sorted symbolic")));
1253 KokkosSparse::Experimental::spadd_symbolic
1255 typename row_ptrs_array::const_type,
typename col_inds_array::const_type,
1256 typename row_ptrs_array::const_type,
typename col_inds_array::const_type,
1257 row_ptrs_array, col_inds_array>
1258 (&handle, Arowptrs, Acolinds, Browptrs, Bcolinds, Crowptrs);
1259 Cvals = values_array(
"C values", addHandle->get_max_result_nnz());
1260 Ccolinds = col_inds_array(
"C colinds", addHandle->get_max_result_nnz());
1261 #ifdef HAVE_TPETRA_MMM_TIMINGS
1262 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() sorted kernel: sorted numeric")));
1264 KokkosSparse::Experimental::spadd_numeric(&handle,
1265 Arowptrs, Acolinds, Avals, scalarA,
1266 Browptrs, Bcolinds, Bvals, scalarB,
1267 Crowptrs, Ccolinds, Cvals);
1270 template<
typename GO,
1271 typename LocalIndicesType,
1272 typename GlobalIndicesType,
1273 typename ColMapType>
1274 struct ConvertColIndsFunctor
1276 ConvertColIndsFunctor (
const GO minGlobal_,
1277 const LocalIndicesType& colindsOrig_,
1278 const GlobalIndicesType& colindsConverted_,
1279 const ColMapType& colmap_) :
1280 minGlobal (minGlobal_),
1281 colindsOrig (colindsOrig_),
1282 colindsConverted (colindsConverted_),
1285 KOKKOS_INLINE_FUNCTION
void
1286 operator() (
const size_t& i)
const
1288 colindsConverted[i] = colmap[colindsOrig[i]];
1291 LocalIndicesType colindsOrig;
1292 GlobalIndicesType colindsConverted;
1296 template<
typename SC,
typename LO,
typename GO,
typename NO>
1297 void AddDetails::AddKernels<SC, LO, GO, NO>::
1298 convertToGlobalAndAdd(
1299 const typename AddDetails::AddKernels<SC, LO, GO, NO>::KCRS& A,
1300 const typename AddDetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarA,
1301 const typename AddDetails::AddKernels<SC, LO, GO, NO>::KCRS& B,
1302 const typename AddDetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarB,
1303 const typename AddDetails::AddKernels<SC, LO, GO, NO>::local_map_type& AcolMap,
1304 const typename AddDetails::AddKernels<SC, LO, GO, NO>::local_map_type& BcolMap,
1307 typename AddDetails::AddKernels<SC, LO, GO, NO>::values_array& Cvals,
1308 typename AddDetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array& Crowptrs,
1309 typename AddDetails::AddKernels<SC, LO, GO, NO>::global_col_inds_array& Ccolinds)
1311 using Teuchos::TimeMonitor;
1313 const values_array& Avals = A.values;
1314 const values_array& Bvals = B.values;
1315 const col_inds_array& Acolinds = A.graph.entries;
1316 const col_inds_array& Bcolinds = B.graph.entries;
1317 auto Arowptrs = A.graph.row_map;
1318 auto Browptrs = B.graph.row_map;
1319 global_col_inds_array AcolindsConverted(
"A colinds (converted)", Acolinds.extent(0));
1320 global_col_inds_array BcolindsConverted(
"B colinds (converted)", Bcolinds.extent(0));
1321 #ifdef HAVE_TPETRA_MMM_TIMINGS
1322 auto MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() diff col map kernel: " + std::string(
"column map conversion"))));
1324 ConvertColIndsFunctor<GO, col_inds_array, global_col_inds_array, local_map_type> convertA(minGlobalCol, Acolinds, AcolindsConverted, AcolMap);
1325 Kokkos::parallel_for(
"Tpetra_MatrixMatrix_convertColIndsA", range_type(0, Acolinds.extent(0)), convertA);
1326 ConvertColIndsFunctor<GO, col_inds_array, global_col_inds_array, local_map_type> convertB(minGlobalCol, Bcolinds, BcolindsConverted, BcolMap);
1327 Kokkos::parallel_for(
"Tpetra_MatrixMatrix_convertColIndsB", range_type(0, Bcolinds.extent(0)), convertB);
1328 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
typename col_inds_array::size_type, GO, impl_scalar_type,
1329 execution_space, memory_space, memory_space> KKH;
1331 handle.create_spadd_handle(
false);
1332 auto addHandle = handle.get_spadd_handle();
1333 #ifdef HAVE_TPETRA_MMM_TIMINGS
1334 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() diff col map kernel: unsorted symbolic")));
1336 auto nrows = Arowptrs.extent(0) - 1;
1337 Crowptrs = row_ptrs_array(
"C row ptrs", nrows + 1);
1338 KokkosSparse::Experimental::spadd_symbolic
1339 <KKH,
typename row_ptrs_array::const_type,
typename global_col_inds_array::const_type,
typename row_ptrs_array::const_type,
typename global_col_inds_array::const_type, row_ptrs_array, global_col_inds_array>
1340 (&handle, Arowptrs, AcolindsConverted, Browptrs, BcolindsConverted, Crowptrs);
1341 Cvals = values_array(
"C values", addHandle->get_max_result_nnz());
1342 Ccolinds = global_col_inds_array(
"C colinds", addHandle->get_max_result_nnz());
1343 #ifdef HAVE_TPETRA_MMM_TIMINGS
1344 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() diff col map kernel: unsorted numeric")));
1346 KokkosSparse::Experimental::spadd_numeric(&handle,
1347 Arowptrs, AcolindsConverted, Avals, scalarA,
1348 Browptrs, BcolindsConverted, Bvals, scalarB,
1349 Crowptrs, Ccolinds, Cvals);
1354 namespace MMdetails{
1358 template <
class TransferType>
1359 void printMultiplicationStatistics(Teuchos::RCP<TransferType >
Transfer,
const std::string &label) {
1363 const Distributor & Distor =
Transfer->getDistributor();
1364 Teuchos::RCP<const Teuchos::Comm<int> > Comm =
Transfer->getSourceMap()->getComm();
1366 size_t rows_send =
Transfer->getNumExportIDs();
1367 size_t rows_recv =
Transfer->getNumRemoteIDs();
1369 size_t round1_send =
Transfer->getNumExportIDs() *
sizeof(size_t);
1370 size_t round1_recv =
Transfer->getNumRemoteIDs() *
sizeof(size_t);
1371 size_t num_send_neighbors = Distor.getNumSends();
1372 size_t num_recv_neighbors = Distor.getNumReceives();
1373 size_t round2_send, round2_recv;
1374 Distor.getLastDoStatistics(round2_send,round2_recv);
1376 int myPID = Comm->getRank();
1377 int NumProcs = Comm->getSize();
1384 size_t lstats[8] = {num_send_neighbors,num_recv_neighbors,rows_send,rows_recv,round1_send,round1_recv,round2_send,round2_recv};
1385 size_t gstats_min[8], gstats_max[8];
1387 double lstats_avg[8], gstats_avg[8];
1388 for(
int i=0; i<8; i++)
1389 lstats_avg[i] = ((
double)lstats[i])/NumProcs;
1391 Teuchos::reduceAll(*Comm(),Teuchos::REDUCE_MIN,8,lstats,gstats_min);
1392 Teuchos::reduceAll(*Comm(),Teuchos::REDUCE_MAX,8,lstats,gstats_max);
1393 Teuchos::reduceAll(*Comm(),Teuchos::REDUCE_SUM,8,lstats_avg,gstats_avg);
1396 printf(
"%s Send Statistics[min/avg/max]: neigh=%d/%4.1f/%d rows=%d/%4.1f/%d round1=%d/%4.1f/%d round2=%d/%4.1f/%d\n", label.c_str(),
1397 (int)gstats_min[0],gstats_avg[0],(
int)gstats_max[0], (int)gstats_min[2],gstats_avg[2],(
int)gstats_max[2],
1398 (int)gstats_min[4],gstats_avg[4],(
int)gstats_max[4], (int)gstats_min[6],gstats_avg[6],(
int)gstats_max[6]);
1399 printf(
"%s Recv Statistics[min/avg/max]: neigh=%d/%4.1f/%d rows=%d/%4.1f/%d round1=%d/%4.1f/%d round2=%d/%4.1f/%d\n", label.c_str(),
1400 (int)gstats_min[1],gstats_avg[1],(
int)gstats_max[1], (int)gstats_min[3],gstats_avg[3],(
int)gstats_max[3],
1401 (int)gstats_min[5],gstats_avg[5],(
int)gstats_max[5], (int)gstats_min[7],gstats_avg[7],(
int)gstats_max[7]);
1408 template<
class Scalar,
1410 class GlobalOrdinal,
1412 void mult_AT_B_newmatrix(
1413 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
1414 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& B,
1415 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1416 const std::string & label,
1417 const Teuchos::RCP<Teuchos::ParameterList>& params)
1423 typedef LocalOrdinal LO;
1424 typedef GlobalOrdinal GO;
1426 typedef CrsMatrixStruct<SC,LO,GO,NO> crs_matrix_struct_type;
1427 typedef RowMatrixTransposer<SC,LO,GO,NO> transposer_type;
1429 #ifdef HAVE_TPETRA_MMM_TIMINGS
1430 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
1431 using Teuchos::TimeMonitor;
1432 RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM-T Transpose"))));
1438 transposer_type transposer (rcpFromRef (A),label+std::string(
"XP: "));
1439 RCP<Teuchos::ParameterList> transposeParams = Teuchos::rcp(
new Teuchos::ParameterList);
1440 if(!params.is_null()) transposeParams->set(
"compute global constants",params->get(
"compute global constants: temporaries",
false));
1441 RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > Atrans = transposer.createTransposeLocal(transposeParams);
1446 #ifdef HAVE_TPETRA_MMM_TIMINGS
1447 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM-T I&X"))));
1451 crs_matrix_struct_type Aview;
1452 crs_matrix_struct_type Bview;
1453 RCP<const Import<LocalOrdinal,GlobalOrdinal, Node> > dummyImporter;
1456 RCP<Teuchos::ParameterList> importParams1 = Teuchos::rcp(
new Teuchos::ParameterList);
1457 if(!params.is_null()) importParams1->set(
"compute global constants",params->get(
"compute global constants: temporaries",
false));
1458 MMdetails::import_and_extract_views(*Atrans, Atrans->getRowMap(), Aview, dummyImporter,
true, label,importParams1);
1460 RCP<Teuchos::ParameterList> importParams2 = Teuchos::rcp(
new Teuchos::ParameterList);
1461 if(!params.is_null()) importParams2->set(
"compute global constants",params->get(
"compute global constants: temporaries",
false));
1463 if(B.getRowMap()->isSameAs(*Atrans->getColMap())){
1464 MMdetails::import_and_extract_views(B, B.getRowMap(), Bview, dummyImporter,
true, label,importParams2);
1467 MMdetails::import_and_extract_views(B, Atrans->getColMap(), Bview, dummyImporter,
false, label,importParams2);
1470 #ifdef HAVE_TPETRA_MMM_TIMINGS
1471 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM-T AB-core"))));
1474 RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >Ctemp;
1477 bool needs_final_export = !Atrans->getGraph()->getExporter().is_null();
1478 if (needs_final_export)
1481 Ctemp = rcp(&C,
false);
1484 mult_A_B_newmatrix(Aview, Bview, *Ctemp, label,params);
1489 #ifdef HAVE_TPETRA_MMM_TIMINGS
1490 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM-T exportAndFillComplete"))));
1493 Teuchos::RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > Crcp(&C,
false);
1494 if (needs_final_export) {
1495 Teuchos::ParameterList labelList;
1496 labelList.set(
"Timer Label", label);
1497 if(!params.is_null()) labelList.set(
"compute global constants",params->get(
"compute global constants",
true));
1499 Ctemp->exportAndFillComplete(Crcp,*Ctemp->getGraph()->getExporter(),
1500 B.getDomainMap(),A.getDomainMap(),rcp(&labelList,
false));
1502 #ifdef HAVE_TPETRA_MMM_STATISTICS
1503 printMultiplicationStatistics(Ctemp->getGraph()->getExporter(), label+std::string(
" AT_B MMM"));
1509 template<
class Scalar,
1511 class GlobalOrdinal,
1514 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
1515 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
1516 CrsWrapper<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1517 const std::string& label,
1518 const Teuchos::RCP<Teuchos::ParameterList>& params)
1520 using Teuchos::Array;
1521 using Teuchos::ArrayRCP;
1522 using Teuchos::ArrayView;
1523 using Teuchos::OrdinalTraits;
1524 using Teuchos::null;
1526 typedef Teuchos::ScalarTraits<Scalar> STS;
1528 LocalOrdinal C_firstCol = Bview.colMap->getMinLocalIndex();
1529 LocalOrdinal C_lastCol = Bview.colMap->getMaxLocalIndex();
1531 LocalOrdinal C_firstCol_import = OrdinalTraits<LocalOrdinal>::zero();
1532 LocalOrdinal C_lastCol_import = OrdinalTraits<LocalOrdinal>::invalid();
1534 ArrayView<const GlobalOrdinal> bcols = Bview.colMap->getNodeElementList();
1535 ArrayView<const GlobalOrdinal> bcols_import =
null;
1536 if (Bview.importColMap !=
null) {
1537 C_firstCol_import = Bview.importColMap->getMinLocalIndex();
1538 C_lastCol_import = Bview.importColMap->getMaxLocalIndex();
1540 bcols_import = Bview.importColMap->getNodeElementList();
1543 size_t C_numCols = C_lastCol - C_firstCol +
1544 OrdinalTraits<LocalOrdinal>::one();
1545 size_t C_numCols_import = C_lastCol_import - C_firstCol_import +
1546 OrdinalTraits<LocalOrdinal>::one();
1548 if (C_numCols_import > C_numCols)
1549 C_numCols = C_numCols_import;
1551 Array<Scalar> dwork = Array<Scalar>(C_numCols);
1552 Array<GlobalOrdinal> iwork = Array<GlobalOrdinal>(C_numCols);
1553 Array<size_t> iwork2 = Array<size_t>(C_numCols);
1555 Array<Scalar> C_row_i = dwork;
1556 Array<GlobalOrdinal> C_cols = iwork;
1557 Array<size_t> c_index = iwork2;
1558 Array<GlobalOrdinal> combined_index = Array<GlobalOrdinal>(2*C_numCols);
1559 Array<Scalar> combined_values = Array<Scalar>(2*C_numCols);
1561 size_t C_row_i_length, j, k, last_index;
1564 LocalOrdinal LO_INVALID = OrdinalTraits<LocalOrdinal>::invalid();
1565 Array<LocalOrdinal> Acol2Brow(Aview.colMap->getNodeNumElements(),LO_INVALID);
1566 Array<LocalOrdinal> Acol2Irow(Aview.colMap->getNodeNumElements(),LO_INVALID);
1567 if(Aview.colMap->isSameAs(*Bview.origMatrix->getRowMap())){
1569 for(LocalOrdinal i=Aview.colMap->getMinLocalIndex(); i <=
1570 Aview.colMap->getMaxLocalIndex(); i++)
1575 for(LocalOrdinal i=Aview.colMap->getMinLocalIndex(); i <=
1576 Aview.colMap->getMaxLocalIndex(); i++) {
1577 GlobalOrdinal GID = Aview.colMap->getGlobalElement(i);
1578 LocalOrdinal BLID = Bview.origMatrix->getRowMap()->getLocalElement(GID);
1579 if(BLID != LO_INVALID) Acol2Brow[i] = BLID;
1580 else Acol2Irow[i] = Bview.importMatrix->getRowMap()->getLocalElement(GID);
1590 ArrayRCP<const size_t> Arowptr_RCP, Browptr_RCP, Irowptr_RCP;
1591 ArrayRCP<const LocalOrdinal> Acolind_RCP, Bcolind_RCP, Icolind_RCP;
1592 ArrayRCP<const Scalar> Avals_RCP, Bvals_RCP, Ivals_RCP;
1593 ArrayView<const size_t> Arowptr, Browptr, Irowptr;
1594 ArrayView<const LocalOrdinal> Acolind, Bcolind, Icolind;
1595 ArrayView<const Scalar> Avals, Bvals, Ivals;
1596 Aview.origMatrix->getAllValues(Arowptr_RCP,Acolind_RCP,Avals_RCP);
1597 Bview.origMatrix->getAllValues(Browptr_RCP,Bcolind_RCP,Bvals_RCP);
1598 Arowptr = Arowptr_RCP(); Acolind = Acolind_RCP(); Avals = Avals_RCP();
1599 Browptr = Browptr_RCP(); Bcolind = Bcolind_RCP(); Bvals = Bvals_RCP();
1600 if(!Bview.importMatrix.is_null()) {
1601 Bview.importMatrix->getAllValues(Irowptr_RCP,Icolind_RCP,Ivals_RCP);
1602 Irowptr = Irowptr_RCP(); Icolind = Icolind_RCP(); Ivals = Ivals_RCP();
1605 bool C_filled = C.isFillComplete();
1607 for (
size_t i = 0; i < C_numCols; i++)
1608 c_index[i] = OrdinalTraits<size_t>::invalid();
1611 size_t Arows = Aview.rowMap->getNodeNumElements();
1612 for(
size_t i=0; i<Arows; ++i) {
1616 GlobalOrdinal global_row = Aview.rowMap->getGlobalElement(i);
1622 C_row_i_length = OrdinalTraits<size_t>::zero();
1624 for (k = Arowptr[i]; k < Arowptr[i+1]; ++k) {
1625 LocalOrdinal Ak = Acol2Brow[Acolind[k]];
1626 const Scalar Aval = Avals[k];
1627 if (Aval == STS::zero())
1630 if (Ak == LO_INVALID)
1633 for (j = Browptr[Ak]; j < Browptr[Ak+1]; ++j) {
1634 LocalOrdinal col = Bcolind[j];
1637 if (c_index[col] == OrdinalTraits<size_t>::invalid()){
1640 C_row_i[C_row_i_length] = Aval*Bvals[j];
1641 C_cols[C_row_i_length] = col;
1642 c_index[col] = C_row_i_length;
1646 C_row_i[c_index[col]] += Aval*Bvals[j];
1651 for (
size_t ii = 0; ii < C_row_i_length; ii++) {
1652 c_index[C_cols[ii]] = OrdinalTraits<size_t>::invalid();
1653 C_cols[ii] = bcols[C_cols[ii]];
1654 combined_index[ii] = C_cols[ii];
1655 combined_values[ii] = C_row_i[ii];
1657 last_index = C_row_i_length;
1663 C_row_i_length = OrdinalTraits<size_t>::zero();
1665 for (k = Arowptr[i]; k < Arowptr[i+1]; ++k) {
1666 LocalOrdinal Ak = Acol2Brow[Acolind[k]];
1667 const Scalar Aval = Avals[k];
1668 if (Aval == STS::zero())
1671 if (Ak!=LO_INVALID)
continue;
1673 Ak = Acol2Irow[Acolind[k]];
1674 for (j = Irowptr[Ak]; j < Irowptr[Ak+1]; ++j) {
1675 LocalOrdinal col = Icolind[j];
1678 if (c_index[col] == OrdinalTraits<size_t>::invalid()) {
1681 C_row_i[C_row_i_length] = Aval*Ivals[j];
1682 C_cols[C_row_i_length] = col;
1683 c_index[col] = C_row_i_length;
1688 C_row_i[c_index[col]] += Aval*Ivals[j];
1693 for (
size_t ii = 0; ii < C_row_i_length; ii++) {
1694 c_index[C_cols[ii]] = OrdinalTraits<size_t>::invalid();
1695 C_cols[ii] = bcols_import[C_cols[ii]];
1696 combined_index[last_index] = C_cols[ii];
1697 combined_values[last_index] = C_row_i[ii];
1704 C.sumIntoGlobalValues(
1706 combined_index.view(OrdinalTraits<size_t>::zero(), last_index),
1707 combined_values.view(OrdinalTraits<size_t>::zero(), last_index))
1709 C.insertGlobalValues(
1711 combined_index.view(OrdinalTraits<size_t>::zero(), last_index),
1712 combined_values.view(OrdinalTraits<size_t>::zero(), last_index));
1718 template<
class Scalar,
1720 class GlobalOrdinal,
1722 void setMaxNumEntriesPerRow(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Mview) {
1723 typedef typename Teuchos::Array<Teuchos::ArrayView<const LocalOrdinal> >::size_type local_length_size;
1724 Mview.maxNumRowEntries = Teuchos::OrdinalTraits<local_length_size>::zero();
1726 if (Mview.indices.size() > Teuchos::OrdinalTraits<local_length_size>::zero()) {
1727 Mview.maxNumRowEntries = Mview.indices[0].size();
1729 for (local_length_size i = 1; i < Mview.indices.size(); ++i)
1730 if (Mview.indices[i].size() > Mview.maxNumRowEntries)
1731 Mview.maxNumRowEntries = Mview.indices[i].size();
1736 template<
class CrsMatrixType>
1737 size_t C_estimate_nnz(CrsMatrixType & A, CrsMatrixType &B){
1739 size_t Aest = 100, Best=100;
1740 if (A.getNodeNumEntries() > 0)
1741 Aest = (A.getNodeNumRows() > 0)? A.getNodeNumEntries()/A.getNodeNumRows() : 100;
1742 if (B.getNodeNumEntries() > 0)
1743 Best = (B.getNodeNumRows() > 0) ? B.getNodeNumEntries()/B.getNodeNumRows() : 100;
1745 size_t nnzperrow = (size_t)(sqrt((
double)Aest) + sqrt((
double)Best) - 1);
1746 nnzperrow *= nnzperrow;
1748 return (
size_t)(A.getNodeNumRows()*nnzperrow*0.75 + 100);
1758 template<
class Scalar,
1760 class GlobalOrdinal,
1762 void mult_A_B_newmatrix(
1763 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
1764 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
1765 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1766 const std::string& label,
1767 const Teuchos::RCP<Teuchos::ParameterList>& params)
1769 using Teuchos::Array;
1770 using Teuchos::ArrayRCP;
1771 using Teuchos::ArrayView;
1776 typedef LocalOrdinal LO;
1777 typedef GlobalOrdinal GO;
1779 typedef Import<LO,GO,NO> import_type;
1780 typedef Map<LO,GO,NO> map_type;
1783 typedef typename map_type::local_map_type local_map_type;
1785 typedef typename KCRS::StaticCrsGraphType graph_t;
1786 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
1787 typedef typename NO::execution_space execution_space;
1788 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
1789 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
1791 #ifdef HAVE_TPETRA_MMM_TIMINGS
1792 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
1793 using Teuchos::TimeMonitor;
1794 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*(TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM M5 Cmap")))));
1796 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1799 RCP<const import_type> Cimport;
1800 RCP<const map_type> Ccolmap;
1801 RCP<const import_type> Bimport = Bview.origMatrix->getGraph()->getImporter();
1802 RCP<const import_type> Iimport = Bview.importMatrix.is_null() ?
1803 Teuchos::null : Bview.importMatrix->getGraph()->getImporter();
1804 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
1805 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
1806 local_map_type Irowmap_local;
if(!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
1807 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
1808 local_map_type Icolmap_local;
if(!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
1815 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"),Bview.colMap->getNodeNumElements()), Icol2Ccol;
1817 if (Bview.importMatrix.is_null()) {
1820 Ccolmap = Bview.colMap;
1821 const LO colMapSize = static_cast<LO>(Bview.colMap->getNodeNumElements());
1823 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::Bcol2Ccol_fill",
1824 Kokkos::RangePolicy<execution_space, LO>(0, colMapSize),
1825 KOKKOS_LAMBDA(
const LO i) {
1838 if (!Bimport.is_null() && !Iimport.is_null()) {
1839 Cimport = Bimport->setUnion(*Iimport);
1841 else if (!Bimport.is_null() && Iimport.is_null()) {
1842 Cimport = Bimport->setUnion();
1844 else if (Bimport.is_null() && !Iimport.is_null()) {
1845 Cimport = Iimport->setUnion();
1848 throw std::runtime_error(
"TpetraExt::MMM status of matrix importers is nonsensical");
1850 Ccolmap = Cimport->getTargetMap();
1855 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
1856 std::runtime_error,
"Tpetra::MMM: Import setUnion messed with the DomainMap in an unfortunate way");
1863 Kokkos::resize(Icol2Ccol,Bview.importMatrix->getColMap()->getNodeNumElements());
1864 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
1865 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::Bcol2Ccol_getGlobalElement",range_type(0,Bview.origMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
1866 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
1868 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::Icol2Ccol_getGlobalElement",range_type(0,Bview.importMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
1869 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
1878 C.replaceColMap(Ccolmap);
1896 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),Aview.colMap->getNodeNumElements());
1897 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),Aview.colMap->getNodeNumElements());
1899 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::construct_tables",range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex()+1),KOKKOS_LAMBDA(
const LO i) {
1900 GO aidx = Acolmap_local.getGlobalElement(i);
1901 LO B_LID = Browmap_local.getLocalElement(aidx);
1902 if (B_LID != LO_INVALID) {
1903 targetMapToOrigRow(i) = B_LID;
1904 targetMapToImportRow(i) = LO_INVALID;
1906 LO I_LID = Irowmap_local.getLocalElement(aidx);
1907 targetMapToOrigRow(i) = LO_INVALID;
1908 targetMapToImportRow(i) = I_LID;
1913 #ifdef HAVE_TPETRA_MMM_TIMINGS
1919 KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Node,lo_view_t>::mult_A_B_newmatrix_kernel_wrapper(Aview,Bview,targetMapToOrigRow,targetMapToImportRow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
1926 template<
class Scalar,
1928 class GlobalOrdinal,
1930 class LocalOrdinalViewType>
1931 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
1932 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
1933 const LocalOrdinalViewType & targetMapToOrigRow,
1934 const LocalOrdinalViewType & targetMapToImportRow,
1935 const LocalOrdinalViewType & Bcol2Ccol,
1936 const LocalOrdinalViewType & Icol2Ccol,
1937 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1938 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Node> > Cimport,
1939 const std::string& label,
1940 const Teuchos::RCP<Teuchos::ParameterList>& params) {
1941 #ifdef HAVE_TPETRA_MMM_TIMINGS
1942 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
1943 using Teuchos::TimeMonitor;
1944 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SerialCore"))));
1945 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
1948 using Teuchos::Array;
1949 using Teuchos::ArrayRCP;
1950 using Teuchos::ArrayView;
1956 typedef typename KCRS::StaticCrsGraphType graph_t;
1957 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
1958 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
1959 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
1960 typedef typename KCRS::values_type::non_const_type scalar_view_t;
1963 typedef LocalOrdinal LO;
1964 typedef GlobalOrdinal GO;
1966 typedef Map<LO,GO,NO> map_type;
1967 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1968 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1969 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
1972 RCP<const map_type> Ccolmap = C.getColMap();
1973 size_t m = Aview.origMatrix->getNodeNumRows();
1974 size_t n = Ccolmap->getNodeNumElements();
1975 size_t b_max_nnz_per_row = Bview.origMatrix->getNodeMaxNumRowEntries();
1978 const KCRS & Amat = Aview.origMatrix->getLocalMatrix();
1979 const KCRS & Bmat = Bview.origMatrix->getLocalMatrix();
1981 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map;
1982 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries;
1983 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
1985 c_lno_view_t Irowptr;
1986 lno_nnz_view_t Icolind;
1987 scalar_view_t Ivals;
1988 if(!Bview.importMatrix.is_null()) {
1989 Irowptr = Bview.importMatrix->getLocalMatrix().graph.row_map;
1990 Icolind = Bview.importMatrix->getLocalMatrix().graph.entries;
1991 Ivals = Bview.importMatrix->getLocalMatrix().values;
1992 b_max_nnz_per_row = std::max(b_max_nnz_per_row,Bview.importMatrix->getNodeMaxNumRowEntries());
1995 #ifdef HAVE_TPETRA_MMM_TIMINGS
1996 MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SerialCore - Compare"))));
2007 size_t CSR_alloc = std::max(C_estimate_nnz(*Aview.origMatrix, *Bview.origMatrix), n);
2008 lno_view_t Crowptr(
"Crowptr",m+1);
2009 lno_nnz_view_t Ccolind(
"Ccolind",CSR_alloc);
2010 scalar_view_t Cvals(
"Cvals",CSR_alloc);
2020 size_t INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
2021 std::vector<size_t> c_status(n, ST_INVALID);
2031 size_t CSR_ip = 0, OLD_ip = 0;
2032 for (
size_t i = 0; i < m; i++) {
2035 Crowptr[i] = CSR_ip;
2038 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
2039 LO Aik = Acolind[k];
2040 const SC Aval = Avals[k];
2041 if (Aval == SC_ZERO)
2044 if (targetMapToOrigRow[Aik] != LO_INVALID) {
2051 size_t Bk = static_cast<size_t> (targetMapToOrigRow[Aik]);
2054 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
2055 LO Bkj = Bcolind[j];
2056 LO Cij = Bcol2Ccol[Bkj];
2058 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
2060 c_status[Cij] = CSR_ip;
2061 Ccolind[CSR_ip] = Cij;
2062 Cvals[CSR_ip] = Aval*Bvals[j];
2066 Cvals[c_status[Cij]] += Aval*Bvals[j];
2077 size_t Ik = static_cast<size_t> (targetMapToImportRow[Aik]);
2078 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
2079 LO Ikj = Icolind[j];
2080 LO Cij = Icol2Ccol[Ikj];
2082 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip){
2084 c_status[Cij] = CSR_ip;
2085 Ccolind[CSR_ip] = Cij;
2086 Cvals[CSR_ip] = Aval*Ivals[j];
2089 Cvals[c_status[Cij]] += Aval*Ivals[j];
2096 if (i+1 < m && CSR_ip + std::min(n,(Arowptr[i+2]-Arowptr[i+1])*b_max_nnz_per_row) > CSR_alloc) {
2098 Kokkos::resize(Ccolind,CSR_alloc);
2099 Kokkos::resize(Cvals,CSR_alloc);
2104 Crowptr[m] = CSR_ip;
2107 Kokkos::resize(Ccolind,CSR_ip);
2108 Kokkos::resize(Cvals,CSR_ip);
2110 #ifdef HAVE_TPETRA_MMM_TIMINGS
2111 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix Final Sort"))));
2112 MM2 = Teuchos::null;
2116 if (params.is_null() || params->get(
"sort entries",
true))
2117 Import_Util::sortCrsEntries(Crowptr,Ccolind, Cvals);
2118 C.setAllValues(Crowptr,Ccolind, Cvals);
2121 #ifdef HAVE_TPETRA_MMM_TIMINGS
2122 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix ESFC"))));
2133 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
2134 labelList->set(
"Timer Label",label);
2135 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
2136 RCP<const Export<LO,GO,NO> > dummyExport;
2137 C.expertStaticFillComplete(Bview. origMatrix->getDomainMap(), Aview. origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
2146 template<
class Scalar,
2148 class GlobalOrdinal,
2150 void mult_A_B_reuse(
2151 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2152 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2153 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2154 const std::string& label,
2155 const Teuchos::RCP<Teuchos::ParameterList>& params)
2157 using Teuchos::Array;
2158 using Teuchos::ArrayRCP;
2159 using Teuchos::ArrayView;
2164 typedef LocalOrdinal LO;
2165 typedef GlobalOrdinal GO;
2167 typedef Import<LO,GO,NO> import_type;
2168 typedef Map<LO,GO,NO> map_type;
2171 typedef typename map_type::local_map_type local_map_type;
2173 typedef typename KCRS::StaticCrsGraphType graph_t;
2174 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2175 typedef typename NO::execution_space execution_space;
2176 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
2177 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
2179 #ifdef HAVE_TPETRA_MMM_TIMINGS
2180 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2181 using Teuchos::TimeMonitor;
2182 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse Cmap"))));
2184 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2187 RCP<const import_type> Cimport = C.getGraph()->getImporter();
2188 RCP<const map_type> Ccolmap = C.getColMap();
2189 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
2190 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
2191 local_map_type Irowmap_local;
if(!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
2192 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
2193 local_map_type Icolmap_local;
if(!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
2194 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
2197 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"),Bview.colMap->getNodeNumElements()), Icol2Ccol;
2201 Kokkos::parallel_for(range_type(0,Bview.origMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
2202 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
2205 if (!Bview.importMatrix.is_null()) {
2206 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
2207 std::runtime_error,
"Tpetra::MMM: Import setUnion messed with the DomainMap in an unfortunate way");
2209 Kokkos::resize(Icol2Ccol,Bview.importMatrix->getColMap()->getNodeNumElements());
2210 Kokkos::parallel_for(range_type(0,Bview.importMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
2211 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
2217 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),Aview.colMap->getNodeNumElements());
2218 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),Aview.colMap->getNodeNumElements());
2219 Kokkos::parallel_for(range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex()+1),KOKKOS_LAMBDA(
const LO i) {
2220 GO aidx = Acolmap_local.getGlobalElement(i);
2221 LO B_LID = Browmap_local.getLocalElement(aidx);
2222 if (B_LID != LO_INVALID) {
2223 targetMapToOrigRow(i) = B_LID;
2224 targetMapToImportRow(i) = LO_INVALID;
2226 LO I_LID = Irowmap_local.getLocalElement(aidx);
2227 targetMapToOrigRow(i) = LO_INVALID;
2228 targetMapToImportRow(i) = I_LID;
2233 #ifdef HAVE_TPETRA_MMM_TIMINGS
2239 KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Node,lo_view_t>::mult_A_B_reuse_kernel_wrapper(Aview,Bview,targetMapToOrigRow,targetMapToImportRow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
2243 template<
class Scalar,
2245 class GlobalOrdinal,
2247 class LocalOrdinalViewType>
2248 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2249 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2250 const LocalOrdinalViewType & targetMapToOrigRow,
2251 const LocalOrdinalViewType & targetMapToImportRow,
2252 const LocalOrdinalViewType & Bcol2Ccol,
2253 const LocalOrdinalViewType & Icol2Ccol,
2254 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2255 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Node> > Cimport,
2256 const std::string& label,
2257 const Teuchos::RCP<Teuchos::ParameterList>& params) {
2258 #ifdef HAVE_TPETRA_MMM_TIMINGS
2259 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2260 using Teuchos::TimeMonitor;
2261 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse SerialCore"))));
2262 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
2270 typedef typename KCRS::StaticCrsGraphType graph_t;
2271 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
2272 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
2273 typedef typename KCRS::values_type::non_const_type scalar_view_t;
2276 typedef LocalOrdinal LO;
2277 typedef GlobalOrdinal GO;
2279 typedef Map<LO,GO,NO> map_type;
2280 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2281 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2282 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
2285 RCP<const map_type> Ccolmap = C.getColMap();
2286 size_t m = Aview.origMatrix->getNodeNumRows();
2287 size_t n = Ccolmap->getNodeNumElements();
2290 const KCRS & Amat = Aview.origMatrix->getLocalMatrix();
2291 const KCRS & Bmat = Bview.origMatrix->getLocalMatrix();
2292 const KCRS & Cmat = C.getLocalMatrix();
2294 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
2295 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
2296 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
2297 scalar_view_t Cvals = Cmat.values;
2299 c_lno_view_t Irowptr;
2300 lno_nnz_view_t Icolind;
2301 scalar_view_t Ivals;
2302 if(!Bview.importMatrix.is_null()) {
2303 Irowptr = Bview.importMatrix->getLocalMatrix().graph.row_map;
2304 Icolind = Bview.importMatrix->getLocalMatrix().graph.entries;
2305 Ivals = Bview.importMatrix->getLocalMatrix().values;
2308 #ifdef HAVE_TPETRA_MMM_TIMINGS
2309 MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SerialCore - Compare"))));
2321 std::vector<size_t> c_status(n, ST_INVALID);
2324 size_t CSR_ip = 0, OLD_ip = 0;
2325 for (
size_t i = 0; i < m; i++) {
2328 OLD_ip = Crowptr[i];
2329 CSR_ip = Crowptr[i+1];
2330 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
2331 c_status[Ccolind[k]] = k;
2337 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
2338 LO Aik = Acolind[k];
2339 const SC Aval = Avals[k];
2340 if (Aval == SC_ZERO)
2343 if (targetMapToOrigRow[Aik] != LO_INVALID) {
2345 size_t Bk = static_cast<size_t> (targetMapToOrigRow[Aik]);
2347 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
2348 LO Bkj = Bcolind[j];
2349 LO Cij = Bcol2Ccol[Bkj];
2351 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
2352 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
2353 "(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
2355 Cvals[c_status[Cij]] += Aval * Bvals[j];
2360 size_t Ik = static_cast<size_t> (targetMapToImportRow[Aik]);
2361 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
2362 LO Ikj = Icolind[j];
2363 LO Cij = Icol2Ccol[Ikj];
2365 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
2366 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
2367 "(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
2369 Cvals[c_status[Cij]] += Aval * Ivals[j];
2375 #ifdef HAVE_TPETRA_MMM_TIMINGS
2377 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse ESFC"))));
2380 C.fillComplete(C.getDomainMap(), C.getRangeMap());
2386 template<
class Scalar,
2388 class GlobalOrdinal,
2390 void jacobi_A_B_newmatrix(
2392 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> & Dinv,
2393 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2394 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2395 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2396 const std::string& label,
2397 const Teuchos::RCP<Teuchos::ParameterList>& params)
2399 using Teuchos::Array;
2400 using Teuchos::ArrayRCP;
2401 using Teuchos::ArrayView;
2405 typedef LocalOrdinal LO;
2406 typedef GlobalOrdinal GO;
2409 typedef Import<LO,GO,NO> import_type;
2410 typedef Map<LO,GO,NO> map_type;
2411 typedef typename map_type::local_map_type local_map_type;
2415 typedef typename KCRS::StaticCrsGraphType graph_t;
2416 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2417 typedef typename NO::execution_space execution_space;
2418 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
2419 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
2422 #ifdef HAVE_TPETRA_MMM_TIMINGS
2423 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2424 using Teuchos::TimeMonitor;
2425 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi M5 Cmap"))));
2427 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2430 RCP<const import_type> Cimport;
2431 RCP<const map_type> Ccolmap;
2432 RCP<const import_type> Bimport = Bview.origMatrix->getGraph()->getImporter();
2433 RCP<const import_type> Iimport = Bview.importMatrix.is_null() ?
2434 Teuchos::null : Bview.importMatrix->getGraph()->getImporter();
2435 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
2436 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
2437 local_map_type Irowmap_local;
if(!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
2438 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
2439 local_map_type Icolmap_local;
if(!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
2446 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"),Bview.colMap->getNodeNumElements()), Icol2Ccol;
2448 if (Bview.importMatrix.is_null()) {
2451 Ccolmap = Bview.colMap;
2455 Kokkos::RangePolicy<execution_space, LO> range (0, static_cast<LO> (Bview.colMap->getNodeNumElements ()));
2456 Kokkos::parallel_for (range, KOKKOS_LAMBDA (
const size_t i) {
2457 Bcol2Ccol(i) = static_cast<LO> (i);
2468 if (!Bimport.is_null() && !Iimport.is_null()){
2469 Cimport = Bimport->setUnion(*Iimport);
2470 Ccolmap = Cimport->getTargetMap();
2472 }
else if (!Bimport.is_null() && Iimport.is_null()) {
2473 Cimport = Bimport->setUnion();
2475 }
else if(Bimport.is_null() && !Iimport.is_null()) {
2476 Cimport = Iimport->setUnion();
2479 throw std::runtime_error(
"TpetraExt::Jacobi status of matrix importers is nonsensical");
2481 Ccolmap = Cimport->getTargetMap();
2483 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
2484 std::runtime_error,
"Tpetra:Jacobi Import setUnion messed with the DomainMap in an unfortunate way");
2491 Kokkos::resize(Icol2Ccol,Bview.importMatrix->getColMap()->getNodeNumElements());
2492 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
2493 Kokkos::parallel_for(range_type(0,Bview.origMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
2494 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
2496 Kokkos::parallel_for(range_type(0,Bview.importMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
2497 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
2506 C.replaceColMap(Ccolmap);
2524 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),Aview.colMap->getNodeNumElements());
2525 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),Aview.colMap->getNodeNumElements());
2526 Kokkos::parallel_for(range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex()+1),KOKKOS_LAMBDA(
const LO i) {
2527 GO aidx = Acolmap_local.getGlobalElement(i);
2528 LO B_LID = Browmap_local.getLocalElement(aidx);
2529 if (B_LID != LO_INVALID) {
2530 targetMapToOrigRow(i) = B_LID;
2531 targetMapToImportRow(i) = LO_INVALID;
2533 LO I_LID = Irowmap_local.getLocalElement(aidx);
2534 targetMapToOrigRow(i) = LO_INVALID;
2535 targetMapToImportRow(i) = I_LID;
2542 KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Node,lo_view_t>::jacobi_A_B_newmatrix_kernel_wrapper(omega,Dinv,Aview,Bview,targetMapToOrigRow,targetMapToImportRow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
2551 template<
class Scalar,
2553 class GlobalOrdinal,
2555 class LocalOrdinalViewType>
2556 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
2557 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> & Dinv,
2558 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2559 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2560 const LocalOrdinalViewType & targetMapToOrigRow,
2561 const LocalOrdinalViewType & targetMapToImportRow,
2562 const LocalOrdinalViewType & Bcol2Ccol,
2563 const LocalOrdinalViewType & Icol2Ccol,
2564 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2565 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Node> > Cimport,
2566 const std::string& label,
2567 const Teuchos::RCP<Teuchos::ParameterList>& params) {
2569 #ifdef HAVE_TPETRA_MMM_TIMINGS
2570 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2571 using Teuchos::TimeMonitor;
2572 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Nemwmatrix SerialCore"))));
2576 using Teuchos::Array;
2577 using Teuchos::ArrayRCP;
2578 using Teuchos::ArrayView;
2584 typedef typename KCRS::StaticCrsGraphType graph_t;
2585 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
2586 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2587 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
2588 typedef typename KCRS::values_type::non_const_type scalar_view_t;
2591 typedef typename scalar_view_t::memory_space scalar_memory_space;
2594 typedef LocalOrdinal LO;
2595 typedef GlobalOrdinal GO;
2598 typedef Map<LO,GO,NO> map_type;
2599 size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2600 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2603 RCP<const map_type> Ccolmap = C.getColMap();
2604 size_t m = Aview.origMatrix->getNodeNumRows();
2605 size_t n = Ccolmap->getNodeNumElements();
2606 size_t b_max_nnz_per_row = Bview.origMatrix->getNodeMaxNumRowEntries();
2609 const KCRS & Amat = Aview.origMatrix->getLocalMatrix();
2610 const KCRS & Bmat = Bview.origMatrix->getLocalMatrix();
2612 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map;
2613 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries;
2614 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
2616 c_lno_view_t Irowptr;
2617 lno_nnz_view_t Icolind;
2618 scalar_view_t Ivals;
2619 if(!Bview.importMatrix.is_null()) {
2620 Irowptr = Bview.importMatrix->getLocalMatrix().graph.row_map;
2621 Icolind = Bview.importMatrix->getLocalMatrix().graph.entries;
2622 Ivals = Bview.importMatrix->getLocalMatrix().values;
2623 b_max_nnz_per_row = std::max(b_max_nnz_per_row,Bview.importMatrix->getNodeMaxNumRowEntries());
2627 auto Dvals = Dinv.template getLocalView<scalar_memory_space>();
2635 size_t INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
2636 Array<size_t> c_status(n, ST_INVALID);
2645 size_t CSR_alloc = std::max(C_estimate_nnz(*Aview.origMatrix, *Bview.origMatrix), n);
2646 lno_view_t Crowptr(
"Crowptr",m+1);
2647 lno_nnz_view_t Ccolind(
"Ccolind",CSR_alloc);
2648 scalar_view_t Cvals(
"Cvals",CSR_alloc);
2649 size_t CSR_ip = 0, OLD_ip = 0;
2651 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
2665 for (
size_t i = 0; i < m; i++) {
2668 Crowptr[i] = CSR_ip;
2669 SC minusOmegaDval = -omega*Dvals(i,0);
2672 for (
size_t j = Browptr[i]; j < Browptr[i+1]; j++) {
2673 Scalar Bval = Bvals[j];
2674 if (Bval == SC_ZERO)
2676 LO Bij = Bcolind[j];
2677 LO Cij = Bcol2Ccol[Bij];
2680 c_status[Cij] = CSR_ip;
2681 Ccolind[CSR_ip] = Cij;
2682 Cvals[CSR_ip] = Bvals[j];
2687 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
2688 LO Aik = Acolind[k];
2689 const SC Aval = Avals[k];
2690 if (Aval == SC_ZERO)
2693 if (targetMapToOrigRow[Aik] != LO_INVALID) {
2695 size_t Bk = static_cast<size_t> (targetMapToOrigRow[Aik]);
2697 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
2698 LO Bkj = Bcolind[j];
2699 LO Cij = Bcol2Ccol[Bkj];
2701 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
2703 c_status[Cij] = CSR_ip;
2704 Ccolind[CSR_ip] = Cij;
2705 Cvals[CSR_ip] = minusOmegaDval* Aval * Bvals[j];
2709 Cvals[c_status[Cij]] += minusOmegaDval* Aval * Bvals[j];
2715 size_t Ik = static_cast<size_t> (targetMapToImportRow[Aik]);
2716 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
2717 LO Ikj = Icolind[j];
2718 LO Cij = Icol2Ccol[Ikj];
2720 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
2722 c_status[Cij] = CSR_ip;
2723 Ccolind[CSR_ip] = Cij;
2724 Cvals[CSR_ip] = minusOmegaDval* Aval * Ivals[j];
2727 Cvals[c_status[Cij]] += minusOmegaDval* Aval * Ivals[j];
2734 if (i+1 < m && CSR_ip + std::min(n,(Arowptr[i+2]-Arowptr[i+1]+1)*b_max_nnz_per_row) > CSR_alloc) {
2736 Kokkos::resize(Ccolind,CSR_alloc);
2737 Kokkos::resize(Cvals,CSR_alloc);
2741 Crowptr[m] = CSR_ip;
2744 Kokkos::resize(Ccolind,CSR_ip);
2745 Kokkos::resize(Cvals,CSR_ip);
2749 #ifdef HAVE_TPETRA_MMM_TIMINGS
2750 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix Final Sort"))));
2757 C.replaceColMap(Ccolmap);
2764 if (params.is_null() || params->get(
"sort entries",
true))
2765 Import_Util::sortCrsEntries(Crowptr,Ccolind, Cvals);
2766 C.setAllValues(Crowptr,Ccolind, Cvals);
2768 #ifdef HAVE_TPETRA_MMM_TIMINGS
2769 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix ESFC"))));
2780 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
2781 labelList->set(
"Timer Label",label);
2782 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
2783 RCP<const Export<LO,GO,NO> > dummyExport;
2784 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
2790 template<
class Scalar,
2792 class GlobalOrdinal,
2794 void jacobi_A_B_reuse(
2796 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> & Dinv,
2797 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2798 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2799 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2800 const std::string& label,
2801 const Teuchos::RCP<Teuchos::ParameterList>& params)
2803 using Teuchos::Array;
2804 using Teuchos::ArrayRCP;
2805 using Teuchos::ArrayView;
2809 typedef LocalOrdinal LO;
2810 typedef GlobalOrdinal GO;
2813 typedef Import<LO,GO,NO> import_type;
2814 typedef Map<LO,GO,NO> map_type;
2817 typedef typename map_type::local_map_type local_map_type;
2819 typedef typename KCRS::StaticCrsGraphType graph_t;
2820 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2821 typedef typename NO::execution_space execution_space;
2822 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
2823 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
2825 #ifdef HAVE_TPETRA_MMM_TIMINGS
2826 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2827 using Teuchos::TimeMonitor;
2828 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse Cmap"))));
2830 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2833 RCP<const import_type> Cimport = C.getGraph()->getImporter();
2834 RCP<const map_type> Ccolmap = C.getColMap();
2835 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
2836 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
2837 local_map_type Irowmap_local;
if(!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
2838 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
2839 local_map_type Icolmap_local;
if(!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
2840 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
2843 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"),Bview.colMap->getNodeNumElements()), Icol2Ccol;
2847 Kokkos::parallel_for(range_type(0,Bview.origMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
2848 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
2851 if (!Bview.importMatrix.is_null()) {
2852 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
2853 std::runtime_error,
"Tpetra::Jacobi: Import setUnion messed with the DomainMap in an unfortunate way");
2855 Kokkos::resize(Icol2Ccol,Bview.importMatrix->getColMap()->getNodeNumElements());
2856 Kokkos::parallel_for(range_type(0,Bview.importMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
2857 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
2863 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),Aview.colMap->getNodeNumElements());
2864 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),Aview.colMap->getNodeNumElements());
2865 Kokkos::parallel_for(range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex()+1),KOKKOS_LAMBDA(
const LO i) {
2866 GO aidx = Acolmap_local.getGlobalElement(i);
2867 LO B_LID = Browmap_local.getLocalElement(aidx);
2868 if (B_LID != LO_INVALID) {
2869 targetMapToOrigRow(i) = B_LID;
2870 targetMapToImportRow(i) = LO_INVALID;
2872 LO I_LID = Irowmap_local.getLocalElement(aidx);
2873 targetMapToOrigRow(i) = LO_INVALID;
2874 targetMapToImportRow(i) = I_LID;
2879 #ifdef HAVE_TPETRA_MMM_TIMINGS
2885 KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Node,lo_view_t>::jacobi_A_B_reuse_kernel_wrapper(omega,Dinv,Aview,Bview,targetMapToOrigRow,targetMapToImportRow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
2891 template<
class Scalar,
2893 class GlobalOrdinal,
2895 class LocalOrdinalViewType>
2896 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
2897 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> & Dinv,
2898 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2899 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2900 const LocalOrdinalViewType & targetMapToOrigRow,
2901 const LocalOrdinalViewType & targetMapToImportRow,
2902 const LocalOrdinalViewType & Bcol2Ccol,
2903 const LocalOrdinalViewType & Icol2Ccol,
2904 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2905 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Node> > Cimport,
2906 const std::string& label,
2907 const Teuchos::RCP<Teuchos::ParameterList>& params) {
2908 #ifdef HAVE_TPETRA_MMM_TIMINGS
2909 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2910 using Teuchos::TimeMonitor;
2911 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse SerialCore"))));
2912 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
2919 typedef typename KCRS::StaticCrsGraphType graph_t;
2920 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
2921 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
2922 typedef typename KCRS::values_type::non_const_type scalar_view_t;
2923 typedef typename scalar_view_t::memory_space scalar_memory_space;
2926 typedef LocalOrdinal LO;
2927 typedef GlobalOrdinal GO;
2929 typedef Map<LO,GO,NO> map_type;
2930 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2931 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2932 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
2935 RCP<const map_type> Ccolmap = C.getColMap();
2936 size_t m = Aview.origMatrix->getNodeNumRows();
2937 size_t n = Ccolmap->getNodeNumElements();
2940 const KCRS & Amat = Aview.origMatrix->getLocalMatrix();
2941 const KCRS & Bmat = Bview.origMatrix->getLocalMatrix();
2942 const KCRS & Cmat = C.getLocalMatrix();
2944 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
2945 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
2946 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
2947 scalar_view_t Cvals = Cmat.values;
2949 c_lno_view_t Irowptr;
2950 lno_nnz_view_t Icolind;
2951 scalar_view_t Ivals;
2952 if(!Bview.importMatrix.is_null()) {
2953 Irowptr = Bview.importMatrix->getLocalMatrix().graph.row_map;
2954 Icolind = Bview.importMatrix->getLocalMatrix().graph.entries;
2955 Ivals = Bview.importMatrix->getLocalMatrix().values;
2959 auto Dvals = Dinv.template getLocalView<scalar_memory_space>();
2961 #ifdef HAVE_TPETRA_MMM_TIMINGS
2962 MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse SerialCore - Compare"))));
2970 std::vector<size_t> c_status(n, ST_INVALID);
2973 size_t CSR_ip = 0, OLD_ip = 0;
2974 for (
size_t i = 0; i < m; i++) {
2978 OLD_ip = Crowptr[i];
2979 CSR_ip = Crowptr[i+1];
2980 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
2981 c_status[Ccolind[k]] = k;
2987 SC minusOmegaDval = -omega*Dvals(i,0);
2990 for (
size_t j = Browptr[i]; j < Browptr[i+1]; j++) {
2991 Scalar Bval = Bvals[j];
2992 if (Bval == SC_ZERO)
2994 LO Bij = Bcolind[j];
2995 LO Cij = Bcol2Ccol[Bij];
2997 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
2998 std::runtime_error,
"Trying to insert a new entry into a static graph");
3000 Cvals[c_status[Cij]] = Bvals[j];
3004 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
3005 LO Aik = Acolind[k];
3006 const SC Aval = Avals[k];
3007 if (Aval == SC_ZERO)
3010 if (targetMapToOrigRow[Aik] != LO_INVALID) {
3012 size_t Bk = static_cast<size_t> (targetMapToOrigRow[Aik]);
3014 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
3015 LO Bkj = Bcolind[j];
3016 LO Cij = Bcol2Ccol[Bkj];
3018 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
3019 std::runtime_error,
"Trying to insert a new entry into a static graph");
3021 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
3026 size_t Ik = static_cast<size_t> (targetMapToImportRow[Aik]);
3027 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
3028 LO Ikj = Icolind[j];
3029 LO Cij = Icol2Ccol[Ikj];
3031 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
3032 std::runtime_error,
"Trying to insert a new entry into a static graph");
3034 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
3040 #ifdef HAVE_TPETRA_MMM_TIMINGS
3042 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse ESFC"))));
3045 C.fillComplete(C.getDomainMap(), C.getRangeMap());
3051 template<
class Scalar,
3053 class GlobalOrdinal,
3055 void import_and_extract_views(
3056 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
3057 Teuchos::RCP<
const Map<LocalOrdinal, GlobalOrdinal, Node> > targetMap,
3058 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
3059 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Node> > prototypeImporter,
3060 bool userAssertsThereAreNoRemotes,
3061 const std::string& label,
3062 const Teuchos::RCP<Teuchos::ParameterList>& params)
3064 using Teuchos::Array;
3065 using Teuchos::ArrayView;
3068 using Teuchos::null;
3071 typedef LocalOrdinal LO;
3072 typedef GlobalOrdinal GO;
3075 typedef Map<LO,GO,NO> map_type;
3076 typedef Import<LO,GO,NO> import_type;
3077 typedef CrsMatrix<SC,LO,GO,NO> crs_matrix_type;
3079 #ifdef HAVE_TPETRA_MMM_TIMINGS
3080 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
3081 using Teuchos::TimeMonitor;
3082 RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Alloc"))));
3090 Aview.deleteContents();
3092 Aview.origMatrix = rcp(&A,
false);
3093 Aview.origRowMap = A.getRowMap();
3094 Aview.rowMap = targetMap;
3095 Aview.colMap = A.getColMap();
3096 Aview.domainMap = A.getDomainMap();
3097 Aview.importColMap =
null;
3100 if (userAssertsThereAreNoRemotes)
3103 RCP<const import_type> importer;
3104 if (params !=
null && params->isParameter(
"importer")) {
3105 importer = params->get<RCP<const import_type> >(
"importer");
3108 #ifdef HAVE_TPETRA_MMM_TIMINGS
3109 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X RemoteMap"))));
3114 RCP<const map_type> rowMap = A.getRowMap(), remoteRowMap;
3115 size_t numRemote = 0;
3117 if (!prototypeImporter.is_null() &&
3118 prototypeImporter->getSourceMap()->isSameAs(*rowMap) &&
3119 prototypeImporter->getTargetMap()->isSameAs(*targetMap)) {
3121 ArrayView<const LO> remoteLIDs = prototypeImporter->getRemoteLIDs();
3122 numRemote = prototypeImporter->getNumRemoteIDs();
3124 Array<GO> remoteRows(numRemote);
3125 for (
size_t i = 0; i < numRemote; i++)
3126 remoteRows[i] = targetMap->getGlobalElement(remoteLIDs[i]);
3128 remoteRowMap = rcp(
new map_type(Teuchos::OrdinalTraits<global_size_t>::invalid(), remoteRows(),
3129 rowMap->getIndexBase(), rowMap->getComm()));
3132 }
else if (prototypeImporter.is_null()) {
3134 ArrayView<const GO> rows = targetMap->getNodeElementList();
3135 size_t numRows = targetMap->getNodeNumElements();
3137 Array<GO> remoteRows(numRows);
3138 for(
size_t i = 0; i < numRows; ++i) {
3139 const LO mlid = rowMap->getLocalElement(rows[i]);
3141 if (mlid == Teuchos::OrdinalTraits<LO>::invalid())
3142 remoteRows[numRemote++] = rows[i];
3144 remoteRows.resize(numRemote);
3145 remoteRowMap = rcp(
new map_type(Teuchos::OrdinalTraits<global_size_t>::invalid(), remoteRows(),
3146 rowMap->getIndexBase(), rowMap->getComm()));
3154 const int numProcs = rowMap->getComm()->getSize();
3156 TEUCHOS_TEST_FOR_EXCEPTION(numRemote > 0, std::runtime_error,
3157 "MatrixMatrix::import_and_extract_views ERROR, numProcs < 2 but attempting to import remote matrix rows.");
3166 #ifdef HAVE_TPETRA_MMM_TIMINGS
3167 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Collective-0"))));
3171 Teuchos::reduceAll(*(rowMap->getComm()), Teuchos::REDUCE_MAX, (
global_size_t)numRemote, Teuchos::outArg(globalMaxNumRemote) );
3173 if (globalMaxNumRemote > 0) {
3174 #ifdef HAVE_TPETRA_MMM_TIMINGS
3175 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Import-2"))));
3179 importer = prototypeImporter->createRemoteOnlyImport(remoteRowMap);
3181 importer = rcp(
new import_type(rowMap, remoteRowMap));
3183 throw std::runtime_error(
"prototypeImporter->SourceMap() does not match A.getRowMap()!");
3187 params->set(
"importer", importer);
3190 if (importer !=
null) {
3191 #ifdef HAVE_TPETRA_MMM_TIMINGS
3192 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Import-3"))));
3196 Teuchos::ParameterList labelList;
3197 labelList.set(
"Timer Label", label);
3199 if(!params.is_null())
3200 labelList.set(
"compute global constants", params->get(
"compute global constants",
false));
3201 Aview.importMatrix = Tpetra::importAndFillCompleteCrsMatrix<crs_matrix_type>(rcpFromRef(A), *importer,
3202 A.getDomainMap(), importer->getTargetMap(), rcpFromRef(labelList));
3208 sprintf(str,
"import_matrix.%d.dat",count);
3213 #ifdef HAVE_TPETRA_MMM_STATISTICS
3214 printMultiplicationStatistics(importer, label + std::string(
" I&X MMM"));
3218 #ifdef HAVE_TPETRA_MMM_TIMINGS
3219 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Import-4"))));
3223 Aview.importColMap = Aview.importMatrix->getColMap();
3233 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node,
class LocalOrdinalViewType>
3235 merge_matrices(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
3236 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
3237 const LocalOrdinalViewType & Acol2Brow,
3238 const LocalOrdinalViewType & Acol2Irow,
3239 const LocalOrdinalViewType & Bcol2Ccol,
3240 const LocalOrdinalViewType & Icol2Ccol,
3241 const size_t mergedNodeNumCols) {
3245 typedef typename KCRS::StaticCrsGraphType graph_t;
3246 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
3247 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
3248 typedef typename KCRS::values_type::non_const_type scalar_view_t;
3250 const KCRS & Ak = Aview.origMatrix->getLocalMatrix();
3251 const KCRS & Bk = Bview.origMatrix->getLocalMatrix();
3254 if(!Bview.importMatrix.is_null() || (Bview.importMatrix.is_null() && (&*Aview.origMatrix->getGraph()->getColMap() != &*Bview.origMatrix->getGraph()->getRowMap()))) {
3258 RCP<const KCRS> Ik_;
3259 if(!Bview.importMatrix.is_null()) Ik_ = Teuchos::rcpFromRef<const KCRS>(Bview.importMatrix->getLocalMatrix());
3260 const KCRS * Ik = Bview.importMatrix.is_null() ? 0 : &*Ik_;
3262 if(Ik!=0) Iks = *Ik;
3263 size_t merge_numrows = Ak.numCols();
3264 lno_view_t Mrowptr(
"Mrowptr", merge_numrows + 1);
3266 const LocalOrdinal LO_INVALID =Teuchos::OrdinalTraits<LocalOrdinal>::invalid();
3269 typedef typename Node::execution_space execution_space;
3270 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
3271 Kokkos::parallel_scan (
"Tpetra_MatrixMatrix_merge_matrices_buildRowptr", range_type (0, merge_numrows),
3272 KOKKOS_LAMBDA(
const size_t i,
size_t& update,
const bool final) {
3273 if(
final) Mrowptr(i) = update;
3276 if(Acol2Brow(i)!=LO_INVALID)
3277 ct = Bk.graph.row_map(Acol2Brow(i)+1) - Bk.graph.row_map(Acol2Brow(i));
3279 ct = Iks.graph.row_map(Acol2Irow(i)+1) - Iks.graph.row_map(Acol2Irow(i));
3282 if(
final && i+1==merge_numrows)
3283 Mrowptr(i+1)=update;
3287 size_t merge_nnz = ::Tpetra::Details::getEntryOnHost(Mrowptr,merge_numrows);
3288 lno_nnz_view_t Mcolind(
"Mcolind",merge_nnz);
3289 scalar_view_t Mvalues(
"Mvals",merge_nnz);
3292 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
3293 Kokkos::parallel_for (
"Tpetra_MatrixMatrix_merg_matrices_buildColindValues", range_type (0, merge_numrows),KOKKOS_LAMBDA(
const size_t i) {
3294 if(Acol2Brow(i)!=LO_INVALID) {
3295 size_t row = Acol2Brow(i);
3296 size_t start = Bk.graph.row_map(row);
3297 for(
size_t j= Mrowptr(i); j<Mrowptr(i+1); j++) {
3298 Mvalues(j) = Bk.values(j-Mrowptr(i)+start);
3299 Mcolind(j) = Bcol2Ccol(Bk.graph.entries(j-Mrowptr(i)+start));
3303 size_t row = Acol2Irow(i);
3304 size_t start = Iks.graph.row_map(row);
3305 for(
size_t j= Mrowptr(i); j<Mrowptr(i+1); j++) {
3306 Mvalues(j) = Iks.values(j-Mrowptr(i)+start);
3307 Mcolind(j) = Icol2Ccol(Iks.graph.entries(j-Mrowptr(i)+start));
3312 KCRS newmat(
"CrsMatrix",merge_numrows,mergedNodeNumCols,merge_nnz,Mvalues,Mrowptr,Mcolind);
3335 #define TPETRA_MATRIXMATRIX_INSTANT(SCALAR,LO,GO,NODE) \
3337 void MatrixMatrix::Multiply( \
3338 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \
3340 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \
3342 CrsMatrix< SCALAR , LO , GO , NODE >& C, \
3343 bool call_FillComplete_on_result, \
3344 const std::string & label, \
3345 const Teuchos::RCP<Teuchos::ParameterList>& params); \
3348 void MatrixMatrix::Jacobi( \
3350 const Vector< SCALAR, LO, GO, NODE > & Dinv, \
3351 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \
3352 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \
3353 CrsMatrix< SCALAR , LO , GO , NODE >& C, \
3354 bool call_FillComplete_on_result, \
3355 const std::string & label, \
3356 const Teuchos::RCP<Teuchos::ParameterList>& params); \
3359 void MatrixMatrix::Add( \
3360 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \
3363 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \
3366 Teuchos::RCP<CrsMatrix< SCALAR , LO , GO , NODE > > C); \
3369 void MatrixMatrix::Add( \
3370 const CrsMatrix<SCALAR, LO, GO, NODE>& A, \
3373 CrsMatrix<SCALAR, LO, GO, NODE>& B, \
3377 Teuchos::RCP<CrsMatrix< SCALAR , LO , GO , NODE > > \
3378 MatrixMatrix::add<SCALAR, LO, GO, NODE> \
3379 (const SCALAR & alpha, \
3380 const bool transposeA, \
3381 const CrsMatrix<SCALAR, LO, GO, NODE>& A, \
3382 const SCALAR & beta, \
3383 const bool transposeB, \
3384 const CrsMatrix<SCALAR, LO, GO, NODE>& B, \
3385 const Teuchos::RCP<const Map<LO, GO, NODE> >& domainMap, \
3386 const Teuchos::RCP<const Map<LO, GO, NODE> >& rangeMap, \
3387 const Teuchos::RCP<Teuchos::ParameterList>& params); \
3389 template struct MatrixMatrix::AddDetails::AddKernels<SCALAR, LO, GO, NODE>;
3393 #endif // TPETRA_MATRIXMATRIX_DEF_HPP