42 #ifndef TPETRA_MATRIXMATRIX_OPENMP_DEF_HPP
43 #define TPETRA_MATRIXMATRIX_OPENMP_DEF_HPP
45 #ifdef HAVE_TPETRA_INST_OPENMP
51 template<
class Scalar,
53 class GlobalOrdinal,
class LocalOrdinalViewType>
54 struct KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType> {
55 static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
56 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
57 const LocalOrdinalViewType & Acol2Brow,
58 const LocalOrdinalViewType & Acol2Irow,
59 const LocalOrdinalViewType & Bcol2Ccol,
60 const LocalOrdinalViewType & Icol2Ccol,
61 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
62 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
63 const std::string& label = std::string(),
64 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
66 static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
67 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
68 const LocalOrdinalViewType & Acol2Brow,
69 const LocalOrdinalViewType & Acol2Irow,
70 const LocalOrdinalViewType & Bcol2Ccol,
71 const LocalOrdinalViewType & Icol2Ccol,
72 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
73 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
74 const std::string& label = std::string(),
75 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
80 template<
class Scalar,
82 class GlobalOrdinal,
class LocalOrdinalViewType>
83 struct KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType> {
84 static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
85 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
86 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
87 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
88 const LocalOrdinalViewType & Acol2Brow,
89 const LocalOrdinalViewType & Acol2Irow,
90 const LocalOrdinalViewType & Bcol2Ccol,
91 const LocalOrdinalViewType & Icol2Ccol,
92 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
93 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
94 const std::string& label = std::string(),
95 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
97 static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
98 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
99 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
100 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
101 const LocalOrdinalViewType & Acol2Brow,
102 const LocalOrdinalViewType & Acol2Irow,
103 const LocalOrdinalViewType & Bcol2Ccol,
104 const LocalOrdinalViewType & Icol2Ccol,
105 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
106 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
107 const std::string& label = std::string(),
108 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
114 template<
class Scalar,
117 class LocalOrdinalViewType>
118 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
119 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
120 const LocalOrdinalViewType & Acol2Brow,
121 const LocalOrdinalViewType & Acol2Irow,
122 const LocalOrdinalViewType & Bcol2Ccol,
123 const LocalOrdinalViewType & Icol2Ccol,
124 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
125 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
126 const std::string& label,
127 const Teuchos::RCP<Teuchos::ParameterList>& params) {
129 #ifdef HAVE_TPETRA_MMM_TIMINGS
130 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
131 using Teuchos::TimeMonitor;
132 Teuchos::RCP<TimeMonitor> MM;
136 std::string nodename(
"OpenMP");
141 typedef typename KCRS::device_type device_t;
142 typedef typename KCRS::StaticCrsGraphType graph_t;
143 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
144 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
145 typedef typename KCRS::values_type::non_const_type scalar_view_t;
148 int team_work_size = 16;
149 std::string myalg(
"SPGEMM_KK_MEMORY");
152 if(!params.is_null()) {
153 if(params->isParameter(
"openmp: algorithm"))
154 myalg = params->get(
"openmp: algorithm",myalg);
155 if(params->isParameter(
"openmp: team work size"))
156 team_work_size = params->get(
"openmp: team work size",team_work_size);
161 ::Tpetra::MatrixMatrix::ExtraKernels::mult_A_B_newmatrix_LowThreadGustavsonKernel(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
165 #ifdef HAVE_TPETRA_MMM_TIMINGS
166 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix OpenMPWrapper"))));
169 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
170 typename lno_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
171 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space > KernelHandle;
174 const KCRS & Ak = Aview.origMatrix->getLocalMatrix();
178 std::string alg = nodename+std::string(
" algorithm");
180 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
181 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
184 const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getNodeNumElements());
186 #ifdef HAVE_TPETRA_MMM_TIMINGS
187 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix OpenMPCore"))));
191 typename KernelHandle::nnz_lno_t AnumRows = Ak.numRows();
194 typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
195 typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
198 lno_view_t row_mapC (
"non_const_lnow_row", AnumRows + 1);
199 lno_nnz_view_t entriesC;
200 scalar_view_t valuesC;
202 kh.create_spgemm_handle(alg_enum);
203 kh.set_team_work_size(team_work_size);
205 KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,
false,Bmerged.graph.row_map,Bmerged.graph.entries,
false,row_mapC);
207 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
209 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
210 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
213 KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,Ak.values,
false,Bmerged.graph.row_map,Bmerged.graph.entries,Bmerged.values,
false,row_mapC,entriesC,valuesC);
214 kh.destroy_spgemm_handle();
216 #ifdef HAVE_TPETRA_MMM_TIMINGS
217 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix OpenMPSort"))));
220 if (params.is_null() || params->get(
"sort entries",
true))
221 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
222 C.setAllValues(row_mapC,entriesC,valuesC);
226 #ifdef HAVE_TPETRA_MMM_TIMINGS
227 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix OpenMPESFC"))));
231 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
232 labelList->set(
"Timer Label",label);
233 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
234 RCP<const Export<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > dummyExport;
235 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
239 Teuchos::ArrayRCP< const size_t > Crowptr;
240 Teuchos::ArrayRCP< const LocalOrdinal > Ccolind;
241 Teuchos::ArrayRCP< const Scalar > Cvalues;
242 C.getAllValues(Crowptr,Ccolind,Cvalues);
245 int MyPID = C->getComm()->getRank();
246 printf(
"[%d] Crowptr = ",MyPID);
247 for(
size_t i=0; i<(size_t) Crowptr.size(); i++) {
248 printf(
"%3d ",(
int)Crowptr.getConst()[i]);
251 printf(
"[%d] Ccolind = ",MyPID);
252 for(
size_t i=0; i<(size_t)Ccolind.size(); i++) {
253 printf(
"%3d ",(
int)Ccolind.getConst()[i]);
264 template<
class Scalar,
267 class LocalOrdinalViewType>
268 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
269 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
270 const LocalOrdinalViewType & Acol2Brow,
271 const LocalOrdinalViewType & Acol2Irow,
272 const LocalOrdinalViewType & Bcol2Ccol,
273 const LocalOrdinalViewType & Icol2Ccol,
274 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
275 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
276 const std::string& label,
277 const Teuchos::RCP<Teuchos::ParameterList>& params) {
278 #ifdef HAVE_TPETRA_MMM_TIMINGS
279 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
280 using Teuchos::TimeMonitor;
281 Teuchos::RCP<TimeMonitor> MM;
288 int team_work_size = 16;
289 std::string myalg(
"LTG");
290 if(!params.is_null()) {
291 if(params->isParameter(
"openmp: algorithm"))
292 myalg = params->get(
"openmp: algorithm",myalg);
293 if(params->isParameter(
"openmp: team work size"))
294 team_work_size = params->get(
"openmp: team work size",team_work_size);
299 ::Tpetra::MatrixMatrix::ExtraKernels::mult_A_B_reuse_LowThreadGustavsonKernel(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
302 throw std::runtime_error(
"Tpetra::MatrixMatrix::MMM reuse unknown kernel");
305 #ifdef HAVE_TPETRA_MMM_TIMINGS
306 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse OpenMPESFC"))));
308 C.fillComplete(C.getDomainMap(), C.getRangeMap());
313 template<
class Scalar,
316 class LocalOrdinalViewType>
317 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
318 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
319 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
320 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
321 const LocalOrdinalViewType & Acol2Brow,
322 const LocalOrdinalViewType & Acol2Irow,
323 const LocalOrdinalViewType & Bcol2Ccol,
324 const LocalOrdinalViewType & Icol2Ccol,
325 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
326 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
327 const std::string& label,
328 const Teuchos::RCP<Teuchos::ParameterList>& params) {
330 #ifdef HAVE_TPETRA_MMM_TIMINGS
331 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
332 using Teuchos::TimeMonitor;
333 Teuchos::RCP<TimeMonitor> MM;
340 int team_work_size = 16;
341 std::string myalg(
"LTG");
342 if(!params.is_null()) {
343 if(params->isParameter(
"openmp: jacobi algorithm"))
344 myalg = params->get(
"openmp: jacobi algorithm",myalg);
345 if(params->isParameter(
"openmp: team work size"))
346 team_work_size = params->get(
"openmp: team work size",team_work_size);
351 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_LowThreadGustavsonKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
353 else if(myalg ==
"MSAK") {
354 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
357 throw std::runtime_error(
"Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
360 #ifdef HAVE_TPETRA_MMM_TIMINGS
361 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix OpenMPESFC"))));
365 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
366 labelList->set(
"Timer Label",label);
367 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
370 if(!C.isFillComplete()) {
371 RCP<const Export<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > dummyExport;
372 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
380 template<
class Scalar,
383 class LocalOrdinalViewType>
384 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
385 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
386 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
387 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
388 const LocalOrdinalViewType & Acol2Brow,
389 const LocalOrdinalViewType & Acol2Irow,
390 const LocalOrdinalViewType & Bcol2Ccol,
391 const LocalOrdinalViewType & Icol2Ccol,
392 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
393 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
394 const std::string& label,
395 const Teuchos::RCP<Teuchos::ParameterList>& params) {
397 #ifdef HAVE_TPETRA_MMM_TIMINGS
398 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
399 using Teuchos::TimeMonitor;
400 Teuchos::RCP<TimeMonitor> MM;
407 int team_work_size = 16;
408 std::string myalg(
"LTG");
409 if(!params.is_null()) {
410 if(params->isParameter(
"openmp: jacobi algorithm"))
411 myalg = params->get(
"openmp: jacobi algorithm",myalg);
412 if(params->isParameter(
"openmp: team work size"))
413 team_work_size = params->get(
"openmp: team work size",team_work_size);
418 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_reuse_LowThreadGustavsonKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
421 throw std::runtime_error(
"Tpetra::MatrixMatrix::Jacobi reuse unknown kernel");
424 #ifdef HAVE_TPETRA_MMM_TIMINGS
425 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse OpenMPESFC"))));
427 C.fillComplete(C.getDomainMap(), C.getRangeMap());