42 #ifndef TPETRA_ROWMATRIXTRANSPOSER_DEF_HPP
43 #define TPETRA_ROWMATRIXTRANSPOSER_DEF_HPP
45 #include "Tpetra_CrsMatrix.hpp"
46 #include "Tpetra_Export.hpp"
47 #include "Tpetra_Import.hpp"
48 #include "Teuchos_ParameterList.hpp"
49 #include "Teuchos_TimeMonitor.hpp"
54 template<
class Scalar,
60 : origMatrix_(origMatrix), label_(label) {}
62 template<
class Scalar,
66 Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
72 RCP<crs_matrix_type> transMatrixWithSharedRows = createTransposeLocal (params);
74 #ifdef HAVE_TPETRA_MMM_TIMINGS
75 std::string prefix = std::string(
"Tpetra ")+ label_ + std::string(
": ");
76 using Teuchos::TimeMonitor;
77 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"Transpose TAFC"))));
83 RCP<const Export<LocalOrdinal,GlobalOrdinal,Node> > exporter =
84 transMatrixWithSharedRows->getGraph ()->getExporter ();
85 if (exporter.is_null ()) {
86 return transMatrixWithSharedRows;
89 Teuchos::ParameterList labelList;
90 #ifdef HAVE_TPETRA_MMM_TIMINGS
91 labelList.set(
"Timer Label",label_);
93 if(!params.is_null()) labelList.set(
"compute global constants",params->get(
"compute global constants",
true));
95 return exportAndFillCompleteCrsMatrix<crs_matrix_type> (transMatrixWithSharedRows, *exporter,Teuchos::null,Teuchos::null,Teuchos::rcp(&labelList,
false));
104 template<
class Scalar,
108 Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
112 using Teuchos::Array;
113 using Teuchos::ArrayRCP;
114 using Teuchos::ArrayView;
117 using Teuchos::rcp_dynamic_cast;
118 typedef LocalOrdinal LO;
119 typedef GlobalOrdinal GO;
123 #ifdef HAVE_TPETRA_MMM_TIMINGS
124 std::string prefix = std::string(
"Tpetra ")+ label_ + std::string(
": ");
125 using Teuchos::TimeMonitor;
126 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"Transpose Local"))));
131 RCP<const import_type> myImport;
132 RCP<const export_type> myExport;
133 if (! origMatrix_->getGraph ()->getImporter ().is_null ()) {
134 myExport = rcp (
new export_type (*origMatrix_->getGraph ()->getImporter ()));
136 if (! origMatrix_->getGraph ()->getExporter ().is_null ()) {
137 myImport = rcp (
new import_type (*origMatrix_->getGraph ()->getExporter ()));
144 size_t numLocalCols = origMatrix_->getNodeNumCols();
145 size_t numLocalRows = origMatrix_->getNodeNumRows();
146 size_t numLocalNnz = origMatrix_->getNodeNumEntries();
148 RCP<const crs_matrix_type> crsMatrix =
149 rcp_dynamic_cast<const crs_matrix_type> (origMatrix_);
151 RCP<crs_matrix_type> transMatrixWithSharedRows;
152 if (crsMatrix != Teuchos::null) {
155 using row_map_type =
typename local_matrix_type::row_map_type::non_const_type;
156 using index_type =
typename local_matrix_type::index_type::non_const_type;
157 using values_type =
typename local_matrix_type::values_type::non_const_type;
158 using execution_space =
typename local_matrix_type::execution_space;
161 auto lclMatrix = crsMatrix->getLocalMatrix();
162 auto lclGraph = lclMatrix.graph;
164 using range_type = Kokkos::RangePolicy<LO,execution_space>;
167 row_map_type t_rows(
"transpose_rows", numLocalCols+1);
168 Kokkos::parallel_for(
"compute_number_of_indices_per_column", range_type(0, numLocalRows),
169 KOKKOS_LAMBDA(
const LO row) {
170 auto rowView = lclGraph.rowConst(row);
171 auto length = rowView.length;
173 for (decltype(length) colID = 0; colID < length; colID++) {
174 auto col = rowView(colID);
175 Kokkos::atomic_fetch_add(&t_rows[col], 1);
180 Kokkos::parallel_scan(
"compute_transpose_row_offsets", range_type(0, numLocalCols+1),
181 KOKKOS_LAMBDA(
const LO i, LO& update,
const bool& final_pass) {
182 const LO val = t_rows(i);
188 row_map_type offsets(
"transpose_row_offsets_aux", numLocalCols+1);
191 index_type t_cols(
"transpose_cols", numLocalNnz);
192 values_type t_vals(
"transpose_vals", numLocalNnz);
193 Kokkos::parallel_for(
"compute_transposed_rows", range_type(0, numLocalRows),
194 KOKKOS_LAMBDA(
const LO row) {
195 auto rowView = lclMatrix.rowConst(row);
196 auto length = rowView.length;
198 for (decltype(length) colID = 0; colID < length; colID++) {
199 auto col = rowView.colidx(colID);
201 LO insert_pos = Kokkos::atomic_fetch_add(&offsets[col], 1);
203 t_cols[insert_pos] = row;
204 t_vals[insert_pos] = rowView.value(colID);
208 local_matrix_type lclTransposeMatrix(
"transpose", numLocalCols, numLocalRows, numLocalNnz, t_vals, t_rows, t_cols);
210 transMatrixWithSharedRows =
212 origMatrix_->getColMap (), origMatrix_->getRowMap (),
213 origMatrix_->getRangeMap (), origMatrix_->getDomainMap ()));
219 Array<size_t> CurrentStart(numLocalCols,0);
220 ArrayView<const LO> localIndices;
221 ArrayView<const Scalar> localValues;
223 for (
size_t i=0; i<numLocalRows; ++i) {
224 const size_t numEntriesInRow = origMatrix_->getNumEntriesInLocalRow(i);
225 origMatrix_->getLocalRowView(i, localIndices, localValues);
226 for (
size_t j=0; j<numEntriesInRow; ++j) {
227 ++CurrentStart[ localIndices[j] ];
233 ArrayRCP<size_t> rowptr_rcp(numLocalCols+1);
234 ArrayRCP<LO> colind_rcp(numLocalNnz);
235 ArrayRCP<Scalar> values_rcp(numLocalNnz);
238 ArrayView<size_t> TransRowptr = rowptr_rcp();
239 ArrayView<LO> TransColind = colind_rcp();
240 ArrayView<Scalar> TransValues = values_rcp();
244 for (
size_t i=1; i<numLocalCols+1; ++i) TransRowptr[i] = CurrentStart[i-1] + TransRowptr[i-1];
245 for (
size_t i=0; i<numLocalCols; ++i) CurrentStart[i] = TransRowptr[i];
249 for (
size_t i=0; i<numLocalRows; ++i) {
250 const size_t numEntriesInRow = origMatrix_->getNumEntriesInLocalRow (i);
251 origMatrix_->getLocalRowView(i, localIndices, localValues);
253 for (
size_t j=0; j<numEntriesInRow; ++j) {
254 size_t idx = CurrentStart[localIndices[j]];
255 TransColind[idx] = Teuchos::as<LO>(i);
256 TransValues[idx] = localValues[j];
257 ++CurrentStart[localIndices[j]];
262 transMatrixWithSharedRows =
264 origMatrix_->getRowMap (), 0));
265 transMatrixWithSharedRows->setAllValues (rowptr_rcp, colind_rcp, values_rcp);
268 Teuchos::ParameterList eParams;
269 #ifdef HAVE_TPETRA_MMM_TIMINGS
270 eParams.set(
"Timer Label",label_);
272 if (!params.is_null())
273 eParams.set(
"compute global constants", params->get(
"compute global constants",
true));
275 transMatrixWithSharedRows->expertStaticFillComplete (origMatrix_->getRangeMap (),
276 origMatrix_->getDomainMap (),
277 myImport, myExport,rcp(&eParams,
false));
281 return transMatrixWithSharedRows;
289 #define TPETRA_ROWMATRIXTRANSPOSER_INSTANT(SCALAR,LO,GO,NODE) \
290 namespace Classes { \
291 template class RowMatrixTransposer< SCALAR, LO , GO , NODE >; \