Tpetra parallel linear algebra  Version of the Day
Tpetra_RowMatrixTransposer_def.hpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_ROWMATRIXTRANSPOSER_DEF_HPP
43 #define TPETRA_ROWMATRIXTRANSPOSER_DEF_HPP
44 
45 #include "Tpetra_CrsMatrix.hpp"
46 #include "Tpetra_Export.hpp"
47 #include "Tpetra_Import.hpp"
48 #include "Teuchos_ParameterList.hpp"
49 #include "Teuchos_TimeMonitor.hpp"
50 
51 namespace Tpetra {
52 namespace Classes {
53 
54 template<class Scalar,
55  class LocalOrdinal,
56  class GlobalOrdinal,
57  class Node>
59 RowMatrixTransposer (const Teuchos::RCP<const crs_matrix_type>& origMatrix,const std::string & label)
60  : origMatrix_(origMatrix), label_(label) {}
61 
62 template<class Scalar,
63  class LocalOrdinal,
64  class GlobalOrdinal,
65  class Node>
66 Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
68 createTranspose (const Teuchos::RCP<Teuchos::ParameterList> &params)
69 {
70  using Teuchos::RCP;
71  // Do the local transpose
72  RCP<crs_matrix_type> transMatrixWithSharedRows = createTransposeLocal (params);
73 
74 #ifdef HAVE_TPETRA_MMM_TIMINGS
75  std::string prefix = std::string("Tpetra ")+ label_ + std::string(": ");
76  using Teuchos::TimeMonitor;
77  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("Transpose TAFC"))));
78 #endif
79 
80  // If transMatrixWithSharedRows has an exporter, that's what we
81  // want. If it doesn't, the rows aren't actually shared, and we're
82  // done!
83  RCP<const Export<LocalOrdinal,GlobalOrdinal,Node> > exporter =
84  transMatrixWithSharedRows->getGraph ()->getExporter ();
85  if (exporter.is_null ()) {
86  return transMatrixWithSharedRows;
87  }
88  else {
89  Teuchos::ParameterList labelList;
90 #ifdef HAVE_TPETRA_MMM_TIMINGS
91  labelList.set("Timer Label",label_);
92 #endif
93  if(!params.is_null()) labelList.set("compute global constants",params->get("compute global constants",true));
94  // Use the Export object to do a fused Export and fillComplete.
95  return exportAndFillCompleteCrsMatrix<crs_matrix_type> (transMatrixWithSharedRows, *exporter,Teuchos::null,Teuchos::null,Teuchos::rcp(&labelList,false));
96  }
97 }
98 
99 
100 // mfh 03 Feb 2013: In a definition outside the class like this, the
101 // return value is considered outside the class scope (for things like
102 // resolving typedefs), but the arguments are considered inside the
103 // class scope.
104 template<class Scalar,
105  class LocalOrdinal,
106  class GlobalOrdinal,
107  class Node>
108 Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
110 createTransposeLocal (const Teuchos::RCP<Teuchos::ParameterList> &params)
111 {
112  using Teuchos::Array;
113  using Teuchos::ArrayRCP;
114  using Teuchos::ArrayView;
115  using Teuchos::RCP;
116  using Teuchos::rcp;
117  using Teuchos::rcp_dynamic_cast;
118  typedef LocalOrdinal LO;
119  typedef GlobalOrdinal GO;
120  typedef Tpetra::Import<LO, GO, Node> import_type;
121  typedef Tpetra::Export<LO, GO, Node> export_type;
122 
123 #ifdef HAVE_TPETRA_MMM_TIMINGS
124  std::string prefix = std::string("Tpetra ")+ label_ + std::string(": ");
125  using Teuchos::TimeMonitor;
126  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("Transpose Local"))));
127 #endif
128 
129  // Prebuild the importers and exporters the no-communication way,
130  // flipping the importers and exporters around.
131  RCP<const import_type> myImport;
132  RCP<const export_type> myExport;
133  if (! origMatrix_->getGraph ()->getImporter ().is_null ()) {
134  myExport = rcp (new export_type (*origMatrix_->getGraph ()->getImporter ()));
135  }
136  if (! origMatrix_->getGraph ()->getExporter ().is_null ()) {
137  myImport = rcp (new import_type (*origMatrix_->getGraph ()->getExporter ()));
138  }
139 
140  //
141  // This transpose is based upon the approach in EpetraExt.
142  //
143 
144  size_t numLocalCols = origMatrix_->getNodeNumCols();
145  size_t numLocalRows = origMatrix_->getNodeNumRows();
146  size_t numLocalNnz = origMatrix_->getNodeNumEntries();
147 
148  RCP<const crs_matrix_type> crsMatrix =
149  rcp_dynamic_cast<const crs_matrix_type> (origMatrix_);
150 
151  RCP<crs_matrix_type> transMatrixWithSharedRows;
152  if (crsMatrix != Teuchos::null) {
153  // The matrix is a CrsMatrix
154  using local_matrix_type = typename crs_matrix_type::local_matrix_type;
155  using row_map_type = typename local_matrix_type::row_map_type::non_const_type;
156  using index_type = typename local_matrix_type::index_type::non_const_type;
157  using values_type = typename local_matrix_type::values_type::non_const_type;
158  using execution_space = typename local_matrix_type::execution_space;
159 
160  // Determine how many nonzeros there are per row in the transpose.
161  auto lclMatrix = crsMatrix->getLocalMatrix();
162  auto lclGraph = lclMatrix.graph;
163 
164  using range_type = Kokkos::RangePolicy<LO,execution_space>;
165 
166  // Determine how many nonzeros there are per row in the transpose.
167  row_map_type t_rows("transpose_rows", numLocalCols+1);
168  Kokkos::parallel_for("compute_number_of_indices_per_column", range_type(0, numLocalRows),
169  KOKKOS_LAMBDA(const LO row) {
170  auto rowView = lclGraph.rowConst(row);
171  auto length = rowView.length;
172 
173  for (decltype(length) colID = 0; colID < length; colID++) {
174  auto col = rowView(colID);
175  Kokkos::atomic_fetch_add(&t_rows[col], 1);
176  }
177  });
178 
179  // Compute offsets
180  Kokkos::parallel_scan("compute_transpose_row_offsets", range_type(0, numLocalCols+1),
181  KOKKOS_LAMBDA(const LO i, LO& update, const bool& final_pass) {
182  const LO val = t_rows(i);
183  if (final_pass)
184  t_rows(i) = update;
185  update += val;
186  });
187 
188  row_map_type offsets("transpose_row_offsets_aux", numLocalCols+1);
189  Kokkos::deep_copy(offsets, t_rows);
190 
191  index_type t_cols("transpose_cols", numLocalNnz);
192  values_type t_vals("transpose_vals", numLocalNnz);
193  Kokkos::parallel_for("compute_transposed_rows", range_type(0, numLocalRows),
194  KOKKOS_LAMBDA(const LO row) {
195  auto rowView = lclMatrix.rowConst(row);
196  auto length = rowView.length;
197 
198  for (decltype(length) colID = 0; colID < length; colID++) {
199  auto col = rowView.colidx(colID);
200 
201  LO insert_pos = Kokkos::atomic_fetch_add(&offsets[col], 1);
202 
203  t_cols[insert_pos] = row;
204  t_vals[insert_pos] = rowView.value(colID);
205  }
206  });
207 
208  local_matrix_type lclTransposeMatrix("transpose", numLocalCols, numLocalRows, numLocalNnz, t_vals, t_rows, t_cols);
209 
210  transMatrixWithSharedRows =
211  rcp (new crs_matrix_type (lclTransposeMatrix,
212  origMatrix_->getColMap (), origMatrix_->getRowMap (),
213  origMatrix_->getRangeMap (), origMatrix_->getDomainMap ()));
214  } else {
215  // FIXME: are we ever here? There is no RowMatrix constructor, we seem to
216  // be working only with CrsMatrices
217 
218  // Determine how many nonzeros there are per row in the transpose.
219  Array<size_t> CurrentStart(numLocalCols,0);
220  ArrayView<const LO> localIndices;
221  ArrayView<const Scalar> localValues;
222  // RowMatrix path
223  for (size_t i=0; i<numLocalRows; ++i) {
224  const size_t numEntriesInRow = origMatrix_->getNumEntriesInLocalRow(i);
225  origMatrix_->getLocalRowView(i, localIndices, localValues);
226  for (size_t j=0; j<numEntriesInRow; ++j) {
227  ++CurrentStart[ localIndices[j] ];
228  }
229  }
230 
231  // create temporary row-major storage for the transposed matrix
232 
233  ArrayRCP<size_t> rowptr_rcp(numLocalCols+1);
234  ArrayRCP<LO> colind_rcp(numLocalNnz);
235  ArrayRCP<Scalar> values_rcp(numLocalNnz);
236 
237  // Since ArrayRCP's are slow...
238  ArrayView<size_t> TransRowptr = rowptr_rcp();
239  ArrayView<LO> TransColind = colind_rcp();
240  ArrayView<Scalar> TransValues = values_rcp();
241 
242  // Scansum the TransRowptr; reset CurrentStart
243  TransRowptr[0]=0;
244  for (size_t i=1; i<numLocalCols+1; ++i) TransRowptr[i] = CurrentStart[i-1] + TransRowptr[i-1];
245  for (size_t i=0; i<numLocalCols; ++i) CurrentStart[i] = TransRowptr[i];
246 
247  // populate the row-major storage so that the data for the transposed
248  // matrix is easy to access
249  for (size_t i=0; i<numLocalRows; ++i) {
250  const size_t numEntriesInRow = origMatrix_->getNumEntriesInLocalRow (i);
251  origMatrix_->getLocalRowView(i, localIndices, localValues);
252 
253  for (size_t j=0; j<numEntriesInRow; ++j) {
254  size_t idx = CurrentStart[localIndices[j]];
255  TransColind[idx] = Teuchos::as<LO>(i);
256  TransValues[idx] = localValues[j];
257  ++CurrentStart[localIndices[j]];
258  }
259  } //for (size_t i=0; i<numLocalRows; ++i)
260 
261  // Allocate and populate temporary matrix with rows not uniquely owned
262  transMatrixWithSharedRows =
263  rcp (new crs_matrix_type (origMatrix_->getColMap (),
264  origMatrix_->getRowMap (), 0));
265  transMatrixWithSharedRows->setAllValues (rowptr_rcp, colind_rcp, values_rcp);
266 
267  // Call ESFC & return
268  Teuchos::ParameterList eParams;
269 #ifdef HAVE_TPETRA_MMM_TIMINGS
270  eParams.set("Timer Label",label_);
271 #endif
272  if (!params.is_null())
273  eParams.set("compute global constants", params->get("compute global constants", true));
274 
275  transMatrixWithSharedRows->expertStaticFillComplete (origMatrix_->getRangeMap (),
276  origMatrix_->getDomainMap (),
277  myImport, myExport,rcp(&eParams,false));
278 
279  }
280 
281  return transMatrixWithSharedRows;
282 }
283 //
284 // Explicit instantiation macro
285 //
286 // Must be expanded from within the Tpetra namespace!
287 //
288 
289 #define TPETRA_ROWMATRIXTRANSPOSER_INSTANT(SCALAR,LO,GO,NODE) \
290  namespace Classes { \
291  template class RowMatrixTransposer< SCALAR, LO , GO , NODE >; \
292  }
293 
294 } // namespace Classes
295 } // namespace Tpetra
296 
297 #endif
Tpetra::Classes::RowMatrixTransposer::createTranspose
Teuchos::RCP< crs_matrix_type > createTranspose(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Compute and return the transpose of the matrix given to the constructor.
Definition: Tpetra_RowMatrixTransposer_def.hpp:68
Tpetra::Classes::CrsMatrix
Sparse matrix that presents a row-oriented interface that lets users read or modify entries.
Definition: Tpetra_CrsMatrix_decl.hpp:424
Tpetra::Classes::Import
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Definition: Tpetra_Import_decl.hpp:115
Tpetra::Classes::RowMatrixTransposer::RowMatrixTransposer
RowMatrixTransposer(const Teuchos::RCP< const crs_matrix_type > &origMatrix, const std::string &label=std::string())
Constructor that takes the matrix to transpose.
Definition: Tpetra_RowMatrixTransposer_def.hpp:59
Tpetra::Classes::CrsMatrix::local_matrix_type
KokkosSparse::CrsMatrix< impl_scalar_type, LocalOrdinal, execution_space, void, typename local_graph_type::size_type > local_matrix_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
Definition: Tpetra_CrsMatrix_decl.hpp:483
Tpetra
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Tpetra::deep_copy
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Definition: Tpetra_MultiVector_decl.hpp:2453
Tpetra::Classes::RowMatrixTransposer::createTransposeLocal
Teuchos::RCP< crs_matrix_type > createTransposeLocal(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Compute and return the transpose of the matrix given to the constructor.
Definition: Tpetra_RowMatrixTransposer_def.hpp:110
Tpetra::Classes::Export
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Definition: Tpetra_Export_decl.hpp:124