42 #ifndef THYRA_SPMD_MULTI_VECTOR_DEFAULT_BASE_DEF_HPP
43 #define THYRA_SPMD_MULTI_VECTOR_DEFAULT_BASE_DEF_HPP
46 #if defined (__clang__) && !defined (__INTEL_COMPILER)
47 #pragma clang system_header
51 #include "Thyra_SpmdMultiVectorDefaultBase_decl.hpp"
52 #include "Thyra_MultiVectorDefaultBase.hpp"
53 #include "Thyra_MultiVectorAdapterBase.hpp"
54 #include "Thyra_SpmdVectorSpaceDefaultBase.hpp"
55 #include "Thyra_DetachedMultiVectorView.hpp"
56 #include "Thyra_apply_op_helper.hpp"
57 #include "Thyra_SpmdLocalDataAccess.hpp"
58 #include "RTOpPack_SPMD_apply_op.hpp"
59 #include "RTOp_parallel_helpers.h"
60 #include "Teuchos_Workspace.hpp"
61 #include "Teuchos_dyn_cast.hpp"
62 #include "Teuchos_Time.hpp"
63 #include "Teuchos_CommHelpers.hpp"
76 template<
class Scalar>
89 template<
class Scalar>
94 this->spmdSpace(), true
102 template<
class Scalar>
106 using Teuchos::outArg;
109 this->getNonconstLocalData(outArg(localValues), outArg(leadingDim));
121 template<
class Scalar>
125 using Teuchos::outArg;
128 this->getLocalData(outArg(localValues), outArg(leadingDim));
143 template<
class Scalar>
149 const Ordinal pri_global_offset_in
155 using Teuchos::rcpFromPtr;
159 const Ordinal numCols = this->domain()->dim();
164 in_applyOp_, std::invalid_argument,
165 "SpmdMultiVectorDefaultBase<>::mvMultiReductApplyOpImpl(...): Error, this method is"
166 " being entered recursively which is a clear sign that one of the methods"
167 " acquireDetachedView(...), releaseDetachedView(...) or commitDetachedView(...)"
168 " was not implemented properly!"
171 "SpmdMultiVectorDefaultBase<>::mvMultiReductApplyOpImpl(...)", *this->domain(),
172 *this->range(), pri_op, multi_vecs, targ_multi_vecs, reduct_objs,
173 pri_global_offset_in);
183 const Range1D local_rng(localOffset_, localOffset_+localSubDim_-1);
184 const Range1D col_rng(0, numCols-1);
187 Workspace<RTOpPack::ConstSubMultiVectorView<Scalar> >
188 sub_multi_vecs(wss,multi_vecs.size());
189 Workspace<RTOpPack::SubMultiVectorView<Scalar> >
190 targ_sub_multi_vecs(wss,targ_multi_vecs.size());
191 for(
int k = 0; k < multi_vecs.size(); ++k ) {
192 sub_multi_vecs[k] = getLocalSubMultiVectorView<Scalar>(rcpFromPtr(multi_vecs[k]));
193 sub_multi_vecs[k].setGlobalOffset(localOffset_+pri_global_offset_in);
195 for(
int k = 0; k < targ_multi_vecs.size(); ++k ) {
196 targ_sub_multi_vecs[k] =
197 getNonconstLocalSubMultiVectorView<Scalar>(rcpFromPtr(targ_multi_vecs[k]));
198 targ_sub_multi_vecs[k].setGlobalOffset(localOffset_+pri_global_offset_in);
200 Workspace<RTOpPack::ReductTarget*> reduct_objs_ptr(wss, reduct_objs.size());
201 for (
int k = 0; k < reduct_objs.size(); ++k) {
202 reduct_objs_ptr[k] = &*reduct_objs[k];
206 RTOpPack::SPMD_apply_op(
207 locallyReplicated ? NULL : spmdSpc.
getComm().
get(),
210 sub_multi_vecs.size(),
211 sub_multi_vecs.getRawPtr(),
212 targ_sub_multi_vecs.size(),
213 targ_sub_multi_vecs.getRawPtr(),
214 reduct_objs_ptr.getRawPtr()
218 for(
int k = 0; k < multi_vecs.size(); ++k ) {
221 for(
int k = 0; k < targ_multi_vecs.size(); ++k ) {
231 template<
class Scalar>
238 using Teuchos::outArg;
239 const Range1D rowRng = validateRowRange(rowRng_in);
240 const Range1D colRng = validateColRange(colRng_in);
241 if( rowRng.
lbound() < localOffset_ || localOffset_+localSubDim_-1 < rowRng.
ubound() ) {
244 rowRng_in,colRng_in,sub_mv
250 this->getLocalData(outArg(localValues), outArg(leadingDim));
257 +(rowRng.
lbound()-localOffset_)
258 +colRng.
lbound()*leadingDim,
264 template<
class Scalar>
283 template<
class Scalar>
290 using Teuchos::outArg;
291 const Range1D rowRng = validateRowRange(rowRng_in);
292 const Range1D colRng = validateColRange(colRng_in);
294 rowRng.
lbound() < localOffset_
296 localOffset_+localSubDim_-1 < rowRng.
ubound()
301 rowRng_in, colRng_in, sub_mv
307 this->getNonconstLocalData(outArg(localValues), outArg(leadingDim));
314 +(rowRng.
lbound()-localOffset_)
315 +colRng.
lbound()*leadingDim
321 template<
class Scalar>
343 template<
class Scalar>
354 using Teuchos::rcpFromPtr;
357 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
391 const int procRank = (
nonnull(comm) ? comm->getRank() : 0 );
395 &Y_range = *Y->range(),
396 &X_range = *X.
range();
399 ( globalDim_ > localSubDim_ ) &&
is_null(comm), std::logic_error
400 ,
"SpmdMultiVectorDefaultBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
407 ,
"SpmdMultiVectorDefaultBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
411 ,
"SpmdMultiVectorDefaultBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
419 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
424 Y_local = getNonconstLocalSubMultiVectorView<Scalar>(rcpFromPtr(Y));
426 M_local = getLocalSubMultiVectorView<Scalar>(rcpFromRef(*
this)),
427 X_local = getLocalSubMultiVectorView<Scalar>(rcpFromRef(X));
448 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
450 std::cout <<
"\nSpmdMultiVectorDefaultBase<Scalar>::apply(...): Time for getting view = " << timer.
totalElapsedTime() <<
" seconds\n";
456 ,
"SpmdMultiVectorDefaultBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
461 ,
"SpmdMultiVectorDefaultBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
490 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
496 bool locallyReplicated =
false;
499 bool locallyReplicated_x = locallyReplicated_this;
500 bool locallyReplicated_y =
false;
504 locallyReplicated_y = spmd_Y->spmdSpace()->isLocallyReplicated();
506 locallyReplicated = locallyReplicated_this && locallyReplicated_x && locallyReplicated_y;
509 bool isNonLocalAdjoint =
513 (globalDim_ > localSubDim_ || (
nonnull(comm) && comm->getSize() > 1))
516 if (locallyReplicated)
517 isNonLocalAdjoint =
false;
519 Workspace<Scalar> Y_local_tmp_store(wss, Y_local.
subDim()*Y_local.
numSubCols(),
false);
522 if (isNonLocalAdjoint) {
527 Teuchos::arcpFromArrayView(Y_local_tmp_store()),
532 for(
int j = 0; j < Y_local.
numSubCols(); ++j ) {
534 const Y_local_values_iter_t Y_local_j =
536 std::copy( Y_local_j, Y_local_j + Y_local.
subDim(),
537 Y_local_tmp.values().begin() + Y_local_tmp.leadingDim()*j );
548 Y_local_tmp = Y_local;
552 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
554 std::cout <<
"\nSpmdMultiVectorDefaultBase<Scalar>::apply(...): Time for setting up Y_local_tmp and localBeta = " << timer.
totalElapsedTime() <<
" seconds\n";
567 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
573 case NOTRANS: t_transp = Teuchos::NO_TRANS;
break;
575 case CONJTRANS: t_transp = Teuchos::CONJ_TRANS;
break;
581 case NOTRANS: t_transp = Teuchos::NO_TRANS;
break;
598 ,const_cast<Scalar*>(X_local.values().getRawPtr())
599 ,std::max((
int) X_local.leadingDim(),1)
601 ,Y_local_tmp.values().getRawPtr()
602 ,std::max((
int) Y_local_tmp.leadingDim(),1)
606 std::fill( Y_local_tmp.values().begin(), Y_local_tmp.values().end(),
609 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
612 <<
"\nSpmdMultiVectorDefaultBase<Scalar>::apply(...): Time for GEMM = "
622 if (isNonLocalAdjoint) {
624 Workspace<Scalar> Y_local_final_buff(wss,Y_local.
subDim()*Y_local.
numSubCols(),
false);
626 Teuchos::reduceAll<Ordinal,Scalar>(
627 *comm, Teuchos::REDUCE_SUM, Y_local_final_buff.size(), Y_local_tmp.values().
getRawPtr(),
628 Y_local_final_buff.getRawPtr()
633 Y_local_final_buff_iter_t Y_local_final_buff_ptr = Y_local_final_buff_av.
begin();
634 for(
int j = 0; j < Y_local.
numSubCols(); ++j ) {
636 Y_local_values_iter_t Y_local_ptr =
638 for(
int i = 0; i < Y_local.
subDim(); ++i ) {
639 (*Y_local_ptr++) = (*Y_local_final_buff_ptr++);
651 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
654 <<
"\nSpmdMultiVectorDefaultBase<Scalar>::apply(...): Total time = "
664 template<
class Scalar>
667 if(globalDim_ == 0) {
670 globalDim_ = l_spmdSpace->
dim();
673 numCols_ = this->domain()->dim();
685 template<
class Scalar>
688 const Range1D rowRng = Teuchos::full_range(rowRng_in,0,globalDim_-1);
691 !( 0 <= rowRng.
lbound() && rowRng.
ubound() < globalDim_ ), std::invalid_argument
692 ,
"SpmdMultiVectorDefaultBase<Scalar>::validateRowRange(rowRng): Error, the range rowRng = ["
694 "in the range [0,"<<(globalDim_-1)<<
"]!"
701 template<
class Scalar>
704 const Range1D colRng = Teuchos::full_range(colRng_in,0,numCols_-1);
707 !(0 <= colRng.
lbound() && colRng.
ubound() < numCols_), std::invalid_argument
708 ,
"SpmdMultiVectorDefaultBase<Scalar>::validateColRange(colRng): Error, the range colRng = ["
710 "in the range [0,"<<(numCols_-1)<<
"]!"
720 #endif // THYRA_SPMD_MULTI_VECTOR_DEFAULT_BASE_DEF_HPP