42 #ifndef BELOS_BLOCK_GMRES_ITER_HPP
43 #define BELOS_BLOCK_GMRES_ITER_HPP
60 #include "Teuchos_BLAS.hpp"
61 #include "Teuchos_SerialDenseMatrix.hpp"
62 #include "Teuchos_SerialDenseVector.hpp"
63 #include "Teuchos_ScalarTraits.hpp"
64 #include "Teuchos_ParameterList.hpp"
65 #include "Teuchos_TimeMonitor.hpp"
82 template<
class ScalarType,
class MV,
class OP>
92 typedef Teuchos::ScalarTraits<ScalarType>
SCT;
111 Teuchos::ParameterList ¶ms );
225 if (!initialized_)
return 0;
259 void setSize(
int blockSize,
int numBlocks);
277 const Teuchos::RCP<LinearProblem<ScalarType,MV,OP> > lp_;
278 const Teuchos::RCP<OutputManager<ScalarType> > om_;
279 const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > stest_;
280 const Teuchos::RCP<OrthoManager<ScalarType,MV> > ortho_;
292 Teuchos::SerialDenseVector<int,ScalarType> beta, sn;
293 Teuchos::SerialDenseVector<int,MagnitudeType> cs;
306 bool stateStorageInitialized_;
311 bool keepHessenberg_;
315 bool initHessenberg_;
328 Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > H_;
333 Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > R_;
334 Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > z_;
339 template<
class ScalarType,
class MV,
class OP>
344 Teuchos::ParameterList ¶ms ):
352 stateStorageInitialized_(false),
353 keepHessenberg_(false),
354 initHessenberg_(false),
359 keepHessenberg_ = params.get(
"Keep Hessenberg",
false);
362 initHessenberg_ = params.get(
"Initialize Hessenberg",
false);
365 TEUCHOS_TEST_FOR_EXCEPTION(!params.isParameter(
"Num Blocks"), std::invalid_argument,
366 "Belos::BlockGmresIter::constructor: mandatory parameter 'Num Blocks' is not specified.");
367 int nb = Teuchos::getParameter<int>(params,
"Num Blocks");
370 int bs = params.get(
"Block Size", 1);
376 template <
class ScalarType,
class MV,
class OP>
382 TEUCHOS_TEST_FOR_EXCEPTION(numBlocks <= 0 || blockSize <= 0, std::invalid_argument,
"Belos::BlockGmresIter::setSize was passed a non-positive argument.");
383 if (blockSize == blockSize_ && numBlocks == numBlocks_) {
388 if (blockSize!=blockSize_ || numBlocks!=numBlocks_)
389 stateStorageInitialized_ =
false;
391 blockSize_ = blockSize;
392 numBlocks_ = numBlocks;
394 initialized_ =
false;
404 template <
class ScalarType,
class MV,
class OP>
407 if (!stateStorageInitialized_) {
410 Teuchos::RCP<const MV> lhsMV = lp_->getLHS();
411 Teuchos::RCP<const MV> rhsMV = lp_->getRHS();
412 if (lhsMV == Teuchos::null && rhsMV == Teuchos::null) {
413 stateStorageInitialized_ =
false;
421 int newsd = blockSize_*(numBlocks_+1);
428 beta.resize( newsd );
432 TEUCHOS_TEST_FOR_EXCEPTION(blockSize_*static_cast<ptrdiff_t>(numBlocks_) > MVT::GetGlobalLength(*rhsMV),std::invalid_argument,
433 "Belos::BlockGmresIter::setStateSize(): Cannot generate a Krylov basis with dimension larger the operator!");
436 if (V_ == Teuchos::null) {
438 Teuchos::RCP<const MV> tmp = ( (rhsMV!=Teuchos::null)? rhsMV: lhsMV );
439 TEUCHOS_TEST_FOR_EXCEPTION(tmp == Teuchos::null,std::invalid_argument,
440 "Belos::BlockGmresIter::setStateSize(): linear problem does not specify multivectors to clone from.");
441 V_ = MVT::Clone( *tmp, newsd );
445 if (MVT::GetNumberVecs(*V_) < newsd) {
446 Teuchos::RCP<const MV> tmp = V_;
447 V_ = MVT::Clone( *tmp, newsd );
452 if (R_ == Teuchos::null) {
453 R_ = Teuchos::rcp(
new Teuchos::SerialDenseMatrix<int,ScalarType>() );
455 if (initHessenberg_) {
456 R_->shape( newsd, newsd-blockSize_ );
459 if (R_->numRows() < newsd || R_->numCols() < newsd-blockSize_) {
460 R_->shapeUninitialized( newsd, newsd-blockSize_ );
465 if (keepHessenberg_) {
466 if (H_ == Teuchos::null) {
467 H_ = Teuchos::rcp(
new Teuchos::SerialDenseMatrix<int,ScalarType>() );
469 if (initHessenberg_) {
470 H_->shape( newsd, newsd-blockSize_ );
473 if (H_->numRows() < newsd || H_->numCols() < newsd-blockSize_) {
474 H_->shapeUninitialized( newsd, newsd-blockSize_ );
484 if (z_ == Teuchos::null) {
485 z_ = Teuchos::rcp(
new Teuchos::SerialDenseMatrix<int,ScalarType>() );
487 if (z_-> numRows() < newsd || z_->numCols() < blockSize_) {
488 z_->shapeUninitialized( newsd, blockSize_ );
492 stateStorageInitialized_ =
true;
499 template <
class ScalarType,
class MV,
class OP>
506 Teuchos::RCP<MV> currentUpdate = Teuchos::null;
508 return currentUpdate;
510 const ScalarType one = Teuchos::ScalarTraits<ScalarType>::one();
511 const ScalarType zero = Teuchos::ScalarTraits<ScalarType>::zero();
512 Teuchos::BLAS<int,ScalarType> blas;
513 currentUpdate = MVT::Clone( *V_, blockSize_ );
517 Teuchos::SerialDenseMatrix<int,ScalarType> y( Teuchos::Copy, *z_, curDim_, blockSize_ );
521 blas.TRSM( Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
522 Teuchos::NON_UNIT_DIAG, curDim_, blockSize_, one,
523 R_->values(), R_->stride(), y.values(), y.stride() );
527 std::vector<int> index(curDim_);
528 for (
int i=0; i<curDim_; i++ ) {
531 Teuchos::RCP<const MV> Vjp1 = MVT::CloneView( *V_, index );
532 MVT::MvTimesMatAddMv( one, *Vjp1, y, zero, *currentUpdate );
534 return currentUpdate;
541 template <
class ScalarType,
class MV,
class OP>
547 if ( norms && (
int)norms->size() < blockSize_ )
548 norms->resize( blockSize_ );
551 Teuchos::BLAS<int,ScalarType> blas;
552 for (
int j=0; j<blockSize_; j++) {
553 (*norms)[j] = blas.NRM2( blockSize_, &(*z_)(curDim_, j), 1);
556 return Teuchos::null;
563 template <
class ScalarType,
class MV,
class OP>
567 if (!stateStorageInitialized_)
570 TEUCHOS_TEST_FOR_EXCEPTION(!stateStorageInitialized_,std::invalid_argument,
571 "Belos::BlockGmresIter::initialize(): Cannot initialize state storage!");
573 const ScalarType zero = Teuchos::ScalarTraits<ScalarType>::zero(), one = Teuchos::ScalarTraits<ScalarType>::one();
579 std::string errstr(
"Belos::BlockGmresIter::initialize(): Specified multivectors must have a consistent length and width.");
581 if (newstate.
V != Teuchos::null && newstate.
z != Teuchos::null) {
585 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetGlobalLength(*newstate.
V) != MVT::GetGlobalLength(*V_),
586 std::invalid_argument, errstr );
587 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.
V) < blockSize_,
588 std::invalid_argument, errstr );
589 TEUCHOS_TEST_FOR_EXCEPTION( newstate.
curDim > blockSize_*(numBlocks_+1),
590 std::invalid_argument, errstr );
592 curDim_ = newstate.
curDim;
593 int lclDim = MVT::GetNumberVecs(*newstate.
V);
596 TEUCHOS_TEST_FOR_EXCEPTION(newstate.
z->numRows() < curDim_ || newstate.
z->numCols() < blockSize_, std::invalid_argument, errstr);
600 if (newstate.
V != V_) {
602 if (curDim_ == 0 && lclDim > blockSize_) {
603 om_->stream(
Warnings) <<
"Belos::BlockGmresIter::initialize(): the solver was initialized with a kernel of " << lclDim << std::endl
604 <<
"The block size however is only " << blockSize_ << std::endl
605 <<
"The last " << lclDim - blockSize_ <<
" vectors will be discarded." << std::endl;
607 std::vector<int> nevind(curDim_+blockSize_);
608 for (
int i=0; i<curDim_+blockSize_; i++) nevind[i] = i;
609 Teuchos::RCP<const MV> newV = MVT::CloneView( *newstate.
V, nevind );
610 Teuchos::RCP<MV> lclV = MVT::CloneViewNonConst( *V_, nevind );
611 MVT::MvAddMv( one, *newV, zero, *newV, *lclV );
614 lclV = Teuchos::null;
618 if (newstate.
z != z_) {
620 Teuchos::SerialDenseMatrix<int,ScalarType> newZ(Teuchos::View,*newstate.
z,curDim_+blockSize_,blockSize_);
621 Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > lclZ;
622 lclZ = Teuchos::rcp(
new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::View,*z_,curDim_+blockSize_,blockSize_) );
626 lclZ = Teuchos::null;
632 TEUCHOS_TEST_FOR_EXCEPTION(newstate.
V == Teuchos::null,std::invalid_argument,
633 "Belos::BlockGmresIter::initialize(): BlockGmresStateIterState does not have initial kernel V_0.");
635 TEUCHOS_TEST_FOR_EXCEPTION(newstate.
z == Teuchos::null,std::invalid_argument,
636 "Belos::BlockGmresIter::initialize(): BlockGmresStateIterState does not have initial norms z_0.");
658 template <
class ScalarType,
class MV,
class OP>
664 if (initialized_ ==
false) {
669 int searchDim = blockSize_*numBlocks_;
676 while (stest_->checkStatus(
this) !=
Passed && curDim_+blockSize_ <= searchDim) {
681 int lclDim = curDim_ + blockSize_;
684 std::vector<int> curind(blockSize_);
685 for (
int i=0; i<blockSize_; i++) { curind[i] = lclDim + i; }
686 Teuchos::RCP<MV> Vnext = MVT::CloneViewNonConst(*V_,curind);
690 for (
int i=0; i<blockSize_; i++) { curind[i] = curDim_ + i; }
691 Teuchos::RCP<const MV> Vprev = MVT::CloneView(*V_,curind);
694 lp_->apply(*Vprev,*Vnext);
695 Vprev = Teuchos::null;
699 std::vector<int> prevind(lclDim);
700 for (
int i=0; i<lclDim; i++) { prevind[i] = i; }
701 Vprev = MVT::CloneView(*V_,prevind);
702 Teuchos::Array<Teuchos::RCP<const MV> > AVprev(1, Vprev);
705 Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> >
706 subH = Teuchos::rcp(
new Teuchos::SerialDenseMatrix<int,ScalarType>
707 ( Teuchos::View,*H_,lclDim,blockSize_,0,curDim_ ) );
708 Teuchos::Array<Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > > AsubH;
709 AsubH.append( subH );
712 Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> >
713 subH2 = Teuchos::rcp(
new Teuchos::SerialDenseMatrix<int,ScalarType>
714 ( Teuchos::View,*H_,blockSize_,blockSize_,lclDim,curDim_ ) );
716 int rank = ortho_->projectAndNormalize(*Vnext,AsubH,subH2,AVprev);
720 if (keepHessenberg_) {
722 Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> >
723 subR = Teuchos::rcp(
new Teuchos::SerialDenseMatrix<int,ScalarType>
724 ( Teuchos::View,*R_,lclDim,blockSize_,0,curDim_ ) );
728 Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> >
729 subR2 = Teuchos::rcp(
new Teuchos::SerialDenseMatrix<int,ScalarType>
730 ( Teuchos::View,*R_,blockSize_,blockSize_,lclDim,curDim_ ) );
731 subR2->assign(*subH2);
735 "Belos::BlockGmresIter::iterate(): couldn't generate basis of full rank.");
745 Vnext = Teuchos::null;
746 curDim_ += blockSize_;
769 template<
class ScalarType,
class MV,
class OP>
773 ScalarType sigma, mu, vscale, maxelem;
774 const ScalarType zero = Teuchos::ScalarTraits<ScalarType>::zero(), one = Teuchos::ScalarTraits<ScalarType>::one();
779 int curDim = curDim_;
780 if (dim >= curDim_ && dim < getMaxSubspaceDim()) {
784 Teuchos::BLAS<int, ScalarType> blas;
789 if (blockSize_ == 1) {
793 for (i=0; i<curDim; i++) {
797 blas.ROT( 1, &(*R_)(i,curDim), 1, &(*R_)(i+1, curDim), 1, &cs[i], &sn[i] );
802 blas.ROTG( &(*R_)(curDim,curDim), &(*R_)(curDim+1,curDim), &cs[curDim], &sn[curDim] );
803 (*R_)(curDim+1,curDim) = zero;
807 blas.ROT( 1, &(*z_)(curDim,0), 1, &(*z_)(curDim+1,0), 1, &cs[curDim], &sn[curDim] );
813 for (j=0; j<blockSize_; j++) {
817 for (i=0; i<curDim+j; i++) {
818 sigma = blas.DOT( blockSize_, &(*R_)(i+1,i), 1, &(*R_)(i+1,curDim+j), 1);
819 sigma += (*R_)(i,curDim+j);
821 blas.AXPY(blockSize_, ScalarType(-sigma), &(*R_)(i+1,i), 1, &(*R_)(i+1,curDim+j), 1);
822 (*R_)(i,curDim+j) -= sigma;
827 maxidx = blas.IAMAX( blockSize_+1, &(*R_)(curDim+j,curDim+j), 1 );
828 maxelem = (*R_)(curDim+j+maxidx-1,curDim+j);
829 for (i=0; i<blockSize_+1; i++)
830 (*R_)(curDim+j+i,curDim+j) /= maxelem;
831 sigma = blas.DOT( blockSize_, &(*R_)(curDim+j+1,curDim+j), 1,
832 &(*R_)(curDim+j+1,curDim+j), 1 );
834 beta[curDim + j] = zero;
836 mu = Teuchos::ScalarTraits<ScalarType>::squareroot((*R_)(curDim+j,curDim+j)*(*R_)(curDim+j,curDim+j)+sigma);
837 if ( Teuchos::ScalarTraits<ScalarType>::real((*R_)(curDim+j,curDim+j))
838 < Teuchos::ScalarTraits<MagnitudeType>::zero() ) {
839 vscale = (*R_)(curDim+j,curDim+j) - mu;
841 vscale = -sigma / ((*R_)(curDim+j,curDim+j) + mu);
843 beta[curDim+j] = Teuchos::as<ScalarType>(2)*one*vscale*vscale/(sigma + vscale*vscale);
844 (*R_)(curDim+j,curDim+j) = maxelem*mu;
845 for (i=0; i<blockSize_; i++)
846 (*R_)(curDim+j+1+i,curDim+j) /= vscale;
851 for (i=0; i<blockSize_; i++) {
852 sigma = blas.DOT( blockSize_, &(*R_)(curDim+j+1,curDim+j),
853 1, &(*z_)(curDim+j+1,i), 1);
854 sigma += (*z_)(curDim+j,i);
855 sigma *= beta[curDim+j];
856 blas.AXPY(blockSize_, ScalarType(-sigma), &(*R_)(curDim+j+1,curDim+j),
857 1, &(*z_)(curDim+j+1,i), 1);
858 (*z_)(curDim+j,i) -= sigma;
864 if (dim >= curDim_ && dim < getMaxSubspaceDim()) {
865 curDim_ = dim + blockSize_;