42 #ifndef BELOS_BLOCK_FGMRES_ITER_HPP
43 #define BELOS_BLOCK_FGMRES_ITER_HPP
60 #include "Teuchos_BLAS.hpp"
61 #include "Teuchos_SerialDenseMatrix.hpp"
62 #include "Teuchos_SerialDenseVector.hpp"
63 #include "Teuchos_ScalarTraits.hpp"
64 #include "Teuchos_ParameterList.hpp"
65 #include "Teuchos_TimeMonitor.hpp"
82 template<
class ScalarType,
class MV,
class OP>
92 typedef Teuchos::ScalarTraits<ScalarType>
SCT;
111 Teuchos::ParameterList ¶ms );
226 if (!initialized_)
return 0;
260 void setSize(
int blockSize,
int numBlocks);
278 const Teuchos::RCP<LinearProblem<ScalarType,MV,OP> > lp_;
279 const Teuchos::RCP<OutputManager<ScalarType> > om_;
280 const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > stest_;
281 const Teuchos::RCP<OrthoManager<ScalarType,MV> > ortho_;
293 Teuchos::SerialDenseVector<int,ScalarType> beta, sn;
294 Teuchos::SerialDenseVector<int,MagnitudeType> cs;
307 bool stateStorageInitialized_;
321 Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > H_;
326 Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > R_;
327 Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > z_;
332 template<
class ScalarType,
class MV,
class OP>
338 Teuchos::ParameterList ¶ms ):
346 stateStorageInitialized_(false),
351 TEUCHOS_TEST_FOR_EXCEPTION(
352 ! params.isParameter (
"Num Blocks"), std::invalid_argument,
353 "Belos::BlockFGmresIter::constructor: mandatory parameter 'Num Blocks' is not specified.");
354 const int nb = params.get<
int> (
"Num Blocks");
357 const int bs = params.get (
"Block Size", 1);
363 template <
class ScalarType,
class MV,
class OP>
369 TEUCHOS_TEST_FOR_EXCEPTION(numBlocks <= 0 || blockSize <= 0, std::invalid_argument,
"Belos::BlockFGmresIter::setSize was passed a non-positive argument.");
370 if (blockSize == blockSize_ && numBlocks == numBlocks_) {
375 if (blockSize!=blockSize_ || numBlocks!=numBlocks_)
376 stateStorageInitialized_ =
false;
378 blockSize_ = blockSize;
379 numBlocks_ = numBlocks;
381 initialized_ =
false;
391 template <
class ScalarType,
class MV,
class OP>
396 typedef Teuchos::SerialDenseMatrix<int, ScalarType> SDM;
398 if (! stateStorageInitialized_) {
400 RCP<const MV> lhsMV = lp_->getLHS();
401 RCP<const MV> rhsMV = lp_->getRHS();
402 if (lhsMV == Teuchos::null && rhsMV == Teuchos::null) {
403 stateStorageInitialized_ =
false;
410 int newsd = blockSize_*(numBlocks_+1);
421 TEUCHOS_TEST_FOR_EXCEPTION(
422 blockSize_ * static_cast<ptrdiff_t> (numBlocks_) > MVT::GetGlobalLength (*rhsMV),
423 std::invalid_argument,
"Belos::BlockFGmresIter::setStateSize(): "
424 "Cannot generate a Krylov basis with dimension larger the operator!");
427 if (V_ == Teuchos::null) {
429 RCP<const MV> tmp = (rhsMV != Teuchos::null) ? rhsMV : lhsMV;
430 TEUCHOS_TEST_FOR_EXCEPTION(
431 tmp == Teuchos::null, std::invalid_argument,
432 "Belos::BlockFGmresIter::setStateSize(): "
433 "linear problem does not specify multivectors to clone from.");
434 V_ = MVT::Clone (*tmp, newsd);
438 if (MVT::GetNumberVecs (*V_) < newsd) {
439 RCP<const MV> tmp = V_;
440 V_ = MVT::Clone (*tmp, newsd);
444 if (Z_ == Teuchos::null) {
446 RCP<const MV> tmp = (rhsMV != Teuchos::null) ? rhsMV : lhsMV;
447 TEUCHOS_TEST_FOR_EXCEPTION(
448 tmp == Teuchos::null, std::invalid_argument,
449 "Belos::BlockFGmresIter::setStateSize(): "
450 "linear problem does not specify multivectors to clone from.");
451 Z_ = MVT::Clone (*tmp, newsd);
455 if (MVT::GetNumberVecs (*Z_) < newsd) {
456 RCP<const MV> tmp = Z_;
457 Z_ = MVT::Clone (*tmp, newsd);
462 if (H_ == Teuchos::null) {
463 H_ = rcp (
new SDM (newsd, newsd-blockSize_));
466 H_->shapeUninitialized (newsd, newsd - blockSize_);
472 if (z_ == Teuchos::null) {
473 z_ = rcp (
new SDM (newsd, blockSize_));
476 z_->shapeUninitialized (newsd, blockSize_);
480 stateStorageInitialized_ =
true;
486 template <
class ScalarType,
class MV,
class OP>
490 typedef Teuchos::SerialDenseMatrix<int, ScalarType> SDM;
492 Teuchos::RCP<MV> currentUpdate = Teuchos::null;
496 return currentUpdate;
499 const ScalarType zero = Teuchos::ScalarTraits<ScalarType>::zero ();
500 const ScalarType one = Teuchos::ScalarTraits<ScalarType>::one ();
501 Teuchos::BLAS<int,ScalarType> blas;
503 currentUpdate = MVT::Clone (*Z_, blockSize_);
506 SDM y (Teuchos::Copy, *z_, curDim_, blockSize_);
509 blas.TRSM (Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
510 Teuchos::NON_UNIT_DIAG, curDim_, blockSize_, one,
511 H_->values (), H_->stride (), y.values (), y.stride ());
514 std::vector<int> index (curDim_);
515 for (
int i = 0; i < curDim_; ++i) {
518 Teuchos::RCP<const MV> Zjp1 = MVT::CloneView (*Z_, index);
519 MVT::MvTimesMatAddMv (one, *Zjp1, y, zero, *currentUpdate);
521 return currentUpdate;
525 template <
class ScalarType,
class MV,
class OP>
526 Teuchos::RCP<const MV>
531 if (norms != NULL && (
int)norms->size() < blockSize_) {
532 norms->resize (blockSize_);
536 Teuchos::BLAS<int, ScalarType> blas;
537 for (
int j = 0; j < blockSize_; ++j) {
538 (*norms)[j] = blas.NRM2 (blockSize_, &(*z_)(curDim_, j), 1);
543 return Teuchos::null;
547 template <
class ScalarType,
class MV,
class OP>
554 typedef Teuchos::ScalarTraits<ScalarType> STS;
555 typedef Teuchos::SerialDenseMatrix<int, ScalarType> SDM;
556 const ScalarType ZERO = STS::zero ();
557 const ScalarType ONE = STS::one ();
560 if (! stateStorageInitialized_) {
564 TEUCHOS_TEST_FOR_EXCEPTION(
565 ! stateStorageInitialized_, std::invalid_argument,
566 "Belos::BlockFGmresIter::initialize(): Cannot initialize state storage!");
571 const char errstr[] =
"Belos::BlockFGmresIter::initialize(): The given "
572 "multivectors must have a consistent length and width.";
574 if (! newstate.
V.is_null () && ! newstate.
z.is_null ()) {
578 TEUCHOS_TEST_FOR_EXCEPTION(
579 MVT::GetGlobalLength(*newstate.
V) != MVT::GetGlobalLength(*V_),
580 std::invalid_argument, errstr );
581 TEUCHOS_TEST_FOR_EXCEPTION(
582 MVT::GetNumberVecs(*newstate.
V) < blockSize_,
583 std::invalid_argument, errstr );
584 TEUCHOS_TEST_FOR_EXCEPTION(
585 newstate.
curDim > blockSize_*(numBlocks_+1),
586 std::invalid_argument, errstr );
588 curDim_ = newstate.
curDim;
589 const int lclDim = MVT::GetNumberVecs(*newstate.
V);
592 TEUCHOS_TEST_FOR_EXCEPTION(
593 newstate.
z->numRows() < curDim_ || newstate.
z->numCols() < blockSize_,
594 std::invalid_argument, errstr);
597 if (newstate.
V != V_) {
599 if (curDim_ == 0 && lclDim > blockSize_) {
600 std::ostream& warn = om_->stream (
Warnings);
601 warn <<
"Belos::BlockFGmresIter::initialize(): the solver was "
602 <<
"initialized with a kernel of " << lclDim << endl
603 <<
"The block size however is only " << blockSize_ << endl
604 <<
"The last " << lclDim - blockSize_
605 <<
" vectors will be discarded." << endl;
607 std::vector<int> nevind (curDim_ + blockSize_);
608 for (
int i = 0; i < curDim_ + blockSize_; ++i) {
611 RCP<const MV> newV = MVT::CloneView (*newstate.
V, nevind);
612 RCP<MV> lclV = MVT::CloneViewNonConst (*V_, nevind);
613 MVT::MvAddMv (ONE, *newV, ZERO, *newV, *lclV);
616 lclV = Teuchos::null;
620 if (newstate.
z != z_) {
622 SDM newZ (Teuchos::View, *newstate.
z, curDim_ + blockSize_, blockSize_);
624 lclz = rcp (
new SDM (Teuchos::View, *z_, curDim_ + blockSize_, blockSize_));
626 lclz = Teuchos::null;
630 TEUCHOS_TEST_FOR_EXCEPTION(
631 newstate.
V == Teuchos::null,std::invalid_argument,
632 "Belos::BlockFGmresIter::initialize(): BlockFGmresStateIterState does not have initial kernel V_0.");
634 TEUCHOS_TEST_FOR_EXCEPTION(
635 newstate.
z == Teuchos::null,std::invalid_argument,
636 "Belos::BlockFGmresIter::initialize(): BlockFGmresStateIterState does not have initial norms z_0.");
644 template <
class ScalarType,
class MV,
class OP>
647 using Teuchos::Array;
652 typedef Teuchos::SerialDenseMatrix<int, ScalarType> SDM;
655 if (initialized_ ==
false) {
660 const int searchDim = blockSize_ * numBlocks_;
664 while (stest_->checkStatus (
this) !=
Passed && curDim_+blockSize_ <= searchDim) {
668 const int lclDim = curDim_ + blockSize_;
671 std::vector<int> curind (blockSize_);
672 for (
int i = 0; i < blockSize_; ++i) {
673 curind[i] = lclDim + i;
675 RCP<MV> Vnext = MVT::CloneViewNonConst (*V_, curind);
679 for (
int i = 0; i < blockSize_; ++i) {
680 curind[i] = curDim_ + i;
682 RCP<const MV> Vprev = MVT::CloneView (*V_, curind);
683 RCP<MV> Znext = MVT::CloneViewNonConst (*Z_, curind);
686 lp_->applyRightPrec (*Vprev, *Znext);
690 lp_->applyOp (*Znext, *Vnext);
695 std::vector<int> prevind (lclDim);
696 for (
int i = 0; i < lclDim; ++i) {
699 Vprev = MVT::CloneView (*V_, prevind);
700 Array<RCP<const MV> > AVprev (1, Vprev);
703 RCP<SDM> subH = rcp (
new SDM (View, *H_, lclDim, blockSize_, 0, curDim_));
704 Array<RCP<SDM> > AsubH;
708 RCP<SDM> subR = rcp (
new SDM (View, *H_, blockSize_, blockSize_, lclDim, curDim_));
709 const int rank = ortho_->projectAndNormalize (*Vnext, AsubH, subR, AVprev);
710 TEUCHOS_TEST_FOR_EXCEPTION(
712 "Belos::BlockFGmresIter::iterate(): After orthogonalization, the new "
713 "basis block does not have full rank. It contains " << blockSize_
714 <<
" vector" << (blockSize_ != 1 ?
"s" :
"")
715 <<
", but its rank is " << rank <<
".");
727 curDim_ += blockSize_;
732 template<
class ScalarType,
class MV,
class OP>
735 typedef Teuchos::ScalarTraits<ScalarType> STS;
736 typedef Teuchos::ScalarTraits<MagnitudeType> STM;
738 const ScalarType zero = STS::zero ();
739 const ScalarType two = (STS::one () + STS::one());
740 ScalarType sigma, mu, vscale, maxelem;
741 Teuchos::BLAS<int, ScalarType> blas;
747 int curDim = curDim_;
748 if (dim >= curDim_ && dim < getMaxSubspaceDim ()) {
757 if (blockSize_ == 1) {
759 for (
int i = 0; i < curDim; ++i) {
761 blas.ROT (1, &(*H_)(i, curDim), 1, &(*H_)(i+1, curDim), 1, &cs[i], &sn[i]);
764 blas.ROTG (&(*H_)(curDim, curDim), &(*H_)(curDim+1, curDim), &cs[curDim], &sn[curDim]);
765 (*H_)(curDim+1, curDim) = zero;
768 blas.ROT (1, &(*z_)(curDim,0), 1, &(*z_)(curDim+1,0), 1, &cs[curDim], &sn[curDim]);
772 for (
int j = 0; j < blockSize_; ++j) {
774 for (
int i = 0; i < curDim + j; ++i) {
775 sigma = blas.DOT (blockSize_, &(*H_)(i+1,i), 1, &(*H_)(i+1,curDim+j), 1);
776 sigma += (*H_)(i,curDim+j);
778 blas.AXPY (blockSize_, ScalarType(-sigma), &(*H_)(i+1,i), 1, &(*H_)(i+1,curDim+j), 1);
779 (*H_)(i,curDim+j) -= sigma;
783 const int maxidx = blas.IAMAX (blockSize_+1, &(*H_)(curDim+j,curDim+j), 1);
784 maxelem = (*H_)(curDim + j + maxidx - 1, curDim + j);
785 for (
int i = 0; i < blockSize_ + 1; ++i) {
786 (*H_)(curDim+j+i,curDim+j) /= maxelem;
788 sigma = blas.DOT (blockSize_, &(*H_)(curDim + j + 1, curDim + j), 1,
789 &(*H_)(curDim + j + 1, curDim + j), 1);
791 beta[curDim + j] = zero;
793 mu = STS::squareroot ((*H_)(curDim+j,curDim+j)*(*H_)(curDim+j,curDim+j)+sigma);
794 if (STS::real ((*H_)(curDim + j, curDim + j)) < STM::zero ()) {
795 vscale = (*H_)(curDim+j,curDim+j) - mu;
797 vscale = -sigma / ((*H_)(curDim+j, curDim+j) + mu);
799 beta[curDim+j] = two * vscale * vscale / (sigma + vscale*vscale);
800 (*H_)(curDim+j, curDim+j) = maxelem*mu;
801 for (
int i = 0; i < blockSize_; ++i) {
802 (*H_)(curDim+j+1+i,curDim+j) /= vscale;
807 for (
int i = 0; i < blockSize_; ++i) {
808 sigma = blas.DOT (blockSize_, &(*H_)(curDim+j+1,curDim+j),
809 1, &(*z_)(curDim+j+1,i), 1);
810 sigma += (*z_)(curDim+j,i);
811 sigma *= beta[curDim+j];
812 blas.AXPY (blockSize_, ScalarType(-sigma), &(*H_)(curDim+j+1,curDim+j),
813 1, &(*z_)(curDim+j+1,i), 1);
814 (*z_)(curDim+j,i) -= sigma;
820 if (dim >= curDim_ && dim < getMaxSubspaceDim ()) {
821 curDim_ = dim + blockSize_;