42 #ifndef _TEUCHOS_SERIALDENSEMATRIX_HPP_
43 #define _TEUCHOS_SERIALDENSEMATRIX_HPP_
53 #include "Teuchos_Assert.hpp"
66 template<
typename OrdinalType,
typename ScalarType>
222 ScalarType&
operator () (OrdinalType rowIndex, OrdinalType colIndex);
232 const ScalarType&
operator () (OrdinalType rowIndex, OrdinalType colIndex)
const;
252 const ScalarType*
operator [] (OrdinalType colIndex)
const;
256 ScalarType*
values()
const {
return(values_); }
286 int scale (
const ScalarType alpha );
351 OrdinalType
numRows()
const {
return(numRows_); }
354 OrdinalType
numCols()
const {
return(numCols_); }
357 OrdinalType
stride()
const {
return(stride_); }
360 bool empty()
const {
return(numRows_ == 0 || numCols_ == 0); }
378 virtual void print(std::ostream& os)
const;
383 void copyMat(ScalarType* inputMatrix, OrdinalType strideInput,
384 OrdinalType
numRows, OrdinalType
numCols, ScalarType* outputMatrix,
385 OrdinalType strideOutput, OrdinalType startRow, OrdinalType startCol,
388 void checkIndex( OrdinalType rowIndex, OrdinalType colIndex = 0 )
const;
389 OrdinalType numRows_;
390 OrdinalType numCols_;
401 template<
typename OrdinalType,
typename ScalarType>
403 :
CompObject(),
Object(),
BLAS<OrdinalType,ScalarType>(), numRows_(0), numCols_(0), stride_(0), valuesCopied_(false), values_(0)
406 template<
typename OrdinalType,
typename ScalarType>
408 OrdinalType numRows_in, OrdinalType numCols_in,
bool zeroOut
410 :
CompObject(),
Object(),
BLAS<OrdinalType,ScalarType>(), numRows_(numRows_in), numCols_(numCols_in), stride_(numRows_in)
412 values_ =
new ScalarType[stride_*numCols_];
413 valuesCopied_ =
true;
418 template<
typename OrdinalType,
typename ScalarType>
420 DataAccess CV, ScalarType* values_in, OrdinalType stride_in, OrdinalType numRows_in,
421 OrdinalType numCols_in
423 :
CompObject(),
Object(),
BLAS<OrdinalType,ScalarType>(), numRows_(numRows_in), numCols_(numCols_in), stride_(stride_in),
424 valuesCopied_(false), values_(values_in)
429 values_ =
new ScalarType[stride_*numCols_];
430 copyMat(values_in, stride_in, numRows_, numCols_, values_, stride_, 0, 0);
431 valuesCopied_ =
true;
435 template<
typename OrdinalType,
typename ScalarType>
437 :
CompObject(),
Object(),
BLAS<OrdinalType,ScalarType>(), numRows_(0), numCols_(0), stride_(0), valuesCopied_(true), values_(0)
441 numRows_ = Source.numRows_;
442 numCols_ = Source.numCols_;
444 if (!Source.valuesCopied_)
446 stride_ = Source.stride_;
447 values_ = Source.values_;
448 valuesCopied_ =
false;
453 const OrdinalType newsize = stride_ * numCols_;
455 values_ =
new ScalarType[newsize];
456 copyMat(Source.values_, Source.stride_, numRows_, numCols_, values_, stride_, 0, 0);
459 numRows_ = 0; numCols_ = 0; stride_ = 0;
460 valuesCopied_ =
false;
466 numRows_ = Source.numCols_;
467 numCols_ = Source.numRows_;
469 values_ =
new ScalarType[stride_*numCols_];
470 for (OrdinalType j=0; j<numCols_; j++) {
471 for (OrdinalType i=0; i<numRows_; i++) {
478 numRows_ = Source.numCols_;
479 numCols_ = Source.numRows_;
481 values_ =
new ScalarType[stride_*numCols_];
482 for (OrdinalType j=0; j<numCols_; j++) {
483 for (OrdinalType i=0; i<numRows_; i++) {
484 values_[j*stride_ + i] = Source.values_[i*Source.stride_ + j];
491 template<
typename OrdinalType,
typename ScalarType>
495 :
CompObject(), numRows_(Source.numRows_), numCols_(Source.numCols_), stride_(Source.stride_),
496 valuesCopied_(false), values_(Source.values_)
501 values_ =
new ScalarType[stride_ * numCols_];
502 copyMat(Source.values_, Source.stride_, numRows_, numCols_, values_, stride_, 0, 0);
503 valuesCopied_ =
true;
508 template<
typename OrdinalType,
typename ScalarType>
511 OrdinalType numRows_in, OrdinalType numCols_in, OrdinalType startRow,
514 :
CompObject(), numRows_(numRows_in), numCols_(numCols_in), stride_(Source.stride_),
515 valuesCopied_(false), values_(Source.values_)
519 stride_ = numRows_in;
520 values_ =
new ScalarType[stride_ * numCols_in];
521 copyMat(Source.values_, Source.stride_, numRows_in, numCols_in, values_, stride_, startRow, startCol);
522 valuesCopied_ =
true;
526 values_ = values_ + (stride_ * startCol) + startRow;
530 template<
typename OrdinalType,
typename ScalarType>
540 template<
typename OrdinalType,
typename ScalarType>
542 OrdinalType numRows_in, OrdinalType numCols_in
546 numRows_ = numRows_in;
547 numCols_ = numCols_in;
549 values_ =
new ScalarType[stride_*numCols_];
551 valuesCopied_ =
true;
555 template<
typename OrdinalType,
typename ScalarType>
557 OrdinalType numRows_in, OrdinalType numCols_in
561 numRows_ = numRows_in;
562 numCols_ = numCols_in;
564 values_ =
new ScalarType[stride_*numCols_];
565 valuesCopied_ =
true;
569 template<
typename OrdinalType,
typename ScalarType>
571 OrdinalType numRows_in, OrdinalType numCols_in
575 ScalarType* values_tmp =
new ScalarType[numRows_in * numCols_in];
577 for(OrdinalType k = 0; k < numRows_in * numCols_in; k++)
579 values_tmp[k] = zero;
581 OrdinalType numRows_tmp = TEUCHOS_MIN(numRows_, numRows_in);
582 OrdinalType numCols_tmp = TEUCHOS_MIN(numCols_, numCols_in);
585 copyMat(values_, stride_, numRows_tmp, numCols_tmp, values_tmp,
589 numRows_ = numRows_in;
590 numCols_ = numCols_in;
592 values_ = values_tmp;
593 valuesCopied_ =
true;
601 template<
typename OrdinalType,
typename ScalarType>
605 for(OrdinalType j = 0; j < numCols_; j++)
607 for(OrdinalType i = 0; i < numRows_; i++)
609 values_[i + j*stride_] = value_in;
615 template<
typename OrdinalType,
typename ScalarType>
619 for(OrdinalType j = 0; j < numCols_; j++)
621 for(OrdinalType i = 0; i < numRows_; i++)
629 template<
typename OrdinalType,
typename ScalarType>
637 if((!valuesCopied_) && (!Source.valuesCopied_) && (values_ == Source.values_))
641 if (!Source.valuesCopied_) {
646 numRows_ = Source.numRows_;
647 numCols_ = Source.numCols_;
648 stride_ = Source.stride_;
649 values_ = Source.values_;
654 numRows_ = Source.numRows_;
655 numCols_ = Source.numCols_;
656 stride_ = Source.numRows_;
657 const OrdinalType newsize = stride_ * numCols_;
659 values_ =
new ScalarType[newsize];
660 valuesCopied_ =
true;
668 if((Source.numRows_ <= stride_) && (Source.numCols_ == numCols_)) {
669 numRows_ = Source.numRows_;
670 numCols_ = Source.numCols_;
674 numRows_ = Source.numRows_;
675 numCols_ = Source.numCols_;
676 stride_ = Source.numRows_;
677 const OrdinalType newsize = stride_ * numCols_;
679 values_ =
new ScalarType[newsize];
680 valuesCopied_ =
true;
684 copyMat(Source.values_, Source.stride_, numRows_, numCols_, values_, stride_, 0, 0);
689 template<
typename OrdinalType,
typename ScalarType>
693 if ((numRows_ != Source.numRows_) || (numCols_ != Source.numCols_))
695 TEUCHOS_CHK_REF(*
this);
701 template<
typename OrdinalType,
typename ScalarType>
705 if ((numRows_ != Source.numRows_) || (numCols_ != Source.numCols_))
707 TEUCHOS_CHK_REF(*
this);
713 template<
typename OrdinalType,
typename ScalarType>
717 if((!valuesCopied_) && (!Source.valuesCopied_) && (values_ == Source.values_))
721 if ((numRows_ != Source.numRows_) || (numCols_ != Source.numCols_))
723 TEUCHOS_CHK_REF(*
this);
725 copyMat(Source.values_, Source.stride_, numRows_, numCols_, values_, stride_, 0, 0);
733 template<
typename OrdinalType,
typename ScalarType>
736 #ifdef HAVE_TEUCHOS_ARRAY_BOUNDSCHECK
737 checkIndex( rowIndex, colIndex );
739 return(values_[colIndex * stride_ + rowIndex]);
742 template<
typename OrdinalType,
typename ScalarType>
745 #ifdef HAVE_TEUCHOS_ARRAY_BOUNDSCHECK
746 checkIndex( rowIndex, colIndex );
748 return(values_[colIndex * stride_ + rowIndex]);
751 template<
typename OrdinalType,
typename ScalarType>
754 #ifdef HAVE_TEUCHOS_ARRAY_BOUNDSCHECK
755 checkIndex( 0, colIndex );
757 return(values_ + colIndex * stride_);
760 template<
typename OrdinalType,
typename ScalarType>
763 #ifdef HAVE_TEUCHOS_ARRAY_BOUNDSCHECK
764 checkIndex( 0, colIndex );
766 return(values_ + colIndex * stride_);
773 template<
typename OrdinalType,
typename ScalarType>
780 for(j = 0; j < numCols_; j++)
783 ptr = values_ + j * stride_;
784 for(i = 0; i < numRows_; i++)
794 updateFlops(numRows_ * numCols_);
798 template<
typename OrdinalType,
typename ScalarType>
804 for (i = 0; i < numRows_; i++) {
806 for (j=0; j< numCols_; j++) {
809 anorm = TEUCHOS_MAX( anorm, sum );
811 updateFlops(numRows_ * numCols_);
815 template<
typename OrdinalType,
typename ScalarType>
820 for (j = 0; j < numCols_; j++) {
821 for (i = 0; i < numRows_; i++) {
826 updateFlops(numRows_ * numCols_);
834 template<
typename OrdinalType,
typename ScalarType>
838 if((numRows_ != Operand.numRows_) || (numCols_ != Operand.numCols_))
845 for(i = 0; i < numRows_; i++)
847 for(j = 0; j < numCols_; j++)
849 if((*
this)(i, j) != Operand(i, j))
859 template<
typename OrdinalType,
typename ScalarType>
862 return !((*this) == Operand);
869 template<
typename OrdinalType,
typename ScalarType>
872 this->scale( alpha );
876 template<
typename OrdinalType,
typename ScalarType>
882 for (j=0; j<numCols_; j++) {
883 ptr = values_ + j*stride_;
884 for (i=0; i<numRows_; i++) { *ptr = alpha * (*ptr); ptr++; }
886 updateFlops( numRows_*numCols_ );
890 template<
typename OrdinalType,
typename ScalarType>
897 if ((numRows_ != A.numRows_) || (numCols_ != A.numCols_))
901 for (j=0; j<numCols_; j++) {
902 ptr = values_ + j*stride_;
903 for (i=0; i<numRows_; i++) { *ptr = A(i,j) * (*ptr); ptr++; }
905 updateFlops( numRows_*numCols_ );
909 template<
typename OrdinalType,
typename ScalarType>
913 OrdinalType A_nrows = (ETranspChar[transa]!=
'N') ? A.
numCols() : A.
numRows();
914 OrdinalType A_ncols = (ETranspChar[transa]!=
'N') ? A.
numRows() : A.
numCols();
915 OrdinalType B_nrows = (ETranspChar[transb]!=
'N') ? B.
numCols() : B.
numRows();
916 OrdinalType B_ncols = (ETranspChar[transb]!=
'N') ? B.
numRows() : B.
numCols();
917 if ((numRows_ != A_nrows) || (A_ncols != B_nrows) || (numCols_ != B_ncols))
922 this->GEMM(transa, transb, numRows_, numCols_, A_ncols, alpha, A.
values(), A.
stride(), B.
values(), B.
stride(), beta, values_, stride_);
923 double nflops = 2 * numRows_;
930 template<
typename OrdinalType,
typename ScalarType>
937 if (ESideChar[sideA]==
'L') {
938 if ((numRows_ != A_nrows) || (A_ncols != B_nrows) || (numCols_ != B_ncols)) {
942 if ((numRows_ != B_nrows) || (B_ncols != A_nrows) || (numCols_ != A_ncols)) {
949 this->SYMM(sideA, uplo, numRows_, numCols_, alpha, A.
values(), A.
stride(), B.
values(), B.
stride(), beta, values_, stride_);
950 double nflops = 2 * numRows_;
957 template<
typename OrdinalType,
typename ScalarType>
962 os <<
"Values_copied : yes" << std::endl;
964 os <<
"Values_copied : no" << std::endl;
965 os <<
"Rows : " << numRows_ << std::endl;
966 os <<
"Columns : " << numCols_ << std::endl;
967 os <<
"LDA : " << stride_ << std::endl;
968 if(numRows_ == 0 || numCols_ == 0) {
969 os <<
"(matrix is empty, no values to display)" << std::endl;
971 for(OrdinalType i = 0; i < numRows_; i++) {
972 for(OrdinalType j = 0; j < numCols_; j++){
973 os << (*this)(i,j) <<
" ";
984 template<
typename OrdinalType,
typename ScalarType>
987 "SerialDenseMatrix<T>::checkIndex: "
988 "Row index " << rowIndex <<
" out of range [0, "<< numRows_ <<
")");
990 "SerialDenseMatrix<T>::checkIndex: "
991 "Col index " << colIndex <<
" out of range [0, "<< numCols_ <<
")");
994 template<
typename OrdinalType,
typename ScalarType>
995 void SerialDenseMatrix<OrdinalType, ScalarType>::deleteArrays(
void)
1001 valuesCopied_ =
false;
1005 template<
typename OrdinalType,
typename ScalarType>
1006 void SerialDenseMatrix<OrdinalType, ScalarType>::copyMat(
1007 ScalarType* inputMatrix, OrdinalType strideInput, OrdinalType numRows_in,
1008 OrdinalType numCols_in, ScalarType* outputMatrix, OrdinalType strideOutput,
1009 OrdinalType startRow, OrdinalType startCol, ScalarType alpha
1013 ScalarType* ptr1 = 0;
1014 ScalarType* ptr2 = 0;
1015 for(j = 0; j < numCols_in; j++) {
1016 ptr1 = outputMatrix + (j * strideOutput);
1017 ptr2 = inputMatrix + (j + startCol) * strideInput + startRow;
1019 for(i = 0; i < numRows_in; i++)
1021 *ptr1++ += alpha*(*ptr2++);
1024 for(i = 0; i < numRows_in; i++)