43 #ifndef IFPACK2_RELAXATION_DEF_HPP 44 #define IFPACK2_RELAXATION_DEF_HPP 46 #include <Teuchos_StandardParameterEntryValidators.hpp> 47 #include <Teuchos_TimeMonitor.hpp> 48 #include <Tpetra_ConfigDefs.hpp> 49 #include <Tpetra_CrsMatrix.hpp> 50 #include <Tpetra_Experimental_BlockCrsMatrix.hpp> 52 #include <Ifpack2_Relaxation_decl.hpp> 62 class NonnegativeIntValidator :
public Teuchos::ParameterEntryValidator {
65 NonnegativeIntValidator () {}
68 Teuchos::ParameterEntryValidator::ValidStringsList validStringValues ()
const {
74 validate (
const Teuchos::ParameterEntry& entry,
75 const std::string& paramName,
76 const std::string& sublistName)
const 79 Teuchos::any anyVal = entry.getAny (
true);
80 const std::string entryName = entry.getAny (
false).typeName ();
82 TEUCHOS_TEST_FOR_EXCEPTION(
83 anyVal.type () !=
typeid (int),
84 Teuchos::Exceptions::InvalidParameterType,
85 "Parameter \"" << paramName <<
"\" in sublist \"" << sublistName
86 <<
"\" has the wrong type." << endl <<
"Parameter: " << paramName
87 << endl <<
"Type specified: " << entryName << endl
88 <<
"Type required: int" << endl);
90 const int val = Teuchos::any_cast<
int> (anyVal);
91 TEUCHOS_TEST_FOR_EXCEPTION(
92 val < 0, Teuchos::Exceptions::InvalidParameterValue,
93 "Parameter \"" << paramName <<
"\" in sublist \"" << sublistName
94 <<
"\" is negative." << endl <<
"Parameter: " << paramName
95 << endl <<
"Value specified: " << val << endl
96 <<
"Required range: [0, INT_MAX]" << endl);
100 const std::string getXMLTypeName ()
const {
101 return "NonnegativeIntValidator";
106 printDoc (
const std::string& docString,
107 std::ostream &out)
const 109 Teuchos::StrUtils::printLines (out,
"# ", docString);
110 out <<
"#\tValidator Used: " << std::endl;
111 out <<
"#\t\tNonnegativeIntValidator" << std::endl;
118 template<class Scalar, const bool isOrdinal=Teuchos::ScalarTraits<Scalar>::isOrdinal>
122 static const Scalar eps ();
126 template<
class Scalar>
127 class SmallTraits<Scalar, true> {
129 static const Scalar eps () {
130 return Teuchos::ScalarTraits<Scalar>::one ();
135 template<
class Scalar>
136 class SmallTraits<Scalar, false> {
138 static const Scalar eps () {
139 return Teuchos::ScalarTraits<Scalar>::eps ();
146 template<
class MatrixType>
147 void Relaxation<MatrixType>::
148 setMatrix (
const Teuchos::RCP<const row_matrix_type>& A)
150 if (A.getRawPtr () != A_.getRawPtr ()) {
151 Importer_ = Teuchos::null;
152 Diagonal_ = Teuchos::null;
153 isInitialized_ =
false;
155 diagOffsets_ = Teuchos::null;
156 savedDiagOffsets_ =
false;
157 hasBlockCrsMatrix_ =
false;
158 if (! A.is_null ()) {
159 IsParallel_ = (A->getRowMap ()->getComm ()->getSize () > 1);
166 template<
class MatrixType>
173 DampingFactor_ (STS::one ()),
174 IsParallel_ ((A.is_null () || A->getRowMap ().is_null () || A->getRowMap ()->getComm ().is_null ()) ?
176 A->getRowMap ()->getComm ()->getSize () > 1),
177 ZeroStartingSolution_ (true),
178 DoBackwardGS_ (false),
181 MinDiagonalValue_ (STS::zero ()),
182 fixTinyDiagEntries_ (false),
183 checkDiagEntries_ (false),
184 isInitialized_ (false),
189 InitializeTime_ (0.0),
194 globalMinMagDiagEntryMag_ (STM::zero ()),
195 globalMaxMagDiagEntryMag_ (STM::zero ()),
196 globalNumSmallDiagEntries_ (0),
197 globalNumZeroDiagEntries_ (0),
198 globalNumNegDiagEntries_ (0),
200 savedDiagOffsets_ (false),
201 hasBlockCrsMatrix_ (false)
203 this->setObjectLabel (
"Ifpack2::Relaxation");
207 template<
class MatrixType>
211 template<
class MatrixType>
212 Teuchos::RCP<const Teuchos::ParameterList>
215 using Teuchos::Array;
216 using Teuchos::ParameterList;
217 using Teuchos::parameterList;
220 using Teuchos::rcp_const_cast;
221 using Teuchos::rcp_implicit_cast;
222 using Teuchos::setStringToIntegralParameter;
223 typedef Teuchos::ParameterEntryValidator PEV;
225 if (validParams_.is_null ()) {
226 RCP<ParameterList> pl = parameterList (
"Ifpack2::Relaxation");
230 Array<std::string> precTypes (3);
231 precTypes[0] =
"Jacobi";
232 precTypes[1] =
"Gauss-Seidel";
233 precTypes[2] =
"Symmetric Gauss-Seidel";
234 Array<Details::RelaxationType> precTypeEnums (3);
235 precTypeEnums[0] = Details::JACOBI;
236 precTypeEnums[1] = Details::GS;
237 precTypeEnums[2] = Details::SGS;
238 const std::string defaultPrecType (
"Jacobi");
239 setStringToIntegralParameter<Details::RelaxationType> (
"relaxation: type",
240 defaultPrecType,
"Relaxation method", precTypes (), precTypeEnums (),
243 const int numSweeps = 1;
244 RCP<PEV> numSweepsValidator =
245 rcp_implicit_cast<PEV> (rcp (
new NonnegativeIntValidator));
246 pl->set (
"relaxation: sweeps", numSweeps,
"Number of relaxation sweeps",
247 rcp_const_cast<const PEV> (numSweepsValidator));
250 pl->set (
"relaxation: damping factor", dampingFactor);
252 const bool zeroStartingSolution =
true;
253 pl->set (
"relaxation: zero starting solution", zeroStartingSolution);
255 const bool doBackwardGS =
false;
256 pl->set (
"relaxation: backward mode", doBackwardGS);
258 const bool doL1Method =
false;
259 pl->set (
"relaxation: use l1", doL1Method);
261 const magnitude_type l1eta = (STM::one() + STM::one() + STM::one()) /
262 (STM::one() + STM::one());
263 pl->set (
"relaxation: l1 eta", l1eta);
266 pl->set (
"relaxation: min diagonal value", minDiagonalValue);
268 const bool fixTinyDiagEntries =
false;
269 pl->set (
"relaxation: fix tiny diagonal entries", fixTinyDiagEntries);
271 const bool checkDiagEntries =
false;
272 pl->set (
"relaxation: check diagonal entries", checkDiagEntries);
274 Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = Teuchos::null;
275 pl->set(
"relaxation: local smoothing indices", localSmoothingIndices);
277 validParams_ = rcp_const_cast<
const ParameterList> (pl);
283 template<
class MatrixType>
286 using Teuchos::getIntegralValue;
287 using Teuchos::ParameterList;
293 const Details::RelaxationType precType =
294 getIntegralValue<Details::RelaxationType> (pl,
"relaxation: type");
295 const int numSweeps = pl.get<
int> (
"relaxation: sweeps");
296 const ST dampingFactor = pl.get<ST> (
"relaxation: damping factor");
297 const bool zeroStartSol = pl.get<
bool> (
"relaxation: zero starting solution");
298 const bool doBackwardGS = pl.get<
bool> (
"relaxation: backward mode");
299 const bool doL1Method = pl.get<
bool> (
"relaxation: use l1");
301 const ST minDiagonalValue = pl.get<ST> (
"relaxation: min diagonal value");
302 const bool fixTinyDiagEntries = pl.get<
bool> (
"relaxation: fix tiny diagonal entries");
303 const bool checkDiagEntries = pl.get<
bool> (
"relaxation: check diagonal entries");
304 Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = pl.get<Teuchos::ArrayRCP<local_ordinal_type> >(
"relaxation: local smoothing indices");
308 PrecType_ = precType;
309 NumSweeps_ = numSweeps;
310 DampingFactor_ = dampingFactor;
311 ZeroStartingSolution_ = zeroStartSol;
312 DoBackwardGS_ = doBackwardGS;
313 DoL1Method_ = doL1Method;
315 MinDiagonalValue_ = minDiagonalValue;
316 fixTinyDiagEntries_ = fixTinyDiagEntries;
317 checkDiagEntries_ = checkDiagEntries;
318 localSmoothingIndices_ = localSmoothingIndices;
322 template<
class MatrixType>
327 this->setParametersImpl(const_cast<Teuchos::ParameterList&>(pl));
331 template<
class MatrixType>
332 Teuchos::RCP<const Teuchos::Comm<int> >
334 TEUCHOS_TEST_FOR_EXCEPTION(
335 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::getComm: " 336 "The input matrix A is null. Please call setMatrix() with a nonnull " 337 "input matrix before calling this method.");
338 return A_->getRowMap ()->getComm ();
342 template<
class MatrixType>
343 Teuchos::RCP<const typename Relaxation<MatrixType>::row_matrix_type>
349 template<
class MatrixType>
350 Teuchos::RCP<
const Tpetra::Map<
typename MatrixType::local_ordinal_type,
351 typename MatrixType::global_ordinal_type,
352 typename MatrixType::node_type> >
354 TEUCHOS_TEST_FOR_EXCEPTION(
355 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::getDomainMap: " 356 "The input matrix A is null. Please call setMatrix() with a nonnull " 357 "input matrix before calling this method.");
358 return A_->getDomainMap ();
362 template<
class MatrixType>
363 Teuchos::RCP<
const Tpetra::Map<
typename MatrixType::local_ordinal_type,
364 typename MatrixType::global_ordinal_type,
365 typename MatrixType::node_type> >
367 TEUCHOS_TEST_FOR_EXCEPTION(
368 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::getRangeMap: " 369 "The input matrix A is null. Please call setMatrix() with a nonnull " 370 "input matrix before calling this method.");
371 return A_->getRangeMap ();
375 template<
class MatrixType>
381 template<
class MatrixType>
383 return(NumInitialize_);
387 template<
class MatrixType>
393 template<
class MatrixType>
399 template<
class MatrixType>
401 return(InitializeTime_);
405 template<
class MatrixType>
407 return(ComputeTime_);
411 template<
class MatrixType>
417 template<
class MatrixType>
419 return(ComputeFlops_);
423 template<
class MatrixType>
429 template<
class MatrixType>
432 apply (
const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
433 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
434 Teuchos::ETransp mode,
441 using Teuchos::rcpFromRef;
444 TEUCHOS_TEST_FOR_EXCEPTION(
445 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::apply: " 446 "The input matrix A is null. Please call setMatrix() with a nonnull " 447 "input matrix, then call compute(), before calling this method.");
448 TEUCHOS_TEST_FOR_EXCEPTION(
451 "Ifpack2::Relaxation::apply: You must call compute() on this Ifpack2 " 452 "preconditioner instance before you may call apply(). You may call " 453 "isComputed() to find out if compute() has been called already.");
454 TEUCHOS_TEST_FOR_EXCEPTION(
455 X.getNumVectors() != Y.getNumVectors(),
457 "Ifpack2::Relaxation::apply: X and Y have different numbers of columns. " 458 "X has " << X.getNumVectors() <<
" columns, but Y has " 459 << Y.getNumVectors() <<
" columns.");
460 TEUCHOS_TEST_FOR_EXCEPTION(
461 beta != STS::zero (), std::logic_error,
462 "Ifpack2::Relaxation::apply: beta = " << beta <<
" != 0 case not " 467 Teuchos::TimeMonitor timeMon (*Time_,
true);
470 if (alpha == STS::zero ()) {
472 Y.putScalar (STS::zero ());
480 auto X_lcl_host = X.template getLocalView<Kokkos::HostSpace> ();
481 auto Y_lcl_host = Y.template getLocalView<Kokkos::HostSpace> ();
482 if (X_lcl_host.ptr_on_device () == Y_lcl_host.ptr_on_device ()) {
483 Xcopy = rcp (
new MV (X, Teuchos::Copy));
485 Xcopy = rcpFromRef (X);
493 case Ifpack2::Details::JACOBI:
494 ApplyInverseJacobi(*Xcopy,Y);
496 case Ifpack2::Details::GS:
497 ApplyInverseGS(*Xcopy,Y);
499 case Ifpack2::Details::SGS:
500 ApplyInverseSGS(*Xcopy,Y);
503 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error,
504 "Ifpack2::Relaxation::apply: Invalid preconditioner type enum value " 505 << PrecType_ <<
". Please report this bug to the Ifpack2 developers.");
507 if (alpha != STS::one ()) {
509 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
510 const double numVectors = as<double> (Y.getNumVectors ());
511 ApplyFlops_ += numGlobalRows * numVectors;
515 ApplyTime_ += Time_->totalElapsedTime ();
520 template<
class MatrixType>
523 applyMat (
const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
524 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
525 Teuchos::ETransp mode)
const 527 TEUCHOS_TEST_FOR_EXCEPTION(
528 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::applyMat: " 529 "The input matrix A is null. Please call setMatrix() with a nonnull " 530 "input matrix, then call compute(), before calling this method.");
531 TEUCHOS_TEST_FOR_EXCEPTION(
532 !
isComputed (), std::runtime_error,
"Ifpack2::Relaxation::applyMat: " 533 "isComputed() must be true before you may call applyMat(). " 534 "Please call compute() before calling this method.");
535 TEUCHOS_TEST_FOR_EXCEPTION(
536 X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
537 "Ifpack2::Relaxation::applyMat: X.getNumVectors() = " << X.getNumVectors ()
538 <<
" != Y.getNumVectors() = " << Y.getNumVectors () <<
".");
539 A_->apply (X, Y, mode);
543 template<
class MatrixType>
546 TEUCHOS_TEST_FOR_EXCEPTION(
547 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::initialize: " 548 "The input matrix A is null. Please call setMatrix() with a nonnull " 549 "input matrix before calling this method.");
552 hasBlockCrsMatrix_ =
false;
555 Teuchos::RCP<const block_crs_matrix_type> A_bcrs =
556 Teuchos::rcp_dynamic_cast<
const block_crs_matrix_type> (A_);
557 if (A_bcrs.is_null ()) {
558 hasBlockCrsMatrix_ =
false;
561 hasBlockCrsMatrix_ =
true;
568 isInitialized_ =
true;
571 template<
class MatrixType>
575 using Teuchos::Array;
576 using Teuchos::ArrayRCP;
577 using Teuchos::ArrayView;
582 using Teuchos::REDUCE_MAX;
583 using Teuchos::REDUCE_MIN;
584 using Teuchos::REDUCE_SUM;
585 using Teuchos::rcp_dynamic_cast;
586 using Teuchos::reduceAll;
592 Teuchos::TimeMonitor timeMon (*Time_,
true);
594 TEUCHOS_TEST_FOR_EXCEPTION(
595 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::" 596 "computeBlockCrs: The input matrix A is null. Please call setMatrix() " 597 "with a nonnull input matrix, then call initialize(), before calling " 599 const block_crs_matrix_type* blockCrsA =
600 dynamic_cast<const block_crs_matrix_type*
> (A_.getRawPtr ());
601 TEUCHOS_TEST_FOR_EXCEPTION(
602 blockCrsA == NULL, std::logic_error,
"Ifpack2::Relaxation::" 603 "computeBlockCrs: A_ is not a BlockCrsMatrix, but it should be if we " 604 "got this far. Please report this bug to the Ifpack2 developers.");
611 const LO lclNumMeshRows =
612 blockCrsA->getCrsGraph ().getNodeNumRows ();
613 const LO blockSize = blockCrsA->getBlockSize ();
615 if (! savedDiagOffsets_) {
616 blockDiag_ = block_diag_type ();
617 blockDiag_ = block_diag_type (
"Ifpack2::Relaxation::blockDiag_",
618 lclNumMeshRows, blockSize, blockSize);
619 blockCrsA->getLocalDiagOffsets (diagOffsets_);
620 TEUCHOS_TEST_FOR_EXCEPTION
621 (static_cast<size_t> (diagOffsets_.size ()) !=
622 static_cast<size_t> (blockDiag_.dimension_0 ()),
623 std::logic_error,
"diagOffsets_.size() = " << diagOffsets_.size () <<
624 " != blockDiag_.dimension_0() = " << blockDiag_.dimension_0 () <<
625 ". Please report this bug to the Ifpack2 developers.");
626 savedDiagOffsets_ =
true;
628 blockCrsA->getLocalDiagCopy (blockDiag_, diagOffsets_ ());
635 unmanaged_block_diag_type blockDiag = blockDiag_;
637 if (DoL1Method_ && IsParallel_) {
639 const size_t maxLength = A_->getNodeMaxNumRowEntries ();
640 Array<LO> indices (maxLength);
641 Array<scalar_type> values (maxLength * blockSize * blockSize);
642 size_t numEntries = 0;
644 for (LO i = 0; i < lclNumMeshRows; ++i) {
646 blockCrsA->getLocalRowCopy (i, indices (), values (), numEntries);
648 auto diagBlock = Kokkos::subview (blockDiag, i, ALL (), ALL ());
649 for (LO subRow = 0; subRow < blockSize; ++subRow) {
651 for (
size_t k = 0 ; k < numEntries ; ++k) {
652 if (indices[k] > lclNumMeshRows) {
653 const size_t offset = blockSize*blockSize*k + subRow*blockSize;
654 for (LO subCol = 0; subCol < blockSize; ++subCol) {
655 diagonal_boost += STS::magnitude (values[offset+subCol] / two);
659 if (STS::magnitude (diagBlock(subRow, subRow)) < L1Eta_ * diagonal_boost) {
660 diagBlock(subRow, subRow) += diagonal_boost;
666 blockDiagFactPivots_ =
667 pivots_type (
"Ifpack2::Relaxation::pivots", lclNumMeshRows, blockSize);
669 unmanaged_pivots_type pivots = blockDiagFactPivots_;
672 for (LO i = 0 ; i < lclNumMeshRows; ++i) {
673 auto diagBlock = Kokkos::subview (blockDiag, i, ALL (), ALL ());
674 auto ipiv = Kokkos::subview (pivots, i, ALL ());
675 Tpetra::Experimental::GETF2 (diagBlock, ipiv, info);
680 #ifdef HAVE_IFPACK2_DEBUG 681 const int numResults = 2;
683 int lclResults[2], gblResults[2];
684 lclResults[0] = info;
685 lclResults[1] = -info;
688 reduceAll<int, int> (* (A_->getGraph ()->getComm ()), REDUCE_MIN,
689 numResults, lclResults, gblResults);
690 TEUCHOS_TEST_FOR_EXCEPTION
691 (gblResults[0] != 0 || gblResults[1] != 0, std::runtime_error,
692 "Ifpack2::Relaxation::compute: When processing the input " 693 "Tpetra::BlockCrsMatrix, one or more diagonal block LU factorizations " 694 "failed on one or more (MPI) processes.");
695 #endif // HAVE_IFPACK2_DEBUG 697 Importer_ = A_->getGraph ()->getImporter ();
700 ComputeTime_ += Time_->totalElapsedTime ();
705 template<
class MatrixType>
708 using Teuchos::Array;
709 using Teuchos::ArrayRCP;
710 using Teuchos::ArrayView;
715 using Teuchos::REDUCE_MAX;
716 using Teuchos::REDUCE_MIN;
717 using Teuchos::REDUCE_SUM;
718 using Teuchos::rcp_dynamic_cast;
719 using Teuchos::reduceAll;
722 typedef typename vector_type::device_type device_type;
723 const scalar_type zero = STS::zero ();
724 const scalar_type one = STS::one ();
731 if (hasBlockCrsMatrix_) {
739 Teuchos::TimeMonitor timeMon (*Time_,
true);
741 TEUCHOS_TEST_FOR_EXCEPTION(
742 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::compute: " 743 "The input matrix A is null. Please call setMatrix() with a nonnull " 744 "input matrix, then call initialize(), before calling this method.");
749 TEUCHOS_TEST_FOR_EXCEPTION(
750 NumSweeps_ < 0, std::logic_error,
751 "Ifpack2::Relaxation::compute: NumSweeps_ = " << NumSweeps_ <<
" < 0. " 752 "Please report this bug to the Ifpack2 developers.");
754 Diagonal_ = rcp (
new vector_type (A_->getRowMap ()));
772 const crs_matrix_type* crsMat =
773 dynamic_cast<const crs_matrix_type*
> (A_.getRawPtr ());
774 if (crsMat == NULL || ! crsMat->isStaticGraph ()) {
775 A_->getLocalDiagCopy (*Diagonal_);
777 if (! savedDiagOffsets_) {
778 crsMat->getLocalDiagOffsets (diagOffsets_);
779 savedDiagOffsets_ =
true;
781 crsMat->getLocalDiagCopy (*Diagonal_, diagOffsets_ ());
782 #ifdef HAVE_TPETRA_DEBUG 784 vector_type D_copy (A_->getRowMap ());
785 A_->getLocalDiagCopy (D_copy);
786 D_copy.update (STS::one (), *Diagonal_, -STS::one ());
790 TEUCHOS_TEST_FOR_EXCEPTION(
791 err != STM::zero(), std::logic_error,
"Ifpack2::Relaxation::compute: " 792 "\"fast-path\" diagonal computation failed. \\|D1 - D2\\|_inf = " 794 #endif // HAVE_TPETRA_DEBUG 800 RCP<vector_type> origDiag;
801 if (checkDiagEntries_) {
802 origDiag = rcp (
new vector_type (A_->getRowMap ()));
803 Tpetra::deep_copy (*origDiag, *Diagonal_);
806 const size_t numMyRows = A_->getNodeNumRows ();
809 Diagonal_->template sync<Kokkos::HostSpace> ();
810 Diagonal_->template modify<Kokkos::HostSpace> ();
811 auto diag_2d = Diagonal_->template getLocalView<Kokkos::HostSpace> ();
812 auto diag_1d = Kokkos::subview (diag_2d, Kokkos::ALL (), 0);
814 scalar_type*
const diag =
reinterpret_cast<scalar_type*
> (diag_1d.ptr_on_device ());
823 if (DoL1Method_ && IsParallel_) {
824 const scalar_type two = one + one;
825 const size_t maxLength = A_->getNodeMaxNumRowEntries ();
826 Array<local_ordinal_type> indices (maxLength);
827 Array<scalar_type> values (maxLength);
830 for (
size_t i = 0; i < numMyRows; ++i) {
831 A_->getLocalRowCopy (i, indices (), values (), numEntries);
833 for (
size_t k = 0 ; k < numEntries ; ++k) {
834 if (static_cast<size_t> (indices[k]) > numMyRows) {
835 diagonal_boost += STS::magnitude (values[k] / two);
838 if (STS::magnitude (diag[i]) < L1Eta_ * diagonal_boost) {
839 diag[i] += diagonal_boost;
855 const scalar_type oneOverMinDiagVal = (MinDiagonalValue_ == zero) ?
856 one / SmallTraits<scalar_type>::eps () :
857 one / MinDiagonalValue_;
859 const magnitude_type minDiagValMag = STS::magnitude (MinDiagonalValue_);
861 if (checkDiagEntries_) {
867 size_t numSmallDiagEntries = 0;
868 size_t numZeroDiagEntries = 0;
869 size_t numNegDiagEntries = 0;
881 const scalar_type d_0 = diag[0];
885 minMagDiagEntryMag = d_0_mag;
886 maxMagDiagEntryMag = d_0_mag;
894 for (
size_t i = 0 ; i < numMyRows; ++i) {
895 const scalar_type d_i = diag[i];
901 if (d_i_real < STM::zero ()) {
904 if (d_i_mag < minMagDiagEntryMag) {
906 minMagDiagEntryMag = d_i_mag;
908 if (d_i_mag > maxMagDiagEntryMag) {
910 maxMagDiagEntryMag = d_i_mag;
913 if (fixTinyDiagEntries_) {
915 if (d_i_mag <= minDiagValMag) {
916 ++numSmallDiagEntries;
917 if (d_i_mag == STM::zero ()) {
918 ++numZeroDiagEntries;
920 diag[i] = oneOverMinDiagVal;
927 if (d_i_mag <= minDiagValMag) {
928 ++numSmallDiagEntries;
929 if (d_i_mag == STM::zero ()) {
930 ++numZeroDiagEntries;
940 if (STS::isComplex) {
941 ComputeFlops_ += 4.0 * numMyRows;
943 ComputeFlops_ += numMyRows;
960 const Comm<int>& comm = * (A_->getRowMap ()->getComm ());
963 Array<magnitude_type> localVals (2);
964 localVals[0] = minMagDiagEntryMag;
966 localVals[1] = -maxMagDiagEntryMag;
967 Array<magnitude_type> globalVals (2);
968 reduceAll<int, magnitude_type> (comm, REDUCE_MIN, 2,
969 localVals.getRawPtr (),
970 globalVals.getRawPtr ());
971 globalMinMagDiagEntryMag_ = globalVals[0];
972 globalMaxMagDiagEntryMag_ = -globalVals[1];
974 Array<size_t> localCounts (3);
975 localCounts[0] = numSmallDiagEntries;
976 localCounts[1] = numZeroDiagEntries;
977 localCounts[2] = numNegDiagEntries;
978 Array<size_t> globalCounts (3);
979 reduceAll<int, size_t> (comm, REDUCE_SUM, 3,
980 localCounts.getRawPtr (),
981 globalCounts.getRawPtr ());
982 globalNumSmallDiagEntries_ = globalCounts[0];
983 globalNumZeroDiagEntries_ = globalCounts[1];
984 globalNumNegDiagEntries_ = globalCounts[2];
996 vector_type diff (A_->getRowMap ());
997 diff.reciprocal (*origDiag);
998 Diagonal_->template sync<device_type> ();
999 diff.update (-one, *Diagonal_, one);
1000 globalDiagNormDiff_ = diff.norm2 ();
1003 if (fixTinyDiagEntries_) {
1007 for (
size_t i = 0 ; i < numMyRows; ++i) {
1008 const scalar_type d_i = diag[i];
1012 if (d_i_mag <= minDiagValMag) {
1013 diag[i] = oneOverMinDiagVal;
1015 diag[i] = one / d_i;
1020 for (
size_t i = 0 ; i < numMyRows; ++i) {
1021 diag[i] = one / diag[i];
1024 if (STS::isComplex) {
1025 ComputeFlops_ += 4.0 * numMyRows;
1027 ComputeFlops_ += numMyRows;
1031 if (IsParallel_ && (PrecType_ == Ifpack2::Details::GS ||
1032 PrecType_ == Ifpack2::Details::SGS)) {
1036 Importer_ = A_->getGraph ()->getImporter ();
1037 Diagonal_->template sync<device_type> ();
1041 ComputeTime_ += Time_->totalElapsedTime ();
1047 template<
class MatrixType>
1050 ApplyInverseJacobi (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1051 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const 1057 if (hasBlockCrsMatrix_) {
1058 ApplyInverseJacobi_BlockCrsMatrix (X, Y);
1062 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1063 const double numVectors = as<double> (X.getNumVectors ());
1064 if (ZeroStartingSolution_) {
1069 Y.elementWiseMultiply (DampingFactor_, *Diagonal_, X, STS::zero ());
1075 double flopUpdate = 0.0;
1076 if (DampingFactor_ == STS::one ()) {
1078 flopUpdate = numGlobalRows * numVectors;
1082 flopUpdate = 2.0 * numGlobalRows * numVectors;
1084 ApplyFlops_ += flopUpdate;
1085 if (NumSweeps_ == 1) {
1091 const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1094 MV A_times_Y (Y.getMap (), as<size_t>(numVectors),
false);
1095 for (
int j = startSweep; j < NumSweeps_; ++j) {
1098 A_times_Y.update (STS::one (), X, -STS::one ());
1099 Y.elementWiseMultiply (DampingFactor_, *Diagonal_, A_times_Y, STS::one ());
1113 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1114 const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
1115 ApplyFlops_ += as<double> (NumSweeps_ - startSweep) * numVectors *
1116 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1119 template<
class MatrixType>
1128 typedef Tpetra::Experimental::BlockMultiVector<
scalar_type,
1132 typedef typename block_crs_matrix_type::little_vec_type little_vec_type;
1136 unmanaged_block_diag_type blockDiag = blockDiag_;
1137 unmanaged_pivots_type pivots = blockDiagFactPivots_;
1139 if (ZeroStartingSolution_) {
1140 Y.putScalar (STS::zero ());
1143 const size_t numVectors = X.getNumVectors ();
1145 const block_crs_matrix_type* blockMat =
1146 dynamic_cast<const block_crs_matrix_type*
> (A_.getRawPtr ());
1148 const local_ordinal_type blockSize = blockMat->getBlockSize ();
1149 BMV A_times_Y_Block (* (blockMat->getRowMap ()), * (Y.getMap ()),
1150 blockSize, numVectors);
1151 MV A_times_Y = A_times_Y_Block.getMultiVectorView();
1152 BMV yBlock (Y, * (blockMat->getRowMap ()), blockSize);
1153 for (
int j = 0; j < NumSweeps_; ++j) {
1154 blockMat->apply (Y, A_times_Y);
1155 A_times_Y.update (STS::one (), X, -STS::one ());
1156 const size_t numRows = blockMat->getNodeNumRows ();
1157 for (
size_t i = 0; i < numVectors; ++i) {
1158 for (
size_t k = 0; k < numRows; ++k) {
1160 auto factorizedBlockDiag = Kokkos::subview (blockDiag, k, ALL (), ALL ());
1161 auto ipiv = Kokkos::subview (pivots, k, ALL ());
1162 little_vec_type xloc = A_times_Y_Block.getLocalBlock (k, i);
1163 little_vec_type yloc = yBlock.getLocalBlock (k, i);
1165 Tpetra::Experimental::GETRS (
"N", factorizedBlockDiag, ipiv, xloc, info);
1166 Tpetra::Experimental::AXPY (DampingFactor_, xloc, yloc);
1172 template<
class MatrixType>
1175 ApplyInverseGS (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1176 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const 1188 const block_crs_matrix_type* blockCrsMat =
1189 dynamic_cast<const block_crs_matrix_type*
> (A_.getRawPtr ());
1190 const crs_matrix_type* crsMat =
1191 dynamic_cast<const crs_matrix_type*
> (A_.getRawPtr ());
1192 if (blockCrsMat != NULL) {
1193 const_cast<this_type*
> (
this)->ApplyInverseGS_BlockCrsMatrix (*blockCrsMat, X, Y);
1194 }
else if (crsMat != NULL) {
1195 ApplyInverseGS_CrsMatrix (*crsMat, X, Y);
1197 ApplyInverseGS_RowMatrix (X, Y);
1202 template<
class MatrixType>
1205 ApplyInverseGS_RowMatrix (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1206 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const 1208 using Teuchos::Array;
1209 using Teuchos::ArrayRCP;
1210 using Teuchos::ArrayView;
1214 using Teuchos::rcpFromRef;
1215 typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV;
1220 if (ZeroStartingSolution_) {
1221 Y.putScalar (STS::zero ());
1224 const size_t NumVectors = X.getNumVectors ();
1225 const size_t maxLength = A_->getNodeMaxNumRowEntries ();
1226 Array<local_ordinal_type> Indices (maxLength);
1227 Array<scalar_type> Values (maxLength);
1230 const size_t numMyRows = A_->getNodeNumRows();
1232 size_t numActive = numMyRows;
1233 bool do_local = ! localSmoothingIndices_.is_null ();
1235 rowInd = localSmoothingIndices_.getRawPtr ();
1236 numActive = localSmoothingIndices_.size ();
1241 if (Importer_.is_null ()) {
1243 Y2 = rcp (
new MV (Y.getMap (), NumVectors,
false));
1248 Y2 = rcp (
new MV (Importer_->getTargetMap (), NumVectors));
1252 Y2 = rcpFromRef (Y);
1256 ArrayRCP<const scalar_type> d_rcp = Diagonal_->get1dView ();
1257 ArrayView<const scalar_type> d_ptr = d_rcp();
1260 bool constant_stride = X.isConstantStride() && Y2->isConstantStride();
1262 if (constant_stride) {
1264 size_t x_stride = X.getStride();
1265 size_t y2_stride = Y2->getStride();
1266 ArrayRCP<scalar_type> y2_rcp = Y2->get1dViewNonConst();
1267 ArrayRCP<const scalar_type> x_rcp = X.get1dView();
1268 ArrayView<scalar_type> y2_ptr = y2_rcp();
1269 ArrayView<const scalar_type> x_ptr = x_rcp();
1270 Array<scalar_type> dtemp(NumVectors,STS::zero());
1272 for (
int j = 0; j < NumSweeps_; ++j) {
1275 if (Importer_.is_null ()) {
1278 Y2->doImport (Y, *Importer_, Tpetra::INSERT);
1282 if (! DoBackwardGS_) {
1283 for (
size_t ii = 0; ii < numActive; ++ii) {
1286 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1287 dtemp.assign(NumVectors,STS::zero());
1289 for (
size_t k = 0; k < NumEntries; ++k) {
1291 for (
size_t m = 0; m < NumVectors; ++m) {
1292 dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
1296 for (
size_t m = 0; m < NumVectors; ++m) {
1297 y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
1304 for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
1307 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1308 dtemp.assign (NumVectors, STS::zero ());
1310 for (
size_t k = 0; k < NumEntries; ++k) {
1312 for (
size_t m = 0; m < NumVectors; ++m) {
1313 dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
1317 for (
size_t m = 0; m < NumVectors; ++m) {
1318 y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
1324 Tpetra::deep_copy (Y, *Y2);
1330 ArrayRCP<ArrayRCP<scalar_type> > y2_ptr = Y2->get2dViewNonConst ();
1331 ArrayRCP<ArrayRCP<const scalar_type> > x_ptr = X.get2dView ();
1333 for (
int j = 0; j < NumSweeps_; ++j) {
1336 if (Importer_.is_null ()) {
1339 Y2->doImport (Y, *Importer_, Tpetra::INSERT);
1343 if (! DoBackwardGS_) {
1344 for (
size_t ii = 0; ii < numActive; ++ii) {
1347 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1349 for (
size_t m = 0; m < NumVectors; ++m) {
1351 ArrayView<const scalar_type> x_local = (x_ptr())[m]();
1352 ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
1354 for (
size_t k = 0; k < NumEntries; ++k) {
1356 dtemp += Values[k] * y2_local[col];
1358 y2_local[i] += DampingFactor_ * d_ptr[i] * (x_local[i] - dtemp);
1365 for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
1369 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1371 for (
size_t m = 0; m < NumVectors; ++m) {
1373 ArrayView<const scalar_type> x_local = (x_ptr())[m]();
1374 ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
1376 for (
size_t k = 0; k < NumEntries; ++k) {
1378 dtemp += Values[k] * y2_local[col];
1380 y2_local[i] += DampingFactor_ * d_ptr[i] * (x_local[i] - dtemp);
1387 Tpetra::deep_copy (Y, *Y2);
1394 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1395 const double numVectors = as<double> (X.getNumVectors ());
1396 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1397 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1398 ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
1399 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1403 template<
class MatrixType>
1407 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1408 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const 1411 const Tpetra::ESweepDirection direction =
1412 DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
1413 if (localSmoothingIndices_.is_null ()) {
1414 A.gaussSeidelCopy (Y, X, *Diagonal_, DampingFactor_, direction,
1415 NumSweeps_, ZeroStartingSolution_);
1418 A.reorderedGaussSeidelCopy (Y, X, *Diagonal_, localSmoothingIndices_ (),
1419 DampingFactor_, direction,
1420 NumSweeps_, ZeroStartingSolution_);
1434 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1435 const double numVectors = as<double> (X.getNumVectors ());
1436 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1437 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1438 ApplyFlops_ += NumSweeps_ * numVectors *
1439 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1442 template<
class MatrixType>
1446 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1447 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
1451 using Teuchos::rcpFromRef;
1452 typedef Tpetra::Experimental::BlockMultiVector<
scalar_type,
1464 BMV yBlock (Y, * (A.getGraph ()->getDomainMap ()), A.getBlockSize ());
1465 const BMV xBlock (X, * (A.getColMap ()), A.getBlockSize ());
1467 bool performImport =
false;
1469 if (Importer_.is_null ()) {
1470 yBlockCol = rcpFromRef (yBlock);
1473 if (yBlockColumnPointMap_.is_null () ||
1474 yBlockColumnPointMap_->getNumVectors () != yBlock.getNumVectors () ||
1475 yBlockColumnPointMap_->getBlockSize () != yBlock.getBlockSize ()) {
1476 yBlockColumnPointMap_ =
1477 rcp (
new BMV (* (A.getColMap ()), A.getBlockSize (),
1478 static_cast<local_ordinal_type
> (yBlock.getNumVectors ())));
1480 yBlockCol = yBlockColumnPointMap_;
1481 performImport =
true;
1484 if (ZeroStartingSolution_) {
1485 yBlockCol->putScalar (STS::zero ());
1487 else if (performImport) {
1488 yBlockCol->doImport (yBlock, *Importer_, Tpetra::INSERT);
1491 const Tpetra::ESweepDirection direction =
1492 DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
1494 for (
int sweep = 0; sweep < NumSweeps_; ++sweep) {
1495 if (performImport && sweep > 0) {
1496 yBlockCol->doImport(yBlock, *Importer_, Tpetra::INSERT);
1498 A.localGaussSeidel (xBlock, *yBlockCol, blockDiag_,
1499 blockDiagFactPivots_,
1500 DampingFactor_, direction);
1501 if (performImport) {
1502 RCP<const MV> yBlockColPointDomain =
1503 yBlockCol->getMultiVectorView ().offsetView (A.getDomainMap (), 0);
1504 Tpetra::deep_copy (Y, *yBlockColPointDomain);
1510 template<
class MatrixType>
1513 ApplyInverseSGS (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1514 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const 1526 const block_crs_matrix_type* blockCrsMat =
dynamic_cast<const block_crs_matrix_type*
> (A_.getRawPtr());
1527 const crs_matrix_type* crsMat =
dynamic_cast<const crs_matrix_type*
> (&(*A_));
1528 if (blockCrsMat != NULL) {
1529 const_cast<this_type*
> (
this)->ApplyInverseSGS_BlockCrsMatrix(*blockCrsMat, X, Y);
1531 else if (crsMat != NULL) {
1532 ApplyInverseSGS_CrsMatrix (*crsMat, X, Y);
1534 ApplyInverseSGS_RowMatrix (X, Y);
1539 template<
class MatrixType>
1543 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const 1545 using Teuchos::Array;
1546 using Teuchos::ArrayRCP;
1547 using Teuchos::ArrayView;
1551 using Teuchos::rcpFromRef;
1552 typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV;
1557 if (ZeroStartingSolution_) {
1558 Y.putScalar (STS::zero ());
1561 const size_t NumVectors = X.getNumVectors ();
1562 const size_t maxLength = A_->getNodeMaxNumRowEntries ();
1563 Array<local_ordinal_type> Indices (maxLength);
1564 Array<scalar_type> Values (maxLength);
1567 const size_t numMyRows = A_->getNodeNumRows();
1569 size_t numActive = numMyRows;
1570 bool do_local = localSmoothingIndices_.is_null();
1572 rowInd = localSmoothingIndices_.getRawPtr();
1573 numActive = localSmoothingIndices_.size();
1579 if (Importer_.is_null ()) {
1581 Y2 = rcp (
new MV (Y.getMap (), NumVectors,
false));
1586 Y2 = rcp (
new MV (Importer_->getTargetMap (), NumVectors));
1590 Y2 = rcpFromRef (Y);
1594 ArrayRCP<const scalar_type> d_rcp = Diagonal_->get1dView();
1595 ArrayView<const scalar_type> d_ptr = d_rcp();
1598 bool constant_stride = X.isConstantStride() && Y2->isConstantStride();
1600 if(constant_stride) {
1602 size_t x_stride = X.getStride();
1603 size_t y2_stride = Y2->getStride();
1604 ArrayRCP<scalar_type> y2_rcp = Y2->get1dViewNonConst();
1605 ArrayRCP<const scalar_type> x_rcp = X.get1dView();
1606 ArrayView<scalar_type> y2_ptr = y2_rcp();
1607 ArrayView<const scalar_type> x_ptr = x_rcp();
1608 Array<scalar_type> dtemp(NumVectors,STS::zero());
1609 for (
int iter = 0; iter < NumSweeps_; ++iter) {
1612 if (Importer_.is_null ()) {
1614 Tpetra::deep_copy (*Y2, Y);
1616 Y2->doImport (Y, *Importer_, Tpetra::INSERT);
1620 for (
int j = 0; j < NumSweeps_; j++) {
1623 if (Importer_.is_null ()) {
1625 Tpetra::deep_copy (*Y2, Y);
1627 Y2->doImport (Y, *Importer_, Tpetra::INSERT);
1631 for (
size_t ii = 0; ii < numActive; ++ii) {
1634 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1635 dtemp.assign(NumVectors,STS::zero());
1637 for (
size_t k = 0; k < NumEntries; ++k) {
1639 for (
size_t m = 0; m < NumVectors; ++m) {
1640 dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
1644 for (
size_t m = 0; m < NumVectors; ++m) {
1645 y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
1651 for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
1654 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1655 dtemp.assign(NumVectors,STS::zero());
1657 for (
size_t k = 0; k < NumEntries; ++k) {
1659 for (
size_t m = 0; m < NumVectors; ++m) {
1660 dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
1664 for (
size_t m = 0; m < NumVectors; ++m) {
1665 y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
1672 Tpetra::deep_copy (Y, *Y2);
1678 ArrayRCP<ArrayRCP<scalar_type> > y2_ptr = Y2->get2dViewNonConst ();
1679 ArrayRCP<ArrayRCP<const scalar_type> > x_ptr = X.get2dView ();
1681 for (
int iter = 0; iter < NumSweeps_; ++iter) {
1684 if (Importer_.is_null ()) {
1686 Tpetra::deep_copy (*Y2, Y);
1688 Y2->doImport (Y, *Importer_, Tpetra::INSERT);
1692 for (
size_t ii = 0; ii < numActive; ++ii) {
1696 A_->getLocalRowCopy (as<local_ordinal_type> (i), Indices (), Values (), NumEntries);
1698 for (
size_t m = 0; m < NumVectors; ++m) {
1700 ArrayView<const scalar_type> x_local = (x_ptr())[m]();
1701 ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
1703 for (
size_t k = 0; k < NumEntries; ++k) {
1705 dtemp += Values[k] * y2_local[col];
1707 y2_local[i] += DampingFactor_ * (x_local[i] - dtemp) * diag;
1713 for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
1717 A_->getLocalRowCopy (as<local_ordinal_type> (i), Indices (), Values (), NumEntries);
1719 for (
size_t m = 0; m < NumVectors; ++m) {
1721 ArrayView<const scalar_type> x_local = (x_ptr())[m]();
1722 ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
1724 for (
size_t k = 0; k < NumEntries; ++k) {
1726 dtemp += Values[k] * y2_local[col];
1728 y2_local[i] += DampingFactor_ * (x_local[i] - dtemp) * diag;
1734 Tpetra::deep_copy (Y, *Y2);
1740 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1741 const double numVectors = as<double> (X.getNumVectors ());
1742 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1743 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1744 ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
1745 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1749 template<
class MatrixType>
1753 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1754 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const 1757 const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
1758 if (localSmoothingIndices_.is_null ()) {
1759 A.gaussSeidelCopy (Y, X, *Diagonal_, DampingFactor_, direction,
1760 NumSweeps_, ZeroStartingSolution_);
1763 A.reorderedGaussSeidelCopy (Y, X, *Diagonal_, localSmoothingIndices_ (),
1764 DampingFactor_, direction,
1765 NumSweeps_, ZeroStartingSolution_);
1782 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1783 const double numVectors = as<double> (X.getNumVectors ());
1784 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1785 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1786 ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
1787 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1790 template<
class MatrixType>
1794 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1795 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
1799 using Teuchos::rcpFromRef;
1800 typedef Tpetra::Experimental::BlockMultiVector<
scalar_type,
1812 BMV yBlock (Y, * (A.getGraph ()->getDomainMap ()), A.getBlockSize ());
1813 const BMV xBlock (X, * (A.getColMap ()), A.getBlockSize ());
1815 bool performImport =
false;
1817 if (Importer_.is_null ()) {
1818 yBlockCol = Teuchos::rcpFromRef (yBlock);
1821 if (yBlockColumnPointMap_.is_null () ||
1822 yBlockColumnPointMap_->getNumVectors () != yBlock.getNumVectors () ||
1823 yBlockColumnPointMap_->getBlockSize () != yBlock.getBlockSize ()) {
1824 yBlockColumnPointMap_ =
1825 rcp (
new BMV (* (A.getColMap ()), A.getBlockSize (),
1826 static_cast<local_ordinal_type
> (yBlock.getNumVectors ())));
1828 yBlockCol = yBlockColumnPointMap_;
1829 performImport =
true;
1832 if (ZeroStartingSolution_) {
1833 yBlockCol->putScalar (STS::zero ());
1835 else if (performImport) {
1836 yBlockCol->doImport (yBlock, *Importer_, Tpetra::INSERT);
1840 const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
1842 for (
int sweep = 0; sweep < NumSweeps_; ++sweep) {
1843 if (performImport && sweep > 0) {
1844 yBlockCol->doImport (yBlock, *Importer_, Tpetra::INSERT);
1846 A.localGaussSeidel (xBlock, *yBlockCol, blockDiag_, blockDiagFactPivots_,
1847 DampingFactor_, direction);
1848 if (performImport) {
1849 RCP<const MV> yBlockColPointDomain =
1850 yBlockCol->getMultiVectorView ().offsetView (A.getDomainMap (), 0);
1851 MV yBlockView = yBlock.getMultiVectorView ();
1852 Tpetra::deep_copy (yBlockView, *yBlockColPointDomain);
1858 template<
class MatrixType>
1861 std::ostringstream os;
1866 os <<
"\"Ifpack2::Relaxation\": {";
1868 os <<
"Initialized: " << (
isInitialized () ?
"true" :
"false") <<
", " 1869 <<
"Computed: " << (
isComputed () ?
"true" :
"false") <<
", ";
1875 if (PrecType_ == Ifpack2::Details::JACOBI) {
1877 }
else if (PrecType_ == Ifpack2::Details::GS) {
1878 os <<
"Gauss-Seidel";
1879 }
else if (PrecType_ == Ifpack2::Details::SGS) {
1880 os <<
"Symmetric Gauss-Seidel";
1885 os <<
", " <<
"sweeps: " << NumSweeps_ <<
", " 1886 <<
"damping factor: " << DampingFactor_ <<
", ";
1888 os <<
"use l1: " << DoL1Method_ <<
", " 1889 <<
"l1 eta: " << L1Eta_ <<
", ";
1892 if (A_.is_null ()) {
1893 os <<
"Matrix: null";
1896 os <<
"Global matrix dimensions: [" 1897 << A_->getGlobalNumRows () <<
", " << A_->getGlobalNumCols () <<
"]" 1898 <<
", Global nnz: " << A_->getGlobalNumEntries();
1906 template<
class MatrixType>
1910 const Teuchos::EVerbosityLevel verbLevel)
const 1912 using Teuchos::OSTab;
1913 using Teuchos::TypeNameTraits;
1914 using Teuchos::VERB_DEFAULT;
1915 using Teuchos::VERB_NONE;
1916 using Teuchos::VERB_LOW;
1917 using Teuchos::VERB_MEDIUM;
1918 using Teuchos::VERB_HIGH;
1919 using Teuchos::VERB_EXTREME;
1922 const Teuchos::EVerbosityLevel vl =
1923 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
1925 const int myRank = this->
getComm ()->getRank ();
1933 if (vl != VERB_NONE && myRank == 0) {
1937 out <<
"\"Ifpack2::Relaxation\":" << endl;
1939 out <<
"MatrixType: \"" << TypeNameTraits<MatrixType>::name () <<
"\"" 1941 if (this->getObjectLabel () !=
"") {
1942 out <<
"Label: " << this->getObjectLabel () << endl;
1944 out <<
"Initialized: " << (
isInitialized () ?
"true" :
"false") << endl
1945 <<
"Computed: " << (
isComputed () ?
"true" :
"false") << endl
1946 <<
"Parameters: " << endl;
1949 out <<
"\"relaxation: type\": ";
1950 if (PrecType_ == Ifpack2::Details::JACOBI) {
1952 }
else if (PrecType_ == Ifpack2::Details::GS) {
1953 out <<
"Gauss-Seidel";
1954 }
else if (PrecType_ == Ifpack2::Details::SGS) {
1955 out <<
"Symmetric Gauss-Seidel";
1962 <<
"\"relaxation: sweeps\": " << NumSweeps_ << endl
1963 <<
"\"relaxation: damping factor\": " << DampingFactor_ << endl
1964 <<
"\"relaxation: min diagonal value\": " << MinDiagonalValue_ << endl
1965 <<
"\"relaxation: zero starting solution\": " << ZeroStartingSolution_ << endl
1966 <<
"\"relaxation: backward mode\": " << DoBackwardGS_ << endl
1967 <<
"\"relaxation: use l1\": " << DoL1Method_ << endl
1968 <<
"\"relaxation: l1 eta\": " << L1Eta_ << endl;
1970 out <<
"Computed quantities:" << endl;
1973 out <<
"Global number of rows: " << A_->getGlobalNumRows () << endl
1974 <<
"Global number of columns: " << A_->getGlobalNumCols () << endl;
1977 out <<
"Properties of input diagonal entries:" << endl;
1980 out <<
"Magnitude of minimum-magnitude diagonal entry: " 1981 << globalMinMagDiagEntryMag_ << endl
1982 <<
"Magnitude of maximum-magnitude diagonal entry: " 1983 << globalMaxMagDiagEntryMag_ << endl
1984 <<
"Number of diagonal entries with small magnitude: " 1985 << globalNumSmallDiagEntries_ << endl
1986 <<
"Number of zero diagonal entries: " 1987 << globalNumZeroDiagEntries_ << endl
1988 <<
"Number of diagonal entries with negative real part: " 1989 << globalNumNegDiagEntries_ << endl
1990 <<
"Abs 2-norm diff between computed and actual inverse " 1991 <<
"diagonal: " << globalDiagNormDiff_ << endl;
1995 out <<
"Saved diagonal offsets: " 1996 << (savedDiagOffsets_ ?
"true" :
"false") << endl;
1998 out <<
"Call counts and total times (in seconds): " << endl;
2001 out <<
"initialize: " << endl;
2004 out <<
"Call count: " << NumInitialize_ << endl;
2005 out <<
"Total time: " << InitializeTime_ << endl;
2007 out <<
"compute: " << endl;
2010 out <<
"Call count: " << NumCompute_ << endl;
2011 out <<
"Total time: " << ComputeTime_ << endl;
2013 out <<
"apply: " << endl;
2016 out <<
"Call count: " << NumApply_ << endl;
2017 out <<
"Total time: " << ApplyTime_ << endl;
2025 #define IFPACK2_RELAXATION_INSTANT(S,LO,GO,N) \ 2026 template class Ifpack2::Relaxation< Tpetra::RowMatrix<S, LO, GO, N> >; 2028 #endif // IFPACK2_RELAXATION_DEF_HPP bool hasTransposeApply() const
Whether apply() and applyMat() let you apply the transpose or conjugate transpose.
Definition: Ifpack2_Relaxation_def.hpp:376
double getComputeFlops() const
Total number of floating-point operations over all calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:418
double getApplyFlops() const
Total number of floating-point operations over all calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:424
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object's attributes to the given output stream.
Definition: Ifpack2_Relaxation_def.hpp:1909
void compute()
Compute the preconditioner.
Definition: Ifpack2_Relaxation_def.hpp:706
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which the matrix and vectors are distributed.
Definition: Ifpack2_Relaxation_def.hpp:333
int getNumApply() const
Total number of calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:394
Ifpack2 implementation details.
double getComputeTime() const
Total time in seconds spent in all calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:406
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:246
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
Definition: Ifpack2_Relaxation_def.hpp:353
void setParameters(const Teuchos::ParameterList ¶ms)
Set the relaxation / preconditioner parameters.
Definition: Ifpack2_Relaxation_def.hpp:323
File for utility functions.
std::string description() const
A simple one-line description of this object.
Definition: Ifpack2_Relaxation_def.hpp:1859
int getNumInitialize() const
Total number of calls to initialize().
Definition: Ifpack2_Relaxation_def.hpp:382
bool isComputed() const
Return true if compute() has been called.
Definition: Ifpack2_Relaxation_decl.hpp:412
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
Definition: Ifpack2_Relaxation_def.hpp:366
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:243
double getInitializeTime() const
Total time in seconds spent in all calls to initialize().
Definition: Ifpack2_Relaxation_def.hpp:400
void applyMat(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS) const
Apply the preconditioner to X, returning the result in Y.
Definition: Ifpack2_Relaxation_def.hpp:523
Definition: Ifpack2_Details_Amesos2Wrapper_decl.hpp:63
Teuchos::RCP< const row_matrix_type > getMatrix() const
The matrix to be preconditioned.
Definition: Ifpack2_Relaxation_def.hpp:344
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:240
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:237
virtual ~Relaxation()
Destructor.
Definition: Ifpack2_Relaxation_def.hpp:208
int getNumCompute() const
Total number of calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:388
bool isInitialized() const
Returns true if the preconditioner has been successfully initialized.
Definition: Ifpack2_Relaxation_decl.hpp:397
Relaxation preconditioners for Tpetra::RowMatrix and Tpetra::CrsMatrix sparse matrices.
Definition: Ifpack2_Relaxation_decl.hpp:222
Preconditioners and smoothers for Tpetra sparse matrices.
Definition: Ifpack2_AdditiveSchwarz_decl.hpp:72
double getApplyTime() const
Total time in seconds spent in all calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:412
void initialize()
Initialize the preconditioner.
Definition: Ifpack2_Relaxation_def.hpp:544
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, returning the result in Y.
Definition: Ifpack2_Relaxation_def.hpp:432
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const
Return a list of all the parameters that this class accepts.
Definition: Ifpack2_Relaxation_def.hpp:213
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition: Ifpack2_Relaxation_decl.hpp:249