52 #ifndef AMESOS2_SUPERLUDIST_DEF_HPP 53 #define AMESOS2_SUPERLUDIST_DEF_HPP 55 #include <Teuchos_Tuple.hpp> 56 #include <Teuchos_StandardParameterEntryValidators.hpp> 57 #include <Teuchos_DefaultMpiComm.hpp> 67 template <
class Matrix,
class Vector>
69 Teuchos::RCP<Vector> X,
70 Teuchos::RCP<const Vector> B)
82 using Teuchos::MpiComm;
83 using Teuchos::outArg;
84 using Teuchos::ParameterList;
85 using Teuchos::parameterList;
88 using Teuchos::rcp_dynamic_cast;
89 using Teuchos::REDUCE_SUM;
90 using Teuchos::reduceAll;
91 typedef global_ordinal_type GO;
92 typedef Tpetra::Map<local_ordinal_type, GO, node_type> map_type;
98 RCP<const Comm<int> > comm = this->
getComm ();
99 const int myRank = comm->getRank ();
100 const int numProcs = comm->getSize ();
102 SLUD::int_t nprow, npcol;
109 RCP<const Comm<int> > matComm = this->
matrixA_->getComm ();
110 TEUCHOS_TEST_FOR_EXCEPTION(
111 matComm.is_null (), std::logic_error,
"Amesos2::Superlustdist " 112 "constructor: The matrix's communicator is null!");
113 RCP<const MpiComm<int> > matMpiComm =
114 rcp_dynamic_cast<
const MpiComm<int> > (matComm);
119 TEUCHOS_TEST_FOR_EXCEPTION(
120 matMpiComm.is_null (), std::logic_error,
"Amesos2::Superlustdist " 121 "constructor: The matrix's communicator is not an MpiComm!");
122 TEUCHOS_TEST_FOR_EXCEPTION(
123 matMpiComm->getRawMpiComm ().is_null (), std::logic_error,
"Amesos2::" 124 "Superlustdist constructor: The matrix's communicator claims to be a " 125 "Teuchos::MpiComm<int>, but its getRawPtrComm() method returns " 126 "Teuchos::null! This means that the underlying MPI_Comm doesn't even " 127 "exist, which likely implies that the Teuchos::MpiComm was constructed " 128 "incorrectly. It means something different than if the MPI_Comm were " 130 MPI_Comm rawMpiComm = (* (matMpiComm->getRawMpiComm ())) ();
131 data_.mat_comm = rawMpiComm;
137 SLUD::superlu_gridinit(data_.mat_comm, nprow, npcol, &(data_.grid));
146 RCP<ParameterList> default_params =
151 data_.options.Fact = SLUD::DOFACT;
152 data_.equed = SLUD::NOEQUIL;
153 data_.options.SolveInitialized = SLUD::NO;
154 data_.options.RefineInitialized = SLUD::NO;
155 data_.rowequ =
false;
156 data_.colequ =
false;
164 data_.symb_comm = MPI_COMM_NULL;
169 data_.domains = (int) ( pow(2.0, floor(log10((
double)nprow*npcol)/log10(2.0))) );
171 const int color = (myRank < data_.domains) ? 0 : MPI_UNDEFINED;
172 MPI_Comm_split (data_.mat_comm, color, myRank, &(data_.symb_comm));
184 int myProcParticipates = 0;
185 if (myRank < nprow * npcol) {
187 myProcParticipates = 1;
192 int numParticipatingProcs = 0;
193 reduceAll<int, int> (*comm, REDUCE_SUM, myProcParticipates,
194 outArg (numParticipatingProcs));
195 TEUCHOS_TEST_FOR_EXCEPTION(
197 std::logic_error,
"Amesos2::Superludist constructor: The matrix has " 198 << this->
globalNumRows_ <<
" > 0 global row(s), but no processes in the " 199 "communicator want to participate in its factorization! nprow = " 200 << nprow <<
" and npcol = " << npcol <<
".");
203 size_t myNumRows = 0;
206 const GO quotient = (numParticipatingProcs == 0) ? static_cast<GO> (0) :
207 GNR /
static_cast<GO
> (numParticipatingProcs);
209 GNR - quotient *
static_cast<GO
> (numParticipatingProcs);
210 const GO lclNumRows = (
static_cast<GO
> (myRank) < remainder) ?
211 (quotient +
static_cast<GO
> (1)) : quotient;
212 myNumRows =
static_cast<size_t> (lclNumRows);
216 const GO indexBase = this->
matrixA_->getRowMap ()->getIndexBase ();
218 rcp (
new map_type (this->
globalNumRows_, myNumRows, indexBase, comm));
224 data_.A.Store = NULL;
226 SLUD::PStatInit(&(data_.stat));
229 data_.scale_perm.perm_r = data_.perm_r.getRawPtr();
230 data_.scale_perm.perm_c = data_.perm_c.getRawPtr();
234 template <
class Matrix,
class Vector>
248 free( data_.fstVtxSep );
253 if( data_.A.Store != NULL ){
254 SLUD::Destroy_SuperMatrix_Store_dist( &(data_.A) );
259 function_map::Destroy_LU(this->
globalNumRows_, &(data_.grid), &(data_.lu));
261 function_map::LUstructFree(&(data_.lu));
269 if ( data_.pslu_freeable.xlsub != NULL ){
270 free( data_.pslu_freeable.xlsub );
271 free( data_.pslu_freeable.lsub );
273 if ( data_.pslu_freeable.xusub != NULL ){
274 free( data_.pslu_freeable.xusub );
275 free( data_.pslu_freeable.usub );
277 if ( data_.pslu_freeable.supno_loc != NULL ){
278 free( data_.pslu_freeable.supno_loc );
279 free( data_.pslu_freeable.xsup_beg_loc );
280 free( data_.pslu_freeable.xsup_end_loc );
282 free( data_.pslu_freeable.globToLoc );
285 SLUD::PStatFree( &(data_.stat) ) ;
290 if ( data_.options.SolveInitialized == SLUD::YES )
291 function_map::SolveFinalize(&(data_.options), &(data_.solve_struct));
293 SLUD::superlu_gridexit(&(data_.grid));
297 if ( data_.symb_comm != MPI_COMM_NULL ) MPI_Comm_free(&(data_.symb_comm));
300 template<
class Matrix,
class Vector>
309 SLUD::int_t slu_rows_ub = Teuchos::as<SLUD::int_t>(this->
globalNumRows_);
310 for( SLUD::int_t i = 0; i < slu_rows_ub; ++i ) data_.perm_r[i] = i;
322 free( data_.fstVtxSep );
324 #ifdef HAVE_AMESOS2_TIMERS 325 Teuchos::TimeMonitor preOrderTime( this->
timers_.preOrderTime_ );
329 info = SLUD::get_perm_c_parmetis( &(data_.A),
330 data_.perm_r.getRawPtr(), data_.perm_c.getRawPtr(),
331 data_.grid.nprow * data_.grid.npcol, data_.domains,
332 &(data_.sizes), &(data_.fstVtxSep),
333 &(data_.grid), &(data_.symb_comm) );
335 TEUCHOS_TEST_FOR_EXCEPTION( info > 0.0,
337 "SuperLU_DIST pre-ordering ran out of memory after allocating " 338 << info <<
" bytes of memory" );
350 template <
class Matrix,
class Vector>
358 #ifdef HAVE_AMESOS2_TIMERS 359 Teuchos::TimeMonitor symFactTime( this->
timers_.symFactTime_ );
363 info = SLUD::symbfact_dist((data_.grid.nprow) * (data_.grid.npcol),
364 data_.domains, &(data_.A), data_.perm_c.getRawPtr(),
365 data_.perm_r.getRawPtr(), data_.sizes,
366 data_.fstVtxSep, &(data_.pslu_freeable),
367 &(data_.grid.comm), &(data_.symb_comm),
370 TEUCHOS_TEST_FOR_EXCEPTION( info > 0.0,
372 "SuperLU_DIST symbolic factorization ran out of memory after" 373 " allocating " << info <<
" bytes of memory" );
375 same_symbolic_ =
false;
376 same_solve_struct_ =
false;
382 template <
class Matrix,
class Vector>
401 size_t nnz_loc = ((SLUD::NRformat_loc*)data_.A.Store)->nnz_loc;
402 for(
size_t i = 0; i < nnz_loc; ++i )
colind_[i] = data_.perm_c[
colind_[i]];
405 if( same_symbolic_ ){
410 function_map::pdistribute(SLUD::SamePattern_SameRowPerm,
412 &(data_.A), &(data_.scale_perm),
413 &(data_.glu_freeable), &(data_.lu),
416 function_map::dist_psymbtonum(SLUD::DOFACT,
418 &(data_.A), &(data_.scale_perm),
419 &(data_.pslu_freeable), &(data_.lu),
424 double anorm = function_map::plangs((
char *)
"I", &(data_.A), &(data_.grid));
428 #ifdef HAVE_AMESOS2_TIMERS 429 Teuchos::TimeMonitor numFactTimer(this->
timers_.numFactTime_);
432 function_map::gstrf(&(data_.options), this->globalNumRows_,
433 this->globalNumCols_, anorm, &(data_.lu),
434 &(data_.grid), &(data_.stat), &info);
438 TEUCHOS_TEST_FOR_EXCEPTION( info > 0,
440 "L and U factors have been computed but U(" 441 << info <<
"," << info <<
") is exactly zero " 442 "(i.e. U is singular)");
449 data_.options.Fact = SLUD::FACTORED;
450 same_symbolic_ =
true;
456 template <
class Matrix,
class Vector>
466 const global_size_type nrhs = X->getGlobalNumVectors();
467 const global_ordinal_type first_global_row_b =
superlu_rowmap_->getMinGlobalIndex();
470 bvals_.resize(nrhs * local_len_rhs);
471 xvals_.resize(nrhs * local_len_rhs);
477 #ifdef HAVE_AMESOS2_TIMERS 478 Teuchos::TimeMonitor convTimer(this->
timers_.vecConvTime_);
488 #ifdef HAVE_AMESOS2_TIMERS 489 Teuchos::TimeMonitor redistTimer(this->
timers_.vecRedistTime_);
495 copy_helper::do_get(B,
519 if( !same_solve_struct_ ){
520 if( data_.options.SolveInitialized == SLUD::YES ){
521 function_map::SolveFinalize(&(data_.options), &(data_.solve_struct));
523 function_map::SolveInit(&(data_.options), &(data_.A), data_.perm_r.getRawPtr(),
524 data_.perm_c.getRawPtr(), as<SLUD::int_t>(nrhs), &(data_.lu),
525 &(data_.grid), &(data_.solve_struct));
529 same_solve_struct_ =
true;
534 #ifdef HAVE_AMESOS2_TIMERS 535 Teuchos::TimeMonitor solveTimer(this->
timers_.solveTime_);
538 function_map::gstrs(as<SLUD::int_t>(this->
globalNumRows_), &(data_.lu),
539 &(data_.scale_perm), &(data_.grid),
bvals_.getRawPtr(),
540 as<SLUD::int_t>(local_len_rhs), as<SLUD::int_t>(first_global_row_b),
541 as<SLUD::int_t>(local_len_rhs), as<int>(nrhs),
542 &(data_.solve_struct), &(data_.stat), &ierr);
545 TEUCHOS_TEST_FOR_EXCEPTION( ierr < 0,
547 "Argument " << -ierr <<
" to gstrs had an illegal value" );
562 #ifdef HAVE_AMESOS2_TIMERS 563 Teuchos::TimeMonitor redistTimer(this->
timers_.vecRedistTime_);
565 SLUD::int_t ld = as<SLUD::int_t>(local_len_rhs);
566 function_map::permute_Dense_Matrix(as<SLUD::int_t>(first_global_row_b),
567 as<SLUD::int_t>(local_len_rhs),
568 data_.solve_struct.row_to_proc,
569 data_.solve_struct.inv_perm_c,
579 #ifdef HAVE_AMESOS2_TIMERS 580 Teuchos::TimeMonitor redistTimer(this->
timers_.vecRedistTime_);
584 put_helper::do_put(X,
594 template <
class Matrix,
class Vector>
603 template <
class Matrix,
class Vector>
609 using Teuchos::getIntegralValue;
610 using Teuchos::ParameterEntryValidator;
614 if( parameterList->isParameter(
"npcol") || parameterList->isParameter(
"nprow") ){
615 TEUCHOS_TEST_FOR_EXCEPTION( !(parameterList->isParameter(
"nprow") &&
616 parameterList->isParameter(
"npcol")),
617 std::invalid_argument,
618 "nprow and npcol must be set together" );
620 SLUD::int_t nprow = parameterList->template get<SLUD::int_t>(
"nprow");
621 SLUD::int_t npcol = parameterList->template get<SLUD::int_t>(
"npcol");
623 TEUCHOS_TEST_FOR_EXCEPTION( nprow * npcol > this->
getComm()->getSize(),
624 std::invalid_argument,
625 "nprow and npcol combination invalid" );
627 if( (npcol != data_.grid.npcol) || (nprow != data_.grid.nprow) ){
629 SLUD::superlu_gridexit(&(data_.grid));
631 SLUD::superlu_gridinit(data_.mat_comm, nprow, npcol, &(data_.grid));
635 TEUCHOS_TEST_FOR_EXCEPTION( this->
control_.useTranspose_,
636 std::invalid_argument,
637 "SuperLU_DIST does not support solving the tranpose system" );
639 data_.options.Trans = SLUD::NOTRANS;
644 data_.options.Equil = SLUD::NO;
646 if( parameterList->isParameter(
"ColPerm") ){
647 RCP<const ParameterEntryValidator> colperm_validator = valid_params->getEntry(
"ColPerm").validator();
648 parameterList->getEntry(
"ColPerm").setValidator(colperm_validator);
650 data_.options.ColPerm = getIntegralValue<SLUD::colperm_t>(*parameterList,
"ColPerm");
657 data_.options.RowPerm = SLUD::NOROWPERM;
666 data_.options.IterRefine = SLUD::NOREFINE;
668 bool replace_tiny = parameterList->get<
bool>(
"ReplaceTinyPivot",
true);
669 data_.options.ReplaceTinyPivot = replace_tiny ? SLUD::YES : SLUD::NO;
673 template <
class Matrix,
class Vector>
674 Teuchos::RCP<const Teuchos::ParameterList>
678 using Teuchos::tuple;
679 using Teuchos::ParameterList;
680 using Teuchos::EnhancedNumberValidator;
681 using Teuchos::setStringToIntegralParameter;
682 using Teuchos::stringToIntegralParameterEntryValidator;
684 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
686 if( is_null(valid_params) ){
687 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
689 Teuchos::RCP<EnhancedNumberValidator<SLUD::int_t> > col_row_validator
690 = Teuchos::rcp(
new EnhancedNumberValidator<SLUD::int_t>() );
691 col_row_validator->setMin(1);
693 pl->set(
"npcol", data_.grid.npcol,
694 "Number of columns in the processor grid. " 695 "Must be set with nprow", col_row_validator);
696 pl->set(
"nprow", data_.grid.nprow,
697 "Number of rows in the SuperLU_DIST processor grid. " 698 "Must be set together with npcol", col_row_validator);
701 setStringToIntegralParameter<SLUD::trans_t>(
"Trans",
"NOTRANS",
702 "Solve for the transpose system or not",
703 tuple<string>(
"NOTRANS"),
704 tuple<string>(
"Do not solve with transpose"),
705 tuple<SLUD::trans_t>(SLUD::NOTRANS),
721 pl->set(
"ReplaceTinyPivot",
true,
722 "Specifies whether to replace tiny diagonals during LU factorization");
724 setStringToIntegralParameter<SLUD::colperm_t>(
"ColPerm",
"PARMETIS",
725 "Specifies how to permute the columns of the " 726 "matrix for sparsity preservation",
727 tuple<string>(
"NATURAL",
"PARMETIS"),
728 tuple<string>(
"Natural ordering",
729 "ParMETIS ordering on A^T + A"),
730 tuple<SLUD::colperm_t>(SLUD::NATURAL,
741 template <
class Matrix,
class Vector>
745 SLUD::int_t& npcol)
const {
746 TEUCHOS_TEST_FOR_EXCEPTION( nprocs < 1,
747 std::invalid_argument,
748 "Number of MPI processes must be at least 1" );
749 SLUD::int_t c, r = 1;
750 while( r*r <= nprocs ) r++;
753 while( (r--)*c != nprocs ){
759 if( r > 1 || nprocs < 9){
766 template <
class Matrix,
class Vector>
770 using Teuchos::Array;
771 using Teuchos::ArrayView;
772 using Teuchos::ptrInArg;
777 #ifdef HAVE_AMESOS2_TIMERS 778 Teuchos::TimeMonitor convTimer(this->
timers_.mtxConvTime_);
782 if( data_.A.Store != NULL ){
783 SLUD::Destroy_SuperMatrix_Store_dist( &(data_.A) );
784 data_.A.Store = NULL;
787 Teuchos::RCP<const MatrixAdapter<Matrix> > redist_mat
790 int_t l_nnz, l_rows, g_rows, g_cols, fst_global_row;
791 l_nnz = as<int_t>(redist_mat->getLocalNNZ());
792 l_rows = as<int_t>(redist_mat->getLocalNumRows());
793 g_rows = as<int_t>(redist_mat->getGlobalNumRows());
803 #ifdef HAVE_AMESOS2_TIMERS 804 Teuchos::TimeMonitor mtxRedistTimer( this->
timers_.mtxRedistTime_ );
809 slu_type, int_t, int_t >::do_get(redist_mat.ptr(),
816 TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != l_nnz,
818 "Did not get the expected number of non-zero vals");
821 SLUD::Dtype_t dtype = type_map::dtype;
824 function_map::create_CompRowLoc_Matrix(&(data_.A),
826 l_nnz, l_rows, fst_global_row,
831 dtype, SLUD::SLU_GE);
838 template<
class Matrix,
class Vector>
844 #endif // AMESOS2_SUPERLUDIST_DEF_HPP int getNumNumericFact() const
Returns the number of numeric factorizations performed by the owning solver.
Definition: Amesos2_Status.hpp:102
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers...
Definition: Amesos2_SolverCore_decl.hpp:105
Amesos2 interface to the distributed memory version of SuperLU.
Definition: Amesos2_Superludist_decl.hpp:90
int getNumPreOrder() const
Returns the number of pre-orderings performed by the owning solver.
Definition: Amesos2_Status.hpp:94
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:65
Teuchos::Array< int > colind_
Stores the row indices of the nonzero entries.
Definition: Amesos2_Superludist_decl.hpp:311
Similar to get_ccs_helper , but used to get a CRS representation of the given matrix.
Definition: Amesos2_Util.hpp:591
global_size_type globalNumCols_
Number of global columns in matrixA_.
Definition: Amesos2_SolverCore_decl.hpp:479
Superludist(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
Initialize from Teuchos::RCP.
Definition: Amesos2_Superludist_def.hpp:68
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > ¶meterList)
Definition: Amesos2_Superludist_def.hpp:605
global_size_type globalNumRows_
Number of global rows in matrixA_.
Definition: Amesos2_SolverCore_decl.hpp:476
Teuchos::Array< slu_type > nzvals_
Stores the values of the nonzero entries for SuperLU_DIST.
Definition: Amesos2_Superludist_decl.hpp:309
Helper class for getting 1-D copies of multivectors.
Definition: Amesos2_MultiVecAdapter_decl.hpp:243
Definition: Amesos2_TypeDecl.hpp:142
Utility functions for Amesos2.
Control control_
Parameters for solving.
Definition: Amesos2_SolverCore_decl.hpp:495
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_Superludist_def.hpp:675
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const
Return a const parameter list of all of the valid parameters that this->setParameterList(...) will accept.
Definition: Amesos2_SolverCore_def.hpp:307
Provides definition of SuperLU_DIST types as well as conversions and type traits. ...
Definition: Amesos2_AbstractConcreteMatrixAdapter.hpp:48
bool symbolicFactorizationDone() const
If true , then symbolic factorization has been performed.
Definition: Amesos2_Status.hpp:114
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_Superludist_def.hpp:302
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Returns a pointer to the Teuchos::Comm communicator with this operator.
Definition: Amesos2_SolverCore_decl.hpp:363
A Matrix adapter interface for Amesos2.
Definition: Amesos2_MatrixAdapter_decl.hpp:76
Teuchos::Array< slu_type > bvals_
1D store for B values
Definition: Amesos2_Superludist_decl.hpp:315
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_Superludist_def.hpp:596
bool numericFactorizationDone() const
If true , then numeric factorization has been performed.
Definition: Amesos2_Status.hpp:118
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > superlu_rowmap_
Maps rows of the matrix to processors in the SuperLU_DIST processor grid.
Definition: Amesos2_Superludist_decl.hpp:327
~Superludist()
Destructor.
Definition: Amesos2_Superludist_def.hpp:235
super_type & setParameters(const Teuchos::RCP< Teuchos::ParameterList > ¶meterList)
Set/update internal variables and solver options.
Definition: Amesos2_SolverCore_def.hpp:275
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using SuperLU_DIST.
Definition: Amesos2_Superludist_def.hpp:352
Teuchos::Array< int > rowptr_
Stores the location in Ai_ and Aval_ that starts row j.
Definition: Amesos2_Superludist_decl.hpp:313
Timers timers_
Various timing statistics.
Definition: Amesos2_SolverCore_decl.hpp:498
void get_default_grid_size(int nprocs, SLUD::int_t &nprow, SLUD::int_t &npcol) const
Definition: Amesos2_Superludist_def.hpp:743
bool in_grid_
true if this processor is in SuperLU_DISTS's 2D process grid
Definition: Amesos2_Superludist_decl.hpp:320
Helper class for putting 1-D data arrays into multivectors.
Definition: Amesos2_MultiVecAdapter_decl.hpp:296
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
SuperLU_DIST specific solve.
Definition: Amesos2_Superludist_def.hpp:458
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:175
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal solver structures.
Definition: Amesos2_Superludist_def.hpp:768
Teuchos::RCP< const MatrixAdapter< Matrix > > matrixA_
The LHS operator.
Definition: Amesos2_SolverCore_decl.hpp:455
Teuchos::Array< slu_type > xvals_
1D store for X values
Definition: Amesos2_Superludist_decl.hpp:317
int numericFactorization_impl()
SuperLU_DIST specific numeric factorization.
Definition: Amesos2_Superludist_def.hpp:384
Status status_
Holds status information about a solver.
Definition: Amesos2_SolverCore_decl.hpp:492