IFPACK  Development
Ifpack_SORa.cpp
1 /*
2 //@HEADER
3 // ***********************************************************************
4 //
5 // Ifpack: Object-Oriented Algebraic Preconditioner Package
6 // Copyright (2002) Sandia Corporation
7 //
8 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
9 // license for use of this work by or on behalf of the U.S. Government.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ***********************************************************************
41 //@HEADER
42 */
43 
44 #include "Ifpack_SORa.h"
45 #include "Ifpack.h"
46 #include "Ifpack_Utils.h"
47 #include "Teuchos_ParameterList.hpp"
48 #include "Teuchos_RCP.hpp"
49 #include "Epetra_Import.h"
50 #include "Epetra_Export.h"
51 #include "Epetra_IntVector.h"
52 
53 #ifdef HAVE_IFPACK_EPETRAEXT
54 #include "EpetraExt_MatrixMatrix.h"
55 #include "EpetraExt_RowMatrixOut.h"
56 #include "EpetraExt_MultiVectorOut.h"
57 #include "EpetraExt_Transpose_RowMatrix.h"
58 
59 
60 #define ABS(x) ((x)>=0?(x):-(x))
61 #define MIN(x,y) ((x)<(y)?(x):(y))
62 #define MAX(x,y) ((x)>(y)?(x):(y))
63 
64 // Useful functions horked from ML
65 int* FindLocalDiricheltRowsFromOnesAndZeros(const Epetra_CrsMatrix & Matrix, int &numBCRows);
66 void Apply_BCsToMatrixRowsAndColumns(const int *dirichletRows, int numBCRows,const Epetra_IntVector &dirichletColumns,const Epetra_CrsMatrix & Matrix);
67 Epetra_IntVector * FindLocalDirichletColumnsFromRows(const int *dirichletRows, int numBCRows,const Epetra_CrsMatrix & Matrix);
68 
69 Ifpack_SORa::Ifpack_SORa(Epetra_RowMatrix* A):
70  IsInitialized_(false),
71  IsComputed_(false),
72  Label_(),
73  Alpha_(1.5),
74  Gamma_(1.0),
75  NumSweeps_(1),
76  IsParallel_(false),
77  HaveOAZBoundaries_(false),
78  UseInterprocDamping_(false),
79  UseGlobalDamping_(false),
80  LambdaMax_(-1.0),
81  Time_(A->Comm())
82 {
83  Epetra_CrsMatrix *Acrs=dynamic_cast<Epetra_CrsMatrix*>(A);
84  if (Acrs) {
85  Acrs_= Teuchos::rcp (Acrs, false);
86  }
87  A_ = Teuchos::rcp (A, false);
88 }
89 
90 void Ifpack_SORa::Destroy(){
91 }
92 
93 
94 
95 int Ifpack_SORa::Initialize(){
96  Alpha_ = List_.get("sora: alpha", Alpha_);
97  Gamma_ = List_.get("sora: gamma",Gamma_);
98  NumSweeps_ = List_.get("sora: sweeps",NumSweeps_);
99  HaveOAZBoundaries_= List_.get("sora: oaz boundaries", HaveOAZBoundaries_);
100  UseInterprocDamping_ = List_.get("sora: use interproc damping",UseInterprocDamping_);
101  UseGlobalDamping_ = List_.get("sora: use global damping",UseGlobalDamping_);
102 
103  if (A_->Comm().NumProc() != 1) IsParallel_ = true;
104  else {
105  IsParallel_ = false;
106  // Don't use interproc damping, for obvious reasons
107  UseInterprocDamping_=false;
108  }
109 
110  // Counters
111  IsInitialized_=true;
112  NumInitialize_++;
113  return 0;
114 }
115 
116 int Ifpack_SORa::SetParameters(Teuchos::ParameterList& parameterlist){
117  List_=parameterlist;
118  return 0;
119 }
120 
121 
122 template<typename int_type>
123 int Ifpack_SORa::TCompute(){
124  using std::cout;
125  using std::endl;
126 
127  Epetra_Map *RowMap=const_cast<Epetra_Map*>(&A_->RowMatrixRowMap());
128  Epetra_Vector Adiag(*RowMap);
129  Epetra_CrsMatrix *Askew2=0, *Aherm2=0,*W=0;
130  int *rowptr_s,*colind_s,*rowptr_h,*colind_h;
131  double *vals_s,*vals_h;
132  bool RowMatrixMode=(Acrs_==Teuchos::null);
133 
134  // Label
135  sprintf(Label_, "IFPACK SORa (alpha=%5.2f gamma=%5.2f)",Alpha_,Gamma_);
136 
137  if(RowMatrixMode){
138  if(!A_->Comm().MyPID()) cout<<"SORa: RowMatrix mode enabled"<<endl;
139  // RowMatrix mode, build Acrs_ the hard way.
140  Epetra_RowMatrix *Arow=&*A_;
141  Epetra_Map *ColMap=const_cast<Epetra_Map*>(&A_->RowMatrixColMap());
142 
143  int Nmax=A_->MaxNumEntries();
144  int length;
145  std::vector<double> values(Nmax);
146  Epetra_CrsMatrix *Acrs=new Epetra_CrsMatrix(Copy,*RowMap,Nmax);
147 
148  std::vector<int> indices_local(Nmax);
149 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
150  if(RowMap->GlobalIndicesInt()) {
151  for(int i=0;i<Arow->NumMyRows();i++) {
152  Arow->ExtractMyRowCopy(i,Nmax,length,&values[0],&indices_local[0]);
153  for(int j=0;j<length;j++)
154  indices_local[j]= ColMap->GID(indices_local[j]);
155  Acrs->InsertGlobalValues(RowMap->GID(i),length,&values[0],&indices_local[0]);
156  }
157  }
158  else
159 #endif
160 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
161  if(RowMap->GlobalIndicesLongLong()) {
162  std::vector<int_type> indices_global(Nmax);
163  for(int i=0;i<Arow->NumMyRows();i++) {
164  Arow->ExtractMyRowCopy(i,Nmax,length,&values[0],&indices_local[0]);
165  for(int j=0;j<length;j++)
166  indices_global[j]= (int_type) ColMap->GID64(indices_local[j]);
167  Acrs->InsertGlobalValues((int_type) RowMap->GID64(i),length,&values[0],&indices_global[0]);
168  }
169  }
170  else
171 #endif
172  throw "Ifpack_SORa::TCompute: GlobalIndices type unknown";
173 
174  Acrs->FillComplete(A_->OperatorDomainMap(),A_->OperatorRangeMap());
175  Acrs_ = Teuchos::rcp (Acrs, true);
176  }
177 
178  // Create Askew2
179  // Note: Here I let EpetraExt do the thinking for me. Since it gets all the maps correct for the E + F^T stencil.
180  // There are probably more efficient ways to do this but this method has the bonus of allowing code reuse.
181  IFPACK_CHK_ERR(EpetraExt::MatrixMatrix::Add(*Acrs_,false,1,*Acrs_,true,-1,Askew2));
182  Askew2->FillComplete();
183 
184  // Create Aherm2
185  IFPACK_CHK_ERR(EpetraExt::MatrixMatrix::Add(*Acrs_,false,1,*Acrs_,true,1,Aherm2));
186  Aherm2->FillComplete();
187 
188  int nnz2=Askew2->NumMyNonzeros();
189  int N=Askew2->NumMyRows();
190 
191 
192  // Grab pointers
193  IFPACK_CHK_ERR(Askew2->ExtractCrsDataPointers(rowptr_s,colind_s,vals_s));
194  IFPACK_CHK_ERR(Aherm2->ExtractCrsDataPointers(rowptr_h,colind_h,vals_h));
195 
196  // Sanity checking: Make sure the sparsity patterns are the same
197 #define SANITY_CHECK
198 #ifdef SANITY_CHECK
199  for(int i=0;i<N;i++)
200  if(rowptr_s[i]!=rowptr_h[i]) IFPACK_CHK_ERR(-2);
201  for(int i=0;i<nnz2;i++)
202  if(colind_s[i]!=colind_h[i]) IFPACK_CHK_ERR(-3);
203 #endif
204 
205  // Dirichlet Detection & Nuking of Aherm2 and Askew2
206  // Note: Relies on Aherm2/Askew2 having identical sparsity patterns (see sanity check above)
207  if(HaveOAZBoundaries_){
208  int numBCRows;
209  int* dirRows=FindLocalDiricheltRowsFromOnesAndZeros(*Acrs_,numBCRows);
210  Epetra_IntVector* dirCols=FindLocalDirichletColumnsFromRows(dirRows,numBCRows,*Aherm2);
211  Apply_BCsToMatrixRowsAndColumns(dirRows,numBCRows,*dirCols,*Aherm2);
212  Apply_BCsToMatrixRowsAndColumns(dirRows,numBCRows,*dirCols,*Askew2);
213  delete [] dirRows;
214  delete dirCols;
215  }
216 
217  // Grab diagonal of A
218  A_->ExtractDiagonalCopy(Adiag);
219 
220  // Allocate the diagonal for W
221  Epetra_Vector *Wdiag = new Epetra_Vector(*RowMap);
222 
223  // Build the W matrix (lower triangle only)
224  // Note: Relies on EpetraExt giving me identical sparsity patterns for both Askew2 and Aherm2 (see sanity check above)
225  int maxentries=Askew2->MaxNumEntries();
226  int_type* gids=new int_type [maxentries];
227  double* newvals=new double[maxentries];
228  W=new Epetra_CrsMatrix(Copy,*RowMap,0);
229  for(int i=0;i<N;i++){
230  // Build the - (1+alpha)/2 E - (1-alpha)/2 F part of the W matrix
231  int_type rowgid = (int_type) Acrs_->GRID64(i);
232  double c_data=0.0;
233  double ipdamp=0.0;
234  int idx=0;
235 
236  for(int j=rowptr_s[i];j<rowptr_s[i+1];j++){
237  int_type colgid = (int_type) Askew2->GCID64(colind_s[j]);
238  c_data+=fabs(vals_s[j]);
239  if(rowgid>colgid){
240  // Rely on the fact that off-diagonal entries are always numbered last, dropping the entry entirely.
241  if(colind_s[j] < N) {
242  gids[idx]=colgid;
243  newvals[idx]=vals_h[j]/2 + Alpha_ * vals_s[j]/2;
244  idx++;
245  }
246  else{
247  ipdamp+=fabs(vals_h[j]/2 + Alpha_ * vals_s[j]/2);
248  }
249  }
250  }
251  if(idx>0)
252  IFPACK_CHK_ERR(W->InsertGlobalValues(rowgid,idx,newvals,gids));
253 
254  // Do the diagonal
255  double w_val= c_data*Alpha_*Gamma_/4 + Adiag[Acrs_->LRID(rowgid)];
256  if(UseInterprocDamping_) w_val+=ipdamp;
257 
258  W->InsertGlobalValues(rowgid,1,&w_val,&rowgid);
259  IFPACK_CHK_ERR(Wdiag->ReplaceGlobalValues(1,&w_val,&rowgid));
260  }
261  W->FillComplete(A_->OperatorDomainMap(),A_->OperatorRangeMap());
262  W_= Teuchos::rcp(W);
263  Wdiag_= Teuchos::rcp(Wdiag);
264 
265  // Mark as computed
266  IsComputed_=true;
267 
268  // Global damping, if wanted
269  if(UseGlobalDamping_) {
270  PowerMethod(10,LambdaMax_);
271  if(!A_->Comm().MyPID()) printf("SORa: Global damping parameter = %6.4e (lmax=%6.4e)\n",GetOmega(),LambdaMax_);
272  }
273 
274  // Cleanup
275  delete [] gids;
276  delete [] newvals;
277  delete Aherm2;
278  delete Askew2;
279  if(RowMatrixMode) {
280  Acrs_=Teuchos::null;
281  }
282 
283  // Counters
284  NumCompute_++;
285  ComputeTime_ += Time_.ElapsedTime();
286  return 0;
287 }
288 
289 int Ifpack_SORa::Compute(){
290  if(!IsInitialized_) Initialize();
291  int ret_val = 0;
292 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
293  if(A_->RowMatrixRowMap().GlobalIndicesInt()) {
294  ret_val = TCompute<int>();
295  }
296  else
297 #endif
298 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
299  if(A_->RowMatrixRowMap().GlobalIndicesLongLong()) {
300  ret_val = TCompute<long long>();
301  }
302  else
303 #endif
304  throw "Ifpack_SORa::Compute: GlobalIndices type unknown";
305 
306  return ret_val;
307 }
308 
309 
310 int Ifpack_SORa::ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const{
311  if(!IsComputed_) return -1;
312  Time_.ResetStartTime();
313  bool initial_guess_is_zero=false;
314  const int lclNumRows = W_->NumMyRows();
315  const int NumVectors = X.NumVectors();
316  Epetra_MultiVector Temp(A_->RowMatrixRowMap(),NumVectors);
317 
318  double omega=GetOmega();
319 
320  // need to create an auxiliary vector, Xcopy
321  Teuchos::RCP<const Epetra_MultiVector> Xcopy;
322  if (X.Pointers()[0] == Y.Pointers()[0]){
323  Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) );
324  // Since the user didn't give us anything better, our initial guess is zero.
325  Y.Scale(0.0);
326  initial_guess_is_zero=true;
327  }
328  else
329  Xcopy = Teuchos::rcp( &X, false );
330 
331  Teuchos::RCP< Epetra_MultiVector > T2;
332  // Note: Assuming that the matrix has an importer. Ifpack_PointRelaxation doesn't do this, but given that
333  // I have a CrsMatrix, I'm probably OK.
334  // Note: This is the lazy man's version sacrificing a few extra flops for avoiding if statements to determine
335  // if things are on or off processor.
336  // Note: T2 must be zero'd out
337  if (IsParallel_ && W_->Importer()) T2 = Teuchos::rcp( new Epetra_MultiVector(W_->Importer()->TargetMap(),NumVectors,true));
338  else T2 = Teuchos::rcp( new Epetra_MultiVector(A_->RowMatrixRowMap(),NumVectors,true));
339 
340  // Pointer grabs
341  int* rowptr,*colind;
342  double *values;
343  double **t_ptr,** y_ptr, ** t2_ptr, **x_ptr,*d_ptr;
344  T2->ExtractView(&t2_ptr);
345  Y.ExtractView(&y_ptr);
346  Temp.ExtractView(&t_ptr);
347  Xcopy->ExtractView(&x_ptr);
348  Wdiag_->ExtractView(&d_ptr);
349  IFPACK_CHK_ERR(W_->ExtractCrsDataPointers(rowptr,colind,values));
350 
351 
352  for(int i=0; i<NumSweeps_; i++){
353  // Calculate b-Ax
354  if(!initial_guess_is_zero || i > 0) {
355  A_->Apply(Y,Temp);
356  Temp.Update(1.0,*Xcopy,-1.0);
357  }
358  else
359  Temp.Update(1.0,*Xcopy,0.0);
360 
361  // Note: The off-processor entries of T2 never get touched (they're always zero) and the other entries are updated
362  // in this sweep before they are used, so we don't need to reset T2 to zero here.
363 
364  // Do backsolve & update
365  // x = x + W^{-1} (b - A x)
366  for(int j=0; j<lclNumRows; j++){
367  double diag=d_ptr[j];
368  for (int m=0 ; m<NumVectors; m++) {
369  double dtmp=0.0;
370  // Note: Since the diagonal is in the matrix, we need to zero that entry of T2 here to make sure it doesn't contribute.
371  t2_ptr[m][j]=0.0;
372  for(int k=rowptr[j];k<rowptr[j+1];k++){
373  dtmp+= values[k]*t2_ptr[m][colind[k]];
374  }
375  // Yes, we need to update both of these.
376  t2_ptr[m][j] = (t_ptr[m][j]- dtmp)/diag;
377  y_ptr[m][j] += omega*t2_ptr[m][j];
378  }
379  }
380  }
381 
382  // Counter update
383  NumApplyInverse_++;
384  ApplyInverseTime_ += Time_.ElapsedTime();
385  return 0;
386 }
387 
388 
389 std::ostream& Ifpack_SORa::Print(std::ostream& os) const{
390  using std::endl;
391 
392  os<<"Ifpack_SORa"<<endl;
393  os<<" alpha = "<<Alpha_<<endl;
394  os<<" gamma = "<<Gamma_<<endl;
395  os<<endl;
396  return os;
397 }
398 
399 
400 double Ifpack_SORa::Condest(const Ifpack_CondestType CT,
401  const int MaxIters,
402  const double Tol,
403  Epetra_RowMatrix* Matrix_in){
404  return -1.0;
405 }
406 
407 
408 
409 
410 
411 // ============================================================================
412 inline int* FindLocalDiricheltRowsFromOnesAndZeros(const Epetra_CrsMatrix & Matrix, int &numBCRows){
413  int *dirichletRows = new int[Matrix.NumMyRows()];
414  numBCRows = 0;
415  for (int i=0; i<Matrix.NumMyRows(); i++) {
416  int numEntries, *cols;
417  double *vals;
418  int ierr = Matrix.ExtractMyRowView(i,numEntries,vals,cols);
419  if (ierr == 0) {
420  int nz=0;
421  for (int j=0; j<numEntries; j++) if (vals[j]!=0) nz++;
422  if (nz==1) dirichletRows[numBCRows++] = i;
423  // EXPERIMENTAL: Treat Special Inflow Boundaries as Dirichlet Boundaries
424  if(nz==2) dirichletRows[numBCRows++] = i;
425  }/*end if*/
426  }/*end for*/
427  return dirichletRows;
428 }/*end FindLocalDiricheltRowsFromOnesAndZeros*/
429 
430 
431 // ======================================================================
433 inline Epetra_IntVector * FindLocalDirichletColumnsFromRows(const int *dirichletRows, int numBCRows,const Epetra_CrsMatrix & Matrix){
434  // Zero the columns corresponding to the Dirichlet rows. Completely ignore the matrix maps.
435 
436  // Build rows
437  Epetra_IntVector ZeroRows(Matrix.RowMap());
438  Epetra_IntVector *ZeroCols=new Epetra_IntVector(Matrix.ColMap());
439  ZeroRows.PutValue(0);
440  ZeroCols->PutValue(0);
441 
442  // Easy Case: We're all on one processor
443  if(Matrix.RowMap().SameAs(Matrix.ColMap())){
444  for (int i=0; i < numBCRows; i++)
445  (*ZeroCols)[dirichletRows[i]]=1;
446  return ZeroCols;
447  }
448 
449  // Flag the rows which are zero locally
450  for (int i=0; i < numBCRows; i++)
451  ZeroRows[dirichletRows[i]]=1;
452 
453  // Boundary exchange to move the data
454  if(Matrix.RowMap().SameAs(Matrix.DomainMap())){
455  // Use A's Importer if we have one
456  ZeroCols->Import(ZeroRows,*Matrix.Importer(),Insert);
457  }
458  else{
459  // Use custom importer if we don't
460  Epetra_Import Importer(Matrix.ColMap(),Matrix.RowMap());
461  ZeroCols->Import(ZeroRows,Importer,Insert);
462  }
463  return ZeroCols;
464 }
465 
466 
467 // ======================================================================
468 inline void Apply_BCsToMatrixRowsAndColumns(const int *dirichletRows, int numBCRows,const Epetra_IntVector &dirichletColumns,const Epetra_CrsMatrix & Matrix){
469  /* This function zeros out rows & columns of Matrix.
470  Comments: The graph of Matrix is unchanged.
471  */
472  // Nuke the rows
473  for(int i=0;i<numBCRows;i++){
474  int numEntries, *cols;
475  double *vals;
476  Matrix.ExtractMyRowView(dirichletRows[i],numEntries,vals,cols);
477  for (int j=0; j<numEntries; j++) vals[j]=0.0;
478  }/*end for*/
479 
480  // Nuke the columns
481  for (int i=0; i < Matrix.NumMyRows(); i++) {
482  int numEntries;
483  double *vals;
484  int *cols;
485  Matrix.ExtractMyRowView(i,numEntries,vals,cols);
486  for (int j=0; j < numEntries; j++) {
487  if (dirichletColumns[ cols[j] ] > 0) vals[j] = 0.0;
488  }/*end for*/
489  }/*end for*/
490 }/* end Apply_BCsToMatrixColumns */
491 
492 
493 
494 
495 
496 int Ifpack_SORa::
497 PowerMethod(const int MaximumIterations, double& lambda_max)
498 {
499  // this is a simple power method
500  lambda_max = 0.0;
501  double RQ_top, RQ_bottom, norm;
502  Epetra_Vector x(A_->OperatorDomainMap());
503  Epetra_Vector y(A_->OperatorRangeMap());
504  Epetra_Vector z(A_->OperatorRangeMap());
505  x.Random();
506  x.Norm2(&norm);
507  if (norm == 0.0) IFPACK_CHK_ERR(-1);
508 
509  x.Scale(1.0 / norm);
510 
511  // Only do 1 sweep per PM step, turn off global damping
512  int NumSweepsBackup=NumSweeps_;
513  bool UseGlobalDampingBackup=UseGlobalDamping_;
514  NumSweeps_=1;UseGlobalDamping_=false;
515 
516  for (int iter = 0; iter < MaximumIterations; ++iter)
517  {
518  y.PutScalar(0.0);
519  A_->Apply(x, z);
520  ApplyInverse(z,y);
521  y.Update(1.0,x,-1.0);
522 
523  // Compute the Rayleigh Quotient
524  y.Dot(x, &RQ_top);
525  x.Dot(x, &RQ_bottom);
526  lambda_max = RQ_top / RQ_bottom;
527  y.Norm2(&norm);
528  if (norm == 0.0) IFPACK_CHK_ERR(-1);
529  x.Update(1.0 / norm, y, 0.0);
530 
531  }
532 
533  // Enable if we want to prevent over-relaxation
534  // lambda_max=MAX(lambda_max,1.0);
535 
536  // Reset parameters
537  NumSweeps_=NumSweepsBackup;
538  UseGlobalDamping_=UseGlobalDampingBackup;
539 
540  return(0);
541 }
542 
543 #endif