Actual source code: dgmres.c

  2: /*
  3:     This file implements the deflated GMRES
  4:     References:  Erhel, Burrage and Pohl,  Restarted GMRES preconditioned by deflation,J. Computational and Applied Mathematics, 69(1996), 303-318.


  7:  */

 9:  #include dgmresimpl.h
 10: #define GMRES_DELTA_DIRECTIONS 10
 11: #define GMRES_DEFAULT_MAXK     30
 12: #define PetscTruth PetscBool
 13: static PetscErrorCode    DGMRESGetNewVectors (KSP,PetscInt);
 14: static PetscErrorCode    DGMRESUpdateHessenberg (KSP,PetscInt,PetscTruth,PetscReal*);
 15: static PetscErrorCode    BuildDGmresSoln (PetscScalar*,Vec,Vec,KSP,PetscInt);

 17: /* Functions from gmres */

 37: PetscErrorCode  KSPDGMRESSetEigen (KSP ksp,PetscInt nb_eig) {

 41:     PetscTryMethod ( (ksp),"KSPDGMRESSetEigen_C", (KSP,PetscInt), (ksp,nb_eig));
 42:     CHKERRQ (ierr);
 43:     PetscFunctionReturn (0);
 44: }
 47: PetscErrorCode  KSPDGMRESSetMaxEigen (KSP ksp,PetscInt max_neig) {

 51:     PetscTryMethod ( (ksp),"KSPDGMRESSetMaxEigen_C", (KSP,PetscInt), (ksp,max_neig));
 52:     CHKERRQ (ierr);
 53:     PetscFunctionReturn (0);
 54: }
 57: PetscErrorCode  KSPDGMRESForce (KSP ksp,PetscInt force) {

 61:         PetscTryMethod ( (ksp),"KSPDGMRESForce_C", (KSP,PetscInt), (ksp,force));
 62:         CHKERRQ (ierr);
 63:         PetscFunctionReturn (0);
 64: }
 67: PetscErrorCode  KSPDGMRESSetRatio (KSP ksp,PetscReal ratio) {

 71:         PetscTryMethod ( (ksp),"KSPDGMRESSetRatio_C", (KSP,PetscReal), (ksp,ratio));
 72:         CHKERRQ (ierr);
 73:         PetscFunctionReturn (0);
 74: }
 77: PetscErrorCode  KSPDGMRESComputeSchurForm (KSP ksp,PetscInt *neig) {

 81:     PetscTryMethod ( (ksp),"KSPDGMRESComputeSchurForm_C", (KSP, PetscInt*), (ksp, neig));
 82:     CHKERRQ (ierr);
 83:     PetscFunctionReturn (0);
 84: }
 87: PetscErrorCode  KSPDGMRESComputeDeflationData (KSP ksp) {

 91:     PetscTryMethod ( (ksp),"KSPDGMRESComputeDeflationData_C", (KSP), (ksp));
 92:     CHKERRQ (ierr);
 93:     PetscFunctionReturn (0);
 94: }
 97: PetscErrorCode  KSPDGMRESApplyDeflation (KSP ksp, Vec x, Vec y) {

101:     PetscTryMethod ( (ksp),"KSPDGMRESApplyDeflation_C", (KSP, Vec, Vec), (ksp, x, y));
102:     CHKERRQ (ierr);
103:     PetscFunctionReturn (0);
104: }

108: PetscErrorCode  KSPDGMRESImproveEig (KSP ksp, PetscInt neig) {

112:     PetscTryMethod ( (ksp), "KSPDGMRESImproveEig_C", (KSP, PetscInt), (ksp, neig));
113:     CHKERRQ (ierr);
114:     PetscFunctionReturn (0);
115: }
118: PetscErrorCode    KSPSetUp_DGMRES (KSP ksp) {
119:     PetscErrorCode    ierr;
120:     KSP_DGMRES        *dgmres   = (KSP_DGMRES *) ksp->data;
121:     PetscInt                    neig            = dgmres->neig+EIG_OFFSET;
122:     PetscInt                    max_k           = dgmres->max_k+1;


126:     KSPSetUp_GMRES (ksp);
127:     CHKERRQ (ierr);
128:     if (dgmres->neig==0)
129:         PetscFunctionReturn (0);

131:     /* Allocate workspace for the Schur vectors*/
132:     ierr=PetscMalloc ( (neig) *max_k*sizeof (PetscReal), &SR);
133:     CHKERRQ (ierr);
134:     UU = PETSC_NULL;
135:     XX = PETSC_NULL;
136:     MX = PETSC_NULL;
137:     AUU = PETSC_NULL;
138:     XMX = PETSC_NULL;
139:     XMU = PETSC_NULL;
140:     UMX = PETSC_NULL;
141:     AUAU = PETSC_NULL;
142:     TT = PETSC_NULL;
143:     TTF = PETSC_NULL;
144:     INVP = PETSC_NULL;
145:     X1 = PETSC_NULL;
146:     X2 = PETSC_NULL;
147:     MU = PETSC_NULL;
148:     PetscFunctionReturn (0);
149: }

151: /*
152:     Run GMRES, possibly with restart.  Return residual history if requested.
153:     input parameters:

155: .       gmres  - structure containing parameters and work areas

157:     output parameters:
158: .        nres    - residuals (from preconditioned system) at each step.
159:                   If restarting, consider passing nres+it.  If null,
160:                   ignored
161: .        itcount - number of iterations used.  nres[0] to nres[itcount]
162:                   are defined.  If null, ignored.

164:     Notes:
165:     On entry, the value in vector VEC_VV(0) should be the initial residual
166:     (this allows shortcuts where the initial preconditioned residual is 0).
167:  */
170: PetscErrorCode DGMREScycle (PetscInt *itcount,KSP ksp) {
171:     KSP_DGMRES      *dgmres = (KSP_DGMRES *) (ksp->data);
172:     PetscReal      res_norm,res,hapbnd,tt;
174:     PetscInt       it = 0, max_k = dgmres->max_k;
175:     PetscTruth     hapend = PETSC_FALSE;
176:     PetscReal           res_old;
177:     PetscInt            test;

180:     VecNormalize (VEC_VV (0),&res_norm);
181:     CHKERRQ (ierr);
182:     res     = res_norm;
183:     *GRS (0) = res_norm;

185:     /* check for the convergence */
186:     PetscObjectTakeAccess (ksp);
187:     CHKERRQ (ierr);
188:     ksp->rnorm = res;
189:     PetscObjectGrantAccess (ksp);
190:     CHKERRQ (ierr);
191:     dgmres->it = (it - 1);
192:     KSPLogResidualHistory (ksp,res);
193:     KSPMonitor (ksp,ksp->its,res);
194:     if (!res) {
195:         if (itcount) *itcount = 0;
196:         ksp->reason = KSP_CONVERGED_ATOL;
197:         PetscInfo (ksp,"Converged due to zero residual norm on entry\n");
198:         CHKERRQ (ierr);
199:         PetscFunctionReturn (0);
200:     }
201:     /* record the residual norm to test if deflation is needed */
202:     res_old = res;

204:     (*ksp->converged) (ksp,ksp->its,res,&ksp->reason,ksp->cnvP);
205:     CHKERRQ (ierr);
206:     while (!ksp->reason && it < max_k && ksp->its < ksp->max_it) {
207:         if (it) {
208:             KSPLogResidualHistory (ksp,res);
209:             KSPMonitor (ksp,ksp->its,res);
210:         }
211:         dgmres->it = (it - 1);
212:         if (dgmres->vv_allocated <= it + VEC_OFFSET + 1) {
213:             DGMRESGetNewVectors (ksp,it+1);
214:             CHKERRQ (ierr);
215:         }
216:         if (dgmres->r > 0) {
217:             if (ksp->pc_side == PC_LEFT) {
218:                 /* Apply the first preconditioner */
219:                 KSP_PCApplyBAorAB (ksp,VEC_VV (it), VEC_TEMP,VEC_TEMP_MATOP);
220:                 CHKERRQ (ierr);
221:                 /* Then apply Deflation as a preconditioner */
222:                 ierr=KSPDGMRESApplyDeflation (ksp, VEC_TEMP, VEC_VV (1+it));
223:                 CHKERRQ (ierr);
224:             } else if (ksp->pc_side == PC_RIGHT) {
225:                 ierr=KSPDGMRESApplyDeflation (ksp, VEC_VV (it), VEC_TEMP);
226:                 CHKERRQ (ierr);
227:                 ierr=KSP_PCApplyBAorAB (ksp, VEC_TEMP, VEC_VV (1+it), VEC_TEMP_MATOP);
228:                 CHKERRQ (ierr);
229:             }
230:         } else {
231:             KSP_PCApplyBAorAB (ksp,VEC_VV (it),VEC_VV (1+it),VEC_TEMP_MATOP);
232:             CHKERRQ (ierr);
233:         }
234:         dgmres->matvecs += 1;
235:         /* update hessenberg matrix and do Gram-Schmidt */
236:         (*dgmres->orthog) (ksp,it);
237:         CHKERRQ (ierr);

239:         /* vv(i+1) . vv(i+1) */
240:         VecNormalize (VEC_VV (it+1),&tt);
241:         CHKERRQ (ierr);
242:         /* save the magnitude */
243:         *HH (it+1,it)    = tt;
244:         *HES (it+1,it)   = tt;

246:         /* check for the happy breakdown */
247:         hapbnd  = PetscAbsScalar (tt / *GRS (it));
248:         if (hapbnd > dgmres->haptol) hapbnd = dgmres->haptol;
249:         if (tt < hapbnd) {
250:             PetscInfo2 (ksp,"Detected happy breakdown, current hapbnd = %G tt = %G\n",hapbnd,tt);
251:             CHKERRQ (ierr);
252:             hapend = PETSC_TRUE;
253:         }
254:         DGMRESUpdateHessenberg (ksp,it,hapend,&res);
255:         CHKERRQ (ierr);

257:         it++;
258:         dgmres->it  = (it-1);    /* For converged */
259:         ksp->its++;
260:         ksp->rnorm = res;
261:         if (ksp->reason) break;

263:         (*ksp->converged) (ksp,ksp->its,res,&ksp->reason,ksp->cnvP);
264:         CHKERRQ (ierr);

266:         /* Catch error in happy breakdown and signal convergence and break from loop */
267:         if (hapend) {
268:             if (!ksp->reason) {
269:               SETERRQ1 (((PetscObject)ksp)->comm, PETSC_ERR_PLIB,"You reached the happy break down, but convergence was not indicated. Residual norm = %G",res);
270:             }
271:             break;
272:         }
273:     }

275:     /* Monitor if we know that we will not return for a restart */
276:     if (it && (ksp->reason || ksp->its >= ksp->max_it)) {
277:         KSPLogResidualHistory (ksp,res);
278:         KSPMonitor (ksp,ksp->its,res);
279:     }

281:     if (itcount) *itcount    = it;


284:     /*
285:       Down here we have to solve for the "best" coefficients of the Krylov
286:       columns, add the solution values together, and possibly unwind the
287:       preconditioning from the solution
288:      */
289:     /* Form the solution (or the solution so far) */
290:     BuildDGmresSoln (GRS (0),ksp->vec_sol,ksp->vec_sol,ksp,it-1);
291:     CHKERRQ (ierr);

293:     /* Compute data for the deflation to be used during the next restart */
294:     if (!ksp->reason && ksp->its < ksp->max_it) {
295:         test = max_k *log (ksp->rtol/res) /log (res/res_old);
296:         /* Compute data for the deflation if the residual rtol will not be reached in the remaining number of steps allowed  */
297:         if ( (test > dgmres->smv* (ksp->max_it-ksp->its)) || dgmres->force) {
298:             ierr= KSPDGMRESComputeDeflationData (ksp);
299:             CHKERRQ (ierr);
300:         }
301:     }

303:     PetscFunctionReturn (0);
304: }

308: PetscErrorCode KSPSolve_DGMRES (KSP ksp) {
310:     PetscInt       its,itcount;
311:     KSP_DGMRES      *dgmres = (KSP_DGMRES *) ksp->data;
312:     PetscTruth     guess_zero = ksp->guess_zero;

315:     if (ksp->calc_sings && !dgmres->Rsvd) SETERRQ (((PetscObject)ksp)->comm, PETSC_ERR_ORDER,"Must call KSPSetComputeSingularValues() before KSPSetUp() is called");

317:     PetscObjectTakeAccess (ksp);
318:     CHKERRQ (ierr);
319:     ksp->its = 0;
320:     dgmres->matvecs = 0;
321:     PetscObjectGrantAccess (ksp);
322:     CHKERRQ (ierr);

324:     itcount     = 0;
325:     ksp->reason = KSP_CONVERGED_ITERATING;
326:     while (!ksp->reason) {
327:         KSPInitialResidual (ksp,ksp->vec_sol,VEC_TEMP,VEC_TEMP_MATOP,VEC_VV (0),ksp->vec_rhs);
328:         CHKERRQ (ierr);
329:         if (ksp->pc_side == PC_LEFT) {
330:             dgmres->matvecs += 1;
331:             if (dgmres->r > 0) {
332:                 KSPDGMRESApplyDeflation (ksp, VEC_VV (0), VEC_TEMP);
333:                 CHKERRQ (ierr);
334:                 ierr=VecCopy (VEC_TEMP, VEC_VV (0));
335:                 CHKERRQ (ierr);
336:             }
337:         }

339:         DGMREScycle (&its,ksp);
340:         CHKERRQ (ierr);
341:         itcount += its;
342:         if (itcount >= ksp->max_it) {
343:             if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
344:             break;
345:         }
346:         ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
347:     }
348:     ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */
349:     PetscFunctionReturn (0);
350: }


355: PetscErrorCode KSPDestroy_DGMRES (KSP ksp) {
357:     KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;
358:     PetscInt            neig1 = dgmres->neig+EIG_OFFSET;
359:     PetscInt            max_neig = dgmres->max_neig;

362:     if (dgmres->r) {
363:         ierr=VecDestroyVecs (max_neig, &UU);
364:         CHKERRQ (ierr);
365:         ierr=VecDestroyVecs (max_neig, &MU);
366:         CHKERRQ (ierr);
367:         ierr=VecDestroyVecs (neig1, &XX);
368:         CHKERRQ (ierr);
369:         ierr=VecDestroyVecs (neig1, &MX);
370:         CHKERRQ (ierr);

372:         ierr=PetscFree (TT);        CHKERRQ (ierr);
373:         ierr=PetscFree (TTF);        CHKERRQ (ierr);
374:         ierr=PetscFree (INVP);        CHKERRQ (ierr);

376:         ierr=PetscFree (XMX);        CHKERRQ (ierr);
377:         ierr=PetscFree (UMX);        CHKERRQ (ierr);
378:         ierr=PetscFree (XMU);        CHKERRQ (ierr);
379:         ierr=PetscFree (X1);        CHKERRQ (ierr);
380:         ierr=PetscFree (X2);        CHKERRQ (ierr);
381:         ierr=PetscFree (dgmres->work);        CHKERRQ (ierr);
382:         ierr=PetscFree (dgmres->iwork);        CHKERRQ (ierr);
383:         ierr=PetscFree (ORTH);        CHKERRQ (ierr);

385:         PetscFree (AUAU);        CHKERRQ (ierr);
386:         PetscFree (AUU);        CHKERRQ (ierr);
387:         PetscFree (SR2);        CHKERRQ (ierr);
388:     }
389:     ierr=PetscFree (SR);        CHKERRQ (ierr);
390:     KSPDestroy_GMRES (ksp);
391:     CHKERRQ (ierr);
392:     PetscFunctionReturn (0);
393: }
394: /*
395:     BuildDGmresSoln - create the solution from the starting vector and the
396:     current iterates.

398:     Input parameters:
399:         nrs - work area of size it + 1.
400:         vs  - index of initial guess
401:         vdest - index of result.  Note that vs may == vdest (replace
402:                 guess with the solution).

404:      This is an internal routine that knows about the GMRES internals.
405:  */
408: static PetscErrorCode BuildDGmresSoln (PetscScalar* nrs,Vec vs,Vec vdest,KSP ksp,PetscInt it) {
409:     PetscScalar    tt;
411:     PetscInt       ii,k,j;
412:     KSP_DGMRES      *dgmres = (KSP_DGMRES *) (ksp->data);

415:     /* Solve for solution vector that minimizes the residual */

417:     /* If it is < 0, no gmres steps have been performed */
418:     if (it < 0) {
419:         VecCopy (vs,vdest);
420:         CHKERRQ (ierr);     /* VecCopy() is smart, exists immediately if vguess == vdest */
421:         PetscFunctionReturn (0);
422:     }
423:     if (*HH (it,it) == 0.0) SETERRQ2 (((PetscObject)ksp)->comm, PETSC_ERR_CONV_FAILED,"Likley your matrix is the zero operator. HH(it,it) is identically zero; it = %D GRS(it) = %G",it,PetscAbsScalar (*GRS (it)));
424:     if (*HH (it,it) != 0.0) {
425:         nrs[it] = *GRS (it) / *HH (it,it);
426:     } else {
427:         nrs[it] = 0.0;
428:     }
429:     for (ii=1; ii<=it; ii++) {
430:         k   = it - ii;
431:         tt  = *GRS (k);
432:         for (j=k+1; j<=it; j++) tt  = tt - *HH (k,j) * nrs[j];
433:         if (*HH (k,k) == 0.0) SETERRQ2 (((PetscObject)ksp)->comm, PETSC_ERR_CONV_FAILED,"Likley your matrix is singular. HH(k,k) is identically zero; it = %D k = %D",it,k);
434:         nrs[k]   = tt / *HH (k,k);
435:     }

437:     /* Accumulate the correction to the solution of the preconditioned problem in TEMP */
438:     VecSet (VEC_TEMP,0.0);
439:     CHKERRQ (ierr);
440:     VecMAXPY (VEC_TEMP,it+1,nrs,&VEC_VV (0));
441:     CHKERRQ (ierr);

443:     /* Apply deflation */
444:     if (ksp->pc_side==PC_RIGHT && dgmres->r > 0) {
445:         ierr=KSPDGMRESApplyDeflation (ksp, VEC_TEMP, VEC_TEMP_MATOP);
446:         CHKERRQ (ierr);
447:         ierr=VecCopy (VEC_TEMP_MATOP, VEC_TEMP);
448:         CHKERRQ (ierr);
449:     }
450:     KSPUnwindPreconditioner (ksp,VEC_TEMP,VEC_TEMP_MATOP);
451:     CHKERRQ (ierr);


454:     /* add solution to previous solution */
455:     if (vdest != vs) {
456:         VecCopy (vs,vdest);
457:         CHKERRQ (ierr);
458:     }
459:     VecAXPY (vdest,1.0,VEC_TEMP);
460:     CHKERRQ (ierr);
461:     PetscFunctionReturn (0);
462: }
463: /*
464:    Do the scalar work for the orthogonalization.  Return new residual norm.
465:  */
468: static PetscErrorCode DGMRESUpdateHessenberg (KSP ksp,PetscInt it,PetscTruth hapend,PetscReal *res) {
469:     PetscScalar *hh,*cc,*ss,tt;
470:     PetscInt    j;
471:     KSP_DGMRES   *dgmres = (KSP_DGMRES *) (ksp->data);

474:     hh  = HH (0,it);
475:     cc  = CC (0);
476:     ss  = SS (0);

478:     /* Apply all the previously computed plane rotations to the new column
479:       of the Hessenberg matrix */
480:     for (j=1; j<=it; j++) {
481:         tt  = *hh;
482: #if defined(PETSC_USE_COMPLEX)
483:         *hh = PetscConj (*cc) * tt + *ss * * (hh+1);
484: #else
485:         *hh = *cc * tt + *ss * * (hh+1);
486: #endif
487:         hh++;
488:         *hh = *cc++ * *hh - (*ss++ * tt);
489:     }

491:     /*
492:       compute the new plane rotation, and apply it to:
493:       1) the right-hand-side of the Hessenberg system
494:       2) the new column of the Hessenberg matrix
495:       thus obtaining the updated value of the residual
496:     */
497:     if (!hapend) {
498: #if defined(PETSC_USE_COMPLEX)
499:         tt        = PetscSqrtScalar (PetscConj (*hh) * *hh + PetscConj (* (hh+1)) * * (hh+1));
500: #else
501:         tt        = PetscSqrtScalar (*hh * *hh + * (hh+1) * * (hh+1));
502: #endif
503:         if (tt == 0.0) {
504:             ksp->reason = KSP_DIVERGED_NULL;
505:             PetscFunctionReturn (0);
506:         }
507:         *cc       = *hh / tt;
508:         *ss       = * (hh+1) / tt;
509:         *GRS (it+1) = - (*ss * *GRS (it));
510: #if defined(PETSC_USE_COMPLEX)
511:         *GRS (it)   = PetscConj (*cc) * *GRS (it);
512:         *hh       = PetscConj (*cc) * *hh + *ss * * (hh+1);
513: #else
514:         *GRS (it)   = *cc * *GRS (it);
515:         *hh       = *cc * *hh + *ss * * (hh+1);
516: #endif
517:         *res      = PetscAbsScalar (*GRS (it+1));
518:     } else {
519:         /* happy breakdown: HH(it+1, it) = 0, therfore we don't need to apply
520:                 another rotation matrix (so RH doesn't change).  The new residual is
521:                 always the new sine term times the residual from last time (GRS(it)),
522:                 but now the new sine rotation would be zero...so the residual should
523:                 be zero...so we will multiply "zero" by the last residual.  This might
524:                 not be exactly what we want to do here -could just return "zero". */

526:         *res = 0.0;
527:     }
528:     PetscFunctionReturn (0);
529: }
530: /*
531:    This routine allocates more work vectors, starting from VEC_VV(it).
532:  */
535: static PetscErrorCode DGMRESGetNewVectors (KSP ksp,PetscInt it) {
536:     KSP_DGMRES      *dgmres = (KSP_DGMRES *) ksp->data;
538:     PetscInt       nwork = dgmres->nwork_alloc,k,nalloc;

541:     nalloc = PetscMin (ksp->max_it,dgmres->delta_allocate);
542:     /* Adjust the number to allocate to make sure that we don't exceed the
543:       number of available slots */
544:     if (it + VEC_OFFSET + nalloc >= dgmres->vecs_allocated) {
545:         nalloc = dgmres->vecs_allocated - it - VEC_OFFSET;
546:     }
547:     if (!nalloc) PetscFunctionReturn (0);

549:     dgmres->vv_allocated += nalloc;
550:     KSPGetVecs (ksp,nalloc,&dgmres->user_work[nwork],0,PETSC_NULL);
551:     CHKERRQ (ierr);
552:     PetscLogObjectParents (ksp,nalloc,dgmres->user_work[nwork]);
553:     CHKERRQ (ierr);
554:     dgmres->mwork_alloc[nwork] = nalloc;
555:     for (k=0; k<nalloc; k++) {
556:         dgmres->vecs[it+VEC_OFFSET+k] = dgmres->user_work[nwork][k];
557:     }
558:     dgmres->nwork_alloc++;
559:     PetscFunctionReturn (0);
560: }

564: PetscErrorCode KSPBuildSolution_DGMRES (KSP ksp,Vec  ptr,Vec *result) {
565:     KSP_DGMRES      *dgmres = (KSP_DGMRES *) ksp->data;

569:     if (!ptr) {
570:         if (!dgmres->sol_temp) {
571:             VecDuplicate (ksp->vec_sol,&dgmres->sol_temp);
572:             CHKERRQ (ierr);
573:             PetscLogObjectParent (ksp,dgmres->sol_temp);
574:             CHKERRQ (ierr);
575:         }
576:         ptr = dgmres->sol_temp;
577:     }
578:     if (!dgmres->nrs) {
579:         /* allocate the work area */
580:         PetscMalloc (dgmres->max_k*sizeof (PetscScalar),&dgmres->nrs);
581:         CHKERRQ (ierr);
582:         PetscLogObjectMemory (ksp,dgmres->max_k*sizeof (PetscScalar));
583:         CHKERRQ (ierr);
584:     }

586:     BuildDGmresSoln (dgmres->nrs,ksp->vec_sol,ptr,ksp,dgmres->it);
587:     CHKERRQ (ierr);
588:     if (result) *result = ptr;
589:     PetscFunctionReturn (0);
590: }


595: PetscErrorCode KSPView_DGMRES (KSP ksp,PetscViewer viewer) {
596:     KSP_DGMRES      *dgmres = (KSP_DGMRES *) ksp->data;
598:     PetscTruth     iascii;

601:     KSPView_GMRES (ksp,viewer);
602:     CHKERRQ (ierr);
603:     PetscTypeCompare ( (PetscObject) viewer,PETSCVIEWERASCII,&iascii);
604:     CHKERRQ (ierr);
605:     if (iascii) {
606:         if (dgmres->force)
607:             PetscViewerASCIIPrintf (viewer, "   DGMRES: Adaptive strategy is used: FALSE\n");
608:         else
609:             PetscViewerASCIIPrintf (viewer, "   DGMRES: Adaptive strategy is used: TRUE\n");
610:         ierr=PetscViewerASCIIPrintf (viewer, "  DGMRES: Frequency of extracted eigenvalues = %D\n", dgmres->neig);
611:         CHKERRQ (ierr);
612:         ierr=PetscViewerASCIIPrintf (viewer, "  DGMRES: Total number of extracted eigenvalues = %D\n", dgmres->r);
613:         CHKERRQ (ierr);
614:         ierr=PetscViewerASCIIPrintf (viewer, "  DGMRES: Maximum number of eigenvalues set to be extracted = %D\n", dgmres->max_neig);
615:         CHKERRQ (ierr);
616:         ierr=PetscViewerASCIIPrintf (viewer, "  DGMRES: relaxation parameter for the adaptive strategy(smv)  = %g\n", dgmres->smv);
617:         CHKERRQ (ierr);
618:         ierr=PetscViewerASCIIPrintf (viewer, "  DGMRES: Number of matvecs : %D\n", dgmres->matvecs);
619:         CHKERRQ (ierr);
620:     } else {
621:       SETERRQ1 (((PetscObject)ksp)->comm, PETSC_ERR_SUP,"Viewer type %s not supported for KSP DGMRES", ( (PetscObject) viewer)->type_name);
622:     }

624:     PetscFunctionReturn (0);
625: }

627: /* New DGMRES functions */

632: PetscErrorCode  KSPDGMRESSetEigen_DGMRES (KSP ksp,PetscInt neig) {
633:     KSP_DGMRES      *dgmres = (KSP_DGMRES *) ksp->data;
635:     if (neig< 0 && neig >dgmres->max_k)  SETERRQ (((PetscObject)ksp)->comm, PETSC_ERR_ARG_OUTOFRANGE,"The value of neig must be positive and less than the restart value ");
636:     dgmres->neig=neig;
637:     PetscFunctionReturn (0);
638: }

644: PetscErrorCode  KSPDGMRESSetMaxEigen_DGMRES (KSP ksp,PetscInt max_neig) {
645:     KSP_DGMRES      *dgmres = (KSP_DGMRES *) ksp->data;
647:     if (max_neig < 0 && max_neig >dgmres->max_k)  SETERRQ (((PetscObject)ksp)->comm, PETSC_ERR_ARG_OUTOFRANGE,"The value of max_neig must be positive and less than the restart value ");
648:     dgmres->max_neig=max_neig;
649:     PetscFunctionReturn (0);
650: }


657: PetscErrorCode  KSPDGMRESSetRatio_DGMRES (KSP ksp,PetscReal ratio) {
658:         KSP_DGMRES      *dgmres = (KSP_DGMRES *) ksp->data;
660:         if (ratio <= 0)  SETERRQ (((PetscObject)ksp)->comm, PETSC_ERR_ARG_OUTOFRANGE,"The relaxation parameter value must be positive");
661:         dgmres->smv=ratio;
662:         PetscFunctionReturn (0);
663: }

669: PetscErrorCode  KSPDGMRESForce_DGMRES (KSP ksp,PetscInt force) {
670:         KSP_DGMRES      *dgmres = (KSP_DGMRES *) ksp->data;
672:         if (force != 0 && force != 1)  SETERRQ (((PetscObject)ksp)->comm, PETSC_ERR_ARG_OUTOFRANGE,"Value must be 0 or 1");
673:         dgmres->force = 1;
674:         PetscFunctionReturn (0);
675: }



683: PetscErrorCode KSPSetFromOptions_DGMRES (KSP ksp) {
685:     PetscInt       neig;
686:     PetscInt         max_neig;
687:     KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;
688:     PetscTruth     flg;
689:     PetscReal           smv;
690:     PetscInt            input;

693:     KSPSetFromOptions_GMRES (ksp);
694:     CHKERRQ (ierr);
695:     PetscOptionsHead ("KSP DGMRES Options");
696:     CHKERRQ (ierr);
697:     PetscOptionsInt ("-ksp_dgmres_eigen","Number of smallest eigenvalues to extract at each restart","KSPDGMRESSetEigen",dgmres->neig, &neig, &flg);
698:     CHKERRQ (ierr);
699:     if (flg) {
700:         KSPDGMRESSetEigen (ksp, neig);
701:         CHKERRQ (ierr);
702:     }
703:     PetscOptionsInt ("-ksp_dgmres_max_eigen","Maximum Number of smallest eigenvalues to extract ","KSPDGMRESSetMaxEigen",dgmres->max_neig, &max_neig, &flg);
704:     CHKERRQ (ierr);
705:     if (flg) {
706:         KSPDGMRESSetMaxEigen (ksp, max_neig);
707:         CHKERRQ (ierr);
708:     }
709:     ierr=PetscOptionsReal ("-ksp_dgmres_ratio", "Relaxation parameter for the smaller number of matrix-vectors product allowed", "KSPDGMRESSetRatio", dgmres->smv, &smv, &flg);
710:     CHKERRQ (ierr);
711:     if (flg) dgmres->smv = smv;
712:     PetscOptionsInt ("-ksp_dgmres_improve", "Improve the computation of eigenvalues by solving a new generalized eigenvalue problem (experimental - not stable at this time)", NULL, dgmres->improve, &input, &flg);
713:     CHKERRQ (ierr);
714:     if (flg) dgmres->improve = input;
715:     PetscOptionsInt ("-ksp_dgmres_force", "Sets DGMRES always at restart active, i.e do not use the adaptive strategy", "KSPDGMRESForce", dgmres->force, &input, &flg);
716:     CHKERRQ (ierr);
717:     if (flg) dgmres->force = input;
718:     PetscOptionsTail();
719:     CHKERRQ (ierr);
720:     PetscFunctionReturn (0);
721: }

726: PetscErrorCode  KSPDGMRESComputeDeflationData_DGMRES (KSP ksp)
727: {
728:     KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;
730:     PetscInt       i,j, k;
731:     PetscInt       r=dgmres->r;
732:     PetscBLASInt   nr, bmax;
733:     PetscInt       neig; /* number of eigenvalues to extract at each restart */
734:     PetscInt       neig1 = dgmres->neig + EIG_OFFSET; /* max number of eig that can be extracted at each restart */
735:     PetscInt       max_neig = dgmres->max_neig; /* Max number of eigenvalues to extract during the iterative process */
736:     PetscInt           N = dgmres->max_k+1;
737:     PetscInt       n = dgmres->it+1;
738:     PetscReal           alpha;
739: #if !defined(PETSC_MISSING_LAPACK_GETRF) 
740:     PetscBLASInt info;
741: #endif

744:     ierr=PetscLogEventBegin (KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
745:     CHKERRQ (ierr);
746:     if (dgmres->neig == 0)
747:         PetscFunctionReturn (0);
748:     if (max_neig < (r+neig1) && !dgmres->improve) {
749:         ierr=PetscLogEventEnd (KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
750:         CHKERRQ (ierr);
751:         PetscFunctionReturn (0);
752:     }

754:     ierr= KSPDGMRESComputeSchurForm (ksp, &neig);
755:     CHKERRQ (ierr);

757:     /* Form the extended Schur vectors X=VV*Sr */
758:     if (!XX) {
759:         ierr=VecDuplicateVecs (VEC_VV (0), neig1, &XX);
760:         CHKERRQ (ierr);
761:     }
762:     for (j = 0; j<neig; j++) {
763:         VecZeroEntries (XX[j]);
764:         CHKERRQ (ierr);
765:         VecMAXPY (XX[j], n, &SR[j*N], &VEC_VV (0));
766:         CHKERRQ (ierr);
767:     }

769:     /* Orthogonalize X against U */
770:     if (!ORTH) {
771:         ierr=PetscMalloc (max_neig*sizeof (PetscReal), &ORTH);
772:         CHKERRQ (ierr);
773:     }
774:     if (r > 0) {
775:         /* modified Gram-Schmidt */
776:         for (j = 0; j<neig; j++) {
777:             for (i=0; i<r; i++) {
778:                 /* First, compute U'*X[j] */
779:                 VecDot (XX[j], UU[i], &alpha);
780:                 CHKERRQ (ierr);
781:                 /* Then, compute X(j)=X(j)-U*U'*X(j) */
782:                 VecAXPY (XX[j], -alpha, UU[i]);
783:                 CHKERRQ (ierr);
784:             }
785:         }
786:     }
787:     /* Compute MX = M^{-1}*A*X */
788:     if (!MX) {
789:         ierr=VecDuplicateVecs (VEC_VV (0), neig1, &MX);
790:         CHKERRQ (ierr);
791:     }
792:     for (j = 0; j<neig; j++) {
793:         KSP_PCApplyBAorAB (ksp, XX[j], MX[j], VEC_TEMP_MATOP);
794:         CHKERRQ (ierr);
795:     }
796:     dgmres->matvecs += neig;

798:     if ( (r+neig1) > max_neig && dgmres->improve) {    /* Improve the approximate eigenvectors in X by solving a new generalized eigenvalue -- Quite expensive to do this actually */
799:         KSPDGMRESImproveEig (ksp, neig);
800:         CHKERRQ (ierr);
801:         PetscFunctionReturn (0);   /* We return here since data for M have been improved in  KSPDGMRESImproveEig()*/
802:     }

804:     /* Compute XMX = X'*M^{-1}*A*X -- size (neig, neig) */
805:     if (!XMX) {
806:         ierr=PetscMalloc (neig1*neig1*sizeof (PetscReal), &XMX);
807:         CHKERRQ (ierr);
808:     }
809:     for (j = 0; j < neig; j++) {
810:         ierr=VecMDot (MX[j], neig, XX, & (XMX[j*neig1]));
811:         CHKERRQ (ierr);
812:     }

814:     if (r > 0) {
815:         /* Compute UMX = U'*M^{-1}*A*X -- size (r, neig) */
816:         if (!UMX) {
817:             PetscMalloc (max_neig*neig1*sizeof (PetscReal), &UMX);
818:             CHKERRQ (ierr);
819:         }
820:         for (j = 0; j < neig; j++) {
821:             VecMDot (MX[j], r, UU, & (UMX[j*max_neig]));
822:             CHKERRQ (ierr);
823:         }
824:         /* Compute XMU = X'*M^{-1}*A*U -- size (neig, r) */
825:         if (!XMU) {
826:             PetscMalloc (max_neig*neig1*sizeof (PetscReal), &XMU);
827:             CHKERRQ (ierr);
828:         }
829:         for (j = 0; j<r; j++) {
830:             ierr=VecMDot (MU[j], neig, XX, & (XMU[j*neig1]));
831:             CHKERRQ (ierr);
832:         }

834:     }

836:     /* Form the new matrix T = [T UMX; XMU XMX]; */
837:     if (!TT) {
838:         PetscMalloc (max_neig*max_neig*sizeof (PetscReal), &TT);
839:         CHKERRQ (ierr);
840:     }
841:     if (r > 0) {
842:         /* Add XMU to T */
843:         for (j = 0; j < r; j++) {
844:             PetscMemcpy (& (TT[max_neig*j+r]), & (XMU[neig1*j]), neig*sizeof (PetscReal));
845:             CHKERRQ (ierr);
846:         }
847:         /* Add [UMX; XMX] to T */
848:         for (j = 0; j < neig; j++) {
849:             k = r+j;
850:             PetscMemcpy (& (TT[max_neig*k]), & (UMX[max_neig*j]), r*sizeof (PetscReal));
851:             CHKERRQ (ierr);
852:             ierr=PetscMemcpy (& (TT[max_neig*k + r]), & (XMX[neig1*j]), neig*sizeof (PetscReal));
853:             CHKERRQ (ierr);
854:         }
855:     } else { /* Add XMX to T */
856:         for (j = 0; j < neig; j++) {
857:             ierr=PetscMemcpy (& (TT[max_neig*j]), & (XMX[neig1*j]), neig*sizeof (PetscReal));
858:             CHKERRQ (ierr);
859:         }
860:     }

862:     dgmres->r += neig;
863:     r=dgmres->r;
864:     nr = PetscBLASIntCast (r);
865:     /*LU Factorize T with Lapack xgetrf routine */

867:     bmax = PetscBLASIntCast (max_neig);
868:     if (!TTF) {
869:         PetscMalloc (bmax*bmax*sizeof (PetscReal), &TTF);
870:         CHKERRQ (ierr);
871:     }
872:     ierr=PetscMemcpy (TTF, TT, bmax*r*sizeof (PetscReal));
873:     CHKERRQ (ierr);
874:     if (!INVP) {
875:         ierr=PetscMalloc (bmax*sizeof (PetscBLASInt), &INVP);
876:         CHKERRQ (ierr);
877:     }
878: #if defined(PETSC_MISSING_LAPACK_GETRF) 
879:     SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"GETRF - Lapack routine is unavailable.");
880: #else
881:     LAPACKgetrf_ (&nr, &nr, TTF, &bmax, INVP, &info);
882:     if (info) SETERRQ1 (((PetscObject)ksp)->comm, PETSC_ERR_LIB,"Error in LAPACK routine XGETRF INFO=%d", (int) info);
883: #endif

885:     /* Save X in U and MX in MU for the next cycles and increase the size of the invariant subspace */
886:     if (!UU) {
887:         ierr=VecDuplicateVecs (VEC_VV (0), max_neig, &UU);
888:         CHKERRQ (ierr);
889:         ierr=VecDuplicateVecs (VEC_VV (0), max_neig, &MU);
890:         CHKERRQ (ierr);
891:     }
892:     for (j=0; j<neig; j++) {
893:         ierr=VecCopy (XX[j], UU[r-neig+j]);
894:         CHKERRQ (ierr);
895:         ierr=VecCopy (MX[j], MU[r-neig+j]);
896:         CHKERRQ (ierr);
897:     }
898:     ierr=PetscLogEventEnd (KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
899:     CHKERRQ (ierr);
900:     PetscFunctionReturn (0);
901: }

907: PetscErrorCode  KSPDGMRESComputeSchurForm_DGMRES (KSP ksp, PetscInt *neig) {
908:     KSP_DGMRES             *dgmres = (KSP_DGMRES*) ksp->data;
909:     PetscErrorCode         ierr;
910:     PetscInt               N = dgmres->max_k + 1, n=dgmres->it+1;
911:     PetscBLASInt        bn, bN;
912:     PetscReal            *A;
913:     PetscBLASInt         ihi;
914:     PetscBLASInt         ldA;                 /* leading dimension of A */
915:     PetscBLASInt        ldQ;                /* leading dimension of Q */
916:     PetscReal                *Q;                 /*  orthogonal matrix of  (left) schur vectors */
917:     PetscReal                 *work;                /* working vector */
918:     PetscBLASInt         lwork;                /* size of the working vector */
919:     PetscInt                 *perm;                /* Permutation vector to sort eigenvalues */
920:     PetscInt                i, j;
921:     PetscBLASInt        NbrEig;         /* Number of eigenvalues really extracted */
922:     PetscReal                *wr, *wi, *modul;         /* Real and imaginary part and modul of the eigenvalues of A*/
923:     PetscBLASInt *select;
924:     PetscBLASInt *iwork;
925:     PetscBLASInt liwork;
926: #if !defined(PETSC_MISSING_LAPACK_HSEQR) 
927:     PetscBLASInt         ilo=1;
928:     PetscBLASInt        info;
929:     PetscReal                 CondEig; /* lower bound on the reciprocal condition number for the selected cluster of eigenvalues */
930:     PetscReal                 CondSub; /* estimated reciprocal condition number of the specified invariant subspace. */
931: #endif

934:     bn=PetscBLASIntCast (n);
935:     bN=PetscBLASIntCast (N);
936:     ihi = ldQ = bn;
937:     ldA=bN;
938:     lwork = PetscBLASIntCast (5*N);

940: #if defined(PETSC_USE_COMPLEX)
941:     SETERRQ (((PetscObject)ksp)->comm, -1, "NO SUPPORT FOR COMPLEX VALUES AT THIS TIME");
942:     PetscFunctionReturn (-1);
943: #endif

945:     PetscMalloc (ldA*ldA*sizeof (PetscReal), &A);    CHKERRQ (ierr);
946:     PetscMalloc (ldQ*n*sizeof (PetscReal), &Q);    CHKERRQ (ierr);
947:     PetscMalloc (lwork*sizeof (PetscReal), &work);    CHKERRQ (ierr);
948:     PetscMalloc (n*sizeof (PetscReal), &wr);    CHKERRQ (ierr);
949:     PetscMalloc (n*sizeof (PetscReal), &wi);    CHKERRQ (ierr);
950:     PetscMalloc (n*sizeof (PetscReal),&modul);    CHKERRQ (ierr);
951:     PetscMalloc (n*sizeof (PetscInt),&perm);    CHKERRQ (ierr);
952:     /* copy the Hessenberg matrix to work space */
953:     PetscMemcpy (A, dgmres->hes_origin, ldA*ldA*sizeof (PetscReal));    CHKERRQ (ierr);
954:     /* Compute eigenvalues with the Schur form */
955: #if defined(PETSC_MISSING_LAPACK_HSEQR) 
956:     SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"HSEQR - Lapack routine is unavailable.");
957: #else
958:     LAPACKhseqr_ ("S", "I", &bn, &ilo, &ihi, A, &ldA, wr, wi, Q, &ldQ, work, &lwork, &info);
959:     if (info) SETERRQ1 (((PetscObject)ksp)->comm, PETSC_ERR_LIB,"Error in LAPACK routine XHSEQR %d", (int) info);
960: #endif
961:     PetscFree(work);

963:     /* sort the eigenvalues */
964:     for (i=0; i<n; i++)  {
965:         modul[i]=sqrt (wr[i]*wr[i]+wi[i]*wi[i]);
966:     }
967:     for (i=0; i<n; i++) {
968:         perm[i] = i;
969:     }
970:     PetscSortRealWithPermutation (n, modul, perm);CHKERRQ (ierr);
971:     /* save the complex modulus of the largest eigenvalue in magnitude */
972:     if (dgmres->lambdaN < modul[perm[n-1]]) {
973:         dgmres->lambdaN=modul[perm[n-1]];
974:     }
975:         /* count the number of extracted eigenvalues (with complex conjugates) */
976:         NbrEig = 0;
977:         while (NbrEig < dgmres->neig)
978:         {
979:           if (wi[perm[NbrEig]] != 0)
980:                 NbrEig += 2;
981:           else
982:                 NbrEig += 1;
983:         }
984:         /* Reorder the Schur decomposition so that the cluster of smallest eigenvalues appears in the leading diagonal blocks of A */

986:         PetscMalloc(n * sizeof(PetscBLASInt), &select);
987:         PetscMemzero(select, n * sizeof(PetscBLASInt));

989:         if (dgmres->GreatestEig == PETSC_FALSE)
990:         {
991:                 for (j = 0; j < NbrEig; j++)
992:                         select[perm[j]] = 1;
993:         }
994:         else
995:         {
996:                 for (j = 0; j < NbrEig; j++)
997:                         select[perm[n-j-1]] = 1;
998:         }
999:         /* call Lapack dtrsen */
1000:         lwork  =  PetscMax(1, 4 * NbrEig * (bn-NbrEig));
1001:         liwork = PetscMax(1, 2 * NbrEig * (bn-NbrEig));
1002:         PetscMalloc(lwork * sizeof(PetscScalar), &work);
1003:         PetscMalloc(liwork * sizeof(PetscBLASInt), &iwork);
1004: #if defined(PETSC_MISSING_LAPACK_TRSEN) 
1005:         SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"TRSEN - Lapack routine is unavailable.");
1006: #else
1007:         LAPACKtrsen_("B", "V", select, &bn, A, &ldA, Q, &ldQ, wr, wi, &NbrEig, &CondEig, &CondSub, work, &lwork, iwork, &liwork, &info);
1008:         if (info == 1) SETERRQ(((PetscObject)ksp)->comm, PETSC_ERR_LIB, "UNABLE TO REORDER THE EIGENVALUES WITH THE LAPACK ROUTINE : ILL-CONDITIONED PROBLEM");
1009: #endif

1011:         /* Extract the Schur vectors */
1012:         for (j = 0; j < NbrEig; j++)
1013:         {
1014:                 PetscMemcpy(&SR[j*N], &(Q[j*ldQ]), n*sizeof(PetscReal));
1015:         }
1016:     *neig=NbrEig;
1017:         ierr=PetscFree (A);    CHKERRQ (ierr);
1018:         ierr=PetscFree (Q);    CHKERRQ (ierr);
1019:         ierr=PetscFree (wr);    CHKERRQ (ierr);
1020:         ierr=PetscFree (wi);    CHKERRQ (ierr);
1021:         ierr=PetscFree (work);    CHKERRQ (ierr);
1022:     ierr=PetscFree (modul);    CHKERRQ (ierr);
1023:     ierr=PetscFree (perm);    CHKERRQ (ierr);
1024:         PetscFree(work);
1025:         PetscFree(iwork);
1026:     PetscFunctionReturn (0);
1027: }


1034: PetscErrorCode  KSPDGMRESApplyDeflation_DGMRES (KSP ksp, Vec x, Vec y) {
1035:     KSP_DGMRES             *dgmres = (KSP_DGMRES*) ksp->data;
1036:     PetscInt        i, r = dgmres->r;
1037:     PetscErrorCode         ierr;
1038:     PetscBLASInt        nrhs = 1;
1039:     PetscReal        alpha = 1.0;
1040:     PetscInt        max_neig = dgmres->max_neig;
1041:     PetscBLASInt        br = PetscBLASIntCast (r);
1042:     PetscBLASInt        bmax = PetscBLASIntCast (max_neig);
1043:     PetscInt        lambda = dgmres->lambdaN;
1044: #if !defined(PETSC_MISSING_LAPACK_GETRS) 
1045:     PetscReal        berr, ferr;
1046:     PetscBLASInt info;
1047: #endif

1050:     ierr=PetscLogEventBegin (KSP_DGMRESApplyDeflation, ksp, 0, 0, 0);    CHKERRQ (ierr);
1051:     if (!r) {
1052:         ierr=VecCopy (x,y);        CHKERRQ (ierr);
1053:         PetscFunctionReturn (0);
1054:     }
1055:     /* Compute U'*x */
1056:     if (!X1) {
1057:         PetscMalloc (bmax*sizeof (PetscReal), &X1);        CHKERRQ (ierr);
1058:         PetscMalloc (bmax*sizeof (PetscReal), &X2);        CHKERRQ (ierr);
1059:     }
1060:     VecMDot (x, r, UU, X1);    CHKERRQ (ierr);

1062:     /* Solve T*X1=X2 for X1*/
1063:     PetscMemcpy (X2, X1, br*sizeof (PetscReal));    CHKERRQ (ierr);
1064: #if defined(PETSC_MISSING_LAPACK_GETRS) 
1065:     SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"GETRS - Lapack routine is unavailable.");
1066: #else
1067:     LAPACKgetrs_ ("N", &br, &nrhs, TTF, &bmax, INVP, X1, &bmax, &info);
1068:     if (info) SETERRQ1 (((PetscObject)ksp)->comm, PETSC_ERR_LIB,"Error in LAPACK routine XGETRS %d", (int) info);
1069: #endif
1070:     /* Iterative refinement -- is it really necessary ?? */
1071:     if (!WORK) {
1072:         ierr=PetscMalloc (3*bmax*sizeof (PetscReal), &WORK);        CHKERRQ (ierr);
1073:         ierr=PetscMalloc (bmax*sizeof (PetscInt), &IWORK);        CHKERRQ (ierr);
1074:     }
1075: #if defined(PETSC_MISSING_LAPACK_GERFS) 
1076:     SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"GERFS - Lapack routine is unavailable.");
1077: #else
1078:     LAPACKgerfs_ ("N", &br, &nrhs, TT, &bmax, TTF, &bmax, INVP, X2, &bmax,
1079:                   X1, &bmax, &ferr, &berr, WORK, IWORK, &info);
1080:     if (info) SETERRQ1 (((PetscObject)ksp)->comm, PETSC_ERR_LIB,"Error in LAPACK routine XGERFS %d", (int) info);
1081: #endif

1083:     for (i = 0; i < r; i++) {
1084:         X2[i] =  X1[i]/lambda - X2[i];
1085:     }

1087:     /* Compute X2=U*X2 */
1088:     ierr=VecZeroEntries (y);
1089:     CHKERRQ (ierr);
1090:     ierr=VecMAXPY (y, r, X2, UU);
1091:     CHKERRQ (ierr);

1093:     ierr=VecAXPY (y, alpha, x);
1094:     CHKERRQ (ierr);

1096:     ierr=PetscLogEventEnd (KSP_DGMRESApplyDeflation, ksp, 0, 0, 0);
1097:     CHKERRQ (ierr);
1098:     PetscFunctionReturn (0);
1099: }

1105: PetscErrorCode  KSPDGMRESImproveEig_DGMRES (KSP ksp, PetscInt neig)
1106: {
1107:         KSP_DGMRES                *dgmres = (KSP_DGMRES*) ksp->data;
1108:         PetscInt                j,r_old, r = dgmres->r;
1109:         PetscBLASInt                i = 0;
1110:         PetscInt                neig1 = dgmres->neig + EIG_OFFSET;
1111:         PetscInt                bmax = dgmres->max_neig;
1112:         PetscInt                aug = r + neig; /* actual size of the augmented invariant basis */
1113:         PetscInt                aug1 = bmax+neig1; /* maximum size of the augmented invariant basis */
1114:         PetscBLASInt         ldA;                 /* leading dimension of AUAU and AUU*/
1115:         PetscBLASInt        N;                         /* size of AUAU */
1116:         PetscReal                *Q;                 /*  orthogonal matrix of  (left) schur vectors */
1117:         PetscReal                *Z;                 /*  orthogonal matrix of  (right) schur vectors */
1118:         PetscReal                 *work;                /* working vector */
1119:         PetscBLASInt         lwork;                /* size of the working vector */
1120:         PetscBLASInt         info;                /* Output info from Lapack routines */
1121:         PetscInt         *perm;                /* Permutation vector to sort eigenvalues */
1122:         PetscReal                *wr, *wi, *beta, *modul;         /* Real and imaginary part and modul of the eigenvalues of A*/
1123:         PetscInt                ierr;
1124:         PetscBLASInt NbrEig = 0,nr,bm;
1125:         PetscBLASInt *select;
1126:         PetscBLASInt liwork, *iwork;
1127: #if !defined(PETSC_MISSING_LAPACK_TGSEN)
1128:         PetscReal Dif[2];
1129:         PetscBLASInt ijob = 2;
1130:         PetscBLASInt wantQ = 1, wantZ = 1;
1131: #endif

1134:         /* Block construction of the matrices AUU=(AU)'*U and (AU)'*AU*/
1135:         if (!AUU) {
1136:                 ierr=PetscMalloc (aug1*aug1*sizeof (PetscReal), &AUU);
1137:                 CHKERRQ (ierr);
1138:                 ierr=PetscMalloc (aug1*aug1*sizeof (PetscReal), &AUAU);
1139:                 CHKERRQ (ierr);
1140:         }
1141:     /* AUU = (AU)'*U = [(MU)'*U (MU)'*X; (MX)'*U (MX)'*X]
1142:         * Note that MU and MX have been computed previously either in ComputeDataDeflation() or down here in a previous call to this function */
1143:         /* (MU)'*U size (r x r) -- store in the <r> first columns of AUU*/
1144:         for (j=0; j < r; j++) {
1145:                 VecMDot (UU[j], r, MU, &AUU[j*aug1]);        CHKERRQ (ierr);
1146:         }
1147:         /* (MU)'*X size (r x neig) -- store in AUU from the column <r>*/
1148:         for (j = 0; j < neig; j++) {
1149:                 VecMDot (XX[j], r, MU, &AUU[ (r+j) *aug1]);        CHKERRQ (ierr);
1150:         }
1151:         /* (MX)'*U size (neig x r) -- store in the <r> first columns of AUU from the row <r>*/
1152:         for (j = 0; j < r; j++) {
1153:                 VecMDot (UU[j], neig, MX, &AUU[j*aug1+r]);        CHKERRQ (ierr);
1154:         }
1155:         /* (MX)'*X size (neig neig) --  store in AUU from the column <r> and the row <r>*/
1156:         for (j = 0; j < neig; j++) {
1157:                 VecMDot (XX[j], neig, MX, &AUU[ (r+j) *aug1 + r]);        CHKERRQ (ierr);
1158:         }

1160:         /* AUAU = (AU)'*AU = [(MU)'*MU (MU)'*MX; (MX)'*MU (MX)'*MX] */
1161:         /* (MU)'*MU size (r x r) -- store in the <r> first columns of AUAU*/
1162:         for (j=0; j < r; j++) {
1163:                 VecMDot (MU[j], r, MU, &AUAU[j*aug1]);        CHKERRQ (ierr);
1164:         }
1165:         /* (MU)'*MX size (r x neig) -- store in AUAU from the column <r>*/
1166:         for (j = 0; j < neig; j++) {
1167:                 VecMDot (MX[j], r, MU, &AUAU[ (r+j) *aug1]);        CHKERRQ (ierr);
1168:         }
1169:         /* (MX)'*MU size (neig x r) -- store in the <r> first columns of AUAU from the row <r>*/
1170:         for (j = 0; j < r; j++) {
1171:                 VecMDot (MU[j], neig, MX, &AUAU[j*aug1+r]);        CHKERRQ (ierr);
1172:         }
1173:         /* (MX)'*MX size (neig neig) --  store in AUAU from the column <r> and the row <r>*/
1174:         for (j = 0; j < neig; j++) {
1175:                 VecMDot (MX[j], neig, MX, &AUAU[ (r+j) *aug1 + r]);        CHKERRQ (ierr);
1176:         }

1178:         /* Computation of the eigenvectors */
1179:         ldA = PetscBLASIntCast (aug1);
1180:         N = PetscBLASIntCast (aug);
1181:         lwork = 8 * N + 20; /* sizeof the working space */
1182:         PetscMalloc (N*sizeof (PetscReal), &wr);    CHKERRQ (ierr);
1183:         PetscMalloc (N*sizeof (PetscReal), &wi);    CHKERRQ (ierr);
1184:         PetscMalloc (N*sizeof (PetscReal), &beta);    CHKERRQ (ierr);
1185:         PetscMalloc (N*sizeof (PetscReal), &modul);    CHKERRQ (ierr);
1186:         PetscMalloc (N*sizeof (PetscInt), &perm);    CHKERRQ (ierr);
1187:         PetscMalloc (N*N*sizeof (PetscReal), &Q);    CHKERRQ (ierr);
1188:         PetscMalloc (N*N*sizeof (PetscReal), &Z);    CHKERRQ (ierr);
1189:         PetscMalloc (lwork*sizeof (PetscReal), &work);    CHKERRQ (ierr);
1190: #if defined(PETSC_MISSING_LAPACK_GGES) 
1191:         SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"GGES - Lapack routine is unavailable.");
1192: #else
1193:         LAPACKgges_ ("V", "V", "N", NULL, &N, AUAU, &ldA, AUU, &ldA, &i, wr, wi, beta, Q, &N, Z, &N, work, &lwork, NULL, &info);
1194:         if (info) SETERRQ1 (((PetscObject)ksp)->comm, PETSC_ERR_LIB,"Error in LAPACK routine XGGES %d", (int) info);
1195: #endif
1196:         for (i=0; i<N; i++) {
1197:                 if (beta[i] !=0.0) {
1198:                         wr[i] /=beta[i];
1199:                         wi[i] /=beta[i];
1200:                 }
1201:         }
1202:         /* sort the eigenvalues */
1203:         for (i=0; i<N; i++)  {
1204:                 modul[i]=sqrt (wr[i]*wr[i]+wi[i]*wi[i]);
1205:         }
1206:         for (i=0; i<N; i++) {
1207:                 perm[i] = i;
1208:         }
1209:         PetscSortRealWithPermutation (N, modul, perm);    CHKERRQ (ierr);
1210:         /* Save the norm of the largest eigenvalue */
1211:         if (dgmres->lambdaN < modul[perm[N-1]]) dgmres->lambdaN = modul[perm[N-1]];
1212:         /* Allocate space to extract the first r schur vectors   */
1213:         if (!SR2) {
1214:                 ierr=PetscMalloc (aug1*bmax*sizeof (PetscReal), &SR2);        CHKERRQ (ierr);
1215:         }
1216:         /* count the number of extracted eigenvalues ( complex conjugates count as 2) */
1217:         while (NbrEig < bmax) {
1218:                 if (wi[perm[NbrEig]] == 0) NbrEig += 1;
1219:                 else NbrEig += 2;
1220:         }
1221:         if (NbrEig > bmax) NbrEig = bmax - 1;
1222:         r_old=r; /* previous size of r */
1223:         dgmres->r = r = NbrEig;
1224: 
1225:         /* Select the eigenvalues to reorder */
1226:         PetscMalloc(N * sizeof(PetscBLASInt), &select);
1227:         PetscMemzero(select, N * sizeof(PetscBLASInt));
1228:         if (dgmres->GreatestEig == PETSC_FALSE)
1229:         {
1230:                 for (j = 0; j < NbrEig; j++)
1231:                         select[perm[j]] = 1;
1232:         }
1233:         else
1234:         {
1235:                 for (j = 0; j < NbrEig; j++)
1236:                         select[perm[N-j-1]] = 1;
1237:         }
1238:         /* Reorder and extract the new <r> schur vectors */
1239:         lwork  = PetscMax(4 * N + 16,  2 * NbrEig * (N - NbrEig) );
1240:         liwork = PetscMax(N + 6,  2 * NbrEig * (N - NbrEig) );
1241:         PetscFree(work);
1242:         PetscMalloc(lwork * sizeof(PetscReal), &work);
1243:         PetscMalloc(liwork * sizeof(PetscBLASInt), &iwork);
1244: #if defined(PETSC_MISSING_LAPACK_TGSEN )
1245:         SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"TGSEN - Lapack routine is unavailable.");
1246: #else
1247:         LAPACKtgsen_(&ijob, &wantQ, &wantZ, select, &N, AUAU, &ldA, AUU, &ldA, wr, wi, beta, Q, &N, Z, &N, &NbrEig, NULL, NULL, & (Dif[0]), work, &lwork, iwork, &liwork, &info);
1248:         if (info == 1) SETERRQ(((PetscObject)ksp)->comm, -1, "UNABLE TO REORDER THE EIGENVALUES WITH THE LAPACK ROUTINE : ILL-CONDITIONED PROBLEM");
1249: #endif
1250: 
1251: 
1252:         for (j=0; j<r; j++) {
1253:                 PetscMemcpy (&SR2[j*aug1], & (Z[j*N]), N*sizeof (PetscReal));        CHKERRQ (ierr);
1254:         }

1256:     /* Multiply the Schur vectors SR2 by U (and X)  to get a new U
1257:         -- save it temporarily in MU */
1258:         for (j = 0; j < r; j++) {
1259:                 VecZeroEntries (MU[j]);        CHKERRQ (ierr);
1260:                 VecMAXPY (MU[j], r_old, &SR2[j*aug1], UU);        CHKERRQ (ierr);
1261:                 VecMAXPY (MU[j], neig, &SR2[j*aug1+r_old], XX);        CHKERRQ (ierr);
1262:         }
1263:         /* Form T = U'*MU*U */
1264:         for (j = 0; j < r; j++) {
1265:                 ierr=VecCopy (MU[j], UU[j]);        CHKERRQ (ierr);
1266:                 KSP_PCApplyBAorAB (ksp, UU[j], MU[j], VEC_TEMP_MATOP);        CHKERRQ (ierr);
1267:         }
1268:         dgmres->matvecs += r;
1269:         for (j = 0; j < r; j++) {
1270:                 VecMDot (MU[j], r, UU, &TT[j*bmax]);        CHKERRQ (ierr);
1271:         }
1272:         /* Factorize T */
1273:         ierr=PetscMemcpy (TTF, TT, bmax*r*sizeof (PetscReal));    CHKERRQ (ierr);
1274:         nr = PetscBLASIntCast (r);
1275:         bm = PetscBLASIntCast (bmax);
1276: #if defined(PETSC_MISSING_LAPACK_GETRF) 
1277:         SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"GETRF - Lapack routine is unavailable.");
1278: #else
1279:         LAPACKgetrf_ (&nr, &nr, TTF, &bm, INVP, &info);
1280:         if (info) SETERRQ1 (((PetscObject)ksp)->comm, PETSC_ERR_LIB,"Error in LAPACK routine XGETRF INFO=%d", (int) info);
1281: #endif
1282:         /* Free Memory */
1283:         PetscFree (wr);    PetscFree (wi);    PetscFree (beta);
1284:         PetscFree (modul);    PetscFree (perm);
1285:         PetscFree (Q);    PetscFree (Z);
1286:         PetscFree (work); PetscFree(iwork);

1288:         PetscFunctionReturn (0);
1289: }

1292: /* end new DGMRES functions */

1294: /*MC
1295:      KSPDGMRES - Implements the deflated GMRES as defined in [1,2]; In this implementation, the adaptive strategy allows to switch to the deflated GMRES when the stagnation occurs.
1296:         GMRES Options Database Keys:
1297: +   -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against
1298: .   -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence)
1299: .   -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of
1300:                              vectors are allocated as needed)
1301: .   -ksp_gmres_classicalgramschmidt - use classical (unmodified) Gram-Schmidt to orthogonalize against the Krylov space (fast) (the default)
1302: .   -ksp_gmres_modifiedgramschmidt - use modified Gram-Schmidt in the orthogonalization (more stable, but slower)
1303: .   -ksp_gmres_cgs_refinement_type <never,ifneeded,always> - determine if iterative refinement is used to increase the
1304:                                    stability of the classical Gram-Schmidt  orthogonalization.
1305: -   -ksp_gmres_krylov_monitor - plot the Krylov space generated
1306:         DGMRES Options Database Keys:
1307:         -ksp_dgmres_eigen <neig> - Number of smallest eigenvalues to extract at each restart
1308:         -ksp_dgmres_max_eigen <max_neig> - Maximum number of eigenvalues that can be extracted during the iterative process
1309:         -ksp_dgmres_force <0, 1> - Use the deflation at each restart; switch off the adaptive strategy.

1311:    Level: beginner

1313:    Notes: Left and right preconditioning are supported, but not symmetric preconditioning. Complex arithmetic is not yet supported

1315:    References:
1316:    [1]Restarted GMRES preconditioned by deflation,J. Computational and Applied Mathematics, 69(1996), 303-318.
1317:    [2]On the performance of various adaptive preconditioned GMRES strategies, 5(1998), 101-121.

1319:  Contributed by: Desire NUENTSA WAKAM,INRIA

1321: .seealso:  KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPFGMRES, KSPLGMRES,
1322:            KSPGMRESSetRestart(), KSPGMRESSetHapTol(), KSPGMRESSetPreAllocateVectors(), KSPGMRESSetOrthogonalization(), KSPGMRESGetOrthogonalization(),
1323:            KSPGMRESClassicalGramSchmidtOrthogonalization(), KSPGMRESModifiedGramSchmidtOrthogonalization(),
1324:            KSPGMRESCGSRefinementType, KSPGMRESSetCGSRefinementType(), KSPGMRESGetCGSRefinementType(), KSPGMRESMonitorKrylov(), KSPSetPCSide()

1326: M*/

1331: PetscErrorCode  KSPCreate_DGMRES (KSP ksp) {
1332:     KSP_DGMRES      *dgmres;

1336:     PetscNewLog (ksp,KSP_DGMRES,&dgmres); CHKERRQ (ierr);
1337:     ksp->data                              = (void*) dgmres;

1339:     KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,2);
1340:     KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,1);

1342:     ksp->ops->buildsolution                = KSPBuildSolution_DGMRES;
1343:     ksp->ops->setup                        = KSPSetUp_DGMRES;
1344:     ksp->ops->solve                        = KSPSolve_DGMRES;
1345:     ksp->ops->destroy                      = KSPDestroy_DGMRES;
1346:     ksp->ops->view                         = KSPView_DGMRES;
1347:     ksp->ops->setfromoptions               = KSPSetFromOptions_DGMRES;
1348:     ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
1349:     ksp->ops->computeeigenvalues           = KSPComputeEigenvalues_GMRES;

1351:     PetscObjectComposeFunctionDynamic ( (PetscObject) ksp,"KSPGMRESSetPreAllocateVectors_C",
1352:             "KSPGMRESSetPreAllocateVectors_GMRES",
1353:             KSPGMRESSetPreAllocateVectors_GMRES);    CHKERRQ (ierr);
1354:     PetscObjectComposeFunctionDynamic ( (PetscObject) ksp,"KSPGMRESSetOrthogonalization_C",
1355:             "KSPGMRESSetOrthogonalization_GMRES",
1356:             KSPGMRESSetOrthogonalization_GMRES);    CHKERRQ (ierr);
1357:     PetscObjectComposeFunctionDynamic ( (PetscObject) ksp,"KSPGMRESSetRestart_C",
1358:             "KSPGMRESSetRestart_GMRES",
1359:             KSPGMRESSetRestart_GMRES);    CHKERRQ (ierr);
1360:     PetscObjectComposeFunctionDynamic ( (PetscObject) ksp,"KSPGMRESSetHapTol_C",
1361:             "KSPGMRESSetHapTol_GMRES",
1362:             KSPGMRESSetHapTol_GMRES);    CHKERRQ (ierr);
1363:     PetscObjectComposeFunctionDynamic ( (PetscObject) ksp,"KSPGMRESSetCGSRefinementType_C",
1364:             "KSPGMRESSetCGSRefinementType_GMRES",
1365:             KSPGMRESSetCGSRefinementType_GMRES);    CHKERRQ (ierr);
1366:     /* -- New functions defined in DGMRES -- */
1367:     ierr=PetscObjectComposeFunctionDynamic ( (PetscObject) ksp, "KSPDGMRESSetEigen_C",
1368:             "KSPDGMRESSetEigen_DGMRES",
1369:             KSPDGMRESSetEigen_DGMRES);    CHKERRQ (ierr);
1370:     ierr=PetscObjectComposeFunctionDynamic ( (PetscObject) ksp, "KSPDGMRESSetMaxEigen_C",
1371:             "KSPDGMRESSetMaxEigen_DGMRES",
1372:             KSPDGMRESSetMaxEigen_DGMRES);        CHKERRQ (ierr);
1373:         ierr=PetscObjectComposeFunctionDynamic ( (PetscObject) ksp, "KSPDGMRESSetRatio_C",
1374:                         "KSPDGMRESSetRatio_DGMRES",
1375:                          KSPDGMRESSetRatio_DGMRES);        CHKERRQ (ierr);
1376:         ierr=PetscObjectComposeFunctionDynamic ( (PetscObject) ksp, "KSPDGMRESForce_C",
1377:                          "KSPDGMRESForce_DGMRES",
1378:                          KSPDGMRESForce_DGMRES);        CHKERRQ (ierr);
1379:     ierr=PetscObjectComposeFunctionDynamic ( (PetscObject) ksp, "KSPDGMRESComputeSchurForm_C",
1380:             "KSPDGMRESComputeSchurForm_DGMRES",
1381:             KSPDGMRESComputeSchurForm_DGMRES);    CHKERRQ (ierr);
1382:     ierr=PetscObjectComposeFunctionDynamic ( (PetscObject) ksp, "KSPDGMRESComputeDeflationData_C",
1383:             "KSPDGMRESComputeDeflationData_DGMRES",
1384:             KSPDGMRESComputeDeflationData_DGMRES);    CHKERRQ (ierr);
1385:     ierr=PetscObjectComposeFunctionDynamic ( (PetscObject) ksp, "KSPDGMRESApplyDeflation_C",
1386:             "KSPDGMRESApplyDeflation_DGMRES",
1387:             KSPDGMRESApplyDeflation_DGMRES);    CHKERRQ (ierr);
1388:     ierr=PetscObjectComposeFunctionDynamic ( (PetscObject) ksp, "KSPDGMRESImproveEig_C", "KSPDGMRESImproveEig_DGMRES", KSPDGMRESImproveEig_DGMRES);    CHKERRQ (ierr);

1390:     ierr=PetscLogEventRegister ("DGMRESComputeDefl", KSP_CLASSID, &KSP_DGMRESComputeDeflationData);
1391:     ierr=PetscLogEventRegister ("DGMRESApplyDefl", KSP_CLASSID, &KSP_DGMRESApplyDeflation);

1393:     dgmres->haptol              = 1.0e-30;
1394:     dgmres->q_preallocate       = 0;
1395:     dgmres->delta_allocate      = GMRES_DELTA_DIRECTIONS;
1396:     dgmres->orthog              = KSPGMRESClassicalGramSchmidtOrthogonalization;
1397:     dgmres->nrs                 = 0;
1398:     dgmres->sol_temp            = 0;
1399:     dgmres->max_k               = GMRES_DEFAULT_MAXK;
1400:     dgmres->Rsvd                = 0;
1401:     dgmres->cgstype             = KSP_GMRES_CGS_REFINE_NEVER;
1402:     dgmres->orthogwork          = 0;

1404:     /* Default values for the deflation */
1405:     dgmres->r                                        = 0;
1406:     dgmres->neig                                = DGMRES_DEFAULT_EIG;
1407:     dgmres->max_neig                        = DGMRES_DEFAULT_MAXEIG-1;
1408:     dgmres->lambdaN                                = 0.0;
1409:     dgmres->smv                                        = SMV;
1410:     dgmres->force                                = 0;
1411:     dgmres->matvecs                         = 0;
1412:     dgmres->improve                                = 0;
1413:         dgmres->GreatestEig                        = PETSC_FALSE; /* experimental */
1414:     PetscFunctionReturn (0);
1415: }