Actual source code: dgmres.c
2: /*
3: This file implements the deflated GMRES
4: References: Erhel, Burrage and Pohl, Restarted GMRES preconditioned by deflation,J. Computational and Applied Mathematics, 69(1996), 303-318.
7: */
9: #include dgmresimpl.h
10: #define GMRES_DELTA_DIRECTIONS 10
11: #define GMRES_DEFAULT_MAXK 30
12: #define PetscTruth PetscBool
13: static PetscErrorCode DGMRESGetNewVectors (KSP,PetscInt);
14: static PetscErrorCode DGMRESUpdateHessenberg (KSP,PetscInt,PetscTruth,PetscReal*);
15: static PetscErrorCode BuildDGmresSoln (PetscScalar*,Vec,Vec,KSP,PetscInt);
17: /* Functions from gmres */
37: PetscErrorCode KSPDGMRESSetEigen (KSP ksp,PetscInt nb_eig) {
41: PetscTryMethod ( (ksp),"KSPDGMRESSetEigen_C", (KSP,PetscInt), (ksp,nb_eig));
42: CHKERRQ (ierr);
43: PetscFunctionReturn (0);
44: }
47: PetscErrorCode KSPDGMRESSetMaxEigen (KSP ksp,PetscInt max_neig) {
51: PetscTryMethod ( (ksp),"KSPDGMRESSetMaxEigen_C", (KSP,PetscInt), (ksp,max_neig));
52: CHKERRQ (ierr);
53: PetscFunctionReturn (0);
54: }
57: PetscErrorCode KSPDGMRESForce (KSP ksp,PetscInt force) {
61: PetscTryMethod ( (ksp),"KSPDGMRESForce_C", (KSP,PetscInt), (ksp,force));
62: CHKERRQ (ierr);
63: PetscFunctionReturn (0);
64: }
67: PetscErrorCode KSPDGMRESSetRatio (KSP ksp,PetscReal ratio) {
71: PetscTryMethod ( (ksp),"KSPDGMRESSetRatio_C", (KSP,PetscReal), (ksp,ratio));
72: CHKERRQ (ierr);
73: PetscFunctionReturn (0);
74: }
77: PetscErrorCode KSPDGMRESComputeSchurForm (KSP ksp,PetscInt *neig) {
81: PetscTryMethod ( (ksp),"KSPDGMRESComputeSchurForm_C", (KSP, PetscInt*), (ksp, neig));
82: CHKERRQ (ierr);
83: PetscFunctionReturn (0);
84: }
87: PetscErrorCode KSPDGMRESComputeDeflationData (KSP ksp) {
91: PetscTryMethod ( (ksp),"KSPDGMRESComputeDeflationData_C", (KSP), (ksp));
92: CHKERRQ (ierr);
93: PetscFunctionReturn (0);
94: }
97: PetscErrorCode KSPDGMRESApplyDeflation (KSP ksp, Vec x, Vec y) {
101: PetscTryMethod ( (ksp),"KSPDGMRESApplyDeflation_C", (KSP, Vec, Vec), (ksp, x, y));
102: CHKERRQ (ierr);
103: PetscFunctionReturn (0);
104: }
108: PetscErrorCode KSPDGMRESImproveEig (KSP ksp, PetscInt neig) {
112: PetscTryMethod ( (ksp), "KSPDGMRESImproveEig_C", (KSP, PetscInt), (ksp, neig));
113: CHKERRQ (ierr);
114: PetscFunctionReturn (0);
115: }
118: PetscErrorCode KSPSetUp_DGMRES (KSP ksp) {
119: PetscErrorCode ierr;
120: KSP_DGMRES *dgmres = (KSP_DGMRES *) ksp->data;
121: PetscInt neig = dgmres->neig+EIG_OFFSET;
122: PetscInt max_k = dgmres->max_k+1;
126: KSPSetUp_GMRES (ksp);
127: CHKERRQ (ierr);
128: if (dgmres->neig==0)
129: PetscFunctionReturn (0);
131: /* Allocate workspace for the Schur vectors*/
132: ierr=PetscMalloc ( (neig) *max_k*sizeof (PetscReal), &SR);
133: CHKERRQ (ierr);
134: UU = PETSC_NULL;
135: XX = PETSC_NULL;
136: MX = PETSC_NULL;
137: AUU = PETSC_NULL;
138: XMX = PETSC_NULL;
139: XMU = PETSC_NULL;
140: UMX = PETSC_NULL;
141: AUAU = PETSC_NULL;
142: TT = PETSC_NULL;
143: TTF = PETSC_NULL;
144: INVP = PETSC_NULL;
145: X1 = PETSC_NULL;
146: X2 = PETSC_NULL;
147: MU = PETSC_NULL;
148: PetscFunctionReturn (0);
149: }
151: /*
152: Run GMRES, possibly with restart. Return residual history if requested.
153: input parameters:
155: . gmres - structure containing parameters and work areas
157: output parameters:
158: . nres - residuals (from preconditioned system) at each step.
159: If restarting, consider passing nres+it. If null,
160: ignored
161: . itcount - number of iterations used. nres[0] to nres[itcount]
162: are defined. If null, ignored.
164: Notes:
165: On entry, the value in vector VEC_VV(0) should be the initial residual
166: (this allows shortcuts where the initial preconditioned residual is 0).
167: */
170: PetscErrorCode DGMREScycle (PetscInt *itcount,KSP ksp) {
171: KSP_DGMRES *dgmres = (KSP_DGMRES *) (ksp->data);
172: PetscReal res_norm,res,hapbnd,tt;
174: PetscInt it = 0, max_k = dgmres->max_k;
175: PetscTruth hapend = PETSC_FALSE;
176: PetscReal res_old;
177: PetscInt test;
180: VecNormalize (VEC_VV (0),&res_norm);
181: CHKERRQ (ierr);
182: res = res_norm;
183: *GRS (0) = res_norm;
185: /* check for the convergence */
186: PetscObjectTakeAccess (ksp);
187: CHKERRQ (ierr);
188: ksp->rnorm = res;
189: PetscObjectGrantAccess (ksp);
190: CHKERRQ (ierr);
191: dgmres->it = (it - 1);
192: KSPLogResidualHistory (ksp,res);
193: KSPMonitor (ksp,ksp->its,res);
194: if (!res) {
195: if (itcount) *itcount = 0;
196: ksp->reason = KSP_CONVERGED_ATOL;
197: PetscInfo (ksp,"Converged due to zero residual norm on entry\n");
198: CHKERRQ (ierr);
199: PetscFunctionReturn (0);
200: }
201: /* record the residual norm to test if deflation is needed */
202: res_old = res;
204: (*ksp->converged) (ksp,ksp->its,res,&ksp->reason,ksp->cnvP);
205: CHKERRQ (ierr);
206: while (!ksp->reason && it < max_k && ksp->its < ksp->max_it) {
207: if (it) {
208: KSPLogResidualHistory (ksp,res);
209: KSPMonitor (ksp,ksp->its,res);
210: }
211: dgmres->it = (it - 1);
212: if (dgmres->vv_allocated <= it + VEC_OFFSET + 1) {
213: DGMRESGetNewVectors (ksp,it+1);
214: CHKERRQ (ierr);
215: }
216: if (dgmres->r > 0) {
217: if (ksp->pc_side == PC_LEFT) {
218: /* Apply the first preconditioner */
219: KSP_PCApplyBAorAB (ksp,VEC_VV (it), VEC_TEMP,VEC_TEMP_MATOP);
220: CHKERRQ (ierr);
221: /* Then apply Deflation as a preconditioner */
222: ierr=KSPDGMRESApplyDeflation (ksp, VEC_TEMP, VEC_VV (1+it));
223: CHKERRQ (ierr);
224: } else if (ksp->pc_side == PC_RIGHT) {
225: ierr=KSPDGMRESApplyDeflation (ksp, VEC_VV (it), VEC_TEMP);
226: CHKERRQ (ierr);
227: ierr=KSP_PCApplyBAorAB (ksp, VEC_TEMP, VEC_VV (1+it), VEC_TEMP_MATOP);
228: CHKERRQ (ierr);
229: }
230: } else {
231: KSP_PCApplyBAorAB (ksp,VEC_VV (it),VEC_VV (1+it),VEC_TEMP_MATOP);
232: CHKERRQ (ierr);
233: }
234: dgmres->matvecs += 1;
235: /* update hessenberg matrix and do Gram-Schmidt */
236: (*dgmres->orthog) (ksp,it);
237: CHKERRQ (ierr);
239: /* vv(i+1) . vv(i+1) */
240: VecNormalize (VEC_VV (it+1),&tt);
241: CHKERRQ (ierr);
242: /* save the magnitude */
243: *HH (it+1,it) = tt;
244: *HES (it+1,it) = tt;
246: /* check for the happy breakdown */
247: hapbnd = PetscAbsScalar (tt / *GRS (it));
248: if (hapbnd > dgmres->haptol) hapbnd = dgmres->haptol;
249: if (tt < hapbnd) {
250: PetscInfo2 (ksp,"Detected happy breakdown, current hapbnd = %G tt = %G\n",hapbnd,tt);
251: CHKERRQ (ierr);
252: hapend = PETSC_TRUE;
253: }
254: DGMRESUpdateHessenberg (ksp,it,hapend,&res);
255: CHKERRQ (ierr);
257: it++;
258: dgmres->it = (it-1); /* For converged */
259: ksp->its++;
260: ksp->rnorm = res;
261: if (ksp->reason) break;
263: (*ksp->converged) (ksp,ksp->its,res,&ksp->reason,ksp->cnvP);
264: CHKERRQ (ierr);
266: /* Catch error in happy breakdown and signal convergence and break from loop */
267: if (hapend) {
268: if (!ksp->reason) {
269: SETERRQ1 (((PetscObject)ksp)->comm, PETSC_ERR_PLIB,"You reached the happy break down, but convergence was not indicated. Residual norm = %G",res);
270: }
271: break;
272: }
273: }
275: /* Monitor if we know that we will not return for a restart */
276: if (it && (ksp->reason || ksp->its >= ksp->max_it)) {
277: KSPLogResidualHistory (ksp,res);
278: KSPMonitor (ksp,ksp->its,res);
279: }
281: if (itcount) *itcount = it;
284: /*
285: Down here we have to solve for the "best" coefficients of the Krylov
286: columns, add the solution values together, and possibly unwind the
287: preconditioning from the solution
288: */
289: /* Form the solution (or the solution so far) */
290: BuildDGmresSoln (GRS (0),ksp->vec_sol,ksp->vec_sol,ksp,it-1);
291: CHKERRQ (ierr);
293: /* Compute data for the deflation to be used during the next restart */
294: if (!ksp->reason && ksp->its < ksp->max_it) {
295: test = max_k *log (ksp->rtol/res) /log (res/res_old);
296: /* Compute data for the deflation if the residual rtol will not be reached in the remaining number of steps allowed */
297: if ( (test > dgmres->smv* (ksp->max_it-ksp->its)) || dgmres->force) {
298: ierr= KSPDGMRESComputeDeflationData (ksp);
299: CHKERRQ (ierr);
300: }
301: }
303: PetscFunctionReturn (0);
304: }
308: PetscErrorCode KSPSolve_DGMRES (KSP ksp) {
310: PetscInt its,itcount;
311: KSP_DGMRES *dgmres = (KSP_DGMRES *) ksp->data;
312: PetscTruth guess_zero = ksp->guess_zero;
315: if (ksp->calc_sings && !dgmres->Rsvd) SETERRQ (((PetscObject)ksp)->comm, PETSC_ERR_ORDER,"Must call KSPSetComputeSingularValues() before KSPSetUp() is called");
317: PetscObjectTakeAccess (ksp);
318: CHKERRQ (ierr);
319: ksp->its = 0;
320: dgmres->matvecs = 0;
321: PetscObjectGrantAccess (ksp);
322: CHKERRQ (ierr);
324: itcount = 0;
325: ksp->reason = KSP_CONVERGED_ITERATING;
326: while (!ksp->reason) {
327: KSPInitialResidual (ksp,ksp->vec_sol,VEC_TEMP,VEC_TEMP_MATOP,VEC_VV (0),ksp->vec_rhs);
328: CHKERRQ (ierr);
329: if (ksp->pc_side == PC_LEFT) {
330: dgmres->matvecs += 1;
331: if (dgmres->r > 0) {
332: KSPDGMRESApplyDeflation (ksp, VEC_VV (0), VEC_TEMP);
333: CHKERRQ (ierr);
334: ierr=VecCopy (VEC_TEMP, VEC_VV (0));
335: CHKERRQ (ierr);
336: }
337: }
339: DGMREScycle (&its,ksp);
340: CHKERRQ (ierr);
341: itcount += its;
342: if (itcount >= ksp->max_it) {
343: if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
344: break;
345: }
346: ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
347: }
348: ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */
349: PetscFunctionReturn (0);
350: }
355: PetscErrorCode KSPDestroy_DGMRES (KSP ksp) {
357: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
358: PetscInt neig1 = dgmres->neig+EIG_OFFSET;
359: PetscInt max_neig = dgmres->max_neig;
362: if (dgmres->r) {
363: ierr=VecDestroyVecs (max_neig, &UU);
364: CHKERRQ (ierr);
365: ierr=VecDestroyVecs (max_neig, &MU);
366: CHKERRQ (ierr);
367: ierr=VecDestroyVecs (neig1, &XX);
368: CHKERRQ (ierr);
369: ierr=VecDestroyVecs (neig1, &MX);
370: CHKERRQ (ierr);
372: ierr=PetscFree (TT); CHKERRQ (ierr);
373: ierr=PetscFree (TTF); CHKERRQ (ierr);
374: ierr=PetscFree (INVP); CHKERRQ (ierr);
376: ierr=PetscFree (XMX); CHKERRQ (ierr);
377: ierr=PetscFree (UMX); CHKERRQ (ierr);
378: ierr=PetscFree (XMU); CHKERRQ (ierr);
379: ierr=PetscFree (X1); CHKERRQ (ierr);
380: ierr=PetscFree (X2); CHKERRQ (ierr);
381: ierr=PetscFree (dgmres->work); CHKERRQ (ierr);
382: ierr=PetscFree (dgmres->iwork); CHKERRQ (ierr);
383: ierr=PetscFree (ORTH); CHKERRQ (ierr);
385: PetscFree (AUAU); CHKERRQ (ierr);
386: PetscFree (AUU); CHKERRQ (ierr);
387: PetscFree (SR2); CHKERRQ (ierr);
388: }
389: ierr=PetscFree (SR); CHKERRQ (ierr);
390: KSPDestroy_GMRES (ksp);
391: CHKERRQ (ierr);
392: PetscFunctionReturn (0);
393: }
394: /*
395: BuildDGmresSoln - create the solution from the starting vector and the
396: current iterates.
398: Input parameters:
399: nrs - work area of size it + 1.
400: vs - index of initial guess
401: vdest - index of result. Note that vs may == vdest (replace
402: guess with the solution).
404: This is an internal routine that knows about the GMRES internals.
405: */
408: static PetscErrorCode BuildDGmresSoln (PetscScalar* nrs,Vec vs,Vec vdest,KSP ksp,PetscInt it) {
409: PetscScalar tt;
411: PetscInt ii,k,j;
412: KSP_DGMRES *dgmres = (KSP_DGMRES *) (ksp->data);
415: /* Solve for solution vector that minimizes the residual */
417: /* If it is < 0, no gmres steps have been performed */
418: if (it < 0) {
419: VecCopy (vs,vdest);
420: CHKERRQ (ierr); /* VecCopy() is smart, exists immediately if vguess == vdest */
421: PetscFunctionReturn (0);
422: }
423: if (*HH (it,it) == 0.0) SETERRQ2 (((PetscObject)ksp)->comm, PETSC_ERR_CONV_FAILED,"Likley your matrix is the zero operator. HH(it,it) is identically zero; it = %D GRS(it) = %G",it,PetscAbsScalar (*GRS (it)));
424: if (*HH (it,it) != 0.0) {
425: nrs[it] = *GRS (it) / *HH (it,it);
426: } else {
427: nrs[it] = 0.0;
428: }
429: for (ii=1; ii<=it; ii++) {
430: k = it - ii;
431: tt = *GRS (k);
432: for (j=k+1; j<=it; j++) tt = tt - *HH (k,j) * nrs[j];
433: if (*HH (k,k) == 0.0) SETERRQ2 (((PetscObject)ksp)->comm, PETSC_ERR_CONV_FAILED,"Likley your matrix is singular. HH(k,k) is identically zero; it = %D k = %D",it,k);
434: nrs[k] = tt / *HH (k,k);
435: }
437: /* Accumulate the correction to the solution of the preconditioned problem in TEMP */
438: VecSet (VEC_TEMP,0.0);
439: CHKERRQ (ierr);
440: VecMAXPY (VEC_TEMP,it+1,nrs,&VEC_VV (0));
441: CHKERRQ (ierr);
443: /* Apply deflation */
444: if (ksp->pc_side==PC_RIGHT && dgmres->r > 0) {
445: ierr=KSPDGMRESApplyDeflation (ksp, VEC_TEMP, VEC_TEMP_MATOP);
446: CHKERRQ (ierr);
447: ierr=VecCopy (VEC_TEMP_MATOP, VEC_TEMP);
448: CHKERRQ (ierr);
449: }
450: KSPUnwindPreconditioner (ksp,VEC_TEMP,VEC_TEMP_MATOP);
451: CHKERRQ (ierr);
454: /* add solution to previous solution */
455: if (vdest != vs) {
456: VecCopy (vs,vdest);
457: CHKERRQ (ierr);
458: }
459: VecAXPY (vdest,1.0,VEC_TEMP);
460: CHKERRQ (ierr);
461: PetscFunctionReturn (0);
462: }
463: /*
464: Do the scalar work for the orthogonalization. Return new residual norm.
465: */
468: static PetscErrorCode DGMRESUpdateHessenberg (KSP ksp,PetscInt it,PetscTruth hapend,PetscReal *res) {
469: PetscScalar *hh,*cc,*ss,tt;
470: PetscInt j;
471: KSP_DGMRES *dgmres = (KSP_DGMRES *) (ksp->data);
474: hh = HH (0,it);
475: cc = CC (0);
476: ss = SS (0);
478: /* Apply all the previously computed plane rotations to the new column
479: of the Hessenberg matrix */
480: for (j=1; j<=it; j++) {
481: tt = *hh;
482: #if defined(PETSC_USE_COMPLEX)
483: *hh = PetscConj (*cc) * tt + *ss * * (hh+1);
484: #else
485: *hh = *cc * tt + *ss * * (hh+1);
486: #endif
487: hh++;
488: *hh = *cc++ * *hh - (*ss++ * tt);
489: }
491: /*
492: compute the new plane rotation, and apply it to:
493: 1) the right-hand-side of the Hessenberg system
494: 2) the new column of the Hessenberg matrix
495: thus obtaining the updated value of the residual
496: */
497: if (!hapend) {
498: #if defined(PETSC_USE_COMPLEX)
499: tt = PetscSqrtScalar (PetscConj (*hh) * *hh + PetscConj (* (hh+1)) * * (hh+1));
500: #else
501: tt = PetscSqrtScalar (*hh * *hh + * (hh+1) * * (hh+1));
502: #endif
503: if (tt == 0.0) {
504: ksp->reason = KSP_DIVERGED_NULL;
505: PetscFunctionReturn (0);
506: }
507: *cc = *hh / tt;
508: *ss = * (hh+1) / tt;
509: *GRS (it+1) = - (*ss * *GRS (it));
510: #if defined(PETSC_USE_COMPLEX)
511: *GRS (it) = PetscConj (*cc) * *GRS (it);
512: *hh = PetscConj (*cc) * *hh + *ss * * (hh+1);
513: #else
514: *GRS (it) = *cc * *GRS (it);
515: *hh = *cc * *hh + *ss * * (hh+1);
516: #endif
517: *res = PetscAbsScalar (*GRS (it+1));
518: } else {
519: /* happy breakdown: HH(it+1, it) = 0, therfore we don't need to apply
520: another rotation matrix (so RH doesn't change). The new residual is
521: always the new sine term times the residual from last time (GRS(it)),
522: but now the new sine rotation would be zero...so the residual should
523: be zero...so we will multiply "zero" by the last residual. This might
524: not be exactly what we want to do here -could just return "zero". */
526: *res = 0.0;
527: }
528: PetscFunctionReturn (0);
529: }
530: /*
531: This routine allocates more work vectors, starting from VEC_VV(it).
532: */
535: static PetscErrorCode DGMRESGetNewVectors (KSP ksp,PetscInt it) {
536: KSP_DGMRES *dgmres = (KSP_DGMRES *) ksp->data;
538: PetscInt nwork = dgmres->nwork_alloc,k,nalloc;
541: nalloc = PetscMin (ksp->max_it,dgmres->delta_allocate);
542: /* Adjust the number to allocate to make sure that we don't exceed the
543: number of available slots */
544: if (it + VEC_OFFSET + nalloc >= dgmres->vecs_allocated) {
545: nalloc = dgmres->vecs_allocated - it - VEC_OFFSET;
546: }
547: if (!nalloc) PetscFunctionReturn (0);
549: dgmres->vv_allocated += nalloc;
550: KSPGetVecs (ksp,nalloc,&dgmres->user_work[nwork],0,PETSC_NULL);
551: CHKERRQ (ierr);
552: PetscLogObjectParents (ksp,nalloc,dgmres->user_work[nwork]);
553: CHKERRQ (ierr);
554: dgmres->mwork_alloc[nwork] = nalloc;
555: for (k=0; k<nalloc; k++) {
556: dgmres->vecs[it+VEC_OFFSET+k] = dgmres->user_work[nwork][k];
557: }
558: dgmres->nwork_alloc++;
559: PetscFunctionReturn (0);
560: }
564: PetscErrorCode KSPBuildSolution_DGMRES (KSP ksp,Vec ptr,Vec *result) {
565: KSP_DGMRES *dgmres = (KSP_DGMRES *) ksp->data;
569: if (!ptr) {
570: if (!dgmres->sol_temp) {
571: VecDuplicate (ksp->vec_sol,&dgmres->sol_temp);
572: CHKERRQ (ierr);
573: PetscLogObjectParent (ksp,dgmres->sol_temp);
574: CHKERRQ (ierr);
575: }
576: ptr = dgmres->sol_temp;
577: }
578: if (!dgmres->nrs) {
579: /* allocate the work area */
580: PetscMalloc (dgmres->max_k*sizeof (PetscScalar),&dgmres->nrs);
581: CHKERRQ (ierr);
582: PetscLogObjectMemory (ksp,dgmres->max_k*sizeof (PetscScalar));
583: CHKERRQ (ierr);
584: }
586: BuildDGmresSoln (dgmres->nrs,ksp->vec_sol,ptr,ksp,dgmres->it);
587: CHKERRQ (ierr);
588: if (result) *result = ptr;
589: PetscFunctionReturn (0);
590: }
595: PetscErrorCode KSPView_DGMRES (KSP ksp,PetscViewer viewer) {
596: KSP_DGMRES *dgmres = (KSP_DGMRES *) ksp->data;
598: PetscTruth iascii;
601: KSPView_GMRES (ksp,viewer);
602: CHKERRQ (ierr);
603: PetscTypeCompare ( (PetscObject) viewer,PETSCVIEWERASCII,&iascii);
604: CHKERRQ (ierr);
605: if (iascii) {
606: if (dgmres->force)
607: PetscViewerASCIIPrintf (viewer, " DGMRES: Adaptive strategy is used: FALSE\n");
608: else
609: PetscViewerASCIIPrintf (viewer, " DGMRES: Adaptive strategy is used: TRUE\n");
610: ierr=PetscViewerASCIIPrintf (viewer, " DGMRES: Frequency of extracted eigenvalues = %D\n", dgmres->neig);
611: CHKERRQ (ierr);
612: ierr=PetscViewerASCIIPrintf (viewer, " DGMRES: Total number of extracted eigenvalues = %D\n", dgmres->r);
613: CHKERRQ (ierr);
614: ierr=PetscViewerASCIIPrintf (viewer, " DGMRES: Maximum number of eigenvalues set to be extracted = %D\n", dgmres->max_neig);
615: CHKERRQ (ierr);
616: ierr=PetscViewerASCIIPrintf (viewer, " DGMRES: relaxation parameter for the adaptive strategy(smv) = %g\n", dgmres->smv);
617: CHKERRQ (ierr);
618: ierr=PetscViewerASCIIPrintf (viewer, " DGMRES: Number of matvecs : %D\n", dgmres->matvecs);
619: CHKERRQ (ierr);
620: } else {
621: SETERRQ1 (((PetscObject)ksp)->comm, PETSC_ERR_SUP,"Viewer type %s not supported for KSP DGMRES", ( (PetscObject) viewer)->type_name);
622: }
624: PetscFunctionReturn (0);
625: }
627: /* New DGMRES functions */
632: PetscErrorCode KSPDGMRESSetEigen_DGMRES (KSP ksp,PetscInt neig) {
633: KSP_DGMRES *dgmres = (KSP_DGMRES *) ksp->data;
635: if (neig< 0 && neig >dgmres->max_k) SETERRQ (((PetscObject)ksp)->comm, PETSC_ERR_ARG_OUTOFRANGE,"The value of neig must be positive and less than the restart value ");
636: dgmres->neig=neig;
637: PetscFunctionReturn (0);
638: }
644: PetscErrorCode KSPDGMRESSetMaxEigen_DGMRES (KSP ksp,PetscInt max_neig) {
645: KSP_DGMRES *dgmres = (KSP_DGMRES *) ksp->data;
647: if (max_neig < 0 && max_neig >dgmres->max_k) SETERRQ (((PetscObject)ksp)->comm, PETSC_ERR_ARG_OUTOFRANGE,"The value of max_neig must be positive and less than the restart value ");
648: dgmres->max_neig=max_neig;
649: PetscFunctionReturn (0);
650: }
657: PetscErrorCode KSPDGMRESSetRatio_DGMRES (KSP ksp,PetscReal ratio) {
658: KSP_DGMRES *dgmres = (KSP_DGMRES *) ksp->data;
660: if (ratio <= 0) SETERRQ (((PetscObject)ksp)->comm, PETSC_ERR_ARG_OUTOFRANGE,"The relaxation parameter value must be positive");
661: dgmres->smv=ratio;
662: PetscFunctionReturn (0);
663: }
669: PetscErrorCode KSPDGMRESForce_DGMRES (KSP ksp,PetscInt force) {
670: KSP_DGMRES *dgmres = (KSP_DGMRES *) ksp->data;
672: if (force != 0 && force != 1) SETERRQ (((PetscObject)ksp)->comm, PETSC_ERR_ARG_OUTOFRANGE,"Value must be 0 or 1");
673: dgmres->force = 1;
674: PetscFunctionReturn (0);
675: }
683: PetscErrorCode KSPSetFromOptions_DGMRES (KSP ksp) {
685: PetscInt neig;
686: PetscInt max_neig;
687: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
688: PetscTruth flg;
689: PetscReal smv;
690: PetscInt input;
693: KSPSetFromOptions_GMRES (ksp);
694: CHKERRQ (ierr);
695: PetscOptionsHead ("KSP DGMRES Options");
696: CHKERRQ (ierr);
697: PetscOptionsInt ("-ksp_dgmres_eigen","Number of smallest eigenvalues to extract at each restart","KSPDGMRESSetEigen",dgmres->neig, &neig, &flg);
698: CHKERRQ (ierr);
699: if (flg) {
700: KSPDGMRESSetEigen (ksp, neig);
701: CHKERRQ (ierr);
702: }
703: PetscOptionsInt ("-ksp_dgmres_max_eigen","Maximum Number of smallest eigenvalues to extract ","KSPDGMRESSetMaxEigen",dgmres->max_neig, &max_neig, &flg);
704: CHKERRQ (ierr);
705: if (flg) {
706: KSPDGMRESSetMaxEigen (ksp, max_neig);
707: CHKERRQ (ierr);
708: }
709: ierr=PetscOptionsReal ("-ksp_dgmres_ratio", "Relaxation parameter for the smaller number of matrix-vectors product allowed", "KSPDGMRESSetRatio", dgmres->smv, &smv, &flg);
710: CHKERRQ (ierr);
711: if (flg) dgmres->smv = smv;
712: PetscOptionsInt ("-ksp_dgmres_improve", "Improve the computation of eigenvalues by solving a new generalized eigenvalue problem (experimental - not stable at this time)", NULL, dgmres->improve, &input, &flg);
713: CHKERRQ (ierr);
714: if (flg) dgmres->improve = input;
715: PetscOptionsInt ("-ksp_dgmres_force", "Sets DGMRES always at restart active, i.e do not use the adaptive strategy", "KSPDGMRESForce", dgmres->force, &input, &flg);
716: CHKERRQ (ierr);
717: if (flg) dgmres->force = input;
718: PetscOptionsTail();
719: CHKERRQ (ierr);
720: PetscFunctionReturn (0);
721: }
726: PetscErrorCode KSPDGMRESComputeDeflationData_DGMRES (KSP ksp)
727: {
728: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
730: PetscInt i,j, k;
731: PetscInt r=dgmres->r;
732: PetscBLASInt nr, bmax;
733: PetscInt neig; /* number of eigenvalues to extract at each restart */
734: PetscInt neig1 = dgmres->neig + EIG_OFFSET; /* max number of eig that can be extracted at each restart */
735: PetscInt max_neig = dgmres->max_neig; /* Max number of eigenvalues to extract during the iterative process */
736: PetscInt N = dgmres->max_k+1;
737: PetscInt n = dgmres->it+1;
738: PetscReal alpha;
739: #if !defined(PETSC_MISSING_LAPACK_GETRF)
740: PetscBLASInt info;
741: #endif
744: ierr=PetscLogEventBegin (KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
745: CHKERRQ (ierr);
746: if (dgmres->neig == 0)
747: PetscFunctionReturn (0);
748: if (max_neig < (r+neig1) && !dgmres->improve) {
749: ierr=PetscLogEventEnd (KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
750: CHKERRQ (ierr);
751: PetscFunctionReturn (0);
752: }
754: ierr= KSPDGMRESComputeSchurForm (ksp, &neig);
755: CHKERRQ (ierr);
757: /* Form the extended Schur vectors X=VV*Sr */
758: if (!XX) {
759: ierr=VecDuplicateVecs (VEC_VV (0), neig1, &XX);
760: CHKERRQ (ierr);
761: }
762: for (j = 0; j<neig; j++) {
763: VecZeroEntries (XX[j]);
764: CHKERRQ (ierr);
765: VecMAXPY (XX[j], n, &SR[j*N], &VEC_VV (0));
766: CHKERRQ (ierr);
767: }
769: /* Orthogonalize X against U */
770: if (!ORTH) {
771: ierr=PetscMalloc (max_neig*sizeof (PetscReal), &ORTH);
772: CHKERRQ (ierr);
773: }
774: if (r > 0) {
775: /* modified Gram-Schmidt */
776: for (j = 0; j<neig; j++) {
777: for (i=0; i<r; i++) {
778: /* First, compute U'*X[j] */
779: VecDot (XX[j], UU[i], &alpha);
780: CHKERRQ (ierr);
781: /* Then, compute X(j)=X(j)-U*U'*X(j) */
782: VecAXPY (XX[j], -alpha, UU[i]);
783: CHKERRQ (ierr);
784: }
785: }
786: }
787: /* Compute MX = M^{-1}*A*X */
788: if (!MX) {
789: ierr=VecDuplicateVecs (VEC_VV (0), neig1, &MX);
790: CHKERRQ (ierr);
791: }
792: for (j = 0; j<neig; j++) {
793: KSP_PCApplyBAorAB (ksp, XX[j], MX[j], VEC_TEMP_MATOP);
794: CHKERRQ (ierr);
795: }
796: dgmres->matvecs += neig;
798: if ( (r+neig1) > max_neig && dgmres->improve) { /* Improve the approximate eigenvectors in X by solving a new generalized eigenvalue -- Quite expensive to do this actually */
799: KSPDGMRESImproveEig (ksp, neig);
800: CHKERRQ (ierr);
801: PetscFunctionReturn (0); /* We return here since data for M have been improved in KSPDGMRESImproveEig()*/
802: }
804: /* Compute XMX = X'*M^{-1}*A*X -- size (neig, neig) */
805: if (!XMX) {
806: ierr=PetscMalloc (neig1*neig1*sizeof (PetscReal), &XMX);
807: CHKERRQ (ierr);
808: }
809: for (j = 0; j < neig; j++) {
810: ierr=VecMDot (MX[j], neig, XX, & (XMX[j*neig1]));
811: CHKERRQ (ierr);
812: }
814: if (r > 0) {
815: /* Compute UMX = U'*M^{-1}*A*X -- size (r, neig) */
816: if (!UMX) {
817: PetscMalloc (max_neig*neig1*sizeof (PetscReal), &UMX);
818: CHKERRQ (ierr);
819: }
820: for (j = 0; j < neig; j++) {
821: VecMDot (MX[j], r, UU, & (UMX[j*max_neig]));
822: CHKERRQ (ierr);
823: }
824: /* Compute XMU = X'*M^{-1}*A*U -- size (neig, r) */
825: if (!XMU) {
826: PetscMalloc (max_neig*neig1*sizeof (PetscReal), &XMU);
827: CHKERRQ (ierr);
828: }
829: for (j = 0; j<r; j++) {
830: ierr=VecMDot (MU[j], neig, XX, & (XMU[j*neig1]));
831: CHKERRQ (ierr);
832: }
834: }
836: /* Form the new matrix T = [T UMX; XMU XMX]; */
837: if (!TT) {
838: PetscMalloc (max_neig*max_neig*sizeof (PetscReal), &TT);
839: CHKERRQ (ierr);
840: }
841: if (r > 0) {
842: /* Add XMU to T */
843: for (j = 0; j < r; j++) {
844: PetscMemcpy (& (TT[max_neig*j+r]), & (XMU[neig1*j]), neig*sizeof (PetscReal));
845: CHKERRQ (ierr);
846: }
847: /* Add [UMX; XMX] to T */
848: for (j = 0; j < neig; j++) {
849: k = r+j;
850: PetscMemcpy (& (TT[max_neig*k]), & (UMX[max_neig*j]), r*sizeof (PetscReal));
851: CHKERRQ (ierr);
852: ierr=PetscMemcpy (& (TT[max_neig*k + r]), & (XMX[neig1*j]), neig*sizeof (PetscReal));
853: CHKERRQ (ierr);
854: }
855: } else { /* Add XMX to T */
856: for (j = 0; j < neig; j++) {
857: ierr=PetscMemcpy (& (TT[max_neig*j]), & (XMX[neig1*j]), neig*sizeof (PetscReal));
858: CHKERRQ (ierr);
859: }
860: }
862: dgmres->r += neig;
863: r=dgmres->r;
864: nr = PetscBLASIntCast (r);
865: /*LU Factorize T with Lapack xgetrf routine */
867: bmax = PetscBLASIntCast (max_neig);
868: if (!TTF) {
869: PetscMalloc (bmax*bmax*sizeof (PetscReal), &TTF);
870: CHKERRQ (ierr);
871: }
872: ierr=PetscMemcpy (TTF, TT, bmax*r*sizeof (PetscReal));
873: CHKERRQ (ierr);
874: if (!INVP) {
875: ierr=PetscMalloc (bmax*sizeof (PetscBLASInt), &INVP);
876: CHKERRQ (ierr);
877: }
878: #if defined(PETSC_MISSING_LAPACK_GETRF)
879: SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"GETRF - Lapack routine is unavailable.");
880: #else
881: LAPACKgetrf_ (&nr, &nr, TTF, &bmax, INVP, &info);
882: if (info) SETERRQ1 (((PetscObject)ksp)->comm, PETSC_ERR_LIB,"Error in LAPACK routine XGETRF INFO=%d", (int) info);
883: #endif
885: /* Save X in U and MX in MU for the next cycles and increase the size of the invariant subspace */
886: if (!UU) {
887: ierr=VecDuplicateVecs (VEC_VV (0), max_neig, &UU);
888: CHKERRQ (ierr);
889: ierr=VecDuplicateVecs (VEC_VV (0), max_neig, &MU);
890: CHKERRQ (ierr);
891: }
892: for (j=0; j<neig; j++) {
893: ierr=VecCopy (XX[j], UU[r-neig+j]);
894: CHKERRQ (ierr);
895: ierr=VecCopy (MX[j], MU[r-neig+j]);
896: CHKERRQ (ierr);
897: }
898: ierr=PetscLogEventEnd (KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
899: CHKERRQ (ierr);
900: PetscFunctionReturn (0);
901: }
907: PetscErrorCode KSPDGMRESComputeSchurForm_DGMRES (KSP ksp, PetscInt *neig) {
908: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
909: PetscErrorCode ierr;
910: PetscInt N = dgmres->max_k + 1, n=dgmres->it+1;
911: PetscBLASInt bn, bN;
912: PetscReal *A;
913: PetscBLASInt ihi;
914: PetscBLASInt ldA; /* leading dimension of A */
915: PetscBLASInt ldQ; /* leading dimension of Q */
916: PetscReal *Q; /* orthogonal matrix of (left) schur vectors */
917: PetscReal *work; /* working vector */
918: PetscBLASInt lwork; /* size of the working vector */
919: PetscInt *perm; /* Permutation vector to sort eigenvalues */
920: PetscInt i, j;
921: PetscBLASInt NbrEig; /* Number of eigenvalues really extracted */
922: PetscReal *wr, *wi, *modul; /* Real and imaginary part and modul of the eigenvalues of A*/
923: PetscBLASInt *select;
924: PetscBLASInt *iwork;
925: PetscBLASInt liwork;
926: #if !defined(PETSC_MISSING_LAPACK_HSEQR)
927: PetscBLASInt ilo=1;
928: PetscBLASInt info;
929: PetscReal CondEig; /* lower bound on the reciprocal condition number for the selected cluster of eigenvalues */
930: PetscReal CondSub; /* estimated reciprocal condition number of the specified invariant subspace. */
931: #endif
934: bn=PetscBLASIntCast (n);
935: bN=PetscBLASIntCast (N);
936: ihi = ldQ = bn;
937: ldA=bN;
938: lwork = PetscBLASIntCast (5*N);
940: #if defined(PETSC_USE_COMPLEX)
941: SETERRQ (((PetscObject)ksp)->comm, -1, "NO SUPPORT FOR COMPLEX VALUES AT THIS TIME");
942: PetscFunctionReturn (-1);
943: #endif
945: PetscMalloc (ldA*ldA*sizeof (PetscReal), &A); CHKERRQ (ierr);
946: PetscMalloc (ldQ*n*sizeof (PetscReal), &Q); CHKERRQ (ierr);
947: PetscMalloc (lwork*sizeof (PetscReal), &work); CHKERRQ (ierr);
948: PetscMalloc (n*sizeof (PetscReal), &wr); CHKERRQ (ierr);
949: PetscMalloc (n*sizeof (PetscReal), &wi); CHKERRQ (ierr);
950: PetscMalloc (n*sizeof (PetscReal),&modul); CHKERRQ (ierr);
951: PetscMalloc (n*sizeof (PetscInt),&perm); CHKERRQ (ierr);
952: /* copy the Hessenberg matrix to work space */
953: PetscMemcpy (A, dgmres->hes_origin, ldA*ldA*sizeof (PetscReal)); CHKERRQ (ierr);
954: /* Compute eigenvalues with the Schur form */
955: #if defined(PETSC_MISSING_LAPACK_HSEQR)
956: SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"HSEQR - Lapack routine is unavailable.");
957: #else
958: LAPACKhseqr_ ("S", "I", &bn, &ilo, &ihi, A, &ldA, wr, wi, Q, &ldQ, work, &lwork, &info);
959: if (info) SETERRQ1 (((PetscObject)ksp)->comm, PETSC_ERR_LIB,"Error in LAPACK routine XHSEQR %d", (int) info);
960: #endif
961: PetscFree(work);
963: /* sort the eigenvalues */
964: for (i=0; i<n; i++) {
965: modul[i]=sqrt (wr[i]*wr[i]+wi[i]*wi[i]);
966: }
967: for (i=0; i<n; i++) {
968: perm[i] = i;
969: }
970: PetscSortRealWithPermutation (n, modul, perm);CHKERRQ (ierr);
971: /* save the complex modulus of the largest eigenvalue in magnitude */
972: if (dgmres->lambdaN < modul[perm[n-1]]) {
973: dgmres->lambdaN=modul[perm[n-1]];
974: }
975: /* count the number of extracted eigenvalues (with complex conjugates) */
976: NbrEig = 0;
977: while (NbrEig < dgmres->neig)
978: {
979: if (wi[perm[NbrEig]] != 0)
980: NbrEig += 2;
981: else
982: NbrEig += 1;
983: }
984: /* Reorder the Schur decomposition so that the cluster of smallest eigenvalues appears in the leading diagonal blocks of A */
986: PetscMalloc(n * sizeof(PetscBLASInt), &select);
987: PetscMemzero(select, n * sizeof(PetscBLASInt));
989: if (dgmres->GreatestEig == PETSC_FALSE)
990: {
991: for (j = 0; j < NbrEig; j++)
992: select[perm[j]] = 1;
993: }
994: else
995: {
996: for (j = 0; j < NbrEig; j++)
997: select[perm[n-j-1]] = 1;
998: }
999: /* call Lapack dtrsen */
1000: lwork = PetscMax(1, 4 * NbrEig * (bn-NbrEig));
1001: liwork = PetscMax(1, 2 * NbrEig * (bn-NbrEig));
1002: PetscMalloc(lwork * sizeof(PetscScalar), &work);
1003: PetscMalloc(liwork * sizeof(PetscBLASInt), &iwork);
1004: #if defined(PETSC_MISSING_LAPACK_TRSEN)
1005: SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"TRSEN - Lapack routine is unavailable.");
1006: #else
1007: LAPACKtrsen_("B", "V", select, &bn, A, &ldA, Q, &ldQ, wr, wi, &NbrEig, &CondEig, &CondSub, work, &lwork, iwork, &liwork, &info);
1008: if (info == 1) SETERRQ(((PetscObject)ksp)->comm, PETSC_ERR_LIB, "UNABLE TO REORDER THE EIGENVALUES WITH THE LAPACK ROUTINE : ILL-CONDITIONED PROBLEM");
1009: #endif
1011: /* Extract the Schur vectors */
1012: for (j = 0; j < NbrEig; j++)
1013: {
1014: PetscMemcpy(&SR[j*N], &(Q[j*ldQ]), n*sizeof(PetscReal));
1015: }
1016: *neig=NbrEig;
1017: ierr=PetscFree (A); CHKERRQ (ierr);
1018: ierr=PetscFree (Q); CHKERRQ (ierr);
1019: ierr=PetscFree (wr); CHKERRQ (ierr);
1020: ierr=PetscFree (wi); CHKERRQ (ierr);
1021: ierr=PetscFree (work); CHKERRQ (ierr);
1022: ierr=PetscFree (modul); CHKERRQ (ierr);
1023: ierr=PetscFree (perm); CHKERRQ (ierr);
1024: PetscFree(work);
1025: PetscFree(iwork);
1026: PetscFunctionReturn (0);
1027: }
1034: PetscErrorCode KSPDGMRESApplyDeflation_DGMRES (KSP ksp, Vec x, Vec y) {
1035: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
1036: PetscInt i, r = dgmres->r;
1037: PetscErrorCode ierr;
1038: PetscBLASInt nrhs = 1;
1039: PetscReal alpha = 1.0;
1040: PetscInt max_neig = dgmres->max_neig;
1041: PetscBLASInt br = PetscBLASIntCast (r);
1042: PetscBLASInt bmax = PetscBLASIntCast (max_neig);
1043: PetscInt lambda = dgmres->lambdaN;
1044: #if !defined(PETSC_MISSING_LAPACK_GETRS)
1045: PetscReal berr, ferr;
1046: PetscBLASInt info;
1047: #endif
1050: ierr=PetscLogEventBegin (KSP_DGMRESApplyDeflation, ksp, 0, 0, 0); CHKERRQ (ierr);
1051: if (!r) {
1052: ierr=VecCopy (x,y); CHKERRQ (ierr);
1053: PetscFunctionReturn (0);
1054: }
1055: /* Compute U'*x */
1056: if (!X1) {
1057: PetscMalloc (bmax*sizeof (PetscReal), &X1); CHKERRQ (ierr);
1058: PetscMalloc (bmax*sizeof (PetscReal), &X2); CHKERRQ (ierr);
1059: }
1060: VecMDot (x, r, UU, X1); CHKERRQ (ierr);
1062: /* Solve T*X1=X2 for X1*/
1063: PetscMemcpy (X2, X1, br*sizeof (PetscReal)); CHKERRQ (ierr);
1064: #if defined(PETSC_MISSING_LAPACK_GETRS)
1065: SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"GETRS - Lapack routine is unavailable.");
1066: #else
1067: LAPACKgetrs_ ("N", &br, &nrhs, TTF, &bmax, INVP, X1, &bmax, &info);
1068: if (info) SETERRQ1 (((PetscObject)ksp)->comm, PETSC_ERR_LIB,"Error in LAPACK routine XGETRS %d", (int) info);
1069: #endif
1070: /* Iterative refinement -- is it really necessary ?? */
1071: if (!WORK) {
1072: ierr=PetscMalloc (3*bmax*sizeof (PetscReal), &WORK); CHKERRQ (ierr);
1073: ierr=PetscMalloc (bmax*sizeof (PetscInt), &IWORK); CHKERRQ (ierr);
1074: }
1075: #if defined(PETSC_MISSING_LAPACK_GERFS)
1076: SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"GERFS - Lapack routine is unavailable.");
1077: #else
1078: LAPACKgerfs_ ("N", &br, &nrhs, TT, &bmax, TTF, &bmax, INVP, X2, &bmax,
1079: X1, &bmax, &ferr, &berr, WORK, IWORK, &info);
1080: if (info) SETERRQ1 (((PetscObject)ksp)->comm, PETSC_ERR_LIB,"Error in LAPACK routine XGERFS %d", (int) info);
1081: #endif
1083: for (i = 0; i < r; i++) {
1084: X2[i] = X1[i]/lambda - X2[i];
1085: }
1087: /* Compute X2=U*X2 */
1088: ierr=VecZeroEntries (y);
1089: CHKERRQ (ierr);
1090: ierr=VecMAXPY (y, r, X2, UU);
1091: CHKERRQ (ierr);
1093: ierr=VecAXPY (y, alpha, x);
1094: CHKERRQ (ierr);
1096: ierr=PetscLogEventEnd (KSP_DGMRESApplyDeflation, ksp, 0, 0, 0);
1097: CHKERRQ (ierr);
1098: PetscFunctionReturn (0);
1099: }
1105: PetscErrorCode KSPDGMRESImproveEig_DGMRES (KSP ksp, PetscInt neig)
1106: {
1107: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
1108: PetscInt j,r_old, r = dgmres->r;
1109: PetscBLASInt i = 0;
1110: PetscInt neig1 = dgmres->neig + EIG_OFFSET;
1111: PetscInt bmax = dgmres->max_neig;
1112: PetscInt aug = r + neig; /* actual size of the augmented invariant basis */
1113: PetscInt aug1 = bmax+neig1; /* maximum size of the augmented invariant basis */
1114: PetscBLASInt ldA; /* leading dimension of AUAU and AUU*/
1115: PetscBLASInt N; /* size of AUAU */
1116: PetscReal *Q; /* orthogonal matrix of (left) schur vectors */
1117: PetscReal *Z; /* orthogonal matrix of (right) schur vectors */
1118: PetscReal *work; /* working vector */
1119: PetscBLASInt lwork; /* size of the working vector */
1120: PetscBLASInt info; /* Output info from Lapack routines */
1121: PetscInt *perm; /* Permutation vector to sort eigenvalues */
1122: PetscReal *wr, *wi, *beta, *modul; /* Real and imaginary part and modul of the eigenvalues of A*/
1123: PetscInt ierr;
1124: PetscBLASInt NbrEig = 0,nr,bm;
1125: PetscBLASInt *select;
1126: PetscBLASInt liwork, *iwork;
1127: #if !defined(PETSC_MISSING_LAPACK_TGSEN)
1128: PetscReal Dif[2];
1129: PetscBLASInt ijob = 2;
1130: PetscBLASInt wantQ = 1, wantZ = 1;
1131: #endif
1134: /* Block construction of the matrices AUU=(AU)'*U and (AU)'*AU*/
1135: if (!AUU) {
1136: ierr=PetscMalloc (aug1*aug1*sizeof (PetscReal), &AUU);
1137: CHKERRQ (ierr);
1138: ierr=PetscMalloc (aug1*aug1*sizeof (PetscReal), &AUAU);
1139: CHKERRQ (ierr);
1140: }
1141: /* AUU = (AU)'*U = [(MU)'*U (MU)'*X; (MX)'*U (MX)'*X]
1142: * Note that MU and MX have been computed previously either in ComputeDataDeflation() or down here in a previous call to this function */
1143: /* (MU)'*U size (r x r) -- store in the <r> first columns of AUU*/
1144: for (j=0; j < r; j++) {
1145: VecMDot (UU[j], r, MU, &AUU[j*aug1]); CHKERRQ (ierr);
1146: }
1147: /* (MU)'*X size (r x neig) -- store in AUU from the column <r>*/
1148: for (j = 0; j < neig; j++) {
1149: VecMDot (XX[j], r, MU, &AUU[ (r+j) *aug1]); CHKERRQ (ierr);
1150: }
1151: /* (MX)'*U size (neig x r) -- store in the <r> first columns of AUU from the row <r>*/
1152: for (j = 0; j < r; j++) {
1153: VecMDot (UU[j], neig, MX, &AUU[j*aug1+r]); CHKERRQ (ierr);
1154: }
1155: /* (MX)'*X size (neig neig) -- store in AUU from the column <r> and the row <r>*/
1156: for (j = 0; j < neig; j++) {
1157: VecMDot (XX[j], neig, MX, &AUU[ (r+j) *aug1 + r]); CHKERRQ (ierr);
1158: }
1160: /* AUAU = (AU)'*AU = [(MU)'*MU (MU)'*MX; (MX)'*MU (MX)'*MX] */
1161: /* (MU)'*MU size (r x r) -- store in the <r> first columns of AUAU*/
1162: for (j=0; j < r; j++) {
1163: VecMDot (MU[j], r, MU, &AUAU[j*aug1]); CHKERRQ (ierr);
1164: }
1165: /* (MU)'*MX size (r x neig) -- store in AUAU from the column <r>*/
1166: for (j = 0; j < neig; j++) {
1167: VecMDot (MX[j], r, MU, &AUAU[ (r+j) *aug1]); CHKERRQ (ierr);
1168: }
1169: /* (MX)'*MU size (neig x r) -- store in the <r> first columns of AUAU from the row <r>*/
1170: for (j = 0; j < r; j++) {
1171: VecMDot (MU[j], neig, MX, &AUAU[j*aug1+r]); CHKERRQ (ierr);
1172: }
1173: /* (MX)'*MX size (neig neig) -- store in AUAU from the column <r> and the row <r>*/
1174: for (j = 0; j < neig; j++) {
1175: VecMDot (MX[j], neig, MX, &AUAU[ (r+j) *aug1 + r]); CHKERRQ (ierr);
1176: }
1178: /* Computation of the eigenvectors */
1179: ldA = PetscBLASIntCast (aug1);
1180: N = PetscBLASIntCast (aug);
1181: lwork = 8 * N + 20; /* sizeof the working space */
1182: PetscMalloc (N*sizeof (PetscReal), &wr); CHKERRQ (ierr);
1183: PetscMalloc (N*sizeof (PetscReal), &wi); CHKERRQ (ierr);
1184: PetscMalloc (N*sizeof (PetscReal), &beta); CHKERRQ (ierr);
1185: PetscMalloc (N*sizeof (PetscReal), &modul); CHKERRQ (ierr);
1186: PetscMalloc (N*sizeof (PetscInt), &perm); CHKERRQ (ierr);
1187: PetscMalloc (N*N*sizeof (PetscReal), &Q); CHKERRQ (ierr);
1188: PetscMalloc (N*N*sizeof (PetscReal), &Z); CHKERRQ (ierr);
1189: PetscMalloc (lwork*sizeof (PetscReal), &work); CHKERRQ (ierr);
1190: #if defined(PETSC_MISSING_LAPACK_GGES)
1191: SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"GGES - Lapack routine is unavailable.");
1192: #else
1193: LAPACKgges_ ("V", "V", "N", NULL, &N, AUAU, &ldA, AUU, &ldA, &i, wr, wi, beta, Q, &N, Z, &N, work, &lwork, NULL, &info);
1194: if (info) SETERRQ1 (((PetscObject)ksp)->comm, PETSC_ERR_LIB,"Error in LAPACK routine XGGES %d", (int) info);
1195: #endif
1196: for (i=0; i<N; i++) {
1197: if (beta[i] !=0.0) {
1198: wr[i] /=beta[i];
1199: wi[i] /=beta[i];
1200: }
1201: }
1202: /* sort the eigenvalues */
1203: for (i=0; i<N; i++) {
1204: modul[i]=sqrt (wr[i]*wr[i]+wi[i]*wi[i]);
1205: }
1206: for (i=0; i<N; i++) {
1207: perm[i] = i;
1208: }
1209: PetscSortRealWithPermutation (N, modul, perm); CHKERRQ (ierr);
1210: /* Save the norm of the largest eigenvalue */
1211: if (dgmres->lambdaN < modul[perm[N-1]]) dgmres->lambdaN = modul[perm[N-1]];
1212: /* Allocate space to extract the first r schur vectors */
1213: if (!SR2) {
1214: ierr=PetscMalloc (aug1*bmax*sizeof (PetscReal), &SR2); CHKERRQ (ierr);
1215: }
1216: /* count the number of extracted eigenvalues ( complex conjugates count as 2) */
1217: while (NbrEig < bmax) {
1218: if (wi[perm[NbrEig]] == 0) NbrEig += 1;
1219: else NbrEig += 2;
1220: }
1221: if (NbrEig > bmax) NbrEig = bmax - 1;
1222: r_old=r; /* previous size of r */
1223: dgmres->r = r = NbrEig;
1224:
1225: /* Select the eigenvalues to reorder */
1226: PetscMalloc(N * sizeof(PetscBLASInt), &select);
1227: PetscMemzero(select, N * sizeof(PetscBLASInt));
1228: if (dgmres->GreatestEig == PETSC_FALSE)
1229: {
1230: for (j = 0; j < NbrEig; j++)
1231: select[perm[j]] = 1;
1232: }
1233: else
1234: {
1235: for (j = 0; j < NbrEig; j++)
1236: select[perm[N-j-1]] = 1;
1237: }
1238: /* Reorder and extract the new <r> schur vectors */
1239: lwork = PetscMax(4 * N + 16, 2 * NbrEig * (N - NbrEig) );
1240: liwork = PetscMax(N + 6, 2 * NbrEig * (N - NbrEig) );
1241: PetscFree(work);
1242: PetscMalloc(lwork * sizeof(PetscReal), &work);
1243: PetscMalloc(liwork * sizeof(PetscBLASInt), &iwork);
1244: #if defined(PETSC_MISSING_LAPACK_TGSEN )
1245: SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"TGSEN - Lapack routine is unavailable.");
1246: #else
1247: LAPACKtgsen_(&ijob, &wantQ, &wantZ, select, &N, AUAU, &ldA, AUU, &ldA, wr, wi, beta, Q, &N, Z, &N, &NbrEig, NULL, NULL, & (Dif[0]), work, &lwork, iwork, &liwork, &info);
1248: if (info == 1) SETERRQ(((PetscObject)ksp)->comm, -1, "UNABLE TO REORDER THE EIGENVALUES WITH THE LAPACK ROUTINE : ILL-CONDITIONED PROBLEM");
1249: #endif
1250:
1251:
1252: for (j=0; j<r; j++) {
1253: PetscMemcpy (&SR2[j*aug1], & (Z[j*N]), N*sizeof (PetscReal)); CHKERRQ (ierr);
1254: }
1256: /* Multiply the Schur vectors SR2 by U (and X) to get a new U
1257: -- save it temporarily in MU */
1258: for (j = 0; j < r; j++) {
1259: VecZeroEntries (MU[j]); CHKERRQ (ierr);
1260: VecMAXPY (MU[j], r_old, &SR2[j*aug1], UU); CHKERRQ (ierr);
1261: VecMAXPY (MU[j], neig, &SR2[j*aug1+r_old], XX); CHKERRQ (ierr);
1262: }
1263: /* Form T = U'*MU*U */
1264: for (j = 0; j < r; j++) {
1265: ierr=VecCopy (MU[j], UU[j]); CHKERRQ (ierr);
1266: KSP_PCApplyBAorAB (ksp, UU[j], MU[j], VEC_TEMP_MATOP); CHKERRQ (ierr);
1267: }
1268: dgmres->matvecs += r;
1269: for (j = 0; j < r; j++) {
1270: VecMDot (MU[j], r, UU, &TT[j*bmax]); CHKERRQ (ierr);
1271: }
1272: /* Factorize T */
1273: ierr=PetscMemcpy (TTF, TT, bmax*r*sizeof (PetscReal)); CHKERRQ (ierr);
1274: nr = PetscBLASIntCast (r);
1275: bm = PetscBLASIntCast (bmax);
1276: #if defined(PETSC_MISSING_LAPACK_GETRF)
1277: SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"GETRF - Lapack routine is unavailable.");
1278: #else
1279: LAPACKgetrf_ (&nr, &nr, TTF, &bm, INVP, &info);
1280: if (info) SETERRQ1 (((PetscObject)ksp)->comm, PETSC_ERR_LIB,"Error in LAPACK routine XGETRF INFO=%d", (int) info);
1281: #endif
1282: /* Free Memory */
1283: PetscFree (wr); PetscFree (wi); PetscFree (beta);
1284: PetscFree (modul); PetscFree (perm);
1285: PetscFree (Q); PetscFree (Z);
1286: PetscFree (work); PetscFree(iwork);
1288: PetscFunctionReturn (0);
1289: }
1292: /* end new DGMRES functions */
1294: /*MC
1295: KSPDGMRES - Implements the deflated GMRES as defined in [1,2]; In this implementation, the adaptive strategy allows to switch to the deflated GMRES when the stagnation occurs.
1296: GMRES Options Database Keys:
1297: + -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against
1298: . -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence)
1299: . -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of
1300: vectors are allocated as needed)
1301: . -ksp_gmres_classicalgramschmidt - use classical (unmodified) Gram-Schmidt to orthogonalize against the Krylov space (fast) (the default)
1302: . -ksp_gmres_modifiedgramschmidt - use modified Gram-Schmidt in the orthogonalization (more stable, but slower)
1303: . -ksp_gmres_cgs_refinement_type <never,ifneeded,always> - determine if iterative refinement is used to increase the
1304: stability of the classical Gram-Schmidt orthogonalization.
1305: - -ksp_gmres_krylov_monitor - plot the Krylov space generated
1306: DGMRES Options Database Keys:
1307: -ksp_dgmres_eigen <neig> - Number of smallest eigenvalues to extract at each restart
1308: -ksp_dgmres_max_eigen <max_neig> - Maximum number of eigenvalues that can be extracted during the iterative process
1309: -ksp_dgmres_force <0, 1> - Use the deflation at each restart; switch off the adaptive strategy.
1311: Level: beginner
1313: Notes: Left and right preconditioning are supported, but not symmetric preconditioning. Complex arithmetic is not yet supported
1315: References:
1316: [1]Restarted GMRES preconditioned by deflation,J. Computational and Applied Mathematics, 69(1996), 303-318.
1317: [2]On the performance of various adaptive preconditioned GMRES strategies, 5(1998), 101-121.
1319: Contributed by: Desire NUENTSA WAKAM,INRIA
1321: .seealso: KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPFGMRES, KSPLGMRES,
1322: KSPGMRESSetRestart(), KSPGMRESSetHapTol(), KSPGMRESSetPreAllocateVectors(), KSPGMRESSetOrthogonalization(), KSPGMRESGetOrthogonalization(),
1323: KSPGMRESClassicalGramSchmidtOrthogonalization(), KSPGMRESModifiedGramSchmidtOrthogonalization(),
1324: KSPGMRESCGSRefinementType, KSPGMRESSetCGSRefinementType(), KSPGMRESGetCGSRefinementType(), KSPGMRESMonitorKrylov(), KSPSetPCSide()
1326: M*/
1331: PetscErrorCode KSPCreate_DGMRES (KSP ksp) {
1332: KSP_DGMRES *dgmres;
1336: PetscNewLog (ksp,KSP_DGMRES,&dgmres); CHKERRQ (ierr);
1337: ksp->data = (void*) dgmres;
1339: KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,2);
1340: KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,1);
1342: ksp->ops->buildsolution = KSPBuildSolution_DGMRES;
1343: ksp->ops->setup = KSPSetUp_DGMRES;
1344: ksp->ops->solve = KSPSolve_DGMRES;
1345: ksp->ops->destroy = KSPDestroy_DGMRES;
1346: ksp->ops->view = KSPView_DGMRES;
1347: ksp->ops->setfromoptions = KSPSetFromOptions_DGMRES;
1348: ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
1349: ksp->ops->computeeigenvalues = KSPComputeEigenvalues_GMRES;
1351: PetscObjectComposeFunctionDynamic ( (PetscObject) ksp,"KSPGMRESSetPreAllocateVectors_C",
1352: "KSPGMRESSetPreAllocateVectors_GMRES",
1353: KSPGMRESSetPreAllocateVectors_GMRES); CHKERRQ (ierr);
1354: PetscObjectComposeFunctionDynamic ( (PetscObject) ksp,"KSPGMRESSetOrthogonalization_C",
1355: "KSPGMRESSetOrthogonalization_GMRES",
1356: KSPGMRESSetOrthogonalization_GMRES); CHKERRQ (ierr);
1357: PetscObjectComposeFunctionDynamic ( (PetscObject) ksp,"KSPGMRESSetRestart_C",
1358: "KSPGMRESSetRestart_GMRES",
1359: KSPGMRESSetRestart_GMRES); CHKERRQ (ierr);
1360: PetscObjectComposeFunctionDynamic ( (PetscObject) ksp,"KSPGMRESSetHapTol_C",
1361: "KSPGMRESSetHapTol_GMRES",
1362: KSPGMRESSetHapTol_GMRES); CHKERRQ (ierr);
1363: PetscObjectComposeFunctionDynamic ( (PetscObject) ksp,"KSPGMRESSetCGSRefinementType_C",
1364: "KSPGMRESSetCGSRefinementType_GMRES",
1365: KSPGMRESSetCGSRefinementType_GMRES); CHKERRQ (ierr);
1366: /* -- New functions defined in DGMRES -- */
1367: ierr=PetscObjectComposeFunctionDynamic ( (PetscObject) ksp, "KSPDGMRESSetEigen_C",
1368: "KSPDGMRESSetEigen_DGMRES",
1369: KSPDGMRESSetEigen_DGMRES); CHKERRQ (ierr);
1370: ierr=PetscObjectComposeFunctionDynamic ( (PetscObject) ksp, "KSPDGMRESSetMaxEigen_C",
1371: "KSPDGMRESSetMaxEigen_DGMRES",
1372: KSPDGMRESSetMaxEigen_DGMRES); CHKERRQ (ierr);
1373: ierr=PetscObjectComposeFunctionDynamic ( (PetscObject) ksp, "KSPDGMRESSetRatio_C",
1374: "KSPDGMRESSetRatio_DGMRES",
1375: KSPDGMRESSetRatio_DGMRES); CHKERRQ (ierr);
1376: ierr=PetscObjectComposeFunctionDynamic ( (PetscObject) ksp, "KSPDGMRESForce_C",
1377: "KSPDGMRESForce_DGMRES",
1378: KSPDGMRESForce_DGMRES); CHKERRQ (ierr);
1379: ierr=PetscObjectComposeFunctionDynamic ( (PetscObject) ksp, "KSPDGMRESComputeSchurForm_C",
1380: "KSPDGMRESComputeSchurForm_DGMRES",
1381: KSPDGMRESComputeSchurForm_DGMRES); CHKERRQ (ierr);
1382: ierr=PetscObjectComposeFunctionDynamic ( (PetscObject) ksp, "KSPDGMRESComputeDeflationData_C",
1383: "KSPDGMRESComputeDeflationData_DGMRES",
1384: KSPDGMRESComputeDeflationData_DGMRES); CHKERRQ (ierr);
1385: ierr=PetscObjectComposeFunctionDynamic ( (PetscObject) ksp, "KSPDGMRESApplyDeflation_C",
1386: "KSPDGMRESApplyDeflation_DGMRES",
1387: KSPDGMRESApplyDeflation_DGMRES); CHKERRQ (ierr);
1388: ierr=PetscObjectComposeFunctionDynamic ( (PetscObject) ksp, "KSPDGMRESImproveEig_C", "KSPDGMRESImproveEig_DGMRES", KSPDGMRESImproveEig_DGMRES); CHKERRQ (ierr);
1390: ierr=PetscLogEventRegister ("DGMRESComputeDefl", KSP_CLASSID, &KSP_DGMRESComputeDeflationData);
1391: ierr=PetscLogEventRegister ("DGMRESApplyDefl", KSP_CLASSID, &KSP_DGMRESApplyDeflation);
1393: dgmres->haptol = 1.0e-30;
1394: dgmres->q_preallocate = 0;
1395: dgmres->delta_allocate = GMRES_DELTA_DIRECTIONS;
1396: dgmres->orthog = KSPGMRESClassicalGramSchmidtOrthogonalization;
1397: dgmres->nrs = 0;
1398: dgmres->sol_temp = 0;
1399: dgmres->max_k = GMRES_DEFAULT_MAXK;
1400: dgmres->Rsvd = 0;
1401: dgmres->cgstype = KSP_GMRES_CGS_REFINE_NEVER;
1402: dgmres->orthogwork = 0;
1404: /* Default values for the deflation */
1405: dgmres->r = 0;
1406: dgmres->neig = DGMRES_DEFAULT_EIG;
1407: dgmres->max_neig = DGMRES_DEFAULT_MAXEIG-1;
1408: dgmres->lambdaN = 0.0;
1409: dgmres->smv = SMV;
1410: dgmres->force = 0;
1411: dgmres->matvecs = 0;
1412: dgmres->improve = 0;
1413: dgmres->GreatestEig = PETSC_FALSE; /* experimental */
1414: PetscFunctionReturn (0);
1415: }