Actual source code: bcgsl.c
2: /*
3: * Implementation of BiCGstab(L) the paper by D.R. Fokkema,
4: * "Enhanced implementation of BiCGStab(L) for solving linear systems
5: * of equations". This uses tricky delayed updating ideas to prevent
6: * round-off buildup.
7: *
8: * This has not been completely cleaned up into PETSc style.
9: *
10: * All the BLAS and LAPACK calls below should be removed and replaced with
11: * loops and the macros for block solvers converted from LINPACK; there is no way
12: * calls to BLAS/LAPACK make sense for size 2, 3, 4, etc.
13: */
14: #include <private/kspimpl.h> /*I "petscksp.h" I*/
15: #include <../src/ksp/ksp/impls/bcgsl/bcgslimpl.h>
16: #include <petscblaslapack.h>
21: static PetscErrorCode KSPSolve_BCGSL(KSP ksp)
22: {
23: KSP_BCGSL *bcgsl = (KSP_BCGSL *) ksp->data;
24: PetscScalar alpha, beta, omega, sigma;
25: PetscScalar rho0, rho1;
26: PetscReal kappa0, kappaA, kappa1;
27: PetscReal ghat;
28: PetscReal zeta, zeta0, rnmax_computed, rnmax_true, nrm0;
29: PetscBool bUpdateX;
30: PetscInt maxit;
31: PetscInt h, i, j, k, vi, ell;
32: PetscBLASInt ldMZ,bierr;
37: /* set up temporary vectors */
38: vi = 0;
39: ell = bcgsl->ell;
40: bcgsl->vB = ksp->work[vi]; vi++;
41: bcgsl->vRt = ksp->work[vi]; vi++;
42: bcgsl->vTm = ksp->work[vi]; vi++;
43: bcgsl->vvR = ksp->work+vi; vi += ell+1;
44: bcgsl->vvU = ksp->work+vi; vi += ell+1;
45: bcgsl->vXr = ksp->work[vi]; vi++;
46: ldMZ = PetscBLASIntCast(ell+1);
48: /* Prime the iterative solver */
49: KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs);
50: VecNorm(VVR[0], NORM_2, &zeta0);
51: rnmax_computed = zeta0;
52: rnmax_true = zeta0;
54: (*ksp->converged)(ksp, 0, zeta0, &ksp->reason, ksp->cnvP);
55: if (ksp->reason) {
56: PetscObjectTakeAccess(ksp);
57: ksp->its = 0;
58: ksp->rnorm = zeta0;
59: PetscObjectGrantAccess(ksp);
60: return(0);
61: }
63: VecSet(VVU[0],0.0);
64: alpha = 0.;
65: rho0 = omega = 1;
67: if (bcgsl->delta>0.0) {
68: VecCopy(VX, VXR);
69: VecSet(VX,0.0);
70: VecCopy(VVR[0], VB);
71: } else {
72: VecCopy(ksp->vec_rhs, VB);
73: }
75: /* Life goes on */
76: VecCopy(VVR[0], VRT);
77: zeta = zeta0;
79: KSPGetTolerances(ksp, PETSC_NULL, PETSC_NULL, PETSC_NULL, &maxit);
81: for (k=0; k<maxit; k += bcgsl->ell) {
82: ksp->its = k;
83: ksp->rnorm = zeta;
85: KSPLogResidualHistory(ksp, zeta);
86: KSPMonitor(ksp, ksp->its, zeta);
88: (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);
89: if (ksp->reason < 0) return(0);
90: else if (ksp->reason) break;
92: /* BiCG part */
93: rho0 = -omega*rho0;
94: nrm0 = zeta;
95: for (j=0; j<bcgsl->ell; j++) {
96: /* rho1 <- r_j' * r_tilde */
97: VecDot(VVR[j], VRT, &rho1);
98: if (rho1 == 0.0) {
99: ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
100: return(0);
101: }
102: beta = alpha*(rho1/rho0);
103: rho0 = rho1;
104: for (i=0; i<=j; i++) {
105: /* u_i <- r_i - beta*u_i */
106: VecAYPX(VVU[i], -beta, VVR[i]);
107: }
108: /* u_{j+1} <- inv(K)*A*u_j */
109: KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM);
111: VecDot(VVU[j+1], VRT, &sigma);
112: if (sigma == 0.0) {
113: ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
114: return(0);
115: }
116: alpha = rho1/sigma;
118: /* x <- x + alpha*u_0 */
119: VecAXPY(VX, alpha, VVU[0]);
121: for (i=0; i<=j; i++) {
122: /* r_i <- r_i - alpha*u_{i+1} */
123: VecAXPY(VVR[i], -alpha, VVU[i+1]);
124: }
126: /* r_{j+1} <- inv(K)*A*r_j */
127: KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM);
129: VecNorm(VVR[0], NORM_2, &nrm0);
130: if (bcgsl->delta>0.0) {
131: if (rnmax_computed<nrm0) rnmax_computed = nrm0;
132: if (rnmax_true<nrm0) rnmax_true = nrm0;
133: }
135: /* NEW: check for early exit */
136: (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP);
137: if (ksp->reason) {
138: PetscObjectTakeAccess(ksp);
139: ksp->its = k+j;
140: ksp->rnorm = nrm0;
141: PetscObjectGrantAccess(ksp);
142: if (ksp->reason < 0) return(0);
143: }
144: }
146: /* Polynomial part */
147: for(i = 0; i <= bcgsl->ell; ++i) {
148: VecMDot(VVR[i], i+1, VVR, &MZa[i*ldMZ]);
149: }
150: /* Symmetrize MZa */
151: for(i = 0; i <= bcgsl->ell; ++i) {
152: for(j = i+1; j <= bcgsl->ell; ++j) {
153: MZa[i*ldMZ+j] = MZa[j*ldMZ+i] = PetscConj(MZa[j*ldMZ+i]);
154: }
155: }
156: /* Copy MZa to MZb */
157: PetscMemcpy(MZb,MZa,ldMZ*ldMZ*sizeof(PetscScalar));
159: if (!bcgsl->bConvex || bcgsl->ell==1) {
160: PetscBLASInt ione = 1,bell = PetscBLASIntCast(bcgsl->ell);
162: AY0c[0] = -1;
163: LAPACKpotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr);
164: if (ierr!=0) {
165: ksp->reason = KSP_DIVERGED_BREAKDOWN;
166: return(0);
167: }
168: PetscMemcpy(&AY0c[1],&MZb[1],bcgsl->ell*sizeof(PetscScalar));
169: LAPACKpotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr);
170: } else {
171: PetscBLASInt ione = 1;
172: PetscScalar aone = 1.0, azero = 0.0;
173: PetscBLASInt neqs = PetscBLASIntCast(bcgsl->ell-1);
175: LAPACKpotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr);
176: if (ierr!=0) {
177: ksp->reason = KSP_DIVERGED_BREAKDOWN;
178: return(0);
179: }
180: PetscMemcpy(&AY0c[1],&MZb[1],(bcgsl->ell-1)*sizeof(PetscScalar));
181: LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr);
182: AY0c[0] = -1;
183: AY0c[bcgsl->ell] = 0.;
185: PetscMemcpy(&AYlc[1],&MZb[1+ldMZ*(bcgsl->ell)],(bcgsl->ell-1)*sizeof(PetscScalar));
186: LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr);
188: AYlc[0] = 0.;
189: AYlc[bcgsl->ell] = -1;
191: BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione);
193: kappa0 = BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione);
195: /* round-off can cause negative kappa's */
196: if (kappa0<0) kappa0 = -kappa0;
197: kappa0 = sqrt(kappa0);
199: kappaA = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione);
201: BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione);
203: kappa1 = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione);
205: if (kappa1<0) kappa1 = -kappa1;
206: kappa1 = PetscSqrtReal(kappa1);
208: if (kappa0!=0.0 && kappa1!=0.0) {
209: if (kappaA<0.7*kappa0*kappa1) {
210: ghat = (kappaA<0.0) ? -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1;
211: } else {
212: ghat = kappaA/(kappa1*kappa1);
213: }
214: for (i=0; i<=bcgsl->ell; i++) {
215: AY0c[i] = AY0c[i] - ghat* AYlc[i];
216: }
217: }
218: }
220: omega = AY0c[bcgsl->ell];
221: for (h=bcgsl->ell; h>0 && omega==0.0; h--) {
222: omega = AY0c[h];
223: }
224: if (omega==0.0) {
225: ksp->reason = KSP_DIVERGED_BREAKDOWN;
226: return(0);
227: }
230: VecMAXPY(VX, bcgsl->ell,AY0c+1, VVR);
231: for (i=1; i<=bcgsl->ell; i++) {
232: AY0c[i] *= -1.0;
233: }
234: VecMAXPY(VVU[0], bcgsl->ell,AY0c+1, VVU+1);
235: VecMAXPY(VVR[0], bcgsl->ell,AY0c+1, VVR+1);
236: for (i=1; i<=bcgsl->ell; i++) {
237: AY0c[i] *= -1.0;
238: }
239: VecNorm(VVR[0], NORM_2, &zeta);
241: /* Accurate Update */
242: if (bcgsl->delta>0.0) {
243: if (rnmax_computed<zeta) rnmax_computed = zeta;
244: if (rnmax_true<zeta) rnmax_true = zeta;
246: bUpdateX = (PetscBool) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed);
247: if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) {
248: /* r0 <- b-inv(K)*A*X */
249: KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM);
250: VecAYPX(VVR[0], -1.0, VB);
251: rnmax_true = zeta;
253: if (bUpdateX) {
254: VecAXPY(VXR,1.0,VX);
255: VecSet(VX,0.0);
256: VecCopy(VVR[0], VB);
257: rnmax_computed = zeta;
258: }
259: }
260: }
261: }
262: if (bcgsl->delta>0.0) {
263: VecAXPY(VX,1.0,VXR);
264: }
266: (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);
267: if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
268: return(0);
269: }
273: /*@
274: KSPBCGSLSetXRes - Sets the parameter governing when
275: exact residuals will be used instead of computed residuals.
277: Logically Collective on KSP
279: Input Parameters:
280: + ksp - iterative context obtained from KSPCreate
281: - delta - computed residuals are used alone when delta is not positive
283: Options Database Keys:
285: . -ksp_bcgsl_xres delta
287: Level: intermediate
289: .keywords: KSP, BiCGStab(L), set, exact residuals
291: .seealso: KSPBCGSLSetEll(), KSPBCGSLSetPol()
292: @*/
293: PetscErrorCode KSPBCGSLSetXRes(KSP ksp, PetscReal delta)
294: {
295: KSP_BCGSL *bcgsl = (KSP_BCGSL *)ksp->data;
300: if (ksp->setupstage) {
301: if ((delta<=0 && bcgsl->delta>0) || (delta>0 && bcgsl->delta<=0)) {
302: VecDestroyVecs(ksp->nwork,&ksp->work);
303: PetscFree5(AY0c,AYlc,AYtc,MZa,MZb);
304: ksp->setupstage = KSP_SETUP_NEW;
305: }
306: }
307: bcgsl->delta = delta;
308: return(0);
309: }
313: /*@
314: KSPBCGSLSetPol - Sets the type of polynomial part will
315: be used in the BiCGSTab(L) solver.
317: Logically Collective on KSP
319: Input Parameters:
320: + ksp - iterative context obtained from KSPCreate
321: - uMROR - set to PETSC_TRUE when the polynomial is a convex combination of an MR and an OR step.
323: Options Database Keys:
325: + -ksp_bcgsl_cxpoly - use enhanced polynomial
326: . -ksp_bcgsl_mrpoly - use standard polynomial
328: Level: intermediate
330: .keywords: KSP, BiCGStab(L), set, polynomial
332: .seealso: @()
333: @*/
334: PetscErrorCode KSPBCGSLSetPol(KSP ksp, PetscBool uMROR)
335: {
336: KSP_BCGSL *bcgsl = (KSP_BCGSL *)ksp->data;
342: if (!ksp->setupstage) {
343: bcgsl->bConvex = uMROR;
344: } else if (bcgsl->bConvex != uMROR) {
345: /* free the data structures,
346: then create them again
347: */
348: VecDestroyVecs(ksp->nwork,&ksp->work);
349: PetscFree5(AY0c,AYlc,AYtc,MZa,MZb);
350: bcgsl->bConvex = uMROR;
351: ksp->setupstage = KSP_SETUP_NEW;
352: }
353: return(0);
354: }
358: /*@
359: KSPBCGSLSetEll - Sets the number of search directions in BiCGStab(L).
361: Logically Collective on KSP
363: Input Parameters:
364: + ksp - iterative context obtained from KSPCreate
365: - ell - number of search directions
367: Options Database Keys:
369: . -ksp_bcgsl_ell ell
371: Level: intermediate
373: .keywords: KSP, BiCGStab(L), set, exact residuals,
375: .seealso: @()
376: @*/
377: PetscErrorCode KSPBCGSLSetEll(KSP ksp, PetscInt ell)
378: {
379: KSP_BCGSL *bcgsl = (KSP_BCGSL *)ksp->data;
383: if (ell < 1) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_ARG_OUTOFRANGE, "KSPBCGSLSetEll: second argument must be positive");
386: if (!ksp->setupstage) {
387: bcgsl->ell = ell;
388: } else if (bcgsl->ell != ell) {
389: /* free the data structures, then create them again */
390: VecDestroyVecs(ksp->nwork,&ksp->work);
391: PetscFree5(AY0c,AYlc,AYtc,MZa,MZb);
392: bcgsl->ell = ell;
393: ksp->setupstage = KSP_SETUP_NEW;
394: }
395: return(0);
396: }
400: PetscErrorCode KSPView_BCGSL(KSP ksp, PetscViewer viewer)
401: {
402: KSP_BCGSL *bcgsl = (KSP_BCGSL *)ksp->data;
403: PetscErrorCode ierr;
404: PetscBool isascii, isstring;
407: PetscTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii);
408: PetscTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring);
410: if (isascii) {
411: PetscViewerASCIIPrintf(viewer, " BCGSL: Ell = %D\n", bcgsl->ell);
412: PetscViewerASCIIPrintf(viewer, " BCGSL: Delta = %lg\n", bcgsl->delta);
413: } else {
414: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP, "Viewer type %s not supported for KSP BCGSL", ((PetscObject)viewer)->type_name);
415: }
416: return(0);
417: }
421: PetscErrorCode KSPSetFromOptions_BCGSL(KSP ksp)
422: {
423: KSP_BCGSL *bcgsl = (KSP_BCGSL *)ksp->data;
425: PetscInt this_ell;
426: PetscReal delta;
427: PetscBool flga = PETSC_FALSE, flg;
430: /* PetscOptionsBegin/End are called in KSPSetFromOptions. They
431: don't need to be called here.
432: */
433: PetscOptionsHead("KSP BiCGStab(L) Options");
435: /* Set number of search directions */
436: PetscOptionsInt("-ksp_bcgsl_ell","Number of Krylov search directions","KSPBCGSLSetEll",bcgsl->ell,&this_ell,&flg);
437: if (flg) {
438: KSPBCGSLSetEll(ksp, this_ell);
439: }
441: /* Set polynomial type */
442: PetscOptionsBool("-ksp_bcgsl_cxpoly", "Polynomial part of BiCGStabL is MinRes + OR", "KSPBCGSLSetPol", flga,&flga,PETSC_NULL);
443: if (flga) {
444: KSPBCGSLSetPol(ksp, PETSC_TRUE);
445: } else {
446: flg = PETSC_FALSE;
447: PetscOptionsBool("-ksp_bcgsl_mrpoly", "Polynomial part of BiCGStabL is MinRes", "KSPBCGSLSetPol", flg,&flg,PETSC_NULL);
448: KSPBCGSLSetPol(ksp, PETSC_FALSE);
449: }
451: /* Will computed residual be refreshed? */
452: PetscOptionsReal("-ksp_bcgsl_xres", "Threshold used to decide when to refresh computed residuals", "KSPBCGSLSetXRes", bcgsl->delta, &delta, &flg);
453: if (flg) {
454: KSPBCGSLSetXRes(ksp, delta);
455: }
456: PetscOptionsTail();
457: return(0);
458: }
462: PetscErrorCode KSPSetUp_BCGSL(KSP ksp)
463: {
464: KSP_BCGSL *bcgsl = (KSP_BCGSL *)ksp->data;
465: PetscInt ell = bcgsl->ell,ldMZ = ell+1;
469: if (ksp->pc_side == PC_SYMMETRIC) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP, "no symmetric preconditioning for KSPBCGSL");
470: else if (ksp->pc_side == PC_RIGHT) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP, "no right preconditioning for KSPBCGSL");
471: KSPDefaultGetWork(ksp, 6+2*ell);
472: PetscMalloc5(ldMZ,PetscScalar,&AY0c,ldMZ,PetscScalar,&AYlc,ldMZ,PetscScalar,&AYtc,ldMZ*ldMZ,PetscScalar,&MZa,ldMZ*ldMZ,PetscScalar,&MZb);
473: return(0);
474: }
478: PetscErrorCode KSPReset_BCGSL(KSP ksp)
479: {
480: KSP_BCGSL *bcgsl = (KSP_BCGSL *)ksp->data;
483: VecDestroyVecs(ksp->nwork,&ksp->work);
484: PetscFree5(AY0c,AYlc,AYtc,MZa,MZb);
485: return(0);
486: }
490: PetscErrorCode KSPDestroy_BCGSL(KSP ksp)
491: {
495: KSPReset_BCGSL(ksp);
496: KSPDefaultDestroy(ksp);
497: return(0);
498: }
500: /*MC
501: KSPBCGSL - Implements a slight variant of the Enhanced
502: BiCGStab(L) algorithm in (3) and (2). The variation
503: concerns cases when either kappa0**2 or kappa1**2 is
504: negative due to round-off. Kappa0 has also been pulled
505: out of the denominator in the formula for ghat.
507: References:
508: 1. G.L.G. Sleijpen, H.A. van der Vorst, "An overview of
509: approaches for the stable computation of hybrid BiCG
510: methods", Applied Numerical Mathematics: Transactions
511: f IMACS, 19(3), pp 235-54, 1996.
512: 2. G.L.G. Sleijpen, H.A. van der Vorst, D.R. Fokkema,
513: "BiCGStab(L) and other hybrid Bi-CG methods",
514: Numerical Algorithms, 7, pp 75-109, 1994.
515: 3. D.R. Fokkema, "Enhanced implementation of BiCGStab(L)
516: for solving linear systems of equations", preprint
517: from www.citeseer.com.
519: Contributed by: Joel M. Malard, email jm.malard@pnl.gov
521: Options Database Keys:
522: + -ksp_bcgsl_ell <ell> Number of Krylov search directions
523: - -ksp_bcgsl_cxpol Use a convex function of the MR and OR polynomials after the BiCG step
524: - -ksp_bcgsl_xres <res> Threshold used to decide when to refresh computed residuals
526: Notes: Supports left preconditioning only
528: Level: beginner
530: .seealso: KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPFGMRES, KSPBCGS, KSPSetPCSide()
532: M*/
536: PetscErrorCode KSPCreate_BCGSL(KSP ksp)
537: {
539: KSP_BCGSL *bcgsl;
542: /* allocate BiCGStab(L) context */
543: PetscNewLog(ksp, KSP_BCGSL, &bcgsl);
544: ksp->data = (void*)bcgsl;
546: KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,2);
547: KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,1);
549: ksp->ops->setup = KSPSetUp_BCGSL;
550: ksp->ops->solve = KSPSolve_BCGSL;
551: ksp->ops->reset = KSPReset_BCGSL;
552: ksp->ops->destroy = KSPDestroy_BCGSL;
553: ksp->ops->buildsolution = KSPDefaultBuildSolution;
554: ksp->ops->buildresidual = KSPDefaultBuildResidual;
555: ksp->ops->setfromoptions = KSPSetFromOptions_BCGSL;
556: ksp->ops->view = KSPView_BCGSL;
558: /* Let the user redefine the number of directions vectors */
559: bcgsl->ell = 2;
561: /*Choose between a single MR step or an averaged MR/OR */
562: bcgsl->bConvex = PETSC_FALSE;
564: /* Set the threshold for when exact residuals will be used */
565: bcgsl->delta = 0.0;
566: return(0);
567: }