Actual source code: bcgsl.c

  2: /*
  3:  * Implementation of BiCGstab(L) the paper by D.R. Fokkema,
  4:  * "Enhanced implementation of BiCGStab(L) for solving linear systems
  5:  * of equations". This uses tricky delayed updating ideas to prevent
  6:  * round-off buildup.
  7:  *
  8:  * This has not been completely cleaned up into PETSc style.
  9:  *
 10:  * All the BLAS and LAPACK calls below should be removed and replaced with 
 11:  * loops and the macros for block solvers converted from LINPACK; there is no way
 12:  * calls to BLAS/LAPACK make sense for size 2, 3, 4, etc.
 13:  */
 14: #include <private/kspimpl.h>              /*I   "petscksp.h" I*/
 15: #include <../src/ksp/ksp/impls/bcgsl/bcgslimpl.h>
 16: #include <petscblaslapack.h>


 21: static PetscErrorCode  KSPSolve_BCGSL(KSP ksp)
 22: {
 23:   KSP_BCGSL      *bcgsl = (KSP_BCGSL *) ksp->data;
 24:   PetscScalar    alpha, beta, omega, sigma;
 25:   PetscScalar    rho0, rho1;
 26:   PetscReal      kappa0, kappaA, kappa1;
 27:   PetscReal      ghat;
 28:   PetscReal      zeta, zeta0, rnmax_computed, rnmax_true, nrm0;
 29:   PetscBool      bUpdateX;
 30:   PetscInt       maxit;
 31:   PetscInt       h, i, j, k, vi, ell;
 32:   PetscBLASInt   ldMZ,bierr;


 37:   /* set up temporary vectors */
 38:   vi = 0;
 39:   ell = bcgsl->ell;
 40:   bcgsl->vB    = ksp->work[vi]; vi++;
 41:   bcgsl->vRt   = ksp->work[vi]; vi++;
 42:   bcgsl->vTm   = ksp->work[vi]; vi++;
 43:   bcgsl->vvR   = ksp->work+vi; vi += ell+1;
 44:   bcgsl->vvU   = ksp->work+vi; vi += ell+1;
 45:   bcgsl->vXr   = ksp->work[vi]; vi++;
 46:   ldMZ = PetscBLASIntCast(ell+1);

 48:   /* Prime the iterative solver */
 49:   KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs);
 50:   VecNorm(VVR[0], NORM_2, &zeta0);
 51:   rnmax_computed = zeta0;
 52:   rnmax_true = zeta0;

 54:   (*ksp->converged)(ksp, 0, zeta0, &ksp->reason, ksp->cnvP);
 55:   if (ksp->reason) {
 56:     PetscObjectTakeAccess(ksp);
 57:     ksp->its   = 0;
 58:     ksp->rnorm = zeta0;
 59:     PetscObjectGrantAccess(ksp);
 60:     return(0);
 61:   }

 63:   VecSet(VVU[0],0.0);
 64:   alpha = 0.;
 65:   rho0 = omega = 1;

 67:   if (bcgsl->delta>0.0) {
 68:     VecCopy(VX, VXR);
 69:     VecSet(VX,0.0);
 70:     VecCopy(VVR[0], VB);
 71:   } else {
 72:     VecCopy(ksp->vec_rhs, VB);
 73:   }

 75:   /* Life goes on */
 76:   VecCopy(VVR[0], VRT);
 77:   zeta = zeta0;

 79:   KSPGetTolerances(ksp, PETSC_NULL, PETSC_NULL, PETSC_NULL, &maxit);

 81:   for (k=0; k<maxit; k += bcgsl->ell) {
 82:     ksp->its   = k;
 83:     ksp->rnorm = zeta;

 85:     KSPLogResidualHistory(ksp, zeta);
 86:     KSPMonitor(ksp, ksp->its, zeta);

 88:     (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);
 89:     if (ksp->reason < 0) return(0);
 90:     else if (ksp->reason) break;

 92:     /* BiCG part */
 93:     rho0 = -omega*rho0;
 94:     nrm0 = zeta;
 95:     for (j=0; j<bcgsl->ell; j++) {
 96:       /* rho1 <- r_j' * r_tilde */
 97:       VecDot(VVR[j], VRT, &rho1);
 98:       if (rho1 == 0.0) {
 99:         ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
100:         return(0);
101:       }
102:       beta = alpha*(rho1/rho0);
103:       rho0 = rho1;
104:       for (i=0; i<=j; i++) {
105:         /* u_i <- r_i - beta*u_i */
106:         VecAYPX(VVU[i], -beta, VVR[i]);
107:       }
108:       /* u_{j+1} <- inv(K)*A*u_j */
109:       KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM);

111:       VecDot(VVU[j+1], VRT, &sigma);
112:       if (sigma == 0.0) {
113:         ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
114:         return(0);
115:       }
116:       alpha = rho1/sigma;

118:       /* x <- x + alpha*u_0 */
119:       VecAXPY(VX, alpha, VVU[0]);

121:       for (i=0; i<=j; i++) {
122:         /* r_i <- r_i - alpha*u_{i+1} */
123:         VecAXPY(VVR[i], -alpha, VVU[i+1]);
124:       }

126:       /* r_{j+1} <- inv(K)*A*r_j */
127:       KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM);

129:       VecNorm(VVR[0], NORM_2, &nrm0);
130:       if (bcgsl->delta>0.0) {
131:         if (rnmax_computed<nrm0) rnmax_computed = nrm0;
132:         if (rnmax_true<nrm0) rnmax_true = nrm0;
133:       }

135:       /* NEW: check for early exit */
136:       (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP);
137:       if (ksp->reason) {
138:         PetscObjectTakeAccess(ksp);
139:         ksp->its   = k+j;
140:         ksp->rnorm = nrm0;
141:         PetscObjectGrantAccess(ksp);
142:         if (ksp->reason < 0) return(0);
143:       }
144:     }

146:     /* Polynomial part */
147:     for(i = 0; i <= bcgsl->ell; ++i) {
148:       VecMDot(VVR[i], i+1, VVR, &MZa[i*ldMZ]);
149:     }
150:     /* Symmetrize MZa */
151:     for(i = 0; i <= bcgsl->ell; ++i) {
152:       for(j = i+1; j <= bcgsl->ell; ++j) {
153:         MZa[i*ldMZ+j] = MZa[j*ldMZ+i] = PetscConj(MZa[j*ldMZ+i]);
154:       }
155:     }
156:     /* Copy MZa to MZb */
157:     PetscMemcpy(MZb,MZa,ldMZ*ldMZ*sizeof(PetscScalar));

159:     if (!bcgsl->bConvex || bcgsl->ell==1) {
160:       PetscBLASInt ione = 1,bell = PetscBLASIntCast(bcgsl->ell);

162:       AY0c[0] = -1;
163:       LAPACKpotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr);
164:       if (ierr!=0) {
165:         ksp->reason = KSP_DIVERGED_BREAKDOWN;
166:         return(0);
167:       }
168:       PetscMemcpy(&AY0c[1],&MZb[1],bcgsl->ell*sizeof(PetscScalar));
169:       LAPACKpotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr);
170:     } else {
171:       PetscBLASInt ione = 1;
172:       PetscScalar aone = 1.0, azero = 0.0;
173:       PetscBLASInt neqs = PetscBLASIntCast(bcgsl->ell-1);

175:       LAPACKpotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr);
176:       if (ierr!=0) {
177:         ksp->reason = KSP_DIVERGED_BREAKDOWN;
178:         return(0);
179:       }
180:       PetscMemcpy(&AY0c[1],&MZb[1],(bcgsl->ell-1)*sizeof(PetscScalar));
181:       LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr);
182:       AY0c[0] = -1;
183:       AY0c[bcgsl->ell] = 0.;

185:       PetscMemcpy(&AYlc[1],&MZb[1+ldMZ*(bcgsl->ell)],(bcgsl->ell-1)*sizeof(PetscScalar));
186:       LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr);

188:       AYlc[0] = 0.;
189:       AYlc[bcgsl->ell] = -1;

191:       BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione);

193:       kappa0 = BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione);

195:       /* round-off can cause negative kappa's */
196:       if (kappa0<0) kappa0 = -kappa0;
197:       kappa0 = sqrt(kappa0);

199:       kappaA = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione);

201:       BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione);

203:       kappa1 = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione);

205:       if (kappa1<0) kappa1 = -kappa1;
206:       kappa1 = PetscSqrtReal(kappa1);

208:       if (kappa0!=0.0 && kappa1!=0.0) {
209:         if (kappaA<0.7*kappa0*kappa1) {
210:           ghat = (kappaA<0.0) ?  -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1;
211:         } else {
212:           ghat = kappaA/(kappa1*kappa1);
213:         }
214:         for (i=0; i<=bcgsl->ell; i++) {
215:           AY0c[i] = AY0c[i] - ghat* AYlc[i];
216:         }
217:       }
218:     }

220:     omega = AY0c[bcgsl->ell];
221:     for (h=bcgsl->ell; h>0 && omega==0.0; h--) {
222:       omega = AY0c[h];
223:     }
224:     if (omega==0.0) {
225:       ksp->reason = KSP_DIVERGED_BREAKDOWN;
226:       return(0);
227:     }


230:     VecMAXPY(VX, bcgsl->ell,AY0c+1, VVR);
231:     for (i=1; i<=bcgsl->ell; i++) {
232:       AY0c[i] *= -1.0;
233:     }
234:     VecMAXPY(VVU[0], bcgsl->ell,AY0c+1, VVU+1);
235:     VecMAXPY(VVR[0], bcgsl->ell,AY0c+1, VVR+1);
236:     for (i=1; i<=bcgsl->ell; i++) {
237:       AY0c[i] *= -1.0;
238:     }
239:     VecNorm(VVR[0], NORM_2, &zeta);

241:     /* Accurate Update */
242:     if (bcgsl->delta>0.0) {
243:       if (rnmax_computed<zeta) rnmax_computed = zeta;
244:       if (rnmax_true<zeta) rnmax_true = zeta;

246:       bUpdateX = (PetscBool) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed);
247:       if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) {
248:         /* r0 <- b-inv(K)*A*X */
249:         KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM);
250:         VecAYPX(VVR[0], -1.0, VB);
251:         rnmax_true = zeta;

253:         if (bUpdateX) {
254:           VecAXPY(VXR,1.0,VX);
255:           VecSet(VX,0.0);
256:           VecCopy(VVR[0], VB);
257:           rnmax_computed = zeta;
258:         }
259:       }
260:     }
261:   }
262:   if (bcgsl->delta>0.0) {
263:     VecAXPY(VX,1.0,VXR);
264:   }

266:   (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);
267:   if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
268:   return(0);
269: }

273: /*@
274:    KSPBCGSLSetXRes - Sets the parameter governing when
275:    exact residuals will be used instead of computed residuals.

277:    Logically Collective on KSP

279:    Input Parameters:
280: +  ksp - iterative context obtained from KSPCreate
281: -  delta - computed residuals are used alone when delta is not positive

283:    Options Database Keys:

285: .  -ksp_bcgsl_xres delta

287:    Level: intermediate

289: .keywords: KSP, BiCGStab(L), set, exact residuals

291: .seealso: KSPBCGSLSetEll(), KSPBCGSLSetPol()
292: @*/
293: PetscErrorCode  KSPBCGSLSetXRes(KSP ksp, PetscReal delta)
294: {
295:   KSP_BCGSL      *bcgsl = (KSP_BCGSL *)ksp->data;

300:   if (ksp->setupstage) {
301:     if ((delta<=0 && bcgsl->delta>0) || (delta>0 && bcgsl->delta<=0)) {
302:       VecDestroyVecs(ksp->nwork,&ksp->work);
303:       PetscFree5(AY0c,AYlc,AYtc,MZa,MZb);
304:       ksp->setupstage = KSP_SETUP_NEW;
305:     }
306:   }
307:   bcgsl->delta = delta;
308:   return(0);
309: }

313: /*@
314:    KSPBCGSLSetPol - Sets the type of polynomial part will
315:    be used in the BiCGSTab(L) solver.

317:    Logically Collective on KSP

319:    Input Parameters:
320: +  ksp - iterative context obtained from KSPCreate
321: -  uMROR - set to PETSC_TRUE when the polynomial is a convex combination of an MR and an OR step.

323:    Options Database Keys:

325: +  -ksp_bcgsl_cxpoly - use enhanced polynomial
326: .  -ksp_bcgsl_mrpoly - use standard polynomial

328:    Level: intermediate

330: .keywords: KSP, BiCGStab(L), set, polynomial

332: .seealso: @()
333: @*/
334: PetscErrorCode  KSPBCGSLSetPol(KSP ksp, PetscBool  uMROR)
335: {
336:   KSP_BCGSL      *bcgsl = (KSP_BCGSL *)ksp->data;


342:   if (!ksp->setupstage) {
343:     bcgsl->bConvex = uMROR;
344:   } else if (bcgsl->bConvex != uMROR) {
345:     /* free the data structures,
346:        then create them again
347:      */
348:     VecDestroyVecs(ksp->nwork,&ksp->work);
349:     PetscFree5(AY0c,AYlc,AYtc,MZa,MZb);
350:     bcgsl->bConvex = uMROR;
351:     ksp->setupstage = KSP_SETUP_NEW;
352:   }
353:   return(0);
354: }

358: /*@
359:    KSPBCGSLSetEll - Sets the number of search directions in BiCGStab(L).

361:    Logically Collective on KSP

363:    Input Parameters:
364: +  ksp - iterative context obtained from KSPCreate
365: -  ell - number of search directions

367:    Options Database Keys:

369: .  -ksp_bcgsl_ell ell

371:    Level: intermediate

373: .keywords: KSP, BiCGStab(L), set, exact residuals,

375: .seealso: @()
376: @*/
377: PetscErrorCode  KSPBCGSLSetEll(KSP ksp, PetscInt ell)
378: {
379:   KSP_BCGSL      *bcgsl = (KSP_BCGSL *)ksp->data;

383:   if (ell < 1) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_ARG_OUTOFRANGE, "KSPBCGSLSetEll: second argument must be positive");

386:   if (!ksp->setupstage) {
387:     bcgsl->ell = ell;
388:   } else if (bcgsl->ell != ell) {
389:     /* free the data structures, then create them again */
390:     VecDestroyVecs(ksp->nwork,&ksp->work);
391:     PetscFree5(AY0c,AYlc,AYtc,MZa,MZb);
392:     bcgsl->ell = ell;
393:     ksp->setupstage = KSP_SETUP_NEW;
394:   }
395:   return(0);
396: }

400: PetscErrorCode KSPView_BCGSL(KSP ksp, PetscViewer viewer)
401: {
402:   KSP_BCGSL       *bcgsl = (KSP_BCGSL *)ksp->data;
403:   PetscErrorCode  ierr;
404:   PetscBool       isascii, isstring;

407:   PetscTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii);
408:   PetscTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring);

410:   if (isascii) {
411:     PetscViewerASCIIPrintf(viewer, "  BCGSL: Ell = %D\n", bcgsl->ell);
412:     PetscViewerASCIIPrintf(viewer, "  BCGSL: Delta = %lg\n", bcgsl->delta);
413:   } else {
414:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP, "Viewer type %s not supported for KSP BCGSL", ((PetscObject)viewer)->type_name);
415:   }
416:   return(0);
417: }

421: PetscErrorCode KSPSetFromOptions_BCGSL(KSP ksp)
422: {
423:   KSP_BCGSL      *bcgsl = (KSP_BCGSL *)ksp->data;
425:   PetscInt       this_ell;
426:   PetscReal      delta;
427:   PetscBool      flga = PETSC_FALSE, flg;

430:   /* PetscOptionsBegin/End are called in KSPSetFromOptions. They
431:      don't need to be called here.
432:   */
433:   PetscOptionsHead("KSP BiCGStab(L) Options");

435:   /* Set number of search directions */
436:   PetscOptionsInt("-ksp_bcgsl_ell","Number of Krylov search directions","KSPBCGSLSetEll",bcgsl->ell,&this_ell,&flg);
437:   if (flg) {
438:     KSPBCGSLSetEll(ksp, this_ell);
439:   }

441:   /* Set polynomial type */
442:   PetscOptionsBool("-ksp_bcgsl_cxpoly", "Polynomial part of BiCGStabL is MinRes + OR", "KSPBCGSLSetPol", flga,&flga,PETSC_NULL);
443:   if (flga) {
444:     KSPBCGSLSetPol(ksp, PETSC_TRUE);
445:   } else {
446:     flg  = PETSC_FALSE;
447:     PetscOptionsBool("-ksp_bcgsl_mrpoly", "Polynomial part of BiCGStabL is MinRes", "KSPBCGSLSetPol", flg,&flg,PETSC_NULL);
448:     KSPBCGSLSetPol(ksp, PETSC_FALSE);
449:   }

451:   /* Will computed residual be refreshed? */
452:   PetscOptionsReal("-ksp_bcgsl_xres", "Threshold used to decide when to refresh computed residuals", "KSPBCGSLSetXRes", bcgsl->delta, &delta, &flg);
453:   if (flg) {
454:     KSPBCGSLSetXRes(ksp, delta);
455:   }
456:   PetscOptionsTail();
457:   return(0);
458: }

462: PetscErrorCode KSPSetUp_BCGSL(KSP ksp)
463: {
464:   KSP_BCGSL      *bcgsl = (KSP_BCGSL *)ksp->data;
465:   PetscInt       ell = bcgsl->ell,ldMZ = ell+1;

469:   if (ksp->pc_side == PC_SYMMETRIC) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP, "no symmetric preconditioning for KSPBCGSL");
470:   else if (ksp->pc_side == PC_RIGHT) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP, "no right preconditioning for KSPBCGSL");
471:   KSPDefaultGetWork(ksp, 6+2*ell);
472:   PetscMalloc5(ldMZ,PetscScalar,&AY0c,ldMZ,PetscScalar,&AYlc,ldMZ,PetscScalar,&AYtc,ldMZ*ldMZ,PetscScalar,&MZa,ldMZ*ldMZ,PetscScalar,&MZb);
473:   return(0);
474: }

478: PetscErrorCode KSPReset_BCGSL(KSP ksp)
479: {
480:   KSP_BCGSL      *bcgsl = (KSP_BCGSL *)ksp->data;
483:   VecDestroyVecs(ksp->nwork,&ksp->work);
484:   PetscFree5(AY0c,AYlc,AYtc,MZa,MZb);
485:   return(0);
486: }

490: PetscErrorCode KSPDestroy_BCGSL(KSP ksp)
491: {

495:   KSPReset_BCGSL(ksp);
496:   KSPDefaultDestroy(ksp);
497:   return(0);
498: }

500: /*MC
501:      KSPBCGSL - Implements a slight variant of the Enhanced
502:                 BiCGStab(L) algorithm in (3) and (2).  The variation
503:                 concerns cases when either kappa0**2 or kappa1**2 is
504:                 negative due to round-off. Kappa0 has also been pulled
505:                 out of the denominator in the formula for ghat.

507:     References:
508:       1. G.L.G. Sleijpen, H.A. van der Vorst, "An overview of
509:          approaches for the stable computation of hybrid BiCG
510:          methods", Applied Numerical Mathematics: Transactions
511:          f IMACS, 19(3), pp 235-54, 1996.
512:       2. G.L.G. Sleijpen, H.A. van der Vorst, D.R. Fokkema,
513:          "BiCGStab(L) and other hybrid Bi-CG methods",
514:           Numerical Algorithms, 7, pp 75-109, 1994.
515:       3. D.R. Fokkema, "Enhanced implementation of BiCGStab(L)
516:          for solving linear systems of equations", preprint
517:          from www.citeseer.com.

519:    Contributed by: Joel M. Malard, email jm.malard@pnl.gov

521:    Options Database Keys:
522: +  -ksp_bcgsl_ell <ell> Number of Krylov search directions
523: -  -ksp_bcgsl_cxpol Use a convex function of the MR and OR polynomials after the BiCG step
524: -  -ksp_bcgsl_xres <res> Threshold used to decide when to refresh computed residuals

526:    Notes: Supports left preconditioning only

528:    Level: beginner

530: .seealso:  KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPFGMRES, KSPBCGS, KSPSetPCSide()

532: M*/
536: PetscErrorCode  KSPCreate_BCGSL(KSP ksp)
537: {
539:   KSP_BCGSL      *bcgsl;

542:   /* allocate BiCGStab(L) context */
543:   PetscNewLog(ksp, KSP_BCGSL, &bcgsl);
544:   ksp->data = (void*)bcgsl;

546:   KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,2);
547:   KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,1);

549:   ksp->ops->setup           = KSPSetUp_BCGSL;
550:   ksp->ops->solve           = KSPSolve_BCGSL;
551:   ksp->ops->reset           = KSPReset_BCGSL;
552:   ksp->ops->destroy         = KSPDestroy_BCGSL;
553:   ksp->ops->buildsolution   = KSPDefaultBuildSolution;
554:   ksp->ops->buildresidual   = KSPDefaultBuildResidual;
555:   ksp->ops->setfromoptions  = KSPSetFromOptions_BCGSL;
556:   ksp->ops->view            = KSPView_BCGSL;

558:   /* Let the user redefine the number of directions vectors */
559:   bcgsl->ell = 2;

561:   /*Choose between a single MR step or an averaged MR/OR */
562:   bcgsl->bConvex = PETSC_FALSE;

564:   /* Set the threshold for when exact residuals will be used */
565:   bcgsl->delta = 0.0;
566:   return(0);
567: }