Actual source code: nrefine.c

slepc-3.13.4 2020-09-02
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2020, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    Newton refinement for polynomial eigenproblems.

 13:    References:

 15:        [1] T. Betcke and D. Kressner, "Perturbation, extraction and refinement
 16:            of invariant pairs for matrix polynomials", Linear Algebra Appl.
 17:            435(3):514-536, 2011.

 19:        [2] C. Campos and J.E. Roman, "Parallel iterative refinement in
 20:            polynomial eigenvalue problems", Numer. Linear Algebra Appl. 23(4):
 21:            730-745, 2016.
 22: */

 24:  #include <slepc/private/pepimpl.h>
 25:  #include <slepcblaslapack.h>

 27: typedef struct {
 28:   Mat          *A,M1;
 29:   BV           V,M2,M3,W;
 30:   PetscInt     k,nmat;
 31:   PetscScalar  *fih,*work,*M4;
 32:   PetscBLASInt *pM4;
 33:   PetscBool    compM1;
 34:   Vec          t;
 35: } PEP_REFINE_MATSHELL;

 37: typedef struct {
 38:   Mat          E[2],M1;
 39:   Vec          tN,ttN,t1,vseq;
 40:   VecScatter   scatterctx;
 41:   PetscBool    compM1;
 42:   PetscInt     *map0,*map1,*idxg,*idxp;
 43:   PetscSubcomm subc;
 44:   VecScatter   scatter_sub;
 45:   VecScatter   *scatter_id,*scatterp_id;
 46:   Mat          *A;
 47:   BV           V,W,M2,M3,Wt;
 48:   PetscScalar  *M4,*w,*wt,*d,*dt;
 49:   Vec          t,tg,Rv,Vi,tp,tpg;
 50:   PetscInt     idx,*cols;
 51: } PEP_REFINE_EXPLICIT;

 53: static PetscErrorCode MatMult_FS(Mat M ,Vec x,Vec y)
 54: {
 55:   PetscErrorCode      ierr;
 56:   PEP_REFINE_MATSHELL *ctx;
 57:   PetscInt            k,i;
 58:   PetscScalar         *c;
 59:   PetscBLASInt        k_,one=1,info;

 62:   MatShellGetContext(M,&ctx);
 63:   VecCopy(x,ctx->t);
 64:   k    = ctx->k;
 65:   c    = ctx->work;
 66:   PetscBLASIntCast(k,&k_);
 67:   MatMult(ctx->M1,x,y);
 68:   VecConjugate(ctx->t);
 69:   BVDotVec(ctx->M3,ctx->t,c);
 70:   for (i=0;i<k;i++) c[i] = PetscConj(c[i]);
 71:   PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,ctx->M4,&k_,ctx->pM4,c,&k_,&info));
 72:   SlepcCheckLapackInfo("getrs",info);
 73:   BVMultVec(ctx->M2,-1.0,1.0,y,c);
 74:   return(0);
 75: }

 77: /*
 78:   Evaluates the first d elements of the polynomial basis
 79:   on a given matrix H which is considered to be triangular
 80: */
 81: static PetscErrorCode PEPEvaluateBasisforMatrix(PEP pep,PetscInt nm,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH)
 82: {
 84:   PetscInt       i,j,ldfh=nm*k,off,nmat=pep->nmat;
 85:   PetscReal      *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat,t;
 86:   PetscScalar    corr=0.0,alpha,beta;
 87:   PetscBLASInt   k_,ldh_,ldfh_;

 90:   PetscBLASIntCast(ldh,&ldh_);
 91:   PetscBLASIntCast(k,&k_);
 92:   PetscBLASIntCast(ldfh,&ldfh_);
 93:   PetscArrayzero(fH,nm*k*k);
 94:   if (nm>0) for (j=0;j<k;j++) fH[j+j*ldfh] = 1.0;
 95:   if (nm>1) {
 96:     t = b[0]/a[0];
 97:     off = k;
 98:     for (j=0;j<k;j++) {
 99:       for (i=0;i<k;i++) fH[off+i+j*ldfh] = H[i+j*ldh]/a[0];
100:       fH[j+j*ldfh] -= t;
101:     }
102:   }
103:   for (i=2;i<nm;i++) {
104:     off = i*k;
105:     if (i==2) {
106:       for (j=0;j<k;j++) {
107:         fH[off+j+j*ldfh] = 1.0;
108:         H[j+j*ldh] -= b[1];
109:       }
110:     } else {
111:       for (j=0;j<k;j++) {
112:         PetscArraycpy(fH+off+j*ldfh,fH+(i-2)*k+j*ldfh,k);
113:         H[j+j*ldh] += corr-b[i-1];
114:       }
115:     }
116:     corr  = b[i-1];
117:     beta  = -g[i-1]/a[i-1];
118:     alpha = 1/a[i-1];
119:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&alpha,H,&ldh_,fH+(i-1)*k,&ldfh_,&beta,fH+off,&ldfh_));
120:   }
121:   for (j=0;j<k;j++) H[j+j*ldh] += corr;
122:   return(0);
123: }

125: static PetscErrorCode NRefSysSetup_shell(PEP pep,PetscInt k,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,PEP_REFINE_MATSHELL *ctx)
126: {
127:   PetscErrorCode    ierr;
128:   PetscScalar       *DHii,*T12,*Tr,*Ts,*array,s,ss,sone=1.0,zero=0.0,*M4=ctx->M4,t,*v,*T;
129:   const PetscScalar *m3,*m2;
130:   PetscInt          i,d,j,nmat=pep->nmat,lda=nmat*k,deg=nmat-1,nloc;
131:   PetscReal         *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
132:   PetscBLASInt      k_,lda_,lds_,nloc_,one=1,info;
133:   Mat               *A=ctx->A,Mk,M1=ctx->M1,P;
134:   BV                V=ctx->V,M2=ctx->M2,M3=ctx->M3,W=ctx->W;
135:   MatStructure      str;
136:   Vec               vc;

139:   STGetMatStructure(pep->st,&str);
140:   PetscMalloc3(nmat*k*k,&T12,k*k,&Tr,PetscMax(k*k,nmat),&Ts);
141:   DHii = T12;
142:   PetscArrayzero(DHii,k*k*nmat);
143:   for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
144:   for (d=2;d<nmat;d++) {
145:     for (j=0;j<k;j++) {
146:       for (i=0;i<k;i++) {
147:         DHii[d*k+i+j*lda] = ((h-b[d-1])*DHii[(d-1)*k+i+j*lda]+fH[(d-1)*k+i+j*lda]-g[d-1]*DHii[(d-2)*k+i+j*lda])/(a[d-1]);
148:       }
149:     }
150:   }
151:   /* T11 */
152:   if (!ctx->compM1) {
153:     MatCopy(A[0],M1,DIFFERENT_NONZERO_PATTERN);
154:     PEPEvaluateBasis(pep,h,0,Ts,NULL);
155:     for (j=1;j<nmat;j++) {
156:       MatAXPY(M1,Ts[j],A[j],str);
157:     }
158:   }

160:   /* T22 */
161:   PetscBLASIntCast(lds,&lds_);
162:   PetscBLASIntCast(k,&k_);
163:   PetscBLASIntCast(lda,&lda_);
164:   PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
165:   for (i=1;i<deg;i++) {
166:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
167:     s = (i==1)?0.0:1.0;
168:     PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,M4,&k_));
169:   }
170:   for (i=0;i<k;i++) for (j=0;j<i;j++) { t=M4[i+j*k];M4[i+j*k]=M4[j+i*k];M4[j+i*k]=t; }

172:   /* T12 */
173:   MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&Mk);
174:   for (i=1;i<nmat;i++) {
175:     MatDenseGetArray(Mk,&array);
176:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,array,&k_));
177:     MatDenseRestoreArray(Mk,&array);
178:     BVSetActiveColumns(W,0,k);
179:     BVMult(W,1.0,0.0,V,Mk);
180:     if (i==1) {
181:       BVMatMult(W,A[i],M2);
182:     } else {
183:       BVMatMult(W,A[i],M3); /* using M3 as work space */
184:       BVMult(M2,1.0,1.0,M3,NULL);
185:     }
186:   }

188:   /* T21 */
189:   MatDenseGetArray(Mk,&array);
190:   for (i=1;i<deg;i++) {
191:     s = (i==1)?0.0:1.0;
192:     ss = PetscConj(fh[i]);
193:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,array,&k_));
194:   }
195:   MatDenseRestoreArray(Mk,&array);
196:   BVSetActiveColumns(M3,0,k);
197:   BVMult(M3,1.0,0.0,V,Mk);
198:   for (i=0;i<k;i++) {
199:     BVGetColumn(M3,i,&vc);
200:     VecConjugate(vc);
201:     BVRestoreColumn(M3,i,&vc);
202:   }
203:   MatDestroy(&Mk);
204:   PetscFree3(T12,Tr,Ts);

206:   VecGetLocalSize(ctx->t,&nloc);
207:   PetscBLASIntCast(nloc,&nloc_);
208:   PetscMalloc1(nloc*k,&T);
209:   KSPGetOperators(pep->refineksp,NULL,&P);
210:   if (!ctx->compM1) { MatCopy(ctx->M1,P,SAME_NONZERO_PATTERN); }
211:   BVGetArrayRead(ctx->M2,&m2);
212:   BVGetArrayRead(ctx->M3,&m3);
213:   VecGetArray(ctx->t,&v);
214:   for (i=0;i<nloc;i++) for (j=0;j<k;j++) T[j+i*k] = m3[i+j*nloc];
215:   PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&k_,&nloc_,ctx->M4,&k_,ctx->pM4,T,&k_,&info));
216:   SlepcCheckLapackInfo("gesv",info);
217:   for (i=0;i<nloc;i++) v[i] = BLASdot_(&k_,m2+i,&nloc_,T+i*k,&one);
218:   VecRestoreArray(ctx->t,&v);
219:   BVRestoreArrayRead(ctx->M2,&m2);
220:   BVRestoreArrayRead(ctx->M3,&m3);
221:   MatDiagonalSet(P,ctx->t,ADD_VALUES);
222:   PetscFree(T);
223:   KSPSetUp(pep->refineksp);
224:   return(0);
225: }

227: static PetscErrorCode NRefSysSolve_shell(KSP ksp,PetscInt nmat,Vec Rv,PetscScalar *Rh,PetscInt k,Vec dVi,PetscScalar *dHi)
228: {
229:   PetscErrorCode      ierr;
230:   PetscScalar         *t0;
231:   PetscBLASInt        k_,one=1,info,lda_;
232:   PetscInt            i,lda=nmat*k;
233:   Mat                 M;
234:   PEP_REFINE_MATSHELL *ctx;

237:   KSPGetOperators(ksp,&M,NULL);
238:   MatShellGetContext(M,&ctx);
239:   PetscCalloc1(k,&t0);
240:   PetscBLASIntCast(lda,&lda_);
241:   PetscBLASIntCast(k,&k_);
242:   for (i=0;i<k;i++) t0[i] = Rh[i];
243:   PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,ctx->M4,&k_,ctx->pM4,t0,&k_,&info));
244:   SlepcCheckLapackInfo("getrs",info);
245:   BVMultVec(ctx->M2,-1.0,1.0,Rv,t0);
246:   KSPSolve(ksp,Rv,dVi);
247:   VecConjugate(dVi);
248:   BVDotVec(ctx->M3,dVi,dHi);
249:   VecConjugate(dVi);
250:   for (i=0;i<k;i++) dHi[i] = Rh[i]-PetscConj(dHi[i]);
251:   PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,ctx->M4,&k_,ctx->pM4,dHi,&k_,&info));
252:   SlepcCheckLapackInfo("getrs",info);
253:   PetscFree(t0);
254:   return(0);
255: }

257: /*
258:    Computes the residual P(H,V*S)*e_j for the polynomial
259: */
260: static PetscErrorCode NRefRightSide(PetscInt nmat,PetscReal *pcf,Mat *A,PetscInt k,BV V,PetscScalar *S,PetscInt lds,PetscInt j,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *DfH,PetscScalar *dH,BV dV,PetscScalar *dVS,PetscInt rds,Vec Rv,PetscScalar *Rh,BV W,Vec t)
261: {
263:   PetscScalar    *DS0,*DS1,*F,beta=0.0,sone=1.0,none=-1.0,tt=0.0,*h,zero=0.0,*Z,*c0;
264:   PetscReal      *a=pcf,*b=pcf+nmat,*g=b+nmat;
265:   PetscInt       i,ii,jj,lda;
266:   PetscBLASInt   lda_,k_,ldh_,lds_,nmat_,k2_,krds_,j_,one=1;
267:   Mat            M0;
268:   Vec            w;

271:   PetscMalloc4(k*nmat,&h,k*k,&DS0,k*k,&DS1,k*k,&Z);
272:   lda = k*nmat;
273:   PetscBLASIntCast(k,&k_);
274:   PetscBLASIntCast(lds,&lds_);
275:   PetscBLASIntCast(lda,&lda_);
276:   PetscBLASIntCast(nmat,&nmat_);
277:   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&nmat_,&k_,&sone,S,&lds_,fH+j*lda,&k_,&zero,h,&k_));
278:   MatCreateSeqDense(PETSC_COMM_SELF,k,nmat,h,&M0);
279:   BVSetActiveColumns(W,0,nmat);
280:   BVMult(W,1.0,0.0,V,M0);
281:   MatDestroy(&M0);

283:   BVGetColumn(W,0,&w);
284:   MatMult(A[0],w,Rv);
285:   BVRestoreColumn(W,0,&w);
286:   for (i=1;i<nmat;i++) {
287:     BVGetColumn(W,i,&w);
288:     MatMult(A[i],w,t);
289:     BVRestoreColumn(W,i,&w);
290:     VecAXPY(Rv,1.0,t);
291:   }
292:   /* Update right-hand side */
293:   if (j) {
294:     PetscBLASIntCast(ldh,&ldh_);
295:     PetscArrayzero(Z,k*k);
296:     PetscArrayzero(DS0,k*k);
297:     PetscArraycpy(Z+(j-1)*k,dH+(j-1)*k,k);
298:     /* Update DfH */
299:     for (i=1;i<nmat;i++) {
300:       if (i>1) {
301:         beta = -g[i-1];
302:         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,fH+(i-1)*k,&lda_,Z,&k_,&beta,DS0,&k_));
303:         tt += -b[i-1];
304:         for (ii=0;ii<k;ii++) H[ii+ii*ldh] += tt;
305:         tt = b[i-1];
306:         beta = 1.0/a[i-1];
307:         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&beta,DS1,&k_,H,&ldh_,&beta,DS0,&k_));
308:         F = DS0; DS0 = DS1; DS1 = F;
309:       } else {
310:         PetscArrayzero(DS1,k*k);
311:         for (ii=0;ii<k;ii++) DS1[ii+(j-1)*k] = Z[ii+(j-1)*k]/a[0];
312:       }
313:       for (jj=j;jj<k;jj++) {
314:         for (ii=0;ii<k;ii++) DfH[k*i+ii+jj*lda] += DS1[ii+jj*k];
315:       }
316:     }
317:     for (ii=0;ii<k;ii++) H[ii+ii*ldh] += tt;
318:     /* Update right-hand side */
319:     PetscBLASIntCast(2*k,&k2_);
320:     PetscBLASIntCast(j,&j_);
321:     PetscBLASIntCast(k+rds,&krds_);
322:     c0 = DS0;
323:     PetscArrayzero(Rh,k);
324:     for (i=0;i<nmat;i++) {
325:       PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&krds_,&j_,&sone,dVS,&k2_,fH+j*lda+i*k,&one,&zero,h,&one));
326:       PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&k_,&k_,&sone,S,&lds_,DfH+i*k+j*lda,&one,&sone,h,&one));
327:       BVMultVec(V,1.0,0.0,t,h);
328:       BVSetActiveColumns(dV,0,rds);
329:       BVMultVec(dV,1.0,1.0,t,h+k);
330:       BVGetColumn(W,i,&w);
331:       MatMult(A[i],t,w);
332:       BVRestoreColumn(W,i,&w);
333:       if (i>0 && i<nmat-1) {
334:         PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&k_,&k_,&sone,S,&lds_,h,&one,&zero,c0,&one));
335:         PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&k_,&k_,&none,fH+i*k,&lda_,c0,&one,&sone,Rh,&one));
336:       }
337:     }

339:     for (i=0;i<nmat;i++) h[i] = -1.0;
340:     BVMultVec(W,1.0,1.0,Rv,h);
341:   }
342:   PetscFree4(h,DS0,DS1,Z);
343:   return(0);
344: }

346: static PetscErrorCode NRefSysSolve_mbe(PetscInt k,PetscInt sz,BV W,PetscScalar *w,BV Wt,PetscScalar *wt,PetscScalar *d,PetscScalar *dt,KSP ksp,BV T2,BV T3 ,PetscScalar *T4,PetscBool trans,Vec x1,PetscScalar *x2,Vec sol1,PetscScalar *sol2,Vec vw)
347: {
349:   PetscInt       i,j,incf,incc;
350:   PetscScalar    *y,*g,*xx2,*ww,y2,*dd;
351:   Vec            v,t,xx1;
352:   BV             WW,T;

355:   PetscMalloc3(sz,&y,sz,&g,k,&xx2);
356:   if (trans) {
357:     WW = W; ww = w; dd = d; T = T3; incf = 0; incc = 1;
358:   } else {
359:     WW = Wt; ww = wt; dd = dt; T = T2; incf = 1; incc = 0;
360:   }
361:   xx1 = vw;
362:   VecCopy(x1,xx1);
363:   PetscArraycpy(xx2,x2,sz);
364:   PetscArrayzero(sol2,k);
365:   for (i=sz-1;i>=0;i--) {
366:     BVGetColumn(WW,i,&v);
367:     VecConjugate(v);
368:     VecDot(xx1,v,y+i);
369:     VecConjugate(v);
370:     BVRestoreColumn(WW,i,&v);
371:     for (j=0;j<i;j++) y[i] += ww[j+i*k]*xx2[j];
372:     y[i] = -(y[i]-xx2[i])/dd[i];
373:     BVGetColumn(T,i,&t);
374:     VecAXPY(xx1,-y[i],t);
375:     BVRestoreColumn(T,i,&t);
376:     for (j=0;j<=i;j++) xx2[j] -= y[i]*T4[j*incf+incc*i+(i*incf+incc*j)*k];
377:     g[i] = xx2[i];
378:   }
379:   if (trans) {
380:     KSPSolveTranspose(ksp,xx1,sol1);
381:   } else {
382:     KSPSolve(ksp,xx1,sol1);
383:   }
384:   if (trans) {
385:     WW = Wt; ww = wt; dd = dt; T = T2; incf = 1; incc = 0;
386:   } else {
387:     WW = W; ww = w; dd = d; T = T3; incf = 0; incc = 1;
388:   }
389:   for (i=0;i<sz;i++) {
390:     BVGetColumn(T,i,&t);
391:     VecConjugate(t);
392:     VecDot(sol1,t,&y2);
393:     VecConjugate(t);
394:     BVRestoreColumn(T,i,&t);
395:     for (j=0;j<i;j++) y2 += sol2[j]*T4[j*incf+incc*i+(i*incf+incc*j)*k];
396:     y2 = (g[i]-y2)/dd[i];
397:     BVGetColumn(WW,i,&v);
398:     VecAXPY(sol1,-y2,v);
399:     for (j=0;j<i;j++) sol2[j] -= ww[j+i*k]*y2;
400:     sol2[i] = y[i]+y2;
401:     BVRestoreColumn(WW,i,&v);
402:   }
403:   PetscFree3(y,g,xx2);
404:   return(0);
405: }

407: static PetscErrorCode NRefSysSetup_mbe(PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,BV V,PEP_REFINE_EXPLICIT *matctx)
408: {
410:   PetscInt       i,j,l,nmat=pep->nmat,lda=nmat*k,deg=nmat-1;
411:   Mat            M1=matctx->M1,*A,*At,Mk;
412:   PetscReal      *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
413:   PetscScalar    s,ss,*DHii,*T12,*array,*Ts,*Tr,*M4=matctx->M4,sone=1.0,zero=0.0;
414:   PetscScalar    *w=matctx->w,*wt=matctx->wt,*d=matctx->d,*dt=matctx->dt;
415:   PetscBLASInt   lds_,lda_,k_;
416:   MatStructure   str;
417:   PetscBool      flg;
418:   BV             M2=matctx->M2,M3=matctx->M3,W=matctx->W,Wt=matctx->Wt;
419:   Vec            vc,vc2;

422:   PetscMalloc3(nmat*k*k,&T12,k*k,&Tr,PetscMax(k*k,nmat),&Ts);
423:   STGetMatStructure(pep->st,&str);
424:   STGetTransform(pep->st,&flg);
425:   if (flg) {
426:     PetscMalloc1(pep->nmat,&At);
427:     for (i=0;i<pep->nmat;i++) {
428:       STGetMatrixTransformed(pep->st,i,&At[i]);
429:     }
430:   } else At = pep->A;
431:   if (matctx->subc) A = matctx->A;
432:   else A = At;
433:   /* Form the explicit system matrix */
434:   DHii = T12;
435:   PetscArrayzero(DHii,k*k*nmat);
436:   for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
437:   for (l=2;l<nmat;l++) {
438:     for (j=0;j<k;j++) {
439:       for (i=0;i<k;i++) {
440:         DHii[l*k+i+j*lda] = ((h-b[l-1])*DHii[(l-1)*k+i+j*lda]+fH[(l-1)*k+i+j*lda]-g[l-1]*DHii[(l-2)*k+i+j*lda])/a[l-1];
441:       }
442:     }
443:   }

445:   /* T11 */
446:   if (!matctx->compM1) {
447:     MatCopy(A[0],M1,DIFFERENT_NONZERO_PATTERN);
448:     PEPEvaluateBasis(pep,h,0,Ts,NULL);
449:     for (j=1;j<nmat;j++) {
450:       MatAXPY(M1,Ts[j],A[j],str);
451:     }
452:   }

454:   /* T22 */
455:   PetscBLASIntCast(lds,&lds_);
456:   PetscBLASIntCast(k,&k_);
457:   PetscBLASIntCast(lda,&lda_);
458:   PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
459:   for (i=1;i<deg;i++) {
460:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
461:     s = (i==1)?0.0:1.0;
462:     PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,M4,&k_));
463:   }

465:   /* T12 */
466:   MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&Mk);
467:   for (i=1;i<nmat;i++) {
468:     MatDenseGetArray(Mk,&array);
469:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,array,&k_));
470:     MatDenseRestoreArray(Mk,&array);
471:     BVSetActiveColumns(W,0,k);
472:     BVMult(W,1.0,0.0,V,Mk);
473:     if (i==1) {
474:       BVMatMult(W,A[i],M2);
475:     } else {
476:       BVMatMult(W,A[i],M3); /* using M3 as work space */
477:       BVMult(M2,1.0,1.0,M3,NULL);
478:     }
479:   }

481:   /* T21 */
482:   MatDenseGetArray(Mk,&array);
483:   for (i=1;i<deg;i++) {
484:     s = (i==1)?0.0:1.0;
485:     ss = PetscConj(fh[i]);
486:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,array,&k_));
487:   }
488:   MatDenseRestoreArray(Mk,&array);
489:   BVSetActiveColumns(M3,0,k);
490:   BVMult(M3,1.0,0.0,V,Mk);
491:   for (i=0;i<k;i++) {
492:     BVGetColumn(M3,i,&vc);
493:     VecConjugate(vc);
494:     BVRestoreColumn(M3,i,&vc);
495:   }

497:   KSPSetOperators(ksp,M1,M1);
498:   KSPSetUp(ksp);
499:   MatDestroy(&Mk);

501:   /* Set up for BEMW */
502:   for (i=0;i<k;i++) {
503:     BVGetColumn(M2,i,&vc);
504:     BVGetColumn(W,i,&vc2);
505:     NRefSysSolve_mbe(k,i,W,w,Wt,wt,d,dt,ksp,M2,M3,M4,PETSC_FALSE,vc,M4+i*k,vc2,w+i*k,matctx->t);
506:     BVRestoreColumn(M2,i,&vc);
507:     BVGetColumn(M3,i,&vc);
508:     VecConjugate(vc);
509:     VecDot(vc2,vc,&d[i]);
510:     VecConjugate(vc);
511:     BVRestoreColumn(M3,i,&vc);
512:     for (j=0;j<i;j++) d[i] += M4[i+j*k]*w[j+i*k];
513:     d[i] = M4[i+i*k]-d[i];
514:     BVRestoreColumn(W,i,&vc2);

516:     BVGetColumn(M3,i,&vc);
517:     BVGetColumn(Wt,i,&vc2);
518:     for (j=0;j<=i;j++) Ts[j] = M4[i+j*k];
519:     NRefSysSolve_mbe(k,i,W,w,Wt,wt,d,dt,ksp,M2,M3,M4,PETSC_TRUE,vc,Ts,vc2,wt+i*k,matctx->t);
520:     BVRestoreColumn(M3,i,&vc);
521:     BVGetColumn(M2,i,&vc);
522:     VecConjugate(vc2);
523:     VecDot(vc,vc2,&dt[i]);
524:     VecConjugate(vc2);
525:     BVRestoreColumn(M2,i,&vc);
526:     for (j=0;j<i;j++) dt[i] += M4[j+i*k]*wt[j+i*k];
527:     dt[i] = M4[i+i*k]-dt[i];
528:     BVRestoreColumn(Wt,i,&vc2);
529:   }

531:   if (flg) {
532:     PetscFree(At);
533:   }
534:   PetscFree3(T12,Tr,Ts);
535:   return(0);
536: }

538: static PetscErrorCode NRefSysSetup_explicit(PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,BV V,PEP_REFINE_EXPLICIT *matctx,BV W)
539: {
540:   PetscErrorCode    ierr;
541:   PetscInt          i,j,d,n,n0,m0,n1,m1,nmat=pep->nmat,lda=nmat*k,deg=nmat-1;
542:   PetscInt          *idxg=matctx->idxg,*idxp=matctx->idxp,idx,ncols;
543:   Mat               M,*E=matctx->E,*A,*At,Mk,Md;
544:   PetscReal         *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
545:   PetscScalar       s,ss,*DHii,*T22,*T21,*T12,*Ts,*Tr,*array,*ts,sone=1.0,zero=0.0;
546:   PetscBLASInt      lds_,lda_,k_;
547:   const PetscInt    *idxmc;
548:   const PetscScalar *valsc,*carray;
549:   MatStructure      str;
550:   Vec               vc,vc0;
551:   PetscBool         flg;

554:   PetscMalloc5(k*k,&T22,k*k,&T21,nmat*k*k,&T12,k*k,&Tr,k*k,&Ts);
555:   STGetMatStructure(pep->st,&str);
556:   KSPGetOperators(ksp,&M,NULL);
557:   MatGetOwnershipRange(E[1],&n1,&m1);
558:   MatGetOwnershipRange(E[0],&n0,&m0);
559:   MatGetOwnershipRange(M,&n,NULL);
560:   PetscMalloc1(nmat,&ts);
561:   STGetTransform(pep->st,&flg);
562:   if (flg) {
563:     PetscMalloc1(pep->nmat,&At);
564:     for (i=0;i<pep->nmat;i++) {
565:       STGetMatrixTransformed(pep->st,i,&At[i]);
566:     }
567:   } else At = pep->A;
568:   if (matctx->subc) A = matctx->A;
569:   else A = At;
570:   /* Form the explicit system matrix */
571:   DHii = T12;
572:   PetscArrayzero(DHii,k*k*nmat);
573:   for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
574:   for (d=2;d<nmat;d++) {
575:     for (j=0;j<k;j++) {
576:       for (i=0;i<k;i++) {
577:         DHii[d*k+i+j*lda] = ((h-b[d-1])*DHii[(d-1)*k+i+j*lda]+fH[(d-1)*k+i+j*lda]-g[d-1]*DHii[(d-2)*k+i+j*lda])/a[d-1];
578:       }
579:     }
580:   }

582:   /* T11 */
583:   if (!matctx->compM1) {
584:     MatCopy(A[0],E[0],DIFFERENT_NONZERO_PATTERN);
585:     PEPEvaluateBasis(pep,h,0,Ts,NULL);
586:     for (j=1;j<nmat;j++) {
587:       MatAXPY(E[0],Ts[j],A[j],str);
588:     }
589:   }
590:   for (i=n0;i<m0;i++) {
591:     MatGetRow(E[0],i,&ncols,&idxmc,&valsc);
592:     idx = n+i-n0;
593:     for (j=0;j<ncols;j++) {
594:       idxg[j] = matctx->map0[idxmc[j]];
595:     }
596:     MatSetValues(M,1,&idx,ncols,idxg,valsc,INSERT_VALUES);
597:     MatRestoreRow(E[0],i,&ncols,&idxmc,&valsc);
598:   }

600:   /* T22 */
601:   PetscBLASIntCast(lds,&lds_);
602:   PetscBLASIntCast(k,&k_);
603:   PetscBLASIntCast(lda,&lda_);
604:   PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
605:   for (i=1;i<deg;i++) {
606:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
607:     s = (i==1)?0.0:1.0;
608:     PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,T22,&k_));
609:   }
610:   for (j=0;j<k;j++) idxp[j] = matctx->map1[j];
611:   for (i=0;i<m1-n1;i++) {
612:     idx = n+m0-n0+i;
613:     for (j=0;j<k;j++) {
614:       Tr[j] = T22[n1+i+j*k];
615:     }
616:     MatSetValues(M,1,&idx,k,idxp,Tr,INSERT_VALUES);
617:   }

619:   /* T21 */
620:   for (i=1;i<deg;i++) {
621:     s = (i==1)?0.0:1.0;
622:     ss = PetscConj(fh[i]);
623:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,T21,&k_));
624:   }
625:   BVSetActiveColumns(W,0,k);
626:   MatCreateSeqDense(PETSC_COMM_SELF,k,k,T21,&Mk);
627:   BVMult(W,1.0,0.0,V,Mk);
628:   for (i=0;i<k;i++) {
629:     BVGetColumn(W,i,&vc);
630:     VecConjugate(vc);
631:     VecGetArrayRead(vc,&carray);
632:     idx = matctx->map1[i];
633:     MatSetValues(M,1,&idx,m0-n0,matctx->map0+n0,carray,INSERT_VALUES);
634:     VecRestoreArrayRead(vc,&carray);
635:     BVRestoreColumn(W,i,&vc);
636:   }

638:   /* T12 */
639:   for (i=1;i<nmat;i++) {
640:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,Ts,&k_));
641:     for (j=0;j<k;j++) {
642:       PetscArraycpy(T12+i*k+j*lda,Ts+j*k,k);
643:     }
644:   }
645:   MatCreateSeqDense(PETSC_COMM_SELF,k,nmat-1,NULL,&Md);
646:   for (i=0;i<nmat;i++) ts[i] = 1.0;
647:   for (j=0;j<k;j++) {
648:     MatDenseGetArray(Md,&array);
649:     PetscArraycpy(array,T12+k+j*lda,(nmat-1)*k);
650:     MatDenseRestoreArray(Md,&array);
651:     BVSetActiveColumns(W,0,nmat-1);
652:     BVMult(W,1.0,0.0,V,Md);
653:     for (i=nmat-1;i>0;i--) {
654:       BVGetColumn(W,i-1,&vc0);
655:       BVGetColumn(W,i,&vc);
656:       MatMult(A[i],vc0,vc);
657:       BVRestoreColumn(W,i-1,&vc0);
658:       BVRestoreColumn(W,i,&vc);
659:     }
660:     BVSetActiveColumns(W,1,nmat);
661:     BVGetColumn(W,0,&vc0);
662:     BVMultVec(W,1.0,0.0,vc0,ts);
663:     VecGetArrayRead(vc0,&carray);
664:     idx = matctx->map1[j];
665:     MatSetValues(M,m0-n0,matctx->map0+n0,1,&idx,carray,INSERT_VALUES);
666:     VecRestoreArrayRead(vc0,&carray);
667:     BVRestoreColumn(W,0,&vc0);
668:   }
669:   MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
670:   MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
671:   KSPSetOperators(ksp,M,M);
672:   KSPSetUp(ksp);
673:   PetscFree(ts);
674:   MatDestroy(&Mk);
675:   MatDestroy(&Md);
676:   if (flg) {
677:     PetscFree(At);
678:   }
679:   PetscFree5(T22,T21,T12,Tr,Ts);
680:   return(0);
681: }

683: static PetscErrorCode NRefSysSolve_explicit(PetscInt k,KSP ksp,Vec Rv,PetscScalar *Rh,Vec dVi,PetscScalar *dHi,PEP_REFINE_EXPLICIT *matctx)
684: {
685:   PetscErrorCode    ierr;
686:   PetscInt          n0,m0,n1,m1,i;
687:   PetscScalar       *arrayV;
688:   const PetscScalar *array;

691:   MatGetOwnershipRange(matctx->E[1],&n1,&m1);
692:   MatGetOwnershipRange(matctx->E[0],&n0,&m0);

694:   /* Right side */
695:   VecGetArrayRead(Rv,&array);
696:   VecSetValues(matctx->tN,m0-n0,matctx->map0+n0,array,INSERT_VALUES);
697:   VecRestoreArrayRead(Rv,&array);
698:   VecSetValues(matctx->tN,m1-n1,matctx->map1+n1,Rh+n1,INSERT_VALUES);
699:   VecAssemblyBegin(matctx->tN);
700:   VecAssemblyEnd(matctx->tN);

702:   /* Solve */
703:   KSPSolve(ksp,matctx->tN,matctx->ttN);

705:   /* Retrieve solution */
706:   VecGetArray(dVi,&arrayV);
707:   VecGetArrayRead(matctx->ttN,&array);
708:   PetscArraycpy(arrayV,array,m0-n0);
709:   VecRestoreArray(dVi,&arrayV);
710:   if (!matctx->subc) {
711:     VecGetArray(matctx->t1,&arrayV);
712:     for (i=0;i<m1-n1;i++) arrayV[i] =  array[m0-n0+i];
713:     VecRestoreArray(matctx->t1,&arrayV);
714:     VecRestoreArrayRead(matctx->ttN,&array);
715:     VecScatterBegin(matctx->scatterctx,matctx->t1,matctx->vseq,INSERT_VALUES,SCATTER_FORWARD);
716:     VecScatterEnd(matctx->scatterctx,matctx->t1,matctx->vseq,INSERT_VALUES,SCATTER_FORWARD);
717:     VecGetArrayRead(matctx->vseq,&array);
718:     for (i=0;i<k;i++) dHi[i] = array[i];
719:     VecRestoreArrayRead(matctx->vseq,&array);
720:   }
721:   return(0);
722: }

724: static PetscErrorCode NRefSysIter(PetscInt i,PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar *H,PetscInt ldh,Vec Rv,PetscScalar *Rh,BV V,Vec dVi,PetscScalar *dHi,PEP_REFINE_EXPLICIT *matctx,BV W)
725: {
726:   PetscErrorCode      ierr;
727:   PetscInt            j,m,lda=pep->nmat*k,n0,m0,idx;
728:   PetscMPIInt         root,len;
729:   PetscScalar         *array2,h;
730:   const PetscScalar   *array;
731:   Vec                 R,Vi;
732:   PEP_REFINE_MATSHELL *ctx;
733:   Mat                 M;

736:   if (!matctx || !matctx->subc) {
737:     for (j=0;j<pep->nmat;j++) fh[j] = fH[j*k+i+i*lda];
738:     h   = H[i+i*ldh];
739:     idx = i;
740:     R   = Rv;
741:     Vi  = dVi;
742:     switch (pep->scheme) {
743:     case PEP_REFINE_SCHEME_EXPLICIT:
744:       NRefSysSetup_explicit(pep,k,ksp,fH,S,lds,fh,h,V,matctx,W);
745:       matctx->compM1 = PETSC_FALSE;
746:       break;
747:     case PEP_REFINE_SCHEME_MBE:
748:       NRefSysSetup_mbe(pep,k,ksp,fH,S,lds,fh,h,V,matctx);
749:       matctx->compM1 = PETSC_FALSE;
750:       break;
751:     case PEP_REFINE_SCHEME_SCHUR:
752:       KSPGetOperators(ksp,&M,NULL);
753:       MatShellGetContext(M,&ctx);
754:       NRefSysSetup_shell(pep,k,fH,S,lds,fh,h,ctx);
755:       ctx->compM1 = PETSC_FALSE;
756:       break;
757:     }
758:   } else {
759:     if (i%matctx->subc->n==0 && (idx=i+matctx->subc->color)<k) {
760:       for (j=0;j<pep->nmat;j++) fh[j] = fH[j*k+idx+idx*lda];
761:       h = H[idx+idx*ldh];
762:       matctx->idx = idx;
763:       switch (pep->scheme) {
764:       case PEP_REFINE_SCHEME_EXPLICIT:
765:         NRefSysSetup_explicit(pep,k,ksp,fH,S,lds,fh,h,matctx->V,matctx,matctx->W);
766:         matctx->compM1 = PETSC_FALSE;
767:         break;
768:       case PEP_REFINE_SCHEME_MBE:
769:         NRefSysSetup_mbe(pep,k,ksp,fH,S,lds,fh,h,matctx->V,matctx);
770:         matctx->compM1 = PETSC_FALSE;
771:         break;
772:       case PEP_REFINE_SCHEME_SCHUR:
773:         break;
774:       }
775:     } else idx = matctx->idx;
776:     VecScatterBegin(matctx->scatter_id[i%matctx->subc->n],Rv,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
777:     VecScatterEnd(matctx->scatter_id[i%matctx->subc->n],Rv,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
778:     VecGetArrayRead(matctx->tg,&array);
779:     VecPlaceArray(matctx->t,array);
780:     VecCopy(matctx->t,matctx->Rv);
781:     VecResetArray(matctx->t);
782:     VecRestoreArrayRead(matctx->tg,&array);
783:     R  = matctx->Rv;
784:     Vi = matctx->Vi;
785:   }
786:   if (idx==i && idx<k) {
787:     switch (pep->scheme) {
788:       case PEP_REFINE_SCHEME_EXPLICIT:
789:         NRefSysSolve_explicit(k,ksp,R,Rh,Vi,dHi,matctx);
790:         break;
791:       case PEP_REFINE_SCHEME_MBE:
792:         NRefSysSolve_mbe(k,k,matctx->W,matctx->w,matctx->Wt,matctx->wt,matctx->d,matctx->dt,ksp,matctx->M2,matctx->M3 ,matctx->M4,PETSC_FALSE,R,Rh,Vi,dHi,matctx->t);
793:         break;
794:       case PEP_REFINE_SCHEME_SCHUR:
795:         NRefSysSolve_shell(ksp,pep->nmat,R,Rh,k,Vi,dHi);
796:         break;
797:     }
798:   }
799:   if (matctx && matctx->subc) {
800:     VecGetLocalSize(Vi,&m);
801:     VecGetArrayRead(Vi,&array);
802:     VecGetArray(matctx->tg,&array2);
803:     PetscArraycpy(array2,array,m);
804:     VecRestoreArray(matctx->tg,&array2);
805:     VecRestoreArrayRead(Vi,&array);
806:     VecScatterBegin(matctx->scatter_id[i%matctx->subc->n],matctx->tg,dVi,INSERT_VALUES,SCATTER_REVERSE);
807:     VecScatterEnd(matctx->scatter_id[i%matctx->subc->n],matctx->tg,dVi,INSERT_VALUES,SCATTER_REVERSE);
808:     switch (pep->scheme) {
809:     case PEP_REFINE_SCHEME_EXPLICIT:
810:       MatGetOwnershipRange(matctx->E[0],&n0,&m0);
811:       VecGetArrayRead(matctx->ttN,&array);
812:       VecPlaceArray(matctx->tp,array+m0-n0);
813:       VecScatterBegin(matctx->scatterp_id[i%matctx->subc->n],matctx->tp,matctx->tpg,INSERT_VALUES,SCATTER_FORWARD);
814:       VecScatterEnd(matctx->scatterp_id[i%matctx->subc->n],matctx->tp,matctx->tpg,INSERT_VALUES,SCATTER_FORWARD);
815:       VecResetArray(matctx->tp);
816:       VecRestoreArrayRead(matctx->ttN,&array);
817:       VecGetArrayRead(matctx->tpg,&array);
818:       for (j=0;j<k;j++) dHi[j] = array[j];
819:       VecRestoreArrayRead(matctx->tpg,&array);
820:       break;
821:      case PEP_REFINE_SCHEME_MBE:
822:       root = 0;
823:       for (j=0;j<i%matctx->subc->n;j++) root += matctx->subc->subsize[j];
824:       PetscMPIIntCast(k,&len);
825:       MPI_Bcast(dHi,len,MPIU_SCALAR,root,matctx->subc->dupparent);
826:       break;
827:     case PEP_REFINE_SCHEME_SCHUR:
828:       break;
829:     }
830:   }
831:   return(0);
832: }

834: static PetscErrorCode PEPNRefForwardSubstitution(PEP pep,PetscInt k,PetscScalar *S,PetscInt lds,PetscScalar *H,PetscInt ldh,PetscScalar *fH,BV dV,PetscScalar *dVS,PetscInt *rds,PetscScalar *dH,PetscInt lddh,KSP ksp,PEP_REFINE_EXPLICIT *matctx)
835: {
836:   PetscErrorCode      ierr;
837:   PetscInt            i,nmat=pep->nmat,lda=nmat*k;
838:   PetscScalar         *fh,*Rh,*DfH;
839:   PetscReal           norm;
840:   BV                  W;
841:   Vec                 Rv,t,dvi;
842:   PEP_REFINE_MATSHELL *ctx;
843:   Mat                 M,*At;
844:   PetscBool           flg,lindep;

847:   PetscMalloc2(nmat*k*k,&DfH,k,&Rh);
848:   *rds = 0;
849:   BVCreateVec(pep->V,&Rv);
850:   switch (pep->scheme) {
851:   case PEP_REFINE_SCHEME_EXPLICIT:
852:     BVCreateVec(pep->V,&t);
853:     BVDuplicateResize(pep->V,PetscMax(k,nmat),&W);
854:     PetscMalloc1(nmat,&fh);
855:     break;
856:   case PEP_REFINE_SCHEME_MBE:
857:     if (matctx->subc) {
858:       BVCreateVec(pep->V,&t);
859:       BVDuplicateResize(pep->V,PetscMax(k,nmat),&W);
860:     } else {
861:       W = matctx->W;
862:       PetscObjectReference((PetscObject)W);
863:       t = matctx->t;
864:       PetscObjectReference((PetscObject)t);
865:     }
866:     BVScale(matctx->W,0.0);
867:     BVScale(matctx->Wt,0.0);
868:     BVScale(matctx->M2,0.0);
869:     BVScale(matctx->M3,0.0);
870:     PetscMalloc1(nmat,&fh);
871:     break;
872:   case PEP_REFINE_SCHEME_SCHUR:
873:     KSPGetOperators(ksp,&M,NULL);
874:     MatShellGetContext(M,&ctx);
875:     BVCreateVec(pep->V,&t);
876:     BVDuplicateResize(pep->V,PetscMax(k,nmat),&W);
877:     fh = ctx->fih;
878:     break;
879:   }
880:   PetscArrayzero(dVS,2*k*k);
881:   PetscArrayzero(DfH,lda*k);
882:   STGetTransform(pep->st,&flg);
883:   if (flg) {
884:     PetscMalloc1(pep->nmat,&At);
885:     for (i=0;i<pep->nmat;i++) {
886:       STGetMatrixTransformed(pep->st,i,&At[i]);
887:     }
888:   } else At = pep->A;

890:   /* Main loop for computing the ith columns of dX and dS */
891:   for (i=0;i<k;i++) {
892:     /* Compute and update i-th column of the right hand side */
893:     PetscArrayzero(Rh,k);
894:     NRefRightSide(nmat,pep->pbc,At,k,pep->V,S,lds,i,H,ldh,fH,DfH,dH,dV,dVS,*rds,Rv,Rh,W,t);

896:     /* Update and solve system */
897:     BVGetColumn(dV,i,&dvi);
898:     NRefSysIter(i,pep,k,ksp,fH,S,lds,fh,H,ldh,Rv,Rh,pep->V,dvi,dH+i*k,matctx,W);
899:     /* Orthogonalize computed solution */
900:     BVOrthogonalizeVec(pep->V,dvi,dVS+i*2*k,&norm,&lindep);
901:     BVRestoreColumn(dV,i,&dvi);
902:     if (!lindep) {
903:       BVOrthogonalizeColumn(dV,i,dVS+k+i*2*k,&norm,&lindep);
904:       if (!lindep) {
905:         dVS[k+i+i*2*k] = norm;
906:         BVScaleColumn(dV,i,1.0/norm);
907:         (*rds)++;
908:       }
909:     }
910:   }
911:   BVSetActiveColumns(dV,0,*rds);
912:   VecDestroy(&t);
913:   VecDestroy(&Rv);
914:   BVDestroy(&W);
915:   if (flg) {
916:     PetscFree(At);
917:   }
918:   PetscFree2(DfH,Rh);
919:   if (pep->scheme!=PEP_REFINE_SCHEME_SCHUR) { PetscFree(fh); }
920:   return(0);
921: }

923: static PetscErrorCode NRefOrthogStep(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *S,PetscInt lds)
924: {
926:   PetscInt       j,nmat=pep->nmat,deg=nmat-1,lda=nmat*k,ldg;
927:   PetscScalar    *G,*tau,sone=1.0,zero=0.0,*work;
928:   PetscBLASInt   lds_,k_,ldh_,info,ldg_,lda_;

931:   PetscMalloc3(k,&tau,k,&work,deg*k*k,&G);
932:   PetscBLASIntCast(lds,&lds_);
933:   PetscBLASIntCast(lda,&lda_);
934:   PetscBLASIntCast(k,&k_);

936:   /* Form auxiliary matrix for the orthogonalization step */
937:   ldg = deg*k;
938:   PEPEvaluateBasisforMatrix(pep,nmat,k,H,ldh,fH);
939:   PetscBLASIntCast(ldg,&ldg_);
940:   PetscBLASIntCast(ldh,&ldh_);
941:   for (j=0;j<deg;j++) {
942:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,fH+j*k,&lda_,&zero,G+j*k,&ldg_));
943:   }
944:   /* Orthogonalize and update S */
945:   PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&ldg_,&k_,G,&ldg_,tau,work,&k_,&info));
946:   SlepcCheckLapackInfo("geqrf",info);
947:   PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&k_,&k_,&sone,G,&ldg_,S,&lds_));

949:   /* Update H */
950:   PetscStackCallBLAS("BLAStrmm",BLAStrmm_("L","U","N","N",&k_,&k_,&sone,G,&ldg_,H,&ldh_));
951:   PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&k_,&k_,&sone,G,&ldg_,H,&ldh_));
952:   PetscFree3(tau,work,G);
953:   return(0);
954: }

956: static PetscErrorCode PEPNRefUpdateInvPair(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *dH,PetscScalar *S,PetscInt lds,BV dV,PetscScalar *dVS,PetscInt rds)
957: {
959:   PetscInt       i,j,nmat=pep->nmat,lda=nmat*k;
960:   PetscScalar    *tau,*array,*work;
961:   PetscBLASInt   lds_,k_,lda_,ldh_,kdrs_,info,k2_;
962:   Mat            M0;

965:   PetscMalloc2(k,&tau,k,&work);
966:   PetscBLASIntCast(lds,&lds_);
967:   PetscBLASIntCast(lda,&lda_);
968:   PetscBLASIntCast(ldh,&ldh_);
969:   PetscBLASIntCast(k,&k_);
970:   PetscBLASIntCast(2*k,&k2_);
971:   PetscBLASIntCast((k+rds),&kdrs_);
972:   /* Update H */
973:   for (j=0;j<k;j++) {
974:     for (i=0;i<k;i++) H[i+j*ldh] -= dH[i+j*k];
975:   }
976:   /* Update V */
977:   for (j=0;j<k;j++) {
978:     for (i=0;i<k;i++) dVS[i+j*2*k] = -dVS[i+j*2*k]+S[i+j*lds];
979:     for (i=k;i<2*k;i++) dVS[i+j*2*k] = -dVS[i+j*2*k];
980:   }
981:   PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&kdrs_,&k_,dVS,&k2_,tau,work,&k_,&info));
982:   SlepcCheckLapackInfo("geqrf",info);
983:   /* Copy triangular matrix in S */
984:   for (j=0;j<k;j++) {
985:     for (i=0;i<=j;i++) S[i+j*lds] = dVS[i+j*2*k];
986:     for (i=j+1;i<k;i++) S[i+j*lds] = 0.0;
987:   }
988:   PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&k2_,&k_,&k_,dVS,&k2_,tau,work,&k_,&info));
989:   SlepcCheckLapackInfo("orgqr",info);
990:   MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&M0);
991:   MatDenseGetArray(M0,&array);
992:   for (j=0;j<k;j++) {
993:     PetscArraycpy(array+j*k,dVS+j*2*k,k);
994:   }
995:   MatDenseRestoreArray(M0,&array);
996:   BVMultInPlace(pep->V,M0,0,k);
997:   if (rds) {
998:     MatDenseGetArray(M0,&array);
999:     for (j=0;j<k;j++) {
1000:       PetscArraycpy(array+j*k,dVS+k+j*2*k,rds);
1001:     }
1002:     MatDenseRestoreArray(M0,&array);
1003:     BVMultInPlace(dV,M0,0,k);
1004:     BVMult(pep->V,1.0,1.0,dV,NULL);
1005:   }
1006:   MatDestroy(&M0);
1007:   NRefOrthogStep(pep,k,H,ldh,fH,S,lds);
1008:   PetscFree2(tau,work);
1009:   return(0);
1010: }

1012: static PetscErrorCode PEPNRefSetUp(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PEP_REFINE_EXPLICIT *matctx,PetscBool ini)
1013: {
1014:   PetscErrorCode      ierr;
1015:   PEP_REFINE_MATSHELL *ctx;
1016:   PetscScalar         t,*coef;
1017:   const PetscScalar   *array;
1018:   MatStructure        str;
1019:   PetscInt            j,nmat=pep->nmat,n0,m0,n1,m1,n0_,m0_,n1_,m1_,N0,N1,p,*idx1,*idx2,count,si,i,l0;
1020:   MPI_Comm            comm;
1021:   PetscMPIInt         np;
1022:   const PetscInt      *rgs0,*rgs1;
1023:   Mat                 B,C,*E,*A,*At;
1024:   IS                  is1,is2;
1025:   Vec                 v;
1026:   PetscBool           flg;
1027:   Mat                 M,P;

1030:   PetscMalloc1(nmat,&coef);
1031:   STGetTransform(pep->st,&flg);
1032:   if (flg) {
1033:     PetscMalloc1(pep->nmat,&At);
1034:     for (i=0;i<pep->nmat;i++) {
1035:       STGetMatrixTransformed(pep->st,i,&At[i]);
1036:     }
1037:   } else At = pep->A;
1038:   switch (pep->scheme) {
1039:   case PEP_REFINE_SCHEME_EXPLICIT:
1040:     if (ini) {
1041:       if (matctx->subc) {
1042:         A = matctx->A;
1043:         comm = PetscSubcommChild(matctx->subc);
1044:       } else {
1045:         A = At;
1046:         PetscObjectGetComm((PetscObject)pep,&comm);
1047:       }
1048:       E = matctx->E;
1049:       STGetMatStructure(pep->st,&str);
1050:       MatDuplicate(A[0],MAT_COPY_VALUES,&E[0]);
1051:       j = (matctx->subc)?matctx->subc->color:0;
1052:       PEPEvaluateBasis(pep,H[j+j*ldh],0,coef,NULL);
1053:       for (j=1;j<nmat;j++) {
1054:         MatAXPY(E[0],coef[j],A[j],str);
1055:       }
1056:       MatCreateDense(comm,PETSC_DECIDE,PETSC_DECIDE,k,k,NULL,&E[1]);
1057:       MatAssemblyBegin(E[1],MAT_FINAL_ASSEMBLY);
1058:       MatAssemblyEnd(E[1],MAT_FINAL_ASSEMBLY);
1059:       MatGetOwnershipRange(E[0],&n0,&m0);
1060:       MatGetOwnershipRange(E[1],&n1,&m1);
1061:       MatGetOwnershipRangeColumn(E[0],&n0_,&m0_);
1062:       MatGetOwnershipRangeColumn(E[1],&n1_,&m1_);
1063:       /* T12 and T21 are computed from V and V*, so,
1064:          they must have the same column and row ranges */
1065:       if (m0_-n0_ != m0-n0) SETERRQ(PETSC_COMM_SELF,1,"Inconsistent dimensions");
1066:       MatCreateDense(comm,m0-n0,m1_-n1_,PETSC_DECIDE,PETSC_DECIDE,NULL,&B);
1067:       MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1068:       MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1069:       MatCreateDense(comm,m1-n1,m0_-n0_,PETSC_DECIDE,PETSC_DECIDE,NULL,&C);
1070:       MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1071:       MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1072:       MatCreateTile(1.0,E[0],1.0,B,1.0,C,1.0,E[1],&M);
1073:       MatDestroy(&B);
1074:       MatDestroy(&C);
1075:       matctx->compM1 = PETSC_TRUE;
1076:       MatGetSize(E[0],NULL,&N0);
1077:       MatGetSize(E[1],NULL,&N1);
1078:       MPI_Comm_size(PetscObjectComm((PetscObject)M),&np);
1079:       MatGetOwnershipRanges(E[0],&rgs0);
1080:       MatGetOwnershipRanges(E[1],&rgs1);
1081:       PetscMalloc4(PetscMax(k,N1),&matctx->idxp,N0,&matctx->idxg,N0,&matctx->map0,N1,&matctx->map1);
1082:       /* Create column (and row) mapping */
1083:       for (p=0;p<np;p++) {
1084:         for (j=rgs0[p];j<rgs0[p+1];j++) matctx->map0[j] = j+rgs1[p];
1085:         for (j=rgs1[p];j<rgs1[p+1];j++) matctx->map1[j] = j+rgs0[p+1];
1086:       }
1087:       MatCreateVecs(M,NULL,&matctx->tN);
1088:       MatCreateVecs(matctx->E[1],NULL,&matctx->t1);
1089:       VecDuplicate(matctx->tN,&matctx->ttN);
1090:       if (matctx->subc) {
1091:         MPI_Comm_size(PetscObjectComm((PetscObject)pep),&np);
1092:         count = np*k;
1093:         PetscMalloc2(count,&idx1,count,&idx2);
1094:         VecCreateMPI(PetscObjectComm((PetscObject)pep),m1-n1,PETSC_DECIDE,&matctx->tp);
1095:         VecGetOwnershipRange(matctx->tp,&l0,NULL);
1096:         VecCreateMPI(PetscObjectComm((PetscObject)pep),k,PETSC_DECIDE,&matctx->tpg);
1097:         for (si=0;si<matctx->subc->n;si++) {
1098:           if (matctx->subc->color==si) {
1099:             j=0;
1100:             if (matctx->subc->color==si) {
1101:               for (p=0;p<np;p++) {
1102:                 for (i=n1;i<m1;i++) {
1103:                   idx1[j] = l0+i-n1;
1104:                   idx2[j++] =p*k+i;
1105:                 }
1106:               }
1107:             }
1108:             count = np*(m1-n1);
1109:           } else count =0;
1110:           ISCreateGeneral(PetscObjectComm((PetscObject)pep),count,idx1,PETSC_COPY_VALUES,&is1);
1111:           ISCreateGeneral(PetscObjectComm((PetscObject)pep),count,idx2,PETSC_COPY_VALUES,&is2);
1112:           VecScatterCreate(matctx->tp,is1,matctx->tpg,is2,&matctx->scatterp_id[si]);
1113:           ISDestroy(&is1);
1114:           ISDestroy(&is2);
1115:         }
1116:         PetscFree2(idx1,idx2);
1117:       } else {
1118:         VecScatterCreateToAll(matctx->t1,&matctx->scatterctx,&matctx->vseq);
1119:       }
1120:       P = M;
1121:     } else {
1122:       if (matctx->subc) {
1123:         /* Scatter vectors pep->V */
1124:         for (i=0;i<k;i++) {
1125:           BVGetColumn(pep->V,i,&v);
1126:           VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1127:           VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1128:           BVRestoreColumn(pep->V,i,&v);
1129:           VecGetArrayRead(matctx->tg,&array);
1130:           VecPlaceArray(matctx->t,(const PetscScalar*)array);
1131:           BVInsertVec(matctx->V,i,matctx->t);
1132:           VecResetArray(matctx->t);
1133:           VecRestoreArrayRead(matctx->tg,&array);
1134:         }
1135:       }
1136:     }
1137:     break;
1138:   case PEP_REFINE_SCHEME_MBE:
1139:     if (ini) {
1140:       if (matctx->subc) {
1141:         A = matctx->A;
1142:         comm = PetscSubcommChild(matctx->subc);
1143:       } else {
1144:         matctx->V = pep->V;
1145:         A = At;
1146:         PetscObjectGetComm((PetscObject)pep,&comm);
1147:         MatCreateVecs(pep->A[0],&matctx->t,NULL);
1148:       }
1149:       STGetMatStructure(pep->st,&str);
1150:       MatDuplicate(A[0],MAT_COPY_VALUES,&matctx->M1);
1151:       j = (matctx->subc)?matctx->subc->color:0;
1152:       PEPEvaluateBasis(pep,H[j+j*ldh],0,coef,NULL);
1153:       for (j=1;j<nmat;j++) {
1154:         MatAXPY(matctx->M1,coef[j],A[j],str);
1155:       }
1156:       BVDuplicateResize(matctx->V,PetscMax(k,pep->nmat),&matctx->W);
1157:       BVDuplicateResize(matctx->V,k,&matctx->M2);
1158:       BVDuplicate(matctx->M2,&matctx->M3);
1159:       BVDuplicate(matctx->M2,&matctx->Wt);
1160:       PetscMalloc5(k*k,&matctx->M4,k*k,&matctx->w,k*k,&matctx->wt,k,&matctx->d,k,&matctx->dt);
1161:       matctx->compM1 = PETSC_TRUE;
1162:       M = matctx->M1;
1163:       P = M;
1164:     }
1165:     break;
1166:   case PEP_REFINE_SCHEME_SCHUR:
1167:     if (ini) {
1168:       PetscObjectGetComm((PetscObject)pep,&comm);
1169:       MatGetSize(At[0],&m0,&n0);
1170:       PetscMalloc1(1,&ctx);
1171:       STGetMatStructure(pep->st,&str);
1172:       /* Create a shell matrix to solve the linear system */
1173:       ctx->V = pep->V;
1174:       ctx->k = k; ctx->nmat = nmat;
1175:       PetscMalloc5(nmat,&ctx->A,k*k,&ctx->M4,k,&ctx->pM4,2*k*k,&ctx->work,nmat,&ctx->fih);
1176:       for (i=0;i<nmat;i++) ctx->A[i] = At[i];
1177:       PetscArrayzero(ctx->M4,k*k);
1178:       MatCreateShell(comm,PETSC_DECIDE,PETSC_DECIDE,m0,n0,ctx,&M);
1179:       MatShellSetOperation(M,MATOP_MULT,(void(*)(void))MatMult_FS);
1180:       BVDuplicateResize(ctx->V,PetscMax(k,pep->nmat),&ctx->W);
1181:       BVDuplicateResize(ctx->V,k,&ctx->M2);
1182:       BVDuplicate(ctx->M2,&ctx->M3);
1183:       BVCreateVec(pep->V,&ctx->t);
1184:       MatDuplicate(At[0],MAT_COPY_VALUES,&ctx->M1);
1185:       PEPEvaluateBasis(pep,H[0],0,coef,NULL);
1186:       for (j=1;j<nmat;j++) {
1187:         MatAXPY(ctx->M1,coef[j],At[j],str);
1188:       }
1189:       MatDuplicate(At[0],MAT_COPY_VALUES,&P);
1190:       /* Compute a precond matrix for the system */
1191:       t = H[0];
1192:       PEPEvaluateBasis(pep,t,0,coef,NULL);
1193:       for (j=1;j<nmat;j++) {
1194:         MatAXPY(P,coef[j],At[j],str);
1195:       }
1196:       ctx->compM1 = PETSC_TRUE;
1197:     }
1198:     break;
1199:   }
1200:   if (ini) {
1201:     PEPRefineGetKSP(pep,&pep->refineksp);
1202:     KSPSetErrorIfNotConverged(pep->refineksp,PETSC_TRUE);
1203:     KSPSetOperators(pep->refineksp,M,P);
1204:     KSPSetFromOptions(pep->refineksp);
1205:   }

1207:   if (!ini && matctx && matctx->subc) {
1208:      /* Scatter vectors pep->V */
1209:     for (i=0;i<k;i++) {
1210:       BVGetColumn(pep->V,i,&v);
1211:       VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1212:       VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1213:       BVRestoreColumn(pep->V,i,&v);
1214:       VecGetArrayRead(matctx->tg,&array);
1215:       VecPlaceArray(matctx->t,(const PetscScalar*)array);
1216:       BVInsertVec(matctx->V,i,matctx->t);
1217:       VecResetArray(matctx->t);
1218:       VecRestoreArrayRead(matctx->tg,&array);
1219:     }
1220:    }
1221:   PetscFree(coef);
1222:   if (flg) {
1223:     PetscFree(At);
1224:   }
1225:   return(0);
1226: }

1228: static PetscErrorCode NRefSubcommSetup(PEP pep,PetscInt k,PEP_REFINE_EXPLICIT *matctx,PetscInt nsubc)
1229: {
1230:   PetscErrorCode    ierr;
1231:   PetscInt          i,si,j,m0,n0,nloc0,nloc_sub,*idx1,*idx2;
1232:   IS                is1,is2;
1233:   BVType            type;
1234:   Vec               v;
1235:   const PetscScalar *array;
1236:   Mat               *A;
1237:   PetscBool         flg;

1240:   STGetTransform(pep->st,&flg);
1241:   if (flg) {
1242:     PetscMalloc1(pep->nmat,&A);
1243:     for (i=0;i<pep->nmat;i++) {
1244:       STGetMatrixTransformed(pep->st,i,&A[i]);
1245:     }
1246:   } else A = pep->A;

1248:   /* Duplicate pep matrices */
1249:   PetscMalloc3(pep->nmat,&matctx->A,nsubc,&matctx->scatter_id,nsubc,&matctx->scatterp_id);
1250:   for (i=0;i<pep->nmat;i++) {
1251:     MatCreateRedundantMatrix(A[i],0,PetscSubcommChild(matctx->subc),MAT_INITIAL_MATRIX,&matctx->A[i]);
1252:   }

1254:   /* Create Scatter */
1255:   MatCreateVecs(matctx->A[0],&matctx->t,NULL);
1256:   MatGetLocalSize(matctx->A[0],&nloc_sub,NULL);
1257:   VecCreateMPI(PetscSubcommContiguousParent(matctx->subc),nloc_sub,PETSC_DECIDE,&matctx->tg);
1258:   BVGetColumn(pep->V,0,&v);
1259:   VecGetOwnershipRange(v,&n0,&m0);
1260:   nloc0 = m0-n0;
1261:   PetscMalloc2(matctx->subc->n*nloc0,&idx1,matctx->subc->n*nloc0,&idx2);
1262:   j = 0;
1263:   for (si=0;si<matctx->subc->n;si++) {
1264:     for (i=n0;i<m0;i++) {
1265:       idx1[j]   = i;
1266:       idx2[j++] = i+pep->n*si;
1267:     }
1268:   }
1269:   ISCreateGeneral(PetscObjectComm((PetscObject)pep),matctx->subc->n*nloc0,idx1,PETSC_COPY_VALUES,&is1);
1270:   ISCreateGeneral(PetscObjectComm((PetscObject)pep),matctx->subc->n*nloc0,idx2,PETSC_COPY_VALUES,&is2);
1271:   VecScatterCreate(v,is1,matctx->tg,is2,&matctx->scatter_sub);
1272:   ISDestroy(&is1);
1273:   ISDestroy(&is2);
1274:   for (si=0;si<matctx->subc->n;si++) {
1275:     j=0;
1276:     for (i=n0;i<m0;i++) {
1277:       idx1[j] = i;
1278:       idx2[j++] = i+pep->n*si;
1279:     }
1280:     ISCreateGeneral(PetscObjectComm((PetscObject)pep),nloc0,idx1,PETSC_COPY_VALUES,&is1);
1281:     ISCreateGeneral(PetscObjectComm((PetscObject)pep),nloc0,idx2,PETSC_COPY_VALUES,&is2);
1282:     VecScatterCreate(v,is1,matctx->tg,is2,&matctx->scatter_id[si]);
1283:     ISDestroy(&is1);
1284:     ISDestroy(&is2);
1285:   }
1286:   BVRestoreColumn(pep->V,0,&v);
1287:   PetscFree2(idx1,idx2);

1289:   /* Duplicate pep->V vecs */
1290:   BVGetType(pep->V,&type);
1291:   BVCreate(PetscSubcommChild(matctx->subc),&matctx->V);
1292:   BVSetType(matctx->V,type);
1293:   BVSetSizesFromVec(matctx->V,matctx->t,k);
1294:   if (pep->scheme==PEP_REFINE_SCHEME_EXPLICIT) {
1295:     BVDuplicateResize(matctx->V,PetscMax(k,pep->nmat),&matctx->W);
1296:   }
1297:   for (i=0;i<k;i++) {
1298:     BVGetColumn(pep->V,i,&v);
1299:     VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1300:     VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1301:     BVRestoreColumn(pep->V,i,&v);
1302:     VecGetArrayRead(matctx->tg,&array);
1303:     VecPlaceArray(matctx->t,(const PetscScalar*)array);
1304:     BVInsertVec(matctx->V,i,matctx->t);
1305:     VecResetArray(matctx->t);
1306:     VecRestoreArrayRead(matctx->tg,&array);
1307:   }

1309:   VecDuplicate(matctx->t,&matctx->Rv);
1310:   VecDuplicate(matctx->t,&matctx->Vi);
1311:   if (flg) {
1312:     PetscFree(A);
1313:   }
1314:   return(0);
1315: }

1317: static PetscErrorCode NRefSubcommDestroy(PEP pep,PEP_REFINE_EXPLICIT *matctx)
1318: {
1320:   PetscInt       i;

1323:   VecScatterDestroy(&matctx->scatter_sub);
1324:   for (i=0;i<matctx->subc->n;i++) {
1325:     VecScatterDestroy(&matctx->scatter_id[i]);
1326:   }
1327:   for (i=0;i<pep->nmat;i++) {
1328:     MatDestroy(&matctx->A[i]);
1329:   }
1330:   if (pep->scheme==PEP_REFINE_SCHEME_EXPLICIT) {
1331:     for (i=0;i<matctx->subc->n;i++) {
1332:       VecScatterDestroy(&matctx->scatterp_id[i]);
1333:     }
1334:     VecDestroy(&matctx->tp);
1335:     VecDestroy(&matctx->tpg);
1336:     BVDestroy(&matctx->W);
1337:   }
1338:   PetscFree3(matctx->A,matctx->scatter_id,matctx->scatterp_id);
1339:   BVDestroy(&matctx->V);
1340:   VecDestroy(&matctx->t);
1341:   VecDestroy(&matctx->tg);
1342:   VecDestroy(&matctx->Rv);
1343:   VecDestroy(&matctx->Vi);
1344:   return(0);
1345: }

1347: PetscErrorCode PEPNewtonRefinement_TOAR(PEP pep,PetscScalar sigma,PetscInt *maxits,PetscReal *tol,PetscInt k,PetscScalar *S,PetscInt lds)
1348: {
1349:   PetscErrorCode      ierr;
1350:   PetscScalar         *H,*work,*dH,*fH,*dVS;
1351:   PetscInt            ldh,i,j,its=1,nmat=pep->nmat,nsubc=pep->npart,rds;
1352:   PetscBLASInt        k_,ld_,*p,info;
1353:   BV                  dV;
1354:   PetscBool           sinvert,flg;
1355:   PEP_REFINE_EXPLICIT *matctx=NULL;
1356:   Vec                 v;
1357:   Mat                 M,P;
1358:   PEP_REFINE_MATSHELL *ctx;

1361:   PetscLogEventBegin(PEP_Refine,pep,0,0,0);
1362:   if (k > pep->n) SETERRQ1(PetscObjectComm((PetscObject)pep),1,"Multiple Refinement available only for invariant pairs of dimension smaller than n=%D",pep->n);
1363:   /* the input tolerance is not being taken into account (by the moment) */
1364:   its = *maxits;
1365:   PetscMalloc3(k*k,&dH,nmat*k*k,&fH,k,&work);
1366:   DSGetLeadingDimension(pep->ds,&ldh);
1367:   DSGetArray(pep->ds,DS_MAT_A,&H);
1368:   DSRestoreArray(pep->ds,DS_MAT_A,&H);
1369:   PetscMalloc1(2*k*k,&dVS);
1370:   STGetTransform(pep->st,&flg);
1371:   if (!flg && pep->st && pep->ops->backtransform) { /* BackTransform */
1372:     PetscBLASIntCast(k,&k_);
1373:     PetscBLASIntCast(ldh,&ld_);
1374:     PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinvert);
1375:     if (sinvert) {
1376:       DSGetArray(pep->ds,DS_MAT_A,&H);
1377:       PetscMalloc1(k,&p);
1378:       PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&k_,&k_,H,&ld_,p,&info));
1379:       SlepcCheckLapackInfo("getrf",info);
1380:       PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&k_,H,&ld_,p,work,&k_,&info));
1381:       SlepcCheckLapackInfo("getri",info);
1382:       DSRestoreArray(pep->ds,DS_MAT_A,&H);
1383:       pep->ops->backtransform = NULL;
1384:     }
1385:     if (sigma!=0.0) {
1386:       DSGetArray(pep->ds,DS_MAT_A,&H);
1387:       for (i=0;i<k;i++) H[i+ldh*i] += sigma;
1388:       DSRestoreArray(pep->ds,DS_MAT_A,&H);
1389:       pep->ops->backtransform = NULL;
1390:     }
1391:   }
1392:   if ((pep->scale==PEP_SCALE_BOTH || pep->scale==PEP_SCALE_SCALAR) && pep->sfactor!=1.0) {
1393:     DSGetArray(pep->ds,DS_MAT_A,&H);
1394:     for (j=0;j<k;j++) {
1395:       for (i=0;i<k;i++) H[i+j*ldh] *= pep->sfactor;
1396:     }
1397:     DSRestoreArray(pep->ds,DS_MAT_A,&H);
1398:     if (!flg) {
1399:       /* Restore original values */
1400:       for (i=0;i<pep->nmat;i++){
1401:         pep->pbc[pep->nmat+i] *= pep->sfactor;
1402:         pep->pbc[2*pep->nmat+i] *= pep->sfactor*pep->sfactor;
1403:       }
1404:     }
1405:   }
1406:   if ((pep->scale==PEP_SCALE_DIAGONAL || pep->scale==PEP_SCALE_BOTH) && pep->Dr) {
1407:     for (i=0;i<k;i++) {
1408:       BVGetColumn(pep->V,i,&v);
1409:       VecPointwiseMult(v,v,pep->Dr);
1410:       BVRestoreColumn(pep->V,i,&v);
1411:     }
1412:   }
1413:   DSGetArray(pep->ds,DS_MAT_A,&H);

1415:   NRefOrthogStep(pep,k,H,ldh,fH,S,lds);
1416:   /* check if H is in Schur form */
1417:   for (i=0;i<k-1;i++) {
1418:     if (H[i+1+i*ldh]!=0.0) {
1419: #if !defined(PETSC_USE_COMPLEX)
1420:       SETERRQ(PetscObjectComm((PetscObject)pep),1,"Iterative Refinement requires the complex Schur form of the projected matrix");
1421: #else
1422:       SETERRQ(PetscObjectComm((PetscObject)pep),1,"Iterative Refinement requires an upper triangular projected matrix");
1423: #endif
1424:     }
1425:   }
1426:   if (nsubc>k) SETERRQ(PetscObjectComm((PetscObject)pep),1,"Amount of subcommunicators should not be larger than the invariant pair dimension");
1427:   BVSetActiveColumns(pep->V,0,k);
1428:   BVDuplicateResize(pep->V,k,&dV);
1429:   PetscLogObjectParent((PetscObject)pep,(PetscObject)dV);
1430:   if (pep->scheme!=PEP_REFINE_SCHEME_SCHUR) {
1431:     PetscMalloc1(1,&matctx);
1432:     if (nsubc>1) { /* spliting in subcommunicators */
1433:       matctx->subc = pep->refinesubc;
1434:       NRefSubcommSetup(pep,k,matctx,nsubc);
1435:     } else matctx->subc=NULL;
1436:   }

1438:   /* Loop performing iterative refinements */
1439:   for (i=0;i<its;i++) {
1440:     /* Pre-compute the polynomial basis evaluated in H */
1441:     PEPEvaluateBasisforMatrix(pep,nmat,k,H,ldh,fH);
1442:     PEPNRefSetUp(pep,k,H,ldh,matctx,PetscNot(i));
1443:     /* Solve the linear system */
1444:     PEPNRefForwardSubstitution(pep,k,S,lds,H,ldh,fH,dV,dVS,&rds,dH,k,pep->refineksp,matctx);
1445:     /* Update X (=V*S) and H, and orthogonalize [X;X*fH1;...;XfH(deg-1)] */
1446:     PEPNRefUpdateInvPair(pep,k,H,ldh,fH,dH,S,lds,dV,dVS,rds);
1447:   }
1448:   DSRestoreArray(pep->ds,DS_MAT_A,&H);
1449:   if (!flg && sinvert) {
1450:     PetscFree(p);
1451:   }
1452:   PetscFree3(dH,fH,work);
1453:   PetscFree(dVS);
1454:   BVDestroy(&dV);
1455:   switch (pep->scheme) {
1456:   case PEP_REFINE_SCHEME_EXPLICIT:
1457:     for (i=0;i<2;i++) {
1458:       MatDestroy(&matctx->E[i]);
1459:     }
1460:     PetscFree4(matctx->idxp,matctx->idxg,matctx->map0,matctx->map1);
1461:     VecDestroy(&matctx->tN);
1462:     VecDestroy(&matctx->ttN);
1463:     VecDestroy(&matctx->t1);
1464:     if (nsubc>1) {
1465:       NRefSubcommDestroy(pep,matctx);
1466:     } else {
1467:       VecDestroy(&matctx->vseq);
1468:       VecScatterDestroy(&matctx->scatterctx);
1469:     }
1470:     PetscFree(matctx);
1471:     KSPGetOperators(pep->refineksp,&M,NULL);
1472:     MatDestroy(&M);
1473:     break;
1474:   case PEP_REFINE_SCHEME_MBE:
1475:     BVDestroy(&matctx->W);
1476:     BVDestroy(&matctx->Wt);
1477:     BVDestroy(&matctx->M2);
1478:     BVDestroy(&matctx->M3);
1479:     MatDestroy(&matctx->M1);
1480:     VecDestroy(&matctx->t);
1481:     PetscFree5(matctx->M4,matctx->w,matctx->wt,matctx->d,matctx->dt);
1482:     if (nsubc>1) {
1483:       NRefSubcommDestroy(pep,matctx);
1484:     }
1485:     PetscFree(matctx);
1486:     break;
1487:   case PEP_REFINE_SCHEME_SCHUR:
1488:     KSPGetOperators(pep->refineksp,&M,&P);
1489:     MatShellGetContext(M,&ctx);
1490:     PetscFree5(ctx->A,ctx->M4,ctx->pM4,ctx->work,ctx->fih);
1491:     MatDestroy(&ctx->M1);
1492:     BVDestroy(&ctx->M2);
1493:     BVDestroy(&ctx->M3);
1494:     BVDestroy(&ctx->W);
1495:     VecDestroy(&ctx->t);
1496:     PetscFree(ctx);
1497:     MatDestroy(&M);
1498:     MatDestroy(&P);
1499:     break;
1500:   }
1501:   PetscLogEventEnd(PEP_Refine,pep,0,0,0);
1502:   return(0);
1503: }