Actual source code: nepdefl.c
slepc-3.13.3 2020-06-14
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2020, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
11: #include <slepc/private/nepimpl.h>
12: #include <slepcblaslapack.h>
13: #include "nepdefl.h"
15: PetscErrorCode NEPDeflationGetInvariantPair(NEP_EXT_OP extop,BV *X,Mat *H)
16: {
20: if (X) *X = extop->X;
21: if (H) {
22: MatCreateSeqDense(PETSC_COMM_SELF,extop->szd+1,extop->szd+1,extop->H,H);
23: }
24: return(0);
25: }
27: PetscErrorCode NEPDeflationExtendInvariantPair(NEP_EXT_OP extop,Vec u,PetscScalar lambda,PetscInt k)
28: {
30: Vec uu;
31: PetscInt ld,i;
32: PetscMPIInt np;
33: PetscReal norm;
36: BVGetColumn(extop->X,k,&uu);
37: ld = extop->szd+1;
38: NEPDeflationCopyToExtendedVec(extop,uu,extop->H+k*ld,u,PETSC_TRUE);
39: BVRestoreColumn(extop->X,k,&uu);
40: BVNormColumn(extop->X,k,NORM_2,&norm);
41: BVScaleColumn(extop->X,k,1.0/norm);
42: MPI_Comm_size(PetscObjectComm((PetscObject)u),&np);
43: for (i=0;i<k;i++) extop->H[k*ld+i] *= PetscSqrtReal(np)/norm;
44: extop->H[k*(ld+1)] = lambda;
45: return(0);
46: }
48: PetscErrorCode NEPDeflationExtractEigenpair(NEP_EXT_OP extop,PetscInt k,Vec u,PetscScalar lambda,DS ds)
49: {
51: PetscScalar *Ap;
52: PetscInt ldh=extop->szd+1,ldds,i,j,k1=k+1;
53: PetscScalar *eigr,*eigi,*t,*Z;
54: Vec x;
57: NEPDeflationExtendInvariantPair(extop,u,lambda,k);
58: PetscCalloc3(k1,&eigr,k1,&eigi,extop->szd,&t);
59: DSReset(ds);
60: DSSetType(ds,DSNHEP);
61: DSAllocate(ds,ldh);
62: DSGetLeadingDimension(ds,&ldds);
63: DSGetArray(ds,DS_MAT_A,&Ap);
64: for (j=0;j<k1;j++)
65: for (i=0;i<k1;i++) Ap[j*ldds+i] = extop->H[j*ldh+i];
66: DSRestoreArray(ds,DS_MAT_A,&Ap);
67: DSSetDimensions(ds,k1,0,0,k1);
68: DSSolve(ds,eigr,eigi);
69: DSVectors(ds,DS_MAT_X,&k,NULL);
70: DSGetArray(ds,DS_MAT_X,&Z);
71: BVMultColumn(extop->X,1.0,Z[k*ldds+k],k,Z+k*ldds);
72: DSRestoreArray(ds,DS_MAT_X,&Z);
73: BVGetColumn(extop->X,k,&x);
74: NEPDeflationCopyToExtendedVec(extop,x,t,u,PETSC_FALSE);
75: BVRestoreColumn(extop->X,k,&x);
76: PetscFree3(eigr,eigi,t);
77: return(0);
78: }
80: PetscErrorCode NEPDeflationCopyToExtendedVec(NEP_EXT_OP extop,Vec v,PetscScalar *a,Vec vex,PetscBool back)
81: {
83: PetscMPIInt np,rk,count;
84: PetscScalar *array1,*array2;
85: PetscInt nloc;
88: if (extop->szd) {
89: MPI_Comm_rank(PetscObjectComm((PetscObject)vex),&rk);
90: MPI_Comm_size(PetscObjectComm((PetscObject)vex),&np);
91: BVGetSizes(extop->nep->V,&nloc,NULL,NULL);
92: if (v) {
93: VecGetArray(v,&array1);
94: VecGetArray(vex,&array2);
95: if (back) {
96: PetscArraycpy(array1,array2,nloc);
97: } else {
98: PetscArraycpy(array2,array1,nloc);
99: }
100: VecRestoreArray(v,&array1);
101: VecRestoreArray(vex,&array2);
102: }
103: if (a) {
104: VecGetArray(vex,&array2);
105: if (back) {
106: PetscArraycpy(a,array2+nloc,extop->szd);
107: PetscMPIIntCast(extop->szd,&count);
108: MPI_Bcast(a,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)vex));
109: } else {
110: PetscArraycpy(array2+nloc,a,extop->szd);
111: PetscMPIIntCast(extop->szd,&count);
112: MPI_Bcast(array2+nloc,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)vex));
113: }
114: VecRestoreArray(vex,&array2);
115: }
116: } else {
117: if (back) {VecCopy(vex,v);}
118: else {VecCopy(v,vex);}
119: }
120: return(0);
121: }
123: PetscErrorCode NEPDeflationCreateVec(NEP_EXT_OP extop,Vec *v)
124: {
126: PetscInt nloc;
127: Vec u;
128: VecType type;
131: if (extop->szd) {
132: BVGetColumn(extop->nep->V,0,&u);
133: VecGetType(u,&type);
134: BVRestoreColumn(extop->nep->V,0,&u);
135: VecCreate(PetscObjectComm((PetscObject)extop->nep),v);
136: VecSetType(*v,type);
137: BVGetSizes(extop->nep->V,&nloc,NULL,NULL);
138: nloc += extop->szd;
139: VecSetSizes(*v,nloc,PETSC_DECIDE);
140: } else {
141: BVCreateVec(extop->nep->V,v);
142: }
143: return(0);
144: }
146: PetscErrorCode NEPDeflationCreateBV(NEP_EXT_OP extop,PetscInt sz,BV *V)
147: {
148: PetscErrorCode ierr;
149: PetscInt nloc;
150: BVType type;
151: BVOrthogType otype;
152: BVOrthogRefineType oref;
153: PetscReal oeta;
154: BVOrthogBlockType oblock;
155: NEP nep=extop->nep;
158: if (extop->szd) {
159: BVGetSizes(nep->V,&nloc,NULL,NULL);
160: BVCreate(PetscObjectComm((PetscObject)nep),V);
161: BVSetSizes(*V,nloc+extop->szd,PETSC_DECIDE,sz);
162: BVGetType(nep->V,&type);
163: BVSetType(*V,type);
164: BVGetOrthogonalization(nep->V,&otype,&oref,&oeta,&oblock);
165: BVSetOrthogonalization(*V,otype,oref,oeta,oblock);
166: PetscObjectStateIncrease((PetscObject)*V);
167: PetscLogObjectParent((PetscObject)nep,(PetscObject)*V);
168: } else {
169: BVDuplicateResize(nep->V,sz,V);
170: }
171: return(0);
172: }
174: PetscErrorCode NEPDeflationSetRandomVec(NEP_EXT_OP extop,Vec v)
175: {
177: PetscInt n,next,i;
178: PetscRandom rand;
179: PetscScalar *array;
180: PetscMPIInt nn,np;
183: BVGetRandomContext(extop->nep->V,&rand);
184: VecSetRandom(v,rand);
185: if (extop->szd) {
186: MPI_Comm_size(PetscObjectComm((PetscObject)v),&np);
187: BVGetSizes(extop->nep->V,&n,NULL,NULL);
188: VecGetLocalSize(v,&next);
189: VecGetArray(v,&array);
190: for (i=n+extop->n;i<next;i++) array[i] = 0.0;
191: for (i=n;i<n+extop->n;i++) array[i] /= PetscSqrtReal(np);
192: PetscMPIIntCast(extop->n,&nn);
193: MPI_Bcast(array+n,nn,MPIU_SCALAR,0,PetscObjectComm((PetscObject)v));
194: VecRestoreArray(v,&array);
195: }
196: return(0);
197: }
199: static PetscErrorCode NEPDeflationEvaluateBasisMat(NEP_EXT_OP extop,PetscInt idx,PetscBool hat,PetscScalar *bval,PetscScalar *Hj,PetscScalar *Hjprev)
200: {
202: PetscInt i,k,n=extop->n,ldhj=extop->szd,ldh=extop->szd+1;
203: PetscScalar sone=1.0,zero=0.0;
204: PetscBLASInt ldh_,ldhj_,n_;
207: i = (idx<0)?extop->szd*extop->szd*(-idx):extop->szd*extop->szd;
208: PetscArrayzero(Hj,i);
209: PetscBLASIntCast(ldhj+1,&ldh_);
210: PetscBLASIntCast(ldhj,&ldhj_);
211: PetscBLASIntCast(n,&n_);
212: if (idx<1) {
213: if (!hat) for (i=0;i<extop->n;i++) Hj[i+i*ldhj] = 1.0;
214: else for (i=0;i<extop->n;i++) Hj[i+i*ldhj] = 0.0;
215: } else {
216: for (i=0;i<n;i++) extop->H[i*ldh+i] -= extop->bc[idx-1];
217: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->H,&ldh_,Hjprev,&ldhj_,&zero,Hj,&ldhj_));
218: for (i=0;i<n;i++) extop->H[i*ldh+i] += extop->bc[idx-1];
219: if (hat) for (i=0;i<n;i++) Hj[i*(ldhj+1)] += bval[idx-1];
220: }
221: if (idx<0) {
222: idx = -idx;
223: for (k=1;k<idx;k++) {
224: for (i=0;i<n;i++) extop->H[i*ldh+i] -= extop->bc[k-1];
225: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->H,&ldh_,Hj+(k-1)*ldhj*ldhj,&ldhj_,&zero,Hj+k*ldhj*ldhj,&ldhj_));
226: for (i=0;i<n;i++) extop->H[i*ldh+i] += extop->bc[k-1];
227: if (hat) for (i=0;i<n;i++) Hj[i*(ldhj+1)] += bval[k-1];
228: }
229: }
230: return(0);
231: }
233: PetscErrorCode NEPDeflationLocking(NEP_EXT_OP extop,Vec u,PetscScalar lambda)
234: {
236: PetscInt i;
239: NEPDeflationExtendInvariantPair(extop,u,lambda,extop->n);
240: extop->n++;
241: BVSetActiveColumns(extop->X,0,extop->n);
242: if (extop->n <= extop->szd) {
243: /* update XpX */
244: BVDotColumn(extop->X,extop->n-1,extop->XpX+(extop->n-1)*extop->szd);
245: extop->XpX[(extop->n-1)*(1+extop->szd)] = 1.0;
246: for (i=0;i<extop->n-1;i++) extop->XpX[i*extop->szd+extop->n-1] = PetscConj(extop->XpX[(extop->n-1)*extop->szd+i]);
247: /* determine minimality index */
248: extop->midx = PetscMin(extop->max_midx,extop->n);
249: /* polynominal basis coeficients */
250: for (i=0;i<extop->midx;i++) extop->bc[i] = extop->nep->target;
251: /* evaluate the polynomial basis in H */
252: NEPDeflationEvaluateBasisMat(extop,-extop->midx,PETSC_FALSE,NULL,extop->Hj,NULL);
253: }
254: return(0);
255: }
257: static PetscErrorCode NEPDeflationEvaluateHatFunction(NEP_EXT_OP extop, PetscInt idx,PetscScalar lambda,PetscScalar *y,PetscScalar *hfj,PetscScalar *hfjp,PetscInt ld)
258: {
260: PetscInt i,j,k,off,ini,fin,sz,ldh,n=extop->n;
261: Mat A,B;
262: PetscScalar *array;
265: if (idx<0) {ini = 0; fin = extop->nep->nt;}
266: else {ini = idx; fin = idx+1;}
267: if (y) sz = hfjp?n+2:n+1;
268: else sz = hfjp?3*n:2*n;
269: ldh = extop->szd+1;
270: MatCreateSeqDense(PETSC_COMM_SELF,sz,sz,NULL,&A);
271: MatCreateSeqDense(PETSC_COMM_SELF,sz,sz,NULL,&B);
272: MatDenseGetArray(A,&array);
273: for (j=0;j<n;j++)
274: for (i=0;i<n;i++) array[j*sz+i] = extop->H[j*ldh+i];
275: MatDenseRestoreArray(A,&array);
276: if (y) {
277: MatDenseGetArray(A,&array);
278: array[extop->n*(sz+1)] = lambda;
279: if (hfjp) { array[(n+1)*sz+n] = 1.0; array[(n+1)*sz+n+1] = lambda;}
280: for (i=0;i<n;i++) array[n*sz+i] = y[i];
281: MatDenseRestoreArray(A,&array);
282: for (j=ini;j<fin;j++) {
283: FNEvaluateFunctionMat(extop->nep->f[j],A,B);
284: MatDenseGetArray(B,&array);
285: for (i=0;i<n;i++) hfj[j*ld+i] = array[n*sz+i];
286: if (hfjp) for (i=0;i<n;i++) hfjp[j*ld+i] = array[(n+1)*sz+i];
287: MatDenseRestoreArray(B,&array);
288: }
289: } else {
290: off = idx<0?ld*n:0;
291: MatDenseGetArray(A,&array);
292: for (k=0;k<n;k++) {
293: array[(n+k)*sz+k] = 1.0;
294: array[(n+k)*sz+n+k] = lambda;
295: }
296: if (hfjp) for (k=0;k<n;k++) {
297: array[(2*n+k)*sz+n+k] = 1.0;
298: array[(2*n+k)*sz+2*n+k] = lambda;
299: }
300: MatDenseRestoreArray(A,&array);
301: for (j=ini;j<fin;j++) {
302: FNEvaluateFunctionMat(extop->nep->f[j],A,B);
303: MatDenseGetArray(B,&array);
304: for (i=0;i<n;i++) for (k=0;k<n;k++) hfj[j*off+i*ld+k] = array[n*sz+i*sz+k];
305: if (hfjp) for (k=0;k<n;k++) for (i=0;i<n;i++) hfjp[j*off+i*ld+k] = array[2*n*sz+i*sz+k];
306: MatDenseRestoreArray(B,&array);
307: }
308: }
309: MatDestroy(&A);
310: MatDestroy(&B);
311: return(0);
312: }
314: static PetscErrorCode MatMult_NEPDeflation(Mat M,Vec x,Vec y)
315: {
316: NEP_DEF_MATSHELL *matctx;
317: PetscErrorCode ierr;
318: NEP_EXT_OP extop;
319: Vec x1,y1;
320: PetscScalar *yy,sone=1.0,zero=0.0;
321: const PetscScalar *xx;
322: PetscInt nloc,i;
323: PetscMPIInt np;
324: PetscBLASInt n_,one=1,szd_;
327: MPI_Comm_size(PetscObjectComm((PetscObject)M),&np);
328: MatShellGetContext(M,(void**)&matctx);
329: extop = matctx->extop;
330: if (extop->ref) {
331: VecZeroEntries(y);
332: }
333: if (extop->szd) {
334: x1 = matctx->w[0]; y1 = matctx->w[1];
335: VecGetArrayRead(x,&xx);
336: VecPlaceArray(x1,xx);
337: VecGetArray(y,&yy);
338: VecPlaceArray(y1,yy);
339: MatMult(matctx->T,x1,y1);
340: if (!extop->ref && extop->n) {
341: VecGetLocalSize(x1,&nloc);
342: /* copy for avoiding warning of constant array xx */
343: for (i=0;i<extop->n;i++) matctx->work[i] = xx[nloc+i]*PetscSqrtReal(np);
344: BVMultVec(matctx->U,1.0,1.0,y1,matctx->work);
345: BVDotVec(extop->X,x1,matctx->work);
346: PetscBLASIntCast(extop->n,&n_);
347: PetscBLASIntCast(extop->szd,&szd_);
348: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,matctx->A,&szd_,matctx->work,&one,&zero,yy+nloc,&one));
349: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,matctx->B,&szd_,xx+nloc,&one,&sone,yy+nloc,&one));
350: for (i=0;i<extop->n;i++) yy[nloc+i] /= PetscSqrtReal(np);
351: }
352: VecResetArray(x1);
353: VecRestoreArrayRead(x,&xx);
354: VecResetArray(y1);
355: VecRestoreArray(y,&yy);
356: } else {
357: MatMult(matctx->T,x,y);
358: }
359: return(0);
360: }
362: static PetscErrorCode MatCreateVecs_NEPDeflation(Mat M,Vec *right,Vec *left)
363: {
364: PetscErrorCode ierr;
365: NEP_DEF_MATSHELL *matctx;
368: MatShellGetContext(M,(void**)&matctx);
369: if (right) {
370: VecDuplicate(matctx->w[0],right);
371: }
372: if (left) {
373: VecDuplicate(matctx->w[0],left);
374: }
375: return(0);
376: }
378: static PetscErrorCode MatDestroy_NEPDeflation(Mat M)
379: {
380: PetscErrorCode ierr;
381: NEP_DEF_MATSHELL *matctx;
384: MatShellGetContext(M,(void**)&matctx);
385: if (matctx->extop->szd) {
386: BVDestroy(&matctx->U);
387: PetscFree2(matctx->hfj,matctx->work);
388: PetscFree2(matctx->A,matctx->B);
389: VecDestroy(&matctx->w[0]);
390: VecDestroy(&matctx->w[1]);
391: }
392: MatDestroy(&matctx->T);
393: PetscFree(matctx);
394: return(0);
395: }
397: static PetscErrorCode NEPDeflationEvaluateBasis(NEP_EXT_OP extop,PetscScalar lambda,PetscInt n,PetscScalar *val,PetscBool jacobian)
398: {
399: PetscScalar p;
400: PetscInt i;
403: if (!jacobian) {
404: val[0] = 1.0;
405: for (i=1;i<extop->n;i++) val[i] = val[i-1]*(lambda-extop->bc[i-1]);
406: } else {
407: val[0] = 0.0;
408: p = 1.0;
409: for (i=1;i<extop->n;i++) {
410: val[i] = val[i-1]*(lambda-extop->bc[i-1])+p;
411: p *= (lambda-extop->bc[i-1]);
412: }
413: }
414: return(0);
415: }
417: static PetscErrorCode NEPDeflationComputeShellMat(NEP_EXT_OP extop,PetscScalar lambda,PetscBool jacobian,Mat *M)
418: {
419: PetscErrorCode ierr;
420: NEP_DEF_MATSHELL *matctx,*matctxC;
421: PetscInt nloc,mloc,n=extop->n,j,i,szd=extop->szd,ldh=szd+1,k;
422: Mat F;
423: Mat Mshell,Mcomp;
424: PetscBool ini=PETSC_FALSE;
425: PetscScalar *hf,*hfj,*hfjp,sone=1.0,*hH,*hHprev,*pts,*B,*A,*Hj=extop->Hj,*basisv,zero=0.0;
426: PetscBLASInt n_,info,szd_;
429: if (!M) {
430: Mshell = jacobian?extop->MJ:extop->MF;
431: } else Mshell = *M;
432: Mcomp = jacobian?extop->MF:extop->MJ;
433: if (!Mshell) {
434: ini = PETSC_TRUE;
435: PetscNew(&matctx);
436: MatGetLocalSize(extop->nep->function,&mloc,&nloc);
437: nloc += szd; mloc += szd;
438: MatCreateShell(PetscObjectComm((PetscObject)extop->nep),nloc,mloc,PETSC_DETERMINE,PETSC_DETERMINE,matctx,&Mshell);
439: MatShellSetOperation(Mshell,MATOP_MULT,(void(*)(void))MatMult_NEPDeflation);
440: MatShellSetOperation(Mshell,MATOP_CREATE_VECS,(void(*)(void))MatCreateVecs_NEPDeflation);
441: MatShellSetOperation(Mshell,MATOP_DESTROY,(void(*)(void))MatDestroy_NEPDeflation);
442: matctx->nep = extop->nep;
443: matctx->extop = extop;
444: if (!M) {
445: if (jacobian) { matctx->jacob = PETSC_TRUE; matctx->T = extop->nep->jacobian; extop->MJ = Mshell; }
446: else { matctx->jacob = PETSC_FALSE; matctx->T = extop->nep->function; extop->MF = Mshell; }
447: PetscObjectReference((PetscObject)matctx->T);
448: } else {
449: matctx->jacob = jacobian;
450: MatDuplicate(jacobian?extop->nep->jacobian:extop->nep->function,MAT_DO_NOT_COPY_VALUES, &matctx->T);
451: *M = Mshell;
452: }
453: if (szd) {
454: BVCreateVec(extop->nep->V,matctx->w);
455: VecDuplicate(matctx->w[0],matctx->w+1);
456: BVDuplicateResize(extop->nep->V,szd,&matctx->U);
457: PetscMalloc2(extop->simpU?2*(szd)*(szd):2*(szd)*(szd)*extop->nep->nt,&matctx->hfj,szd,&matctx->work);
458: PetscMalloc2(szd*szd,&matctx->A,szd*szd,&matctx->B);
459: }
460: } else {
461: MatShellGetContext(Mshell,(void**)&matctx);
462: }
463: if (ini || matctx->theta != lambda || matctx->n != extop->n) {
464: if (ini || matctx->theta != lambda) {
465: if (jacobian) {
466: NEPComputeJacobian(extop->nep,lambda,matctx->T);
467: } else {
468: NEPComputeFunction(extop->nep,lambda,matctx->T,matctx->T);
469: }
470: }
471: if (n) {
472: matctx->hfjset = PETSC_FALSE;
473: if (!extop->simpU) {
474: /* likely hfjp has been already computed */
475: if (Mcomp) {
476: MatShellGetContext(Mcomp,(void**)&matctxC);
477: if (matctxC->hfjset && matctxC->theta == lambda && matctxC->n == extop->n) {
478: PetscArraycpy(matctx->hfj,matctxC->hfj,2*extop->szd*extop->szd*extop->nep->nt);
479: matctx->hfjset = PETSC_TRUE;
480: }
481: }
482: hfj = matctx->hfj; hfjp = matctx->hfj+extop->szd*extop->szd*extop->nep->nt;
483: if (!matctx->hfjset) {
484: NEPDeflationEvaluateHatFunction(extop,-1,lambda,NULL,hfj,hfjp,n);
485: matctx->hfjset = PETSC_TRUE;
486: }
487: BVSetActiveColumns(matctx->U,0,n);
488: hf = jacobian?hfjp:hfj;
489: MatCreateSeqDense(PETSC_COMM_SELF,n,n,hf,&F);
490: BVMatMult(extop->X,extop->nep->A[0],matctx->U);
491: BVMultInPlace(matctx->U,F,0,n);
492: BVSetActiveColumns(extop->W,0,extop->n);
493: for (j=1;j<extop->nep->nt;j++) {
494: BVMatMult(extop->X,extop->nep->A[j],extop->W);
495: MatDensePlaceArray(F,hf+j*n*n);
496: BVMult(matctx->U,1.0,1.0,extop->W,F);
497: MatDenseResetArray(F);
498: }
499: MatDestroy(&F);
500: } else {
501: hfj = matctx->hfj;
502: BVSetActiveColumns(matctx->U,0,n);
503: BVMatMult(extop->X,matctx->T,matctx->U);
504: for (j=0;j<n;j++) {
505: for (i=0;i<n;i++) hfj[j*n+i] = -extop->H[j*ldh+i];
506: hfj[j*(n+1)] += lambda;
507: }
508: PetscBLASIntCast(n,&n_);
509: PetscStackCallBLAS("LAPACKtrtri",LAPACKtrtri_("U","N",&n_,hfj,&n_,&info));
510: SlepcCheckLapackInfo("trtri",info);
511: MatCreateSeqDense(PETSC_COMM_SELF,n,n,hfj,&F);
512: BVMultInPlace(matctx->U,F,0,n);
513: if (jacobian) {
514: NEPDeflationComputeFunction(extop,lambda,NULL);
515: MatShellGetContext(extop->MF,(void**)&matctxC);
516: BVMult(matctx->U,-1.0,1.0,matctxC->U,F);
517: }
518: MatDestroy(&F);
519: }
520: PetscCalloc3(n,&basisv,szd*szd,&hH,szd*szd,&hHprev);
521: NEPDeflationEvaluateBasis(extop,lambda,n,basisv,jacobian);
522: A = matctx->A;
523: PetscArrayzero(A,szd*szd);
524: if (!jacobian) for (i=0;i<n;i++) A[i*(szd+1)] = 1.0;
525: for (j=0;j<n;j++)
526: for (i=0;i<n;i++)
527: for (k=1;k<extop->midx;k++) A[j*szd+i] += basisv[k]*PetscConj(Hj[k*szd*szd+i*szd+j]);
528: PetscBLASIntCast(n,&n_);
529: PetscBLASIntCast(szd,&szd_);
530: B = matctx->B;
531: PetscArrayzero(B,szd*szd);
532: for (i=1;i<extop->midx;i++) {
533: NEPDeflationEvaluateBasisMat(extop,i,PETSC_TRUE,basisv,hH,hHprev);
534: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->XpX,&szd_,hH,&szd_,&zero,hHprev,&szd_));
535: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,extop->Hj+szd*szd*i,&szd_,hHprev,&szd_,&sone,B,&szd_));
536: pts = hHprev; hHprev = hH; hH = pts;
537: }
538: PetscFree3(basisv,hH,hHprev);
539: }
540: matctx->theta = lambda;
541: matctx->n = extop->n;
542: }
543: return(0);
544: }
546: PetscErrorCode NEPDeflationComputeFunction(NEP_EXT_OP extop,PetscScalar lambda,Mat *F)
547: {
551: NEPDeflationComputeShellMat(extop,lambda,PETSC_FALSE,NULL);
552: if (F) *F = extop->MF;
553: return(0);
554: }
556: PetscErrorCode NEPDeflationComputeJacobian(NEP_EXT_OP extop,PetscScalar lambda,Mat *J)
557: {
561: NEPDeflationComputeShellMat(extop,lambda,PETSC_TRUE,NULL);
562: if (J) *J = extop->MJ;
563: return(0);
564: }
566: PetscErrorCode NEPDeflationSolveSetUp(NEP_EXT_OP extop,PetscScalar lambda)
567: {
568: PetscErrorCode ierr;
569: NEP_DEF_MATSHELL *matctx;
570: NEP_DEF_FUN_SOLVE solve;
571: PetscInt i,j,n=extop->n;
572: Vec u,tu;
573: Mat F;
574: PetscScalar snone=-1.0,sone=1.0;
575: PetscBLASInt n_,szd_,ldh_,*p,info;
576: Mat Mshell;
579: solve = extop->solve;
580: if (lambda!=solve->theta || n!=solve->n) {
581: NEPDeflationComputeShellMat(extop,lambda,PETSC_FALSE,solve->sincf?NULL:&solve->T);
582: Mshell = (solve->sincf)?extop->MF:solve->T;
583: MatShellGetContext(Mshell,(void**)&matctx);
584: KSPSetOperators(solve->ksp,matctx->T,matctx->T);
585: if (!extop->ref && n) {
586: PetscBLASIntCast(n,&n_);
587: PetscBLASIntCast(extop->szd,&szd_);
588: PetscBLASIntCast(extop->szd+1,&ldh_);
589: if (!extop->simpU) {
590: BVSetActiveColumns(solve->T_1U,0,n);
591: for (i=0;i<n;i++) {
592: BVGetColumn(matctx->U,i,&u);
593: BVGetColumn(solve->T_1U,i,&tu);
594: KSPSolve(solve->ksp,u,tu);
595: BVRestoreColumn(solve->T_1U,i,&tu);
596: BVRestoreColumn(matctx->U,i,&u);
597: }
598: MatCreateSeqDense(PETSC_COMM_SELF,n,n,solve->work,&F);
599: BVDot(solve->T_1U,extop->X,F);
600: MatDestroy(&F);
601: } else {
602: for (j=0;j<n;j++)
603: for (i=0;i<n;i++) solve->work[j*n+i] = extop->XpX[j*extop->szd+i];
604: for (i=0;i<n;i++) extop->H[i*ldh_+i] -= lambda;
605: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&n_,&n_,&snone,extop->H,&ldh_,solve->work,&n_));
606: for (i=0;i<n;i++) extop->H[i*ldh_+i] += lambda;
607: }
608: PetscArraycpy(solve->M,matctx->B,extop->szd*extop->szd);
609: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&snone,matctx->A,&szd_,solve->work,&n_,&sone,solve->M,&szd_));
610: PetscMalloc1(n,&p);
611: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n_,&n_,solve->M,&szd_,p,&info));
612: SlepcCheckLapackInfo("getrf",info);
613: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&n_,solve->M,&szd_,p,solve->work,&n_,&info));
614: SlepcCheckLapackInfo("getri",info);
615: PetscFree(p);
616: }
617: solve->theta = lambda;
618: solve->n = n;
619: }
620: return(0);
621: }
623: PetscErrorCode NEPDeflationFunctionSolve(NEP_EXT_OP extop,Vec b,Vec x)
624: {
625: PetscErrorCode ierr;
626: Vec b1,x1;
627: PetscScalar *xx,*bb,*x2,*b2,*w,*w2,snone=-1.0,sone=1.0,zero=0.0;
628: NEP_DEF_MATSHELL *matctx;
629: NEP_DEF_FUN_SOLVE solve=extop->solve;
630: PetscBLASInt one=1,szd_,n_,ldh_;
631: PetscInt nloc,i;
632: PetscMPIInt np,count;
635: if (extop->ref) {
636: VecZeroEntries(x);
637: }
638: if (extop->szd) {
639: x1 = solve->w[0]; b1 = solve->w[1];
640: VecGetArray(x,&xx);
641: VecPlaceArray(x1,xx);
642: VecGetArray(b,&bb);
643: VecPlaceArray(b1,bb);
644: } else {
645: b1 = b; x1 = x;
646: }
647: KSPSolve(extop->solve->ksp,b1,x1);
648: if (!extop->ref && extop->n) {
649: PetscBLASIntCast(extop->szd,&szd_);
650: PetscBLASIntCast(extop->n,&n_);
651: PetscBLASIntCast(extop->szd+1,&ldh_);
652: BVGetSizes(extop->nep->V,&nloc,NULL,NULL);
653: PetscMalloc2(extop->n,&b2,extop->n,&x2);
654: MPI_Comm_size(PetscObjectComm((PetscObject)b),&np);
655: for (i=0;i<extop->n;i++) b2[i] = bb[nloc+i]*PetscSqrtReal(np);
656: w = solve->work; w2 = solve->work+extop->n;
657: MatShellGetContext(solve->sincf?extop->MF:solve->T,(void**)&matctx);
658: PetscArraycpy(w2,b2,extop->n);
659: BVDotVec(extop->X,x1,w);
660: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&snone,matctx->A,&szd_,w,&one,&sone,w2,&one));
661: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,solve->M,&szd_,w2,&one,&zero,x2,&one));
662: if (extop->simpU) {
663: for (i=0;i<extop->n;i++) extop->H[i+i*(extop->szd+1)] -= solve->theta;
664: for (i=0;i<extop->n;i++) w[i] = x2[i];
665: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&one,&snone,extop->H,&ldh_,w,&n_));
666: for (i=0;i<extop->n;i++) extop->H[i+i*(extop->szd+1)] += solve->theta;
667: BVMultVec(extop->X,-1.0,1.0,x1,w);
668: } else {
669: BVMultVec(solve->T_1U,-1.0,1.0,x1,x2);
670: }
671: for (i=0;i<extop->n;i++) xx[i+nloc] = x2[i]/PetscSqrtReal(np);
672: PetscMPIIntCast(extop->n,&count);
673: MPI_Bcast(xx+nloc,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)b));
674: }
675: if (extop->szd) {
676: VecResetArray(x1);
677: VecRestoreArray(x,&xx);
678: VecResetArray(b1);
679: VecRestoreArray(b,&bb);
680: if (!extop->ref && extop->n) { PetscFree2(b2,x2);}
681: }
682: return(0);
683: }
685: PetscErrorCode NEPDeflationSetRefine(NEP_EXT_OP extop,PetscBool ref)
686: {
688: extop->ref = ref;
689: return(0);
690: }
692: PetscErrorCode NEPDeflationReset(NEP_EXT_OP extop)
693: {
694: PetscErrorCode ierr;
695: PetscInt j;
696: NEP_DEF_FUN_SOLVE solve;
699: if (!extop) return(0);
700: PetscFree(extop->H);
701: BVDestroy(&extop->X);
702: if (extop->szd) {
703: VecDestroy(&extop->w);
704: PetscFree3(extop->Hj,extop->XpX,extop->bc);
705: BVDestroy(&extop->W);
706: }
707: MatDestroy(&extop->MF);
708: MatDestroy(&extop->MJ);
709: if (extop->solve) {
710: solve = extop->solve;
711: if (extop->szd) {
712: if (!extop->simpU) {BVDestroy(&solve->T_1U);}
713: PetscFree2(solve->M,solve->work);
714: VecDestroy(&solve->w[0]);
715: VecDestroy(&solve->w[1]);
716: }
717: if (!solve->sincf) {
718: MatDestroy(&solve->T);
719: }
720: PetscFree(extop->solve);
721: }
722: if (extop->proj) {
723: if (extop->szd) {
724: for (j=0;j<extop->nep->nt;j++) {MatDestroy(&extop->proj->V1pApX[j]);}
725: MatDestroy(&extop->proj->XpV1);
726: PetscFree3(extop->proj->V2,extop->proj->V1pApX,extop->proj->work);
727: VecDestroy(&extop->proj->w);
728: BVDestroy(&extop->proj->V1);
729: }
730: PetscFree(extop->proj);
731: }
732: PetscFree(extop);
733: return(0);
734: }
736: PetscErrorCode NEPDeflationInitialize(NEP nep,BV X,KSP ksp,PetscBool sincfun,PetscInt sz,NEP_EXT_OP *extop)
737: {
738: PetscErrorCode ierr;
739: NEP_EXT_OP op;
740: NEP_DEF_FUN_SOLVE solve;
741: PetscInt szd;
742: Vec x;
745: NEPDeflationReset(*extop);
746: PetscNew(&op);
747: *extop = op;
748: op->nep = nep;
749: op->n = 0;
750: op->szd = szd = sz-1;
751: op->max_midx = PetscMin(MAX_MINIDX,szd);
752: op->X = X;
753: if (!X) { BVDuplicateResize(nep->V,sz,&op->X); }
754: else { PetscObjectReference((PetscObject)X); }
755: PetscCalloc1(sz*sz,&(op)->H);
756: if (op->szd) {
757: BVGetColumn(op->X,0,&x);
758: VecDuplicate(x,&op->w);
759: BVRestoreColumn(op->X,0,&x);
760: op->simpU = PETSC_FALSE;
761: if (nep->fui==NEP_USER_INTERFACE_SPLIT) {
762: /* undocumented option to use the simple expression for U = T*X*inv(lambda-H) */
763: PetscOptionsGetBool(NULL,NULL,"-nep_deflation_simpleu",&op->simpU,NULL);
764: } else {
765: op->simpU = PETSC_TRUE;
766: }
767: PetscCalloc3(szd*szd*op->max_midx,&(op)->Hj,szd*szd,&(op)->XpX,szd,&op->bc);
768: BVDuplicateResize(op->X,op->szd,&op->W);
769: }
770: if (ksp) {
771: PetscNew(&solve);
772: op->solve = solve;
773: solve->ksp = ksp;
774: solve->sincf = sincfun;
775: solve->n = -1;
776: if (op->szd) {
777: if (!op->simpU) {
778: BVDuplicateResize(nep->V,szd,&solve->T_1U);
779: }
780: PetscMalloc2(szd*szd,&solve->M,2*szd*szd,&solve->work);
781: BVCreateVec(nep->V,&solve->w[0]);
782: VecDuplicate(solve->w[0],&solve->w[1]);
783: }
784: }
785: return(0);
786: }
788: PetscErrorCode NEPDeflationDSNEPComputeMatrix(DS ds,PetscScalar lambda,PetscBool deriv,DSMatType mat,void *ctx)
789: {
790: PetscScalar *T,*E,*w1,*w2,*w=NULL,*ww,*hH,*hHprev,*pts;
791: PetscScalar alpha,alpha2,*AB,sone=1.0,zero=0.0,*basisv,s;
792: PetscInt i,ldds,nwork=0,szd,nv,j,k,n;
793: PetscBLASInt inc=1,nv_,ldds_,dim_,dim2,szdk,szd_,n_,ldh_;
794: PetscMPIInt np;
795: NEP_DEF_PROJECT proj=(NEP_DEF_PROJECT)ctx;
796: NEP_EXT_OP extop=proj->extop;
797: NEP nep=extop->nep;
798: PetscErrorCode ierr;
801: DSGetDimensions(ds,&nv,NULL,NULL,NULL,NULL);
802: DSGetLeadingDimension(ds,&ldds);
803: DSGetArray(ds,mat,&T);
804: PetscArrayzero(T,ldds*nv);
805: PetscBLASIntCast(ldds*nv,&dim2);
806: /* mat = V1^*T(lambda)V1 */
807: for (i=0;i<nep->nt;i++) {
808: if (deriv) {
809: FNEvaluateDerivative(nep->f[i],lambda,&alpha);
810: } else {
811: FNEvaluateFunction(nep->f[i],lambda,&alpha);
812: }
813: DSGetArray(ds,DSMatExtra[i],&E);
814: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&dim2,&alpha,E,&inc,T,&inc));
815: DSRestoreArray(ds,DSMatExtra[i],&E);
816: }
817: if (!extop->ref && extop->n) {
818: n = extop->n;
819: szd = extop->szd;
820: PetscArrayzero(proj->work,proj->lwork);
821: PetscBLASIntCast(nv,&nv_);
822: PetscBLASIntCast(n,&n_);
823: PetscBLASIntCast(ldds,&ldds_);
824: PetscBLASIntCast(szd,&szd_);
825: PetscBLASIntCast(proj->dim,&dim_);
826: PetscBLASIntCast(extop->szd+1,&ldh_);
827: w1 = proj->work; w2 = proj->work+proj->dim*proj->dim;
828: nwork += 2*proj->dim*proj->dim;
830: /* mat = mat + V1^*U(lambda)V2 */
831: for (i=0;i<nep->nt;i++) {
832: MatDenseGetArray(proj->V1pApX[i],&E);
833: if (extop->simpU) {
834: if (deriv) {
835: FNEvaluateDerivative(nep->f[i],lambda,&alpha);
836: } else {
837: FNEvaluateFunction(nep->f[i],lambda,&alpha);
838: }
839: ww = w1; w = w2;
840: PetscArraycpy(ww,proj->V2,szd*nv);
841: MPI_Comm_size(PetscObjectComm((PetscObject)ds),&np);
842: for (j=0;j<szd*nv;j++) ww[j] *= PetscSqrtReal(np);
843: for (j=0;j<n;j++) extop->H[j*ldh_+j] -= lambda;
844: alpha = -alpha;
845: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha,extop->H,&ldh_,ww,&szd_));
846: if (deriv) {
847: PetscBLASIntCast(szd*nv,&szdk);
848: FNEvaluateFunction(nep->f[i],lambda,&alpha2);
849: PetscArraycpy(w,proj->V2,szd*nv);
850: for (j=0;j<szd*nv;j++) w[j] *= PetscSqrtReal(np);
851: alpha2 = -alpha2;
852: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha2,extop->H,&ldh_,w,&szd_));
853: alpha2 = 1.0;
854: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha2,extop->H,&ldh_,w,&szd_));
855: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&szdk,&sone,w,&inc,ww,&inc));
856: }
857: for (j=0;j<n;j++) extop->H[j*ldh_+j] += lambda;
858: } else {
859: NEPDeflationEvaluateHatFunction(extop,i,lambda,NULL,w1,w2,szd);
860: w = deriv?w2:w1; ww = deriv?w1:w2;
861: MPI_Comm_size(PetscObjectComm((PetscObject)ds),&np);
862: s = PetscSqrtReal(np);
863: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&s,w,&szd_,proj->V2,&szd_,&zero,ww,&szd_));
864: }
865: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&nv_,&nv_,&n_,&sone,E,&dim_,ww,&szd_,&sone,T,&ldds_));
866: MatDenseRestoreArray(proj->V1pApX[i],&E);
867: }
869: /* mat = mat + V2^*A(lambda)V1 */
870: basisv = proj->work+nwork; nwork += szd;
871: hH = proj->work+nwork; nwork += szd*szd;
872: hHprev = proj->work+nwork; nwork += szd*szd;
873: AB = proj->work+nwork;
874: NEPDeflationEvaluateBasis(extop,lambda,n,basisv,deriv);
875: if (!deriv) for (i=0;i<n;i++) AB[i*(szd+1)] = 1.0;
876: for (j=0;j<n;j++)
877: for (i=0;i<n;i++)
878: for (k=1;k<extop->midx;k++) AB[j*szd+i] += basisv[k]*PetscConj(extop->Hj[k*szd*szd+i*szd+j]);
879: MatDenseGetArray(proj->XpV1,&E);
880: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,AB,&szd_,E,&szd_,&zero,w,&szd_));
881: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&nv_,&nv_,&n_,&sone,proj->V2,&szd_,w,&szd_,&sone,T,&ldds_));
882: MatDenseRestoreArray(proj->XpV1,&E);
884: /* mat = mat + V2^*B(lambda)V2 */
885: PetscArrayzero(AB,szd*szd);
886: for (i=1;i<extop->midx;i++) {
887: NEPDeflationEvaluateBasisMat(extop,i,PETSC_TRUE,basisv,hH,hHprev);
888: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->XpX,&szd_,hH,&szd_,&zero,hHprev,&szd_));
889: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,extop->Hj+szd*szd*i,&szd_,hHprev,&szd_,&sone,AB,&szd_));
890: pts = hHprev; hHprev = hH; hH = pts;
891: }
892: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,AB,&szd_,proj->V2,&szd_,&zero,w,&szd_));
893: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&nv_,&nv_,&n_,&sone,proj->V2,&szd_,w,&szd_,&sone,T,&ldds_));
894: }
895: DSRestoreArray(ds,mat,&T);
896: return(0);
897: }
899: PetscErrorCode NEPDeflationProjectOperator(NEP_EXT_OP extop,BV Vext,DS ds,PetscInt j0,PetscInt j1)
900: {
901: PetscErrorCode ierr;
902: PetscInt k,j,n=extop->n,dim;
903: Vec v,ve;
904: BV V1;
905: Mat G;
906: NEP nep=extop->nep;
907: NEP_DEF_PROJECT proj;
910: NEPCheckSplit(extop->nep,1);
911: proj = extop->proj;
912: if (!proj) {
913: /* Initialize the projection data structure */
914: PetscNew(&proj);
915: extop->proj = proj;
916: proj->extop = extop;
917: BVGetSizes(Vext,NULL,NULL,&dim);
918: proj->dim = dim;
919: if (extop->szd) {
920: proj->lwork = 3*dim*dim+2*extop->szd*extop->szd+extop->szd;
921: PetscMalloc3(dim*extop->szd,&proj->V2,nep->nt,&proj->V1pApX,proj->lwork,&proj->work);
922: for (j=0;j<nep->nt;j++) {
923: MatCreateSeqDense(PETSC_COMM_SELF,proj->dim,extop->szd,NULL,&proj->V1pApX[j]);
924: }
925: MatCreateSeqDense(PETSC_COMM_SELF,extop->szd,proj->dim,NULL,&proj->XpV1);
926: BVCreateVec(extop->X,&proj->w);
927: BVDuplicateResize(extop->X,proj->dim,&proj->V1);
928: }
929: DSNEPSetComputeMatrixFunction(ds,NEPDeflationDSNEPComputeMatrix,(void*)proj);
930: }
932: /* Split Vext in V1 and V2 */
933: if (extop->szd) {
934: for (j=j0;j<j1;j++) {
935: BVGetColumn(Vext,j,&ve);
936: BVGetColumn(proj->V1,j,&v);
937: NEPDeflationCopyToExtendedVec(extop,v,proj->V2+j*extop->szd,ve,PETSC_TRUE);
938: BVRestoreColumn(proj->V1,j,&v);
939: BVRestoreColumn(Vext,j,&ve);
940: }
941: V1 = proj->V1;
942: } else V1 = Vext;
944: /* Compute matrices V1^* A_i V1 */
945: BVSetActiveColumns(V1,j0,j1);
946: for (k=0;k<nep->nt;k++) {
947: DSGetMat(ds,DSMatExtra[k],&G);
948: BVMatProject(V1,nep->A[k],V1,G);
949: DSRestoreMat(ds,DSMatExtra[k],&G);
950: }
952: if (extop->n) {
953: if (extop->szd) {
954: /* Compute matrices V1^* A_i X and V1^* X */
955: BVSetActiveColumns(extop->W,0,n);
956: for (k=0;k<nep->nt;k++) {
957: BVMatMult(extop->X,nep->A[k],extop->W);
958: BVDot(extop->W,V1,proj->V1pApX[k]);
959: }
960: BVDot(V1,extop->X,proj->XpV1);
961: }
962: }
963: return(0);
964: }