Actual source code: ks-twosided.c
slepc-3.13.4 2020-09-02
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2020, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: SLEPc eigensolver: "krylovschur"
13: Method: Two-sided Arnoldi with Krylov-Schur restart (for left eigenvectors)
15: References:
17: [1] I.N. Zwaan and M.E. Hochstenbach, "Krylov-Schur-type restarts
18: for the two-sided Arnoldi method", SIAM J. Matrix Anal. Appl.
19: 38(2):297-321, 2017.
21: */
23: #include <slepc/private/epsimpl.h>
24: #include "krylovschur.h"
25: #include <slepcblaslapack.h>
27: static PetscErrorCode EPSTwoSidedRQUpdate1(EPS eps,Mat M,PetscInt nv)
28: {
30: PetscScalar *T,*S,*A,*w,*pM,beta;
31: Vec u;
32: PetscInt ld,ncv=eps->ncv,i,l,nnv;
33: PetscBLASInt info,n_,ncv_,*p,one=1;
36: DSGetLeadingDimension(eps->ds,&ld);
37: PetscMalloc3(nv,&p,ncv*ncv,&A,ncv,&w);
38: BVGetActiveColumns(eps->V,&l,&nnv);
39: BVSetActiveColumns(eps->V,0,nv);
40: BVSetActiveColumns(eps->W,0,nv);
41: BVGetColumn(eps->V,nv,&u);
42: BVDotVec(eps->W,u,w);
43: BVRestoreColumn(eps->V,nv,&u);
44: MatDenseGetArray(M,&pM);
45: PetscArraycpy(A,pM,ncv*ncv);
46: PetscBLASIntCast(nv,&n_);
47: PetscBLASIntCast(ncv,&ncv_);
48: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n_,&n_,A,&ncv_,p,&info));
49: SlepcCheckLapackInfo("getrf",info);
50: PetscLogFlops(2.0*n_*n_*n_/3.0);
51: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&n_,&one,A,&ncv_,p,w,&ncv_,&info));
52: SlepcCheckLapackInfo("getrs",info);
53: PetscLogFlops(2.0*n_*n_-n_);
54: BVMultColumn(eps->V,-1.0,1.0,nv,w);
55: DSGetArray(eps->ds,DS_MAT_A,&S);
56: beta = S[(nv-1)*ld+nv];
57: for (i=0;i<nv;i++) S[(nv-1)*ld+i] += beta*w[i];
58: DSRestoreArray(eps->ds,DS_MAT_A,&S);
59: BVGetColumn(eps->W,nv,&u);
60: BVDotVec(eps->V,u,w);
61: BVRestoreColumn(eps->W,nv,&u);
62: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("C",&n_,&one,A,&ncv_,p,w,&ncv_,&info));
63: BVMultColumn(eps->W,-1.0,1.0,nv,w);
64: DSGetArray(eps->dsts,DS_MAT_A,&T);
65: beta = T[(nv-1)*ld+nv];
66: for (i=0;i<nv;i++) T[(nv-1)*ld+i] += beta*w[i];
67: DSRestoreArray(eps->dsts,DS_MAT_A,&T);
68: PetscFree3(p,A,w);
69: BVSetActiveColumns(eps->V,l,nnv);
70: BVSetActiveColumns(eps->W,l,nnv);
71: return(0);
72: }
74: static PetscErrorCode EPSTwoSidedRQUpdate2(EPS eps,Mat M,PetscInt k)
75: {
77: PetscScalar *Q,*pM,*w,zero=0.0,sone=1.0,*c,*A;
78: PetscBLASInt n_,ncv_,ld_;
79: PetscReal norm;
80: PetscInt l,nv,ncv=eps->ncv,ld,i,j;
83: DSGetLeadingDimension(eps->ds,&ld);
84: BVGetActiveColumns(eps->V,&l,&nv);
85: BVSetActiveColumns(eps->V,0,nv);
86: BVSetActiveColumns(eps->W,0,nv);
87: PetscMalloc2(ncv*ncv,&w,ncv,&c);
88: /* u = u - V*V'*u */
89: BVOrthogonalizeColumn(eps->V,k,c,&norm,NULL);
90: BVScaleColumn(eps->V,k,1.0/norm);
91: DSGetArray(eps->ds,DS_MAT_A,&A);
92: /* H = H + V'*u*b' */
93: for (j=l;j<k;j++) {
94: for (i=0;i<k;i++) A[i+j*ld] += c[i]*A[k+j*ld];
95: A[k+j*ld] *= norm;
96: }
97: DSRestoreArray(eps->ds,DS_MAT_A,&A);
98: BVOrthogonalizeColumn(eps->W,k,c,&norm,NULL);
99: BVScaleColumn(eps->W,k,1.0/norm);
100: DSGetArray(eps->dsts,DS_MAT_A,&A);
101: /* H = H + V'*u*b' */
102: for (j=l;j<k;j++) {
103: for (i=0;i<k;i++) A[i+j*ld] += c[i]*A[k+j*ld];
104: A[k+j*ld] *= norm;
105: }
106: DSRestoreArray(eps->dsts,DS_MAT_A,&A);
108: /* M = Q'*M*Q */
109: MatDenseGetArray(M,&pM);
110: PetscBLASIntCast(ncv,&ncv_);
111: PetscBLASIntCast(nv,&n_);
112: PetscBLASIntCast(ld,&ld_);
113: DSGetArray(eps->ds,DS_MAT_Q,&Q);
114: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,pM,&ncv_,Q,&ld_,&zero,w,&ncv_));
115: DSRestoreArray(eps->ds,DS_MAT_Q,&Q);
116: DSGetArray(eps->dsts,DS_MAT_Q,&Q);
117: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,Q,&ld_,w,&ncv_,&zero,pM,&ncv_));
118: DSRestoreArray(eps->dsts,DS_MAT_Q,&Q);
119: PetscFree2(w,c);
120: BVSetActiveColumns(eps->V,l,nv);
121: BVSetActiveColumns(eps->W,l,nv);
122: return(0);
123: }
125: PetscErrorCode EPSSolve_KrylovSchur_TwoSided(EPS eps)
126: {
127: PetscErrorCode ierr;
128: EPS_KRYLOVSCHUR *ctx = (EPS_KRYLOVSCHUR*)eps->data;
129: Mat M,U,Op,OpHT;
130: PetscReal norm,norm2,beta,betat,s,t;
131: PetscScalar *pM,*S,*T,*eigr,*eigi,*Q;
132: PetscInt ld,l,nv,nvt,ncv=eps->ncv,i,j,k,nconv,*p,cont,*idx,*idx2,id=0;
133: PetscBool breakdownt,breakdown,breakdownl;
134: #if defined(PETSC_USE_COMPLEX)
135: Mat A;
136: #endif
139: DSGetLeadingDimension(eps->ds,&ld);
140: EPSGetStartVector(eps,0,NULL);
141: EPSGetLeftStartVector(eps,0,NULL);
142: l = 0;
143: PetscMalloc6(ncv*ncv,&pM,ncv,&eigr,ncv,&eigi,ncv,&idx,ncv,&idx2,ncv,&p);
144: MatCreateSeqDense(PETSC_COMM_SELF,eps->ncv,eps->ncv,pM,&M);
146: STGetOperator(eps->st,&Op);
147: MatCreateHermitianTranspose(Op,&OpHT);
149: /* Restart loop */
150: while (eps->reason == EPS_CONVERGED_ITERATING) {
151: eps->its++;
153: /* Compute an nv-step Arnoldi factorization for Op */
154: nv = PetscMin(eps->nconv+eps->mpd,eps->ncv);
155: DSGetArray(eps->ds,DS_MAT_A,&S);
156: BVMatArnoldi(eps->V,Op,S,ld,eps->nconv+l,&nv,&beta,&breakdown);
157: DSRestoreArray(eps->ds,DS_MAT_A,&S);
159: /* Compute an nv-step Arnoldi factorization for Op' */
160: nvt = nv;
161: DSGetArray(eps->dsts,DS_MAT_A,&T);
162: BVMatArnoldi(eps->W,OpHT,T,ld,eps->nconv+l,&nvt,&betat,&breakdownt);
163: DSRestoreArray(eps->dsts,DS_MAT_A,&T);
165: /* Make sure both factorizations have the same length */
166: nv = PetscMin(nv,nvt);
167: DSSetDimensions(eps->ds,nv,0,eps->nconv,eps->nconv+l);
168: DSSetDimensions(eps->dsts,nv,0,eps->nconv,eps->nconv+l);
169: if (l==0) {
170: DSSetState(eps->ds,DS_STATE_INTERMEDIATE);
171: DSSetState(eps->dsts,DS_STATE_INTERMEDIATE);
172: } else {
173: DSSetState(eps->ds,DS_STATE_RAW);
174: DSSetState(eps->dsts,DS_STATE_RAW);
175: }
176: breakdown = (breakdown || breakdownt)? PETSC_TRUE: PETSC_FALSE;
178: /* Update M, modify Rayleigh quotients S and T */
179: BVSetActiveColumns(eps->V,eps->nconv+l,nv);
180: BVSetActiveColumns(eps->W,eps->nconv+l,nv);
181: BVMatProject(eps->V,NULL,eps->W,M);
183: EPSTwoSidedRQUpdate1(eps,M,nv);
185: /* Solve projected problem */
186: DSSolve(eps->ds,eps->eigr,eps->eigi);
187: DSSort(eps->ds,eps->eigr,eps->eigi,NULL,NULL,NULL);
188: DSSynchronize(eps->ds,eps->eigr,eps->eigi);
189: DSSolve(eps->dsts,eigr,eigi);
190: #if defined(PETSC_USE_COMPLEX)
191: DSGetMat(eps->dsts,DS_MAT_A,&A);
192: MatConjugate(A);
193: DSRestoreMat(eps->dsts,DS_MAT_A,&A);
194: DSGetMat(eps->dsts,DS_MAT_Q,&U);
195: MatConjugate(U);
196: DSRestoreMat(eps->dsts,DS_MAT_Q,&U);
197: for (i=0;i<nv;i++) eigr[i] = PetscConj(eigr[i]);
198: #endif
199: DSSort(eps->dsts,eigr,eigi,NULL,NULL,NULL);
200: /* check correct eigenvalue correspondence */
201: cont = 0;
202: for (i=0;i<nv;i++) {
203: if (SlepcAbsEigenvalue(eigr[i]-eps->eigr[i],eigi[i]-eps->eigi[i])>PETSC_SQRT_MACHINE_EPSILON) {idx2[cont] = i; idx[cont++] = i;}
204: p[i] = -1;
205: }
206: if (cont) {
207: for (i=0;i<cont;i++) {
208: t = PETSC_MAX_REAL;
209: for (j=0;j<cont;j++) if (idx2[j]!=-1 && (s=SlepcAbsEigenvalue(eigr[idx[j]]-eps->eigr[idx[i]],eigi[idx[j]]-eps->eigi[idx[i]]))<t) { id = j; t = s; }
210: p[idx[i]] = idx[id];
211: idx2[id] = -1;
212: }
213: for (i=0;i<nv;i++) if (p[i]==-1) p[i] = i;
214: DSSortWithPermutation(eps->dsts,p,eigr,eigi);
215: }
216: #if defined(PETSC_USE_COMPLEX)
217: DSGetMat(eps->dsts,DS_MAT_A,&A);
218: MatConjugate(A);
219: DSRestoreMat(eps->dsts,DS_MAT_A,&A);
220: DSGetMat(eps->dsts,DS_MAT_Q,&U);
221: MatConjugate(U);
222: DSRestoreMat(eps->dsts,DS_MAT_Q,&U);
223: #endif
224: DSSynchronize(eps->dsts,eigr,eigi);
226: /* Check convergence */
227: BVNormColumn(eps->V,nv,NORM_2,&norm);
228: BVNormColumn(eps->W,nv,NORM_2,&norm2);
229: EPSKrylovConvergence(eps,PETSC_FALSE,eps->nconv,nv-eps->nconv,beta*norm,betat*norm2,1.0,&k);
230: (*eps->stopping)(eps,eps->its,eps->max_it,k,eps->nev,&eps->reason,eps->stoppingctx);
231: nconv = k;
233: /* Update l */
234: if (eps->reason != EPS_CONVERGED_ITERATING || breakdown || k==nv) l = 0;
235: else {
236: l = PetscMax(1,(PetscInt)((nv-k)*ctx->keep));
237: #if !defined(PETSC_USE_COMPLEX)
238: DSGetArray(eps->ds,DS_MAT_A,&S);
239: if (S[k+l+(k+l-1)*ld] != 0.0) {
240: if (k+l<nv-1) l = l+1;
241: else l = l-1;
242: }
243: DSRestoreArray(eps->ds,DS_MAT_A,&S);
244: #endif
245: }
246: if (!ctx->lock && l>0) { l += k; k = 0; } /* non-locking variant: reset no. of converged pairs */
248: /* Update the corresponding vectors V(:,idx) = V*Q(:,idx) */
249: BVSetActiveColumns(eps->V,eps->nconv,nv);
250: BVSetActiveColumns(eps->W,eps->nconv,nv);
251: DSGetMat(eps->ds,DS_MAT_Q,&U);
252: BVMultInPlace(eps->V,U,eps->nconv,k+l);
253: MatDestroy(&U);
254: DSGetMat(eps->dsts,DS_MAT_Q,&U);
255: BVMultInPlace(eps->W,U,eps->nconv,k+l);
256: MatDestroy(&U);
257: if (eps->reason == EPS_CONVERGED_ITERATING && !breakdown) {
258: BVCopyColumn(eps->V,nv,k+l);
259: BVCopyColumn(eps->W,nv,k+l);
260: }
262: if (eps->reason == EPS_CONVERGED_ITERATING) {
263: if (breakdown || k==nv) {
264: /* Start a new Arnoldi factorization */
265: PetscInfo2(eps,"Breakdown in Krylov-Schur method (it=%D norm=%g)\n",eps->its,(double)beta);
266: if (k<eps->nev) {
267: EPSGetStartVector(eps,k,&breakdown);
268: EPSGetLeftStartVector(eps,k,&breakdownl);
269: if (breakdown || breakdownl) {
270: eps->reason = EPS_DIVERGED_BREAKDOWN;
271: PetscInfo(eps,"Unable to generate more start vectors\n");
272: }
273: }
274: } else {
275: /* Prepare the Rayleigh quotient for restart */
276: DSGetArray(eps->ds,DS_MAT_A,&S);
277: DSGetArray(eps->ds,DS_MAT_Q,&Q);
278: for (i=k;i<k+l;i++) S[k+l+i*ld] = Q[nv-1+i*ld]*beta;
279: DSRestoreArray(eps->ds,DS_MAT_A,&S);
280: DSRestoreArray(eps->ds,DS_MAT_Q,&Q);
281: DSGetArray(eps->dsts,DS_MAT_A,&S);
282: DSGetArray(eps->dsts,DS_MAT_Q,&Q);
283: for (i=k;i<k+l;i++) S[k+l+i*ld] = Q[nv-1+i*ld]*betat;
284: DSRestoreArray(eps->dsts,DS_MAT_A,&S);
285: DSRestoreArray(eps->dsts,DS_MAT_Q,&Q);
286: }
287: EPSTwoSidedRQUpdate2(eps,M,k+l);
288: }
289: eps->nconv = k;
290: EPSMonitor(eps,eps->its,nconv,eps->eigr,eps->eigi,eps->errest,nv);
291: }
293: STRestoreOperator(eps->st,&Op);
294: MatDestroy(&OpHT);
296: /* truncate Schur decomposition and change the state to raw so that
297: DSVectors() computes eigenvectors from scratch */
298: DSSetDimensions(eps->ds,eps->nconv,0,0,0);
299: DSSetState(eps->ds,DS_STATE_RAW);
300: DSSetDimensions(eps->dsts,eps->nconv,0,0,0);
301: DSSetState(eps->dsts,DS_STATE_RAW);
302: PetscFree6(pM,eigr,eigi,idx,idx2,p);
303: MatDestroy(&M);
304: return(0);
305: }