Actual source code: bvimpl.h
slepc-3.8.3 2018-04-03
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2017, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
11: #if !defined(_BVIMPL)
12: #define _BVIMPL
14: #include <slepcbv.h>
15: #include <slepc/private/slepcimpl.h>
17: PETSC_EXTERN PetscBool BVRegisterAllCalled;
18: PETSC_EXTERN PetscErrorCode BVRegisterAll(void);
20: PETSC_EXTERN PetscLogEvent BV_Create,BV_Copy,BV_Mult,BV_MultVec,BV_MultInPlace,BV_Dot,BV_DotVec,BV_Orthogonalize,BV_OrthogonalizeVec,BV_Scale,BV_Norm,BV_NormVec,BV_SetRandom,BV_MatMult,BV_MatMultVec,BV_MatProject;
22: typedef struct _BVOps *BVOps;
24: struct _BVOps {
25: PetscErrorCode (*mult)(BV,PetscScalar,PetscScalar,BV,Mat);
26: PetscErrorCode (*multvec)(BV,PetscScalar,PetscScalar,Vec,PetscScalar*);
27: PetscErrorCode (*multinplace)(BV,Mat,PetscInt,PetscInt);
28: PetscErrorCode (*multinplacetrans)(BV,Mat,PetscInt,PetscInt);
29: PetscErrorCode (*dot)(BV,BV,Mat);
30: PetscErrorCode (*dotvec)(BV,Vec,PetscScalar*);
31: PetscErrorCode (*dotvec_local)(BV,Vec,PetscScalar*);
32: PetscErrorCode (*dotvec_begin)(BV,Vec,PetscScalar*);
33: PetscErrorCode (*dotvec_end)(BV,Vec,PetscScalar*);
34: PetscErrorCode (*scale)(BV,PetscInt,PetscScalar);
35: PetscErrorCode (*norm)(BV,PetscInt,NormType,PetscReal*);
36: PetscErrorCode (*norm_local)(BV,PetscInt,NormType,PetscReal*);
37: PetscErrorCode (*norm_begin)(BV,PetscInt,NormType,PetscReal*);
38: PetscErrorCode (*norm_end)(BV,PetscInt,NormType,PetscReal*);
39: PetscErrorCode (*matmult)(BV,Mat,BV);
40: PetscErrorCode (*copy)(BV,BV);
41: PetscErrorCode (*resize)(BV,PetscInt,PetscBool);
42: PetscErrorCode (*getcolumn)(BV,PetscInt,Vec*);
43: PetscErrorCode (*restorecolumn)(BV,PetscInt,Vec*);
44: PetscErrorCode (*getarray)(BV,PetscScalar**);
45: PetscErrorCode (*restorearray)(BV,PetscScalar**);
46: PetscErrorCode (*getarrayread)(BV,const PetscScalar**);
47: PetscErrorCode (*restorearrayread)(BV,const PetscScalar**);
48: PetscErrorCode (*duplicate)(BV,BV*);
49: PetscErrorCode (*create)(BV);
50: PetscErrorCode (*setfromoptions)(PetscOptionItems*,BV);
51: PetscErrorCode (*view)(BV,PetscViewer);
52: PetscErrorCode (*destroy)(BV);
53: };
55: struct _p_BV {
56: PETSCHEADER(struct _BVOps);
57: /*------------------------- User parameters --------------------------*/
58: Vec t; /* template vector */
59: PetscInt n,N; /* dimensions of vectors (local, global) */
60: PetscInt m; /* number of vectors */
61: PetscInt l; /* number of leading columns */
62: PetscInt k; /* number of active columns */
63: PetscInt nc; /* number of constraints */
64: BVOrthogType orthog_type; /* the method of vector orthogonalization */
65: BVOrthogRefineType orthog_ref; /* refinement method */
66: PetscReal orthog_eta; /* refinement threshold */
67: BVOrthogBlockType orthog_block; /* the method of block orthogonalization */
68: Mat matrix; /* inner product matrix */
69: PetscBool indef; /* matrix is indefinite */
70: BVMatMultType vmm; /* version of matmult operation */
72: /*---------------------- Cached data and workspace -------------------*/
73: Vec Bx; /* result of matrix times a vector x */
74: Vec buffer; /* buffer vector used in orthogonalization */
75: PetscObjectId xid; /* object id of vector x */
76: PetscObjectState xstate; /* state of vector x */
77: Vec cv[2]; /* column vectors obtained with BVGetColumn() */
78: PetscInt ci[2]; /* column indices of obtained vectors */
79: PetscObjectState st[2]; /* state of obtained vectors */
80: PetscObjectId id[2]; /* object id of obtained vectors */
81: PetscScalar *h,*c; /* orthogonalization coefficients */
82: Vec omega; /* signature matrix values for indefinite case */
83: Mat B,C; /* auxiliary dense matrices for matmult operation */
84: PetscObjectId Aid; /* object id of matrix A of matmult operation */
85: PetscBool defersfo; /* deferred call to setfromoptions */
86: BV cached; /* cached BV to store result of matrix times BV */
87: PetscObjectState bvstate; /* state of BV when BVApplyMatrixBV() was called */
88: PetscRandom rand; /* random number generator */
89: PetscBool rrandom; /* reproducible random vectors */
90: Mat Acreate; /* matrix given at BVCreateFromMat() */
91: PetscBool cuda; /* true if GPU must be used in SVEC */
92: PetscScalar *work;
93: PetscInt lwork;
94: void *data;
95: };
97: /*
98: BV_SafeSqrt - Computes the square root of a scalar value alpha, which is
99: assumed to be z'*B*z. The result is
100: if definite inner product: res = sqrt(alpha)
101: if indefinite inner product: res = sgn(alpha)*sqrt(abs(alpha))
102: */
103: PETSC_STATIC_INLINE PetscErrorCode BV_SafeSqrt(BV bv,PetscScalar alpha,PetscReal *res)
104: {
106: PetscReal absal,realp;
109: absal = PetscAbsScalar(alpha);
110: realp = PetscRealPart(alpha);
111: if (absal<PETSC_MACHINE_EPSILON) {
112: PetscInfo(bv,"Zero norm, either the vector is zero or a semi-inner product is being used\n");
113: }
114: #if defined(PETSC_USE_COMPLEX)
115: if (PetscAbsReal(PetscImaginaryPart(alpha))>PETSC_MACHINE_EPSILON && PetscAbsReal(PetscImaginaryPart(alpha))/absal>100*PETSC_MACHINE_EPSILON) SETERRQ1(PetscObjectComm((PetscObject)bv),1,"The inner product is not well defined: nonzero imaginary part %g",PetscImaginaryPart(alpha));
116: #endif
117: if (bv->indef) {
118: *res = (realp<0.0)? -PetscSqrtReal(-realp): PetscSqrtReal(realp);
119: } else {
120: if (realp<-10*PETSC_MACHINE_EPSILON) SETERRQ(PetscObjectComm((PetscObject)bv),1,"The inner product is not well defined: indefinite matrix");
121: *res = (realp<0.0)? 0.0: PetscSqrtReal(realp);
122: }
123: return(0);
124: }
126: /*
127: BV_IPMatMult - Multiply a vector x by the inner-product matrix, cache the
128: result in Bx.
129: */
130: PETSC_STATIC_INLINE PetscErrorCode BV_IPMatMult(BV bv,Vec x)
131: {
135: if (((PetscObject)x)->id != bv->xid || ((PetscObject)x)->state != bv->xstate) {
136: MatMult(bv->matrix,x,bv->Bx);
137: bv->xid = ((PetscObject)x)->id;
138: bv->xstate = ((PetscObject)x)->state;
139: }
140: return(0);
141: }
143: /*
144: BV_IPMatMultBV - Multiply BV by the inner-product matrix, cache the
145: result internally in bv->cached.
146: */
147: PETSC_STATIC_INLINE PetscErrorCode BV_IPMatMultBV(BV bv)
148: {
152: BVGetCachedBV(bv,&bv->cached);
153: if (((PetscObject)bv)->state != bv->bvstate || bv->l != bv->cached->l || bv->k != bv->cached->k) {
154: BVSetActiveColumns(bv->cached,bv->l,bv->k);
155: if (bv->matrix) {
156: BVMatMult(bv,bv->matrix,bv->cached);
157: } else {
158: BVCopy(bv,bv->cached);
159: }
160: bv->bvstate = ((PetscObject)bv)->state;
161: }
162: return(0);
163: }
165: /*
166: BV_AllocateCoeffs - Allocate orthogonalization coefficients if not done already.
167: */
168: PETSC_STATIC_INLINE PetscErrorCode BV_AllocateCoeffs(BV bv)
169: {
173: if (!bv->h) {
174: PetscMalloc2(bv->nc+bv->m,&bv->h,bv->nc+bv->m,&bv->c);
175: PetscLogObjectMemory((PetscObject)bv,2*bv->m*sizeof(PetscScalar));
176: }
177: return(0);
178: }
180: /*
181: BV_AllocateSignature - Allocate signature coefficients if not done already.
182: */
183: PETSC_STATIC_INLINE PetscErrorCode BV_AllocateSignature(BV bv)
184: {
188: if (bv->indef && !bv->omega) {
189: if (bv->cuda) {
190: #if defined(PETSC_HAVE_VECCUDA)
191: VecCreateSeqCUDA(PETSC_COMM_SELF,bv->nc+bv->m,&bv->omega);
192: #else
193: SETERRQ(PetscObjectComm((PetscObject)bv),1,"Something wrong happened");
194: #endif
195: } else {
196: VecCreateSeq(PETSC_COMM_SELF,bv->nc+bv->m,&bv->omega);
197: }
198: PetscLogObjectParent((PetscObject)bv,(PetscObject)bv->omega);
199: VecSet(bv->omega,1.0);
200: }
201: return(0);
202: }
204: /*
205: BV_AllocateMatMult - Allocate auxiliary matrices required for BVMatMult if not available.
206: */
207: PETSC_STATIC_INLINE PetscErrorCode BV_AllocateMatMult(BV bv,Mat A,PetscInt m)
208: {
210: PetscObjectId Aid;
211: PetscBool create=PETSC_FALSE;
212: PetscInt cols;
215: if (!bv->B) create=PETSC_TRUE;
216: else {
217: MatGetSize(bv->B,NULL,&cols);
218: PetscObjectGetId((PetscObject)A,&Aid);
219: if (cols!=m || bv->Aid!=Aid) {
220: MatDestroy(&bv->B);
221: MatDestroy(&bv->C);
222: create=PETSC_TRUE;
223: }
224: }
225: if (create) {
226: MatCreateDense(PetscObjectComm((PetscObject)bv),bv->n,PETSC_DECIDE,bv->N,m,NULL,&bv->B);
227: PetscLogObjectParent((PetscObject)bv,(PetscObject)bv->B);
228: MatAssemblyBegin(bv->B,MAT_FINAL_ASSEMBLY);
229: MatAssemblyEnd(bv->B,MAT_FINAL_ASSEMBLY);
230: }
231: return(0);
232: }
234: /*
235: BVAvailableVec: First (0) or second (1) vector available for
236: getcolumn operation (or -1 if both vectors already fetched).
237: */
238: #define BVAvailableVec (((bv->ci[0]==-bv->nc-1)? 0: (bv->ci[1]==-bv->nc-1)? 1: -1))
240: /*
241: Macros to test valid BV arguments
242: */
243: #if !defined(PETSC_USE_DEBUG)
245: #define BVCheckSizes(h,arg) do {} while (0)
247: #else
249: #define BVCheckSizes(h,arg) \
250: do { \
251: if (!h->m) SETERRQ1(PetscObjectComm((PetscObject)h),PETSC_ERR_ARG_WRONGSTATE,"BV sizes have not been defined: Parameter #%d",arg); \
252: } while (0)
254: #endif
256: PETSC_INTERN PetscErrorCode BVView_Vecs(BV,PetscViewer);
258: PETSC_INTERN PetscErrorCode BVAllocateWork_Private(BV,PetscInt);
260: PETSC_INTERN PetscErrorCode BVMult_BLAS_Private(BV,PetscInt,PetscInt,PetscInt,PetscInt,PetscScalar,const PetscScalar*,const PetscScalar*,PetscScalar,PetscScalar*);
261: PETSC_INTERN PetscErrorCode BVMultVec_BLAS_Private(BV,PetscInt,PetscInt,PetscScalar,const PetscScalar*,const PetscScalar*,PetscScalar,PetscScalar*);
262: PETSC_INTERN PetscErrorCode BVMultInPlace_BLAS_Private(BV,PetscInt,PetscInt,PetscInt,PetscInt,PetscInt,PetscScalar*,const PetscScalar*,PetscBool);
263: PETSC_INTERN PetscErrorCode BVMultInPlace_Vecs_Private(BV,PetscInt,PetscInt,PetscInt,Vec*,const PetscScalar*,PetscBool);
264: PETSC_INTERN PetscErrorCode BVAXPY_BLAS_Private(BV,PetscInt,PetscInt,PetscScalar,const PetscScalar*,PetscScalar,PetscScalar*);
265: PETSC_INTERN PetscErrorCode BVDot_BLAS_Private(BV,PetscInt,PetscInt,PetscInt,PetscInt,const PetscScalar*,const PetscScalar*,PetscScalar*,PetscBool);
266: PETSC_INTERN PetscErrorCode BVDotVec_BLAS_Private(BV,PetscInt,PetscInt,const PetscScalar*,const PetscScalar*,PetscScalar*,PetscBool);
267: PETSC_INTERN PetscErrorCode BVScale_BLAS_Private(BV,PetscInt,PetscScalar*,PetscScalar);
268: PETSC_INTERN PetscErrorCode BVNorm_LAPACK_Private(BV,PetscInt,PetscInt,const PetscScalar*,NormType,PetscReal*,PetscBool);
269: PETSC_INTERN PetscErrorCode BVOrthogonalize_LAPACK_Private(BV,PetscInt,PetscInt,PetscScalar*,PetscScalar*);
271: #if defined(PETSC_HAVE_VECCUDA)
272: #include <petsccuda.h>
273: #include <cublas_v2.h>
275: #define WaitForGPU() PetscCUDASynchronize ? cudaThreadSynchronize() : 0
277: /* complex single */
278: #if defined(PETSC_USE_COMPLEX)
279: #if defined(PETSC_USE_REAL_SINGLE)
280: #define cublasXgemm(a,b,c,d,e,f,g,h,i,j,k,l,m,n) cublasCgemm((a),(b),(c),(d),(e),(f),(cuComplex*)(g),(cuComplex*)(h),(i),(cuComplex*)(j),(k),(cuComplex*)(l),(cuComplex*)(m),(n))
281: #define cublasXgemv(a,b,c,d,e,f,g,h,i,j,k,l) cublasCgemv((a),(b),(c),(d),(cuComplex*)(e),(cuComplex*)(f),(g),(cuComplex*)(h),(i),(cuComplex*)(j),(cuComplex*)(k),(l))
282: #define cublasXscal(a,b,c,d,e) cublasCscal(a,b,(const cuComplex*)(c),(cuComplex*)(d),e)
283: #define cublasXnrm2(a,b,c,d,e) cublasScnrm2(a,b,(const cuComplex*)(c),d,e)
284: #define cublasXaxpy(a,b,c,d,e,f,g) cublasCaxpy((a),(b),(cuComplex*)(c),(cuComplex*)(d),(e),(cuComplex*)(f),(g))
285: #define cublasXdotc(a,b,c,d,e,f,g) cublasCdotc((a),(b),(const cuComplex *)(c),(d),(const cuComplex *)(e),(f),(cuComplex *)(g))
286: #else /* complex double */
287: #define cublasXgemm(a,b,c,d,e,f,g,h,i,j,k,l,m,n) cublasZgemm((a),(b),(c),(d),(e),(f),(cuDoubleComplex*)(g),(cuDoubleComplex*)(h),(i),(cuDoubleComplex*)(j),(k),(cuDoubleComplex*)(l),(cuDoubleComplex*)(m),(n))
288: #define cublasXgemv(a,b,c,d,e,f,g,h,i,j,k,l) cublasZgemv((a),(b),(c),(d),(cuDoubleComplex*)(e),(cuDoubleComplex*)(f),(g),(cuDoubleComplex*)(h),(i),(cuDoubleComplex*)(j),(cuDoubleComplex*)(k),(l))
289: #define cublasXscal(a,b,c,d,e) cublasZscal(a,b,(const cuDoubleComplex*)(c),(cuDoubleComplex*)(d),e)
290: #define cublasXnrm2(a,b,c,d,e) cublasDznrm2(a,b,(const cuDoubleComplex*)(c),d,e)
291: #define cublasXaxpy(a,b,c,d,e,f,g) cublasZaxpy((a),(b),(cuDoubleComplex*)(c),(cuDoubleComplex*)(d),(e),(cuDoubleComplex*)(f),(g))
292: #define cublasXdotc(a,b,c,d,e,f,g) cublasZdotc((a),(b),(const cuDoubleComplex *)(c),(d),(const cuDoubleComplex *)(e),(f),(cuDoubleComplex *)(g))
293: #endif
294: #else /* real single */
295: #if defined(PETSC_USE_REAL_SINGLE)
296: #define cublasXgemm cublasSgemm
297: #define cublasXgemv cublasSgemv
298: #define cublasXscal cublasSscal
299: #define cublasXnrm2 cublasSnrm2
300: #define cublasXaxpy cublasSaxpy
301: #define cublasXdotc cublasSdot
302: #else /* real double */
303: #define cublasXgemm cublasDgemm
304: #define cublasXgemv cublasDgemv
305: #define cublasXscal cublasDscal
306: #define cublasXnrm2 cublasDnrm2
307: #define cublasXaxpy cublasDaxpy
308: #define cublasXdotc cublasDdot
309: #endif
310: #endif
312: PETSC_INTERN PetscErrorCode BV_CleanCoefficients_CUDA(BV,PetscInt,PetscScalar*);
313: PETSC_INTERN PetscErrorCode BV_AddCoefficients_CUDA(BV,PetscInt,PetscScalar*,PetscScalar*);
314: PETSC_INTERN PetscErrorCode BV_SetValue_CUDA(BV,PetscInt,PetscInt,PetscScalar*,PetscScalar);
315: PETSC_INTERN PetscErrorCode BV_SquareSum_CUDA(BV,PetscInt,PetscScalar*,PetscReal*);
316: PETSC_INTERN PetscErrorCode BV_ApplySignature_CUDA(BV,PetscInt,PetscScalar*,PetscBool);
317: PETSC_INTERN PetscErrorCode BV_SquareRoot_CUDA(BV,PetscInt,PetscScalar*,PetscReal*);
318: PETSC_INTERN PetscErrorCode BV_StoreCoefficients_CUDA(BV,PetscInt,PetscScalar*,PetscScalar*);
320: #endif /* PETSC_HAVE_VECCUDA */
322: #endif