Actual source code: spooles.c
2: /*
3: Provides an interface to the Spooles serial sparse solver
4: */
5: #include <../src/mat/impls/aij/seq/aij.h>
6: #include <../src/mat/impls/sbaij/seq/sbaij.h>
7: #include <../src/mat/impls/aij/seq/spooles/spooles.h>
11: PetscErrorCode MatDestroy_SeqAIJSpooles(Mat A)
12: {
13: Mat_Spooles *lu = (Mat_Spooles*)A->spptr;
17: if (lu && lu->CleanUpSpooles) {
18: FrontMtx_free(lu->frontmtx);
19: IV_free(lu->newToOldIV);
20: IV_free(lu->oldToNewIV);
21: InpMtx_free(lu->mtxA);
22: ETree_free(lu->frontETree);
23: IVL_free(lu->symbfacIVL);
24: SubMtxManager_free(lu->mtxmanager);
25: Graph_free(lu->graph);
26: }
27: PetscFree(A->spptr);
28: MatDestroy_SeqAIJ(A);
29: return(0);
30: }
34: PetscErrorCode MatSolve_SeqSpooles(Mat A,Vec b,Vec x)
35: {
36: Mat_Spooles *lu = (Mat_Spooles*)A->spptr;
37: PetscScalar *array;
38: DenseMtx *mtxY, *mtxX ;
39: PetscErrorCode ierr;
40: PetscInt irow,neqns=A->cmap->n,nrow=A->rmap->n,*iv;
41: #if defined(PETSC_USE_COMPLEX)
42: double x_real,x_imag;
43: #else
44: double *entX;
45: #endif
48: mtxY = DenseMtx_new();
49: DenseMtx_init(mtxY, lu->options.typeflag, 0, 0, nrow, 1, 1, nrow); /* column major */
50: VecGetArray(b,&array);
52: if (lu->options.useQR) { /* copy b to mtxY */
53: for ( irow = 0 ; irow < nrow; irow++ )
54: #if !defined(PETSC_USE_COMPLEX)
55: DenseMtx_setRealEntry(mtxY, irow, 0, *array++);
56: #else
57: DenseMtx_setComplexEntry(mtxY, irow, 0, PetscRealPart(array[irow]), PetscImaginaryPart(array[irow]));
58: #endif
59: } else { /* copy permuted b to mtxY */
60: iv = IV_entries(lu->oldToNewIV);
61: for ( irow = 0 ; irow < nrow; irow++ )
62: #if !defined(PETSC_USE_COMPLEX)
63: DenseMtx_setRealEntry(mtxY, *iv++, 0, *array++);
64: #else
65: DenseMtx_setComplexEntry(mtxY,*iv++,0,PetscRealPart(array[irow]),PetscImaginaryPart(array[irow]));
66: #endif
67: }
68: VecRestoreArray(b,&array);
70: mtxX = DenseMtx_new();
71: DenseMtx_init(mtxX, lu->options.typeflag, 0, 0, neqns, 1, 1, neqns);
72: if (lu->options.useQR) {
73: FrontMtx_QR_solve(lu->frontmtx, lu->mtxA, mtxX, mtxY, lu->mtxmanager,
74: lu->cpus, lu->options.msglvl, lu->options.msgFile);
75: } else {
76: FrontMtx_solve(lu->frontmtx, mtxX, mtxY, lu->mtxmanager,
77: lu->cpus, lu->options.msglvl, lu->options.msgFile);
78: }
79: if ( lu->options.msglvl > 2 ) {
80: int err;
81: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n right hand side matrix after permutation");
82: DenseMtx_writeForHumanEye(mtxY, lu->options.msgFile);
83: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n solution matrix in new ordering");
84: DenseMtx_writeForHumanEye(mtxX, lu->options.msgFile);
85: err = fflush(lu->options.msgFile);
86: if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
87: }
89: /* permute solution into original ordering, then copy to x */
90: DenseMtx_permuteRows(mtxX, lu->newToOldIV);
91: VecGetArray(x,&array);
93: #if !defined(PETSC_USE_COMPLEX)
94: entX = DenseMtx_entries(mtxX);
95: DVcopy(neqns, array, entX);
96: #else
97: for (irow=0; irow<nrow; irow++){
98: DenseMtx_complexEntry(mtxX,irow,0,&x_real,&x_imag);
99: array[irow] = x_real+x_imag*PETSC_i;
100: }
101: #endif
103: VecRestoreArray(x,&array);
104:
105: /* free memory */
106: DenseMtx_free(mtxX);
107: DenseMtx_free(mtxY);
108: return(0);
109: }
113: PetscErrorCode MatFactorNumeric_SeqSpooles(Mat F,Mat A,const MatFactorInfo *info)
114: {
115: Mat_Spooles *lu = (Mat_Spooles*)(F)->spptr;
116: ChvManager *chvmanager ;
117: Chv *rootchv ;
118: IVL *adjIVL;
119: PetscErrorCode ierr;
120: PetscInt nz,nrow=A->rmap->n,irow,nedges,neqns=A->cmap->n,*ai,*aj,i,*diag=0,fierr;
121: PetscScalar *av;
122: double cputotal,facops;
123: #if defined(PETSC_USE_COMPLEX)
124: PetscInt nz_row,*aj_tmp;
125: PetscScalar *av_tmp;
126: #else
127: PetscInt *ivec1,*ivec2,j;
128: double *dvec;
129: #endif
130: PetscBool isSeqAIJ,isMPIAIJ;
131:
133: if (lu->flg == DIFFERENT_NONZERO_PATTERN) { /* first numeric factorization */
134: (F)->ops->solve = MatSolve_SeqSpooles;
135: (F)->assembled = PETSC_TRUE;
136:
137: /* set Spooles options */
138: SetSpoolesOptions(A, &lu->options);
140: lu->mtxA = InpMtx_new();
141: }
143: /* copy A to Spooles' InpMtx object */
144: PetscTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);
145: PetscTypeCompare((PetscObject)A,MATSEQAIJ,&isMPIAIJ);
146: if (isSeqAIJ){
147: Mat_SeqAIJ *mat = (Mat_SeqAIJ*)A->data;
148: ai=mat->i; aj=mat->j; av=mat->a;
149: if (lu->options.symflag == SPOOLES_NONSYMMETRIC) {
150: nz=mat->nz;
151: } else { /* SPOOLES_SYMMETRIC || SPOOLES_HERMITIAN */
152: nz=(mat->nz + A->rmap->n)/2;
153: diag=mat->diag;
154: }
155: } else { /* A is SBAIJ */
156: Mat_SeqSBAIJ *mat = (Mat_SeqSBAIJ*)A->data;
157: ai=mat->i; aj=mat->j; av=mat->a;
158: nz=mat->nz;
159: }
160: InpMtx_init(lu->mtxA, INPMTX_BY_ROWS, lu->options.typeflag, nz, 0);
161:
162: #if defined(PETSC_USE_COMPLEX)
163: for (irow=0; irow<nrow; irow++) {
164: if ( lu->options.symflag == SPOOLES_NONSYMMETRIC || !(isSeqAIJ || isMPIAIJ)){
165: nz_row = ai[irow+1] - ai[irow];
166: aj_tmp = aj + ai[irow];
167: av_tmp = av + ai[irow];
168: } else {
169: nz_row = ai[irow+1] - diag[irow];
170: aj_tmp = aj + diag[irow];
171: av_tmp = av + diag[irow];
172: }
173: for (i=0; i<nz_row; i++){
174: InpMtx_inputComplexEntry(lu->mtxA, irow, *aj_tmp++,PetscRealPart(*av_tmp),PetscImaginaryPart(*av_tmp));
175: av_tmp++;
176: }
177: }
178: #else
179: ivec1 = InpMtx_ivec1(lu->mtxA);
180: ivec2 = InpMtx_ivec2(lu->mtxA);
181: dvec = InpMtx_dvec(lu->mtxA);
182: if ( lu->options.symflag == SPOOLES_NONSYMMETRIC || !isSeqAIJ){
183: for (irow = 0; irow < nrow; irow++){
184: for (i = ai[irow]; i<ai[irow+1]; i++) ivec1[i] = irow;
185: }
186: IVcopy(nz, ivec2, aj);
187: DVcopy(nz, dvec, av);
188: } else {
189: nz = 0;
190: for (irow = 0; irow < nrow; irow++){
191: for (j = diag[irow]; j<ai[irow+1]; j++) {
192: ivec1[nz] = irow;
193: ivec2[nz] = aj[j];
194: dvec[nz] = av[j];
195: nz++;
196: }
197: }
198: }
199: InpMtx_inputRealTriples(lu->mtxA, nz, ivec1, ivec2, dvec);
200: #endif
202: InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
203: if ( lu->options.msglvl > 0 ) {
204: int err;
205: printf("\n\n input matrix");
206: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n input matrix");
207: InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile);
208: err = fflush(lu->options.msgFile);
209: if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
210: }
212: if ( lu->flg == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization */
213: /*---------------------------------------------------
214: find a low-fill ordering
215: (1) create the Graph object
216: (2) order the graph
217: -------------------------------------------------------*/
218: if (lu->options.useQR){
219: adjIVL = InpMtx_adjForATA(lu->mtxA);
220: } else {
221: adjIVL = InpMtx_fullAdjacency(lu->mtxA);
222: }
223: nedges = IVL_tsize(adjIVL);
225: lu->graph = Graph_new();
226: Graph_init2(lu->graph, 0, neqns, 0, nedges, neqns, nedges, adjIVL, NULL, NULL);
227: if ( lu->options.msglvl > 2 ) {
228: int err;
230: if (lu->options.useQR){
231: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n graph of A^T A");
232: } else {
233: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n graph of the input matrix");
234: }
235: Graph_writeForHumanEye(lu->graph, lu->options.msgFile);
236: err = fflush(lu->options.msgFile);
237: if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
238: }
240: switch (lu->options.ordering) {
241: case 0:
242: lu->frontETree = orderViaBestOfNDandMS(lu->graph,
243: lu->options.maxdomainsize, lu->options.maxzeros, lu->options.maxsize,
244: lu->options.seed, lu->options.msglvl, lu->options.msgFile); break;
245: case 1:
246: lu->frontETree = orderViaMMD(lu->graph,lu->options.seed,lu->options.msglvl,lu->options.msgFile); break;
247: case 2:
248: lu->frontETree = orderViaMS(lu->graph, lu->options.maxdomainsize,
249: lu->options.seed,lu->options.msglvl,lu->options.msgFile); break;
250: case 3:
251: lu->frontETree = orderViaND(lu->graph, lu->options.maxdomainsize,
252: lu->options.seed,lu->options.msglvl,lu->options.msgFile); break;
253: default:
254: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown Spooles's ordering");
255: }
257: if ( lu->options.msglvl > 0 ) {
258: int err;
260: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n front tree from ordering");
261: ETree_writeForHumanEye(lu->frontETree, lu->options.msgFile);
262: err = fflush(lu->options.msgFile);
263: if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
264: }
265:
266: /* get the permutation, permute the front tree */
267: lu->oldToNewIV = ETree_oldToNewVtxPerm(lu->frontETree);
268: lu->oldToNew = IV_entries(lu->oldToNewIV);
269: lu->newToOldIV = ETree_newToOldVtxPerm(lu->frontETree);
270: if (!lu->options.useQR) ETree_permuteVertices(lu->frontETree, lu->oldToNewIV);
272: /* permute the matrix */
273: if (lu->options.useQR){
274: InpMtx_permute(lu->mtxA, NULL, lu->oldToNew);
275: } else {
276: InpMtx_permute(lu->mtxA, lu->oldToNew, lu->oldToNew);
277: if ( lu->options.symflag == SPOOLES_SYMMETRIC) {
278: InpMtx_mapToUpperTriangle(lu->mtxA);
279: }
280: #if defined(PETSC_USE_COMPLEX)
281: if ( lu->options.symflag == SPOOLES_HERMITIAN ) {
282: InpMtx_mapToUpperTriangleH(lu->mtxA);
283: }
284: #endif
285: InpMtx_changeCoordType(lu->mtxA, INPMTX_BY_CHEVRONS);
286: }
287: InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
289: /* get symbolic factorization */
290: if (lu->options.useQR){
291: lu->symbfacIVL = SymbFac_initFromGraph(lu->frontETree, lu->graph);
292: IVL_overwrite(lu->symbfacIVL, lu->oldToNewIV);
293: IVL_sortUp(lu->symbfacIVL);
294: ETree_permuteVertices(lu->frontETree, lu->oldToNewIV);
295: } else {
296: lu->symbfacIVL = SymbFac_initFromInpMtx(lu->frontETree, lu->mtxA);
297: }
298: if ( lu->options.msglvl > 2 ) {
299: int err;
301: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n old-to-new permutation vector");
302: IV_writeForHumanEye(lu->oldToNewIV, lu->options.msgFile);
303: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n new-to-old permutation vector");
304: IV_writeForHumanEye(lu->newToOldIV, lu->options.msgFile);
305: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n front tree after permutation");
306: ETree_writeForHumanEye(lu->frontETree, lu->options.msgFile);
307: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n input matrix after permutation");
308: InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile);
309: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n symbolic factorization");
310: IVL_writeForHumanEye(lu->symbfacIVL, lu->options.msgFile);
311: err = fflush(lu->options.msgFile);
312: if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
313: }
315: lu->frontmtx = FrontMtx_new();
316: lu->mtxmanager = SubMtxManager_new();
317: SubMtxManager_init(lu->mtxmanager, NO_LOCK, 0);
319: } else { /* new num factorization using previously computed symbolic factor */
321: if (lu->options.pivotingflag) { /* different FrontMtx is required */
322: FrontMtx_free(lu->frontmtx);
323: lu->frontmtx = FrontMtx_new();
324: } else {
325: FrontMtx_clearData (lu->frontmtx);
326: }
328: SubMtxManager_free(lu->mtxmanager);
329: lu->mtxmanager = SubMtxManager_new();
330: SubMtxManager_init(lu->mtxmanager, NO_LOCK, 0);
332: /* permute mtxA */
333: if (lu->options.useQR){
334: InpMtx_permute(lu->mtxA, NULL, lu->oldToNew);
335: } else {
336: InpMtx_permute(lu->mtxA, lu->oldToNew, lu->oldToNew);
337: if ( lu->options.symflag == SPOOLES_SYMMETRIC ) {
338: InpMtx_mapToUpperTriangle(lu->mtxA);
339: }
340: InpMtx_changeCoordType(lu->mtxA, INPMTX_BY_CHEVRONS);
341: }
342: InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
343: if ( lu->options.msglvl > 2 ) {
344: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n input matrix after permutation");
345: InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile);
346: }
347: } /* end of if( lu->flg == DIFFERENT_NONZERO_PATTERN) */
348:
349: if (lu->options.useQR){
350: FrontMtx_init(lu->frontmtx, lu->frontETree, lu->symbfacIVL, lu->options.typeflag,
351: SPOOLES_SYMMETRIC, FRONTMTX_DENSE_FRONTS,
352: SPOOLES_NO_PIVOTING, NO_LOCK, 0, NULL,
353: lu->mtxmanager, lu->options.msglvl, lu->options.msgFile);
354: } else {
355: FrontMtx_init(lu->frontmtx, lu->frontETree, lu->symbfacIVL, lu->options.typeflag, lu->options.symflag,
356: FRONTMTX_DENSE_FRONTS, lu->options.pivotingflag, NO_LOCK, 0, NULL,
357: lu->mtxmanager, lu->options.msglvl, lu->options.msgFile);
358: }
360: if ( lu->options.symflag == SPOOLES_SYMMETRIC ) { /* || SPOOLES_HERMITIAN ? */
361: if ( lu->options.patchAndGoFlag == 1 ) {
362: lu->frontmtx->patchinfo = PatchAndGoInfo_new();
363: PatchAndGoInfo_init(lu->frontmtx->patchinfo, 1, lu->options.toosmall, lu->options.fudge,
364: lu->options.storeids, lu->options.storevalues);
365: } else if ( lu->options.patchAndGoFlag == 2 ) {
366: lu->frontmtx->patchinfo = PatchAndGoInfo_new();
367: PatchAndGoInfo_init(lu->frontmtx->patchinfo, 2, lu->options.toosmall, lu->options.fudge,
368: lu->options.storeids, lu->options.storevalues);
369: }
370: }
372: /* numerical factorization */
373: chvmanager = ChvManager_new();
374: ChvManager_init(chvmanager, NO_LOCK, 1);
375: DVfill(10, lu->cpus, 0.0);
376: if (lu->options.useQR){
377: facops = 0.0 ;
378: FrontMtx_QR_factor(lu->frontmtx, lu->mtxA, chvmanager,
379: lu->cpus, &facops, lu->options.msglvl, lu->options.msgFile);
380: if ( lu->options.msglvl > 1 ) {
381: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n factor matrix");
382: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n facops = %9.2f", facops);
383: }
384: } else {
385: IVfill(20, lu->stats, 0);
386: rootchv = FrontMtx_factorInpMtx(lu->frontmtx, lu->mtxA, lu->options.tau, 0.0,
387: chvmanager, &fierr, lu->cpus,lu->stats,lu->options.msglvl,lu->options.msgFile);
388: if (rootchv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"\n matrix found to be singular");
389: if (fierr >= 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"\n error encountered at front %D", fierr);
390:
391: if(lu->options.FrontMtxInfo){
392: PetscPrintf(PETSC_COMM_SELF,"\n %8d pivots, %8d pivot tests, %8d delayed rows and columns\n",lu->stats[0], lu->stats[1], lu->stats[2]);
393: cputotal = lu->cpus[8] ;
394: if ( cputotal > 0.0 ) {
395: PetscPrintf(PETSC_COMM_SELF,
396: "\n cpus cpus/totaltime"
397: "\n initialize fronts %8.3f %6.2f"
398: "\n load original entries %8.3f %6.2f"
399: "\n update fronts %8.3f %6.2f"
400: "\n assemble postponed data %8.3f %6.2f"
401: "\n factor fronts %8.3f %6.2f"
402: "\n extract postponed data %8.3f %6.2f"
403: "\n store factor entries %8.3f %6.2f"
404: "\n miscellaneous %8.3f %6.2f"
405: "\n total time %8.3f \n",
406: lu->cpus[0], 100.*lu->cpus[0]/cputotal,
407: lu->cpus[1], 100.*lu->cpus[1]/cputotal,
408: lu->cpus[2], 100.*lu->cpus[2]/cputotal,
409: lu->cpus[3], 100.*lu->cpus[3]/cputotal,
410: lu->cpus[4], 100.*lu->cpus[4]/cputotal,
411: lu->cpus[5], 100.*lu->cpus[5]/cputotal,
412: lu->cpus[6], 100.*lu->cpus[6]/cputotal,
413: lu->cpus[7], 100.*lu->cpus[7]/cputotal, cputotal);
414: }
415: }
416: }
417: ChvManager_free(chvmanager);
419: if ( lu->options.msglvl > 0 ) {
420: int err;
422: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n factor matrix");
423: FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile);
424: err = fflush(lu->options.msgFile);
425: if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
426: }
428: if ( lu->options.symflag == SPOOLES_SYMMETRIC ) { /* || SPOOLES_HERMITIAN ? */
429: if ( lu->options.patchAndGoFlag == 1 ) {
430: if ( lu->frontmtx->patchinfo->fudgeIV != NULL ) {
431: if (lu->options.msglvl > 0 ){
432: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n small pivots found at these locations");
433: IV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeIV, lu->options.msgFile);
434: }
435: }
436: PatchAndGoInfo_free(lu->frontmtx->patchinfo);
437: } else if ( lu->options.patchAndGoFlag == 2 ) {
438: if (lu->options.msglvl > 0 ){
439: if ( lu->frontmtx->patchinfo->fudgeIV != NULL ) {
440: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n small pivots found at these locations");
441: IV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeIV, lu->options.msgFile);
442: }
443: if ( lu->frontmtx->patchinfo->fudgeDV != NULL ) {
444: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n perturbations");
445: DV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeDV, lu->options.msgFile);
446: }
447: }
448: PatchAndGoInfo_free(lu->frontmtx->patchinfo);
449: }
450: }
452: /* post-process the factorization */
453: FrontMtx_postProcess(lu->frontmtx, lu->options.msglvl, lu->options.msgFile);
454: if ( lu->options.msglvl > 2 ) {
455: int err;
457: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n factor matrix after post-processing");
458: FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile);
459: err = fflush(lu->options.msgFile);
460: if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
461: }
463: lu->flg = SAME_NONZERO_PATTERN;
464: lu->CleanUpSpooles = PETSC_TRUE;
465: return(0);
466: }