Actual source code: superlu_dist.c
2: /*
3: Provides an interface to the SuperLU_DIST_2.2 sparse solver
4: */
6: #include <../src/mat/impls/aij/seq/aij.h>
7: #include <../src/mat/impls/aij/mpi/mpiaij.h>
8: #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */
9: #include <stdlib.h>
10: #endif
12: #if defined(PETSC_USE_64BIT_INDICES)
13: /* ugly SuperLU_Dist variable telling it to use long long int */
14: #define _LONGINT
15: #endif
18: #if defined(PETSC_USE_COMPLEX)
19: #include <superlu_zdefs.h>
20: #else
21: #include <superlu_ddefs.h>
22: #endif
25: typedef enum {GLOBAL,DISTRIBUTED} SuperLU_MatInputMode;
26: const char *SuperLU_MatInputModes[] = {"GLOBAL","DISTRIBUTED","SuperLU_MatInputMode","PETSC_",0};
28: typedef struct {
29: int_t nprow,npcol,*row,*col;
30: gridinfo_t grid;
31: superlu_options_t options;
32: SuperMatrix A_sup;
33: ScalePermstruct_t ScalePermstruct;
34: LUstruct_t LUstruct;
35: int StatPrint;
36: SuperLU_MatInputMode MatInputMode;
37: SOLVEstruct_t SOLVEstruct;
38: fact_t FactPattern;
39: MPI_Comm comm_superlu;
40: #if defined(PETSC_USE_COMPLEX)
41: doublecomplex *val;
42: #else
43: double *val;
44: #endif
45: PetscBool matsolve_iscalled,matmatsolve_iscalled;
46: PetscBool CleanUpSuperLU_Dist; /* Flag to clean up (non-global) SuperLU objects during Destroy */
47: } Mat_SuperLU_DIST;
59: PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
60: {
61: PetscErrorCode ierr;
62: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
63: PetscBool flg;
66: if (lu && lu->CleanUpSuperLU_Dist) {
67: /* Deallocate SuperLU_DIST storage */
68: if (lu->MatInputMode == GLOBAL) {
69: Destroy_CompCol_Matrix_dist(&lu->A_sup);
70: } else {
71: Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
72: if ( lu->options.SolveInitialized ) {
73: #if defined(PETSC_USE_COMPLEX)
74: zSolveFinalize(&lu->options, &lu->SOLVEstruct);
75: #else
76: dSolveFinalize(&lu->options, &lu->SOLVEstruct);
77: #endif
78: }
79: }
80: Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct);
81: ScalePermstructFree(&lu->ScalePermstruct);
82: LUstructFree(&lu->LUstruct);
84: /* Release the SuperLU_DIST process grid. */
85: superlu_gridexit(&lu->grid);
87: MPI_Comm_free(&(lu->comm_superlu));
88: }
89: PetscFree(A->spptr);
91: PetscTypeCompare((PetscObject)A,MATSEQAIJ,&flg);
92: if (flg) {
93: MatDestroy_SeqAIJ(A);
94: } else {
95: MatDestroy_MPIAIJ(A);
96: }
97: return(0);
98: }
102: PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
103: {
104: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
105: PetscErrorCode ierr;
106: PetscMPIInt size;
107: PetscInt m=A->rmap->n,M=A->rmap->N,N=A->cmap->N;
108: SuperLUStat_t stat;
109: double berr[1];
110: PetscScalar *bptr;
111: PetscInt nrhs=1;
112: Vec x_seq;
113: IS iden;
114: VecScatter scat;
115: int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
118: MPI_Comm_size(((PetscObject)A)->comm,&size);
119: if (size > 1 && lu->MatInputMode == GLOBAL) {
120: /* global mat input, convert b to x_seq */
121: VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);
122: ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);
123: VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);
124: ISDestroy(&iden);
126: VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);
127: VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);
128: VecGetArray(x_seq,&bptr);
129: } else { /* size==1 || distributed mat input */
130: if (lu->options.SolveInitialized && !lu->matsolve_iscalled){
131: /* see comments in MatMatSolve() */
132: #if defined(PETSC_USE_COMPLEX)
133: zSolveFinalize(&lu->options, &lu->SOLVEstruct);
134: #else
135: dSolveFinalize(&lu->options, &lu->SOLVEstruct);
136: #endif
137: lu->options.SolveInitialized = NO;
138: }
139: VecCopy(b_mpi,x);
140: VecGetArray(x,&bptr);
141: }
142:
143: if (lu->options.Fact != FACTORED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");
145: PStatInit(&stat); /* Initialize the statistics variables. */
146: if (lu->MatInputMode == GLOBAL) {
147: #if defined(PETSC_USE_COMPLEX)
148: pzgssvx_ABglobal(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,M,nrhs,
149: &lu->grid,&lu->LUstruct,berr,&stat,&info);
150: #else
151: pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr,M,nrhs,
152: &lu->grid,&lu->LUstruct,berr,&stat,&info);
153: #endif
154: } else { /* distributed mat input */
155: #if defined(PETSC_USE_COMPLEX)
156: pzgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,m,nrhs,&lu->grid,
157: &lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info);
158: #else
159: pdgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,bptr,m,nrhs,&lu->grid,
160: &lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info);
161: #endif
162: }
163: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
165: if (lu->options.PrintStat) {
166: PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */
167: }
168: PStatFree(&stat);
169:
170: if (size > 1 && lu->MatInputMode == GLOBAL) {
171: /* convert seq x to mpi x */
172: VecRestoreArray(x_seq,&bptr);
173: VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);
174: VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);
175: VecScatterDestroy(&scat);
176: VecDestroy(&x_seq);
177: } else {
178: VecRestoreArray(x,&bptr);
179: lu->matsolve_iscalled = PETSC_TRUE;
180: lu->matmatsolve_iscalled = PETSC_FALSE;
181: }
182: return(0);
183: }
187: PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A,Mat B_mpi,Mat X)
188: {
189: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
190: PetscErrorCode ierr;
191: PetscMPIInt size;
192: PetscInt M=A->rmap->N,m=A->rmap->n,nrhs;
193: SuperLUStat_t stat;
194: double berr[1];
195: PetscScalar *bptr;
196: int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
197: PetscBool flg;
200: if (lu->options.Fact != FACTORED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");
201: PetscTypeCompareAny((PetscObject)B_mpi,&flg,MATSEQDENSE,MATMPIDENSE,PETSC_NULL);
202: if (!flg) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix");
203: PetscTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,PETSC_NULL);
204: if (!flg) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
205:
206: MPI_Comm_size(((PetscObject)A)->comm,&size);
207: if (size > 1 && lu->MatInputMode == GLOBAL) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatInputMode=GLOBAL for nproc %d>1 is not supported",size);
208: /* size==1 or distributed mat input */
209: if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled){
210: /* communication pattern of SOLVEstruct is unlikely created for matmatsolve,
211: thus destroy it and create a new SOLVEstruct.
212: Otherwise it may result in memory corruption or incorrect solution
213: See src/mat/examples/tests/ex125.c */
214: #if defined(PETSC_USE_COMPLEX)
215: zSolveFinalize(&lu->options, &lu->SOLVEstruct);
216: #else
217: dSolveFinalize(&lu->options, &lu->SOLVEstruct);
218: #endif
219: lu->options.SolveInitialized = NO;
220: }
221: MatCopy(B_mpi,X,SAME_NONZERO_PATTERN);
223: MatGetSize(B_mpi,PETSC_NULL,&nrhs);
224:
225: PStatInit(&stat); /* Initialize the statistics variables. */
226: MatGetArray(X,&bptr);
227: if (lu->MatInputMode == GLOBAL) { /* size == 1 */
228: #if defined(PETSC_USE_COMPLEX)
229: pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, M, nrhs,
230: &lu->grid, &lu->LUstruct, berr, &stat, &info);
231: #else
232: pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, M, nrhs,
233: &lu->grid, &lu->LUstruct, berr, &stat, &info);
234: #endif
235: } else { /* distributed mat input */
236: #if defined(PETSC_USE_COMPLEX)
237: pzgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,m,nrhs,&lu->grid,
238: &lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info);
239: #else
240: pdgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,bptr,m,nrhs,&lu->grid,
241: &lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info);
242: #endif
243: }
244: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
245: MatRestoreArray(X,&bptr);
247: if (lu->options.PrintStat) {
248: PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */
249: }
250: PStatFree(&stat);
251: lu->matsolve_iscalled = PETSC_FALSE;
252: lu->matmatsolve_iscalled = PETSC_TRUE;
253: return(0);
254: }
259: PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo *info)
260: {
261: Mat *tseq,A_seq = PETSC_NULL;
262: Mat_SeqAIJ *aa,*bb;
263: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(F)->spptr;
264: PetscErrorCode ierr;
265: PetscInt M=A->rmap->N,N=A->cmap->N,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
266: m=A->rmap->n, colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
267: int sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */
268: PetscMPIInt size,rank;
269: SuperLUStat_t stat;
270: double *berr=0;
271: IS isrow;
272: PetscLogDouble time0,time,time_min,time_max;
273: Mat F_diag=PETSC_NULL;
274: #if defined(PETSC_USE_COMPLEX)
275: doublecomplex *av, *bv;
276: #else
277: double *av, *bv;
278: #endif
281: MPI_Comm_size(((PetscObject)A)->comm,&size);
282: MPI_Comm_rank(((PetscObject)A)->comm,&rank);
283:
284: if (lu->options.PrintStat) { /* collect time for mat conversion */
285: MPI_Barrier(((PetscObject)A)->comm);
286: PetscGetTime(&time0);
287: }
289: if (lu->MatInputMode == GLOBAL) { /* global mat input */
290: if (size > 1) { /* convert mpi A to seq mat A */
291: ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);
292: MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);
293: ISDestroy(&isrow);
294:
295: A_seq = *tseq;
296: PetscFree(tseq);
297: aa = (Mat_SeqAIJ*)A_seq->data;
298: } else {
299: PetscBool flg;
300: PetscTypeCompare((PetscObject)A,MATMPIAIJ,&flg);
301: if (flg) {
302: Mat_MPIAIJ *At = (Mat_MPIAIJ*)A->data;
303: A = At->A;
304: }
305: aa = (Mat_SeqAIJ*)A->data;
306: }
308: /* Convert Petsc NR matrix to SuperLU_DIST NC.
309: Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
310: if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */
311: Destroy_CompCol_Matrix_dist(&lu->A_sup);
312: if (lu->FactPattern == SamePattern_SameRowPerm){
313: lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
314: } else { /* lu->FactPattern == SamePattern */
315: Destroy_LU(N, &lu->grid, &lu->LUstruct);
316: lu->options.Fact = SamePattern;
317: }
318: }
319: #if defined(PETSC_USE_COMPLEX)
320: zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
321: #else
322: dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
323: #endif
325: /* Create compressed column matrix A_sup. */
326: #if defined(PETSC_USE_COMPLEX)
327: zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
328: #else
329: dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
330: #endif
331: } else { /* distributed mat input */
332: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
333: aa=(Mat_SeqAIJ*)(mat->A)->data;
334: bb=(Mat_SeqAIJ*)(mat->B)->data;
335: ai=aa->i; aj=aa->j;
336: bi=bb->i; bj=bb->j;
337: #if defined(PETSC_USE_COMPLEX)
338: av=(doublecomplex*)aa->a;
339: bv=(doublecomplex*)bb->a;
340: #else
341: av=aa->a;
342: bv=bb->a;
343: #endif
344: rstart = A->rmap->rstart;
345: nz = aa->nz + bb->nz;
346: garray = mat->garray;
347:
348: if (lu->options.Fact == DOFACT) {/* first numeric factorization */
349: #if defined(PETSC_USE_COMPLEX)
350: zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
351: #else
352: dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
353: #endif
354: } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
355: /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); */ /* this leads to crash! However, see SuperLU_DIST_2.5/EXAMPLE/pzdrive2.c */
356: if (lu->FactPattern == SamePattern_SameRowPerm){
357: lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
358: } else {
359: Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */
360: lu->options.Fact = SamePattern;
361: }
362: }
363: nz = 0;
364: for ( i=0; i<m; i++ ) {
365: lu->row[i] = nz;
366: countA = ai[i+1] - ai[i];
367: countB = bi[i+1] - bi[i];
368: ajj = aj + ai[i]; /* ptr to the beginning of this row */
369: bjj = bj + bi[i];
371: /* B part, smaller col index */
372: colA_start = rstart + ajj[0]; /* the smallest global col index of A */
373: jB = 0;
374: for (j=0; j<countB; j++){
375: jcol = garray[bjj[j]];
376: if (jcol > colA_start) {
377: jB = j;
378: break;
379: }
380: lu->col[nz] = jcol;
381: lu->val[nz++] = *bv++;
382: if (j==countB-1) jB = countB;
383: }
385: /* A part */
386: for (j=0; j<countA; j++){
387: lu->col[nz] = rstart + ajj[j];
388: lu->val[nz++] = *av++;
389: }
391: /* B part, larger col index */
392: for (j=jB; j<countB; j++){
393: lu->col[nz] = garray[bjj[j]];
394: lu->val[nz++] = *bv++;
395: }
396: }
397: lu->row[m] = nz;
398: #if defined(PETSC_USE_COMPLEX)
399: zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
400: lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
401: #else
402: dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
403: lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
404: #endif
405: }
406: if (lu->options.PrintStat) {
407: PetscGetTime(&time);
408: time0 = time - time0;
409: }
411: /* Factor the matrix. */
412: PStatInit(&stat); /* Initialize the statistics variables. */
413: if (lu->MatInputMode == GLOBAL) { /* global mat input */
414: #if defined(PETSC_USE_COMPLEX)
415: pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
416: &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
417: #else
418: pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
419: &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
420: #endif
421: } else { /* distributed mat input */
422: #if defined(PETSC_USE_COMPLEX)
423: pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, m, 0, &lu->grid,
424: &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
425: if (sinfo) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
426: #else
427: pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, m, 0, &lu->grid,
428: &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
429: if (sinfo) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
430: #endif
431: }
433: if (lu->MatInputMode == GLOBAL && size > 1){
434: MatDestroy(&A_seq);
435: }
437: if (lu->options.PrintStat) {
438: if (size > 1){
439: MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,((PetscObject)A)->comm);
440: MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,((PetscObject)A)->comm);
441: MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,((PetscObject)A)->comm);
442: time = time/size; /* average time */
443: if (!rank) {
444: PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n %g / %g / %g\n",time_max,time_min,time);
445: }
446: } else {
447: PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time: \n %g\n",time0);
448: }
449: PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */
450: }
451: PStatFree(&stat);
452: if (size > 1){
453: F_diag = ((Mat_MPIAIJ *)(F)->data)->A;
454: F_diag->assembled = PETSC_TRUE;
455: }
456: (F)->assembled = PETSC_TRUE;
457: (F)->preallocated = PETSC_TRUE;
458: lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
459: return(0);
460: }
462: /* Note the Petsc r and c permutations are ignored */
465: PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
466: {
467: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->spptr;
468: PetscInt M=A->rmap->N,N=A->cmap->N;
471: /* Initialize the SuperLU process grid. */
472: superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
474: /* Initialize ScalePermstruct and LUstruct. */
475: ScalePermstructInit(M, N, &lu->ScalePermstruct);
476: LUstructInit(M, N, &lu->LUstruct);
477: F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST;
478: F->ops->solve = MatSolve_SuperLU_DIST;
479: F->ops->matsolve = MatMatSolve_SuperLU_DIST;
480: lu->CleanUpSuperLU_Dist = PETSC_TRUE;
481: return(0);
482: }
487: PetscErrorCode MatFactorGetSolverPackage_aij_superlu_dist(Mat A,const MatSolverPackage *type)
488: {
490: *type = MATSOLVERSUPERLU_DIST;
491: return(0);
492: }
497: PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
498: {
499: Mat B;
500: Mat_SuperLU_DIST *lu;
501: PetscErrorCode ierr;
502: PetscInt M=A->rmap->N,N=A->cmap->N,indx;
503: PetscMPIInt size;
504: superlu_options_t options;
505: PetscBool flg;
506: const char *colperm[] = {"NATURAL","MMD_AT_PLUS_A","MMD_ATA","METIS_AT_PLUS_A","PARMETIS"};
507: const char *rowperm[] = {"LargeDiag","NATURAL"};
508: const char *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"};
511: /* Create the factorization matrix */
512: MatCreate(((PetscObject)A)->comm,&B);
513: MatSetSizes(B,A->rmap->n,A->cmap->n,M,N);
514: MatSetType(B,((PetscObject)A)->type_name);
515: MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
516: MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);
518: B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
519: B->ops->view = MatView_SuperLU_DIST;
520: B->ops->destroy = MatDestroy_SuperLU_DIST;
521: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_aij_superlu_dist",MatFactorGetSolverPackage_aij_superlu_dist);
522: B->factortype = MAT_FACTOR_LU;
524: PetscNewLog(B,Mat_SuperLU_DIST,&lu);
525: B->spptr = lu;
527: /* Set the default input options:
528: options.Fact = DOFACT;
529: options.Equil = YES;
530: options.ParSymbFact = NO;
531: options.ColPerm = METIS_AT_PLUS_A;
532: options.RowPerm = LargeDiag;
533: options.ReplaceTinyPivot = YES;
534: options.IterRefine = DOUBLE;
535: options.Trans = NOTRANS;
536: options.SolveInitialized = NO; -hold the communication pattern used MatSolve() and MatMatSolve()
537: options.RefineInitialized = NO;
538: options.PrintStat = YES;
539: */
540: set_default_options_dist(&options);
542: MPI_Comm_dup(((PetscObject)A)->comm,&(lu->comm_superlu));
543: MPI_Comm_size(((PetscObject)A)->comm,&size);
544: /* Default num of process columns and rows */
545: lu->npcol = PetscMPIIntCast((PetscInt)(0.5 + PetscSqrtReal((PetscReal)size)));
546: if (!lu->npcol) lu->npcol = 1;
547: while (lu->npcol > 0) {
548: lu->nprow = PetscMPIIntCast(size/lu->npcol);
549: if (size == lu->nprow * lu->npcol) break;
550: lu->npcol --;
551: }
552:
553: PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");
554: PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);
555: PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);
556: if (size != lu->nprow * lu->npcol) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol);
557:
558: lu->MatInputMode = DISTRIBUTED;
559: PetscOptionsEnum("-mat_superlu_dist_matinput","Matrix input mode (global or distributed)","None",SuperLU_MatInputModes,(PetscEnum)lu->MatInputMode,(PetscEnum*)&lu->MatInputMode,PETSC_NULL);
560: if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
562: PetscOptionsBool("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);
563: if (!flg) {
564: options.Equil = NO;
565: }
567: PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",rowperm,2,rowperm[0],&indx,&flg);
568: if (flg) {
569: switch (indx) {
570: case 0:
571: options.RowPerm = LargeDiag;
572: break;
573: case 1:
574: options.RowPerm = NOROWPERM;
575: break;
576: }
577: }
579: PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",colperm,5,colperm[3],&indx,&flg);
580: if (flg) {
581: switch (indx) {
582: case 0:
583: options.ColPerm = NATURAL;
584: break;
585: case 1:
586: options.ColPerm = MMD_AT_PLUS_A;
587: break;
588: case 2:
589: options.ColPerm = MMD_ATA;
590: break;
591: case 3:
592: options.ColPerm = METIS_AT_PLUS_A;
593: break;
594: case 4:
595: options.ColPerm = PARMETIS; /* only works for np>1 */
596: break;
597: default:
598: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation");
599: }
600: }
602: PetscOptionsBool("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);
603: if (!flg) {
604: options.ReplaceTinyPivot = NO;
605: }
607: options.ParSymbFact = NO;
608: PetscOptionsBool("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,0);
609: if (flg){
610: #ifdef PETSC_HAVE_PARMETIS
611: options.ParSymbFact = YES;
612: options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */
613: #else
614: printf("parsymbfact needs PARMETIS");
615: #endif
616: }
618: lu->FactPattern = SamePattern_SameRowPerm;
619: PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[1],&indx,&flg);
620: if (flg) {
621: switch (indx) {
622: case 0:
623: lu->FactPattern = SamePattern;
624: break;
625: case 1:
626: lu->FactPattern = SamePattern_SameRowPerm;
627: break;
628: }
629: }
630:
631: options.IterRefine = NOREFINE;
632: PetscOptionsBool("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);
633: if (flg) {
634: options.IterRefine = DOUBLE;
635: }
637: if (PetscLogPrintInfo) {
638: options.PrintStat = YES;
639: } else {
640: options.PrintStat = NO;
641: }
642: PetscOptionsBool("-mat_superlu_dist_statprint","Print factorization information","None",
643: (PetscBool)options.PrintStat,(PetscBool*)&options.PrintStat,0);
644: PetscOptionsEnd();
646: lu->options = options;
647: lu->options.Fact = DOFACT;
648: lu->matsolve_iscalled = PETSC_FALSE;
649: lu->matmatsolve_iscalled = PETSC_FALSE;
650: *F = B;
651: return(0);
652: }
657: PetscErrorCode MatGetFactor_seqaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
658: {
662: MatGetFactor_aij_superlu_dist(A,ftype,F);
663: return(0);
664: }
670: PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
671: {
675: MatGetFactor_aij_superlu_dist(A,ftype,F);
676: return(0);
677: }
682: PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
683: {
684: Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr;
685: superlu_options_t options;
686: PetscErrorCode ierr;
689: /* check if matrix is superlu_dist type */
690: if (A->ops->solve != MatSolve_SuperLU_DIST) return(0);
692: options = lu->options;
693: PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");
694: PetscViewerASCIIPrintf(viewer," Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);
695: PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",PetscBools[options.Equil != NO]);
696: PetscViewerASCIIPrintf(viewer," Matrix input mode %d \n",lu->MatInputMode);
697: PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",PetscBools[options.ReplaceTinyPivot != NO]);
698: PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",PetscBools[options.IterRefine == DOUBLE]);
699: PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);
700: PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");
701:
702: switch(options.ColPerm){
703: case NATURAL:
704: PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n");
705: break;
706: case MMD_AT_PLUS_A:
707: PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n");
708: break;
709: case MMD_ATA:
710: PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n");
711: break;
712: case METIS_AT_PLUS_A:
713: PetscViewerASCIIPrintf(viewer," Column permutation METIS_AT_PLUS_A\n");
714: break;
715: case PARMETIS:
716: PetscViewerASCIIPrintf(viewer," Column permutation PARMETIS\n");
717: break;
718: default:
719: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation");
720: }
722: PetscViewerASCIIPrintf(viewer," Parallel symbolic factorization %s \n",PetscBools[options.ParSymbFact != NO]);
723:
724: if (lu->FactPattern == SamePattern){
725: PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern\n");
726: } else {
727: PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern_SameRowPerm\n");
728: }
729: return(0);
730: }
734: PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
735: {
736: PetscErrorCode ierr;
737: PetscBool iascii;
738: PetscViewerFormat format;
741: PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
742: if (iascii) {
743: PetscViewerGetFormat(viewer,&format);
744: if (format == PETSC_VIEWER_ASCII_INFO) {
745: MatFactorInfo_SuperLU_DIST(A,viewer);
746: }
747: }
748: return(0);
749: }
752: /*MC
753: MATSOLVERSUPERLU_DIST - Parallel direct solver package for LU factorization
755: Works with AIJ matrices
757: Options Database Keys:
758: + -mat_superlu_dist_r <n> - number of rows in processor partition
759: . -mat_superlu_dist_c <n> - number of columns in processor partition
760: . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
761: . -mat_superlu_dist_equil - equilibrate the matrix
762: . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
763: . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
764: . -mat_superlu_dist_replacetinypivot - replace tiny pivots
765: . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm
766: . -mat_superlu_dist_iterrefine - use iterative refinement
767: - -mat_superlu_dist_statprint - print factorization information
769: Level: beginner
771: .seealso: PCLU
773: .seealso: PCFactorSetMatSolverPackage(), MatSolverPackage
775: M*/