Actual source code: mumps.c
2: /*
3: Provides an interface to the MUMPS sparse solver
4: */
6: #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
7: #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
10: #if defined(PETSC_USE_COMPLEX)
11: #include <zmumps_c.h>
12: #else
13: #include <dmumps_c.h>
14: #endif
16: #define JOB_INIT -1
17: #define JOB_FACTSYMBOLIC 1
18: #define JOB_FACTNUMERIC 2
19: #define JOB_SOLVE 3
20: #define JOB_END -2
23: /* macros s.t. indices match MUMPS documentation */
24: #define ICNTL(I) icntl[(I)-1]
25: #define CNTL(I) cntl[(I)-1]
26: #define INFOG(I) infog[(I)-1]
27: #define INFO(I) info[(I)-1]
28: #define RINFOG(I) rinfog[(I)-1]
29: #define RINFO(I) rinfo[(I)-1]
31: typedef struct {
32: #if defined(PETSC_USE_COMPLEX)
33: ZMUMPS_STRUC_C id;
34: #else
35: DMUMPS_STRUC_C id;
36: #endif
37: MatStructure matstruc;
38: PetscMPIInt myid,size;
39: PetscInt *irn,*jcn,nz,sym,nSolve;
40: PetscScalar *val;
41: MPI_Comm comm_mumps;
42: VecScatter scat_rhs, scat_sol;
43: PetscBool isAIJ,CleanUpMUMPS;
44: Vec b_seq,x_seq;
45: PetscErrorCode (*Destroy)(Mat);
46: PetscErrorCode (*ConvertToTriples)(Mat, int, MatReuse, int*, int**, int**, PetscScalar**);
47: } Mat_MUMPS;
52: /* MatConvertToTriples_A_B */
53: /*convert Petsc matrix to triples: row[nz], col[nz], val[nz] */
54: /*
55: input:
56: A - matrix in aij,baij or sbaij (bs=1) format
57: shift - 0: C style output triple; 1: Fortran style output triple.
58: reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
59: MAT_REUSE_MATRIX: only the values in v array are updated
60: output:
61: nnz - dim of r, c, and v (number of local nonzero entries of A)
62: r, c, v - row and col index, matrix values (matrix triples)
63: */
67: PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v)
68: {
69: const PetscInt *ai,*aj,*ajj,M=A->rmap->n;
70: PetscInt nz,rnz,i,j;
71: PetscErrorCode ierr;
72: PetscInt *row,*col;
73: Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data;
76: *v=aa->a;
77: if (reuse == MAT_INITIAL_MATRIX){
78: nz = aa->nz; ai = aa->i; aj = aa->j;
79: *nnz = nz;
80: PetscMalloc(2*nz*sizeof(PetscInt), &row);
81: col = row + nz;
83: nz = 0;
84: for(i=0; i<M; i++) {
85: rnz = ai[i+1] - ai[i];
86: ajj = aj + ai[i];
87: for(j=0; j<rnz; j++) {
88: row[nz] = i+shift; col[nz++] = ajj[j] + shift;
89: }
90: }
91: *r = row; *c = col;
92: }
93: return(0);
94: }
98: PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v)
99: {
100: Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data;
101: const PetscInt *ai,*aj,*ajj,bs=A->rmap->bs,bs2=aa->bs2,M=A->rmap->N/bs;
102: PetscInt nz,idx=0,rnz,i,j,k,m;
103: PetscErrorCode ierr;
104: PetscInt *row,*col;
107: *v = aa->a;
108: if (reuse == MAT_INITIAL_MATRIX){
109: ai = aa->i; aj = aa->j;
110: nz = bs2*aa->nz;
111: *nnz = nz;
112: PetscMalloc(2*nz*sizeof(PetscInt), &row);
113: col = row + nz;
115: for(i=0; i<M; i++) {
116: ajj = aj + ai[i];
117: rnz = ai[i+1] - ai[i];
118: for(k=0; k<rnz; k++) {
119: for(j=0; j<bs; j++) {
120: for(m=0; m<bs; m++) {
121: row[idx] = i*bs + m + shift;
122: col[idx++] = bs*(ajj[k]) + j + shift;
123: }
124: }
125: }
126: }
127: *r = row; *c = col;
128: }
129: return(0);
130: }
134: PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v)
135: {
136: const PetscInt *ai, *aj,*ajj,M=A->rmap->n;
137: PetscInt nz,rnz,i,j;
138: PetscErrorCode ierr;
139: PetscInt *row,*col;
140: Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data;
143: if (reuse == MAT_INITIAL_MATRIX){
144: nz = aa->nz;ai=aa->i; aj=aa->j;*v=aa->a;
145: *nnz = nz;
146: PetscMalloc(2*nz*sizeof(PetscInt), &row);
147: col = row + nz;
149: nz = 0;
150: for(i=0; i<M; i++) {
151: rnz = ai[i+1] - ai[i];
152: ajj = aj + ai[i];
153: for(j=0; j<rnz; j++) {
154: row[nz] = i+shift; col[nz++] = ajj[j] + shift;
155: }
156: }
157: *r = row; *c = col;
158: }
159: return(0);
160: }
164: PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v)
165: {
166: const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n;
167: PetscInt nz,rnz,i,j;
168: const PetscScalar *av,*v1;
169: PetscScalar *val;
170: PetscErrorCode ierr;
171: PetscInt *row,*col;
172: Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data;
175: ai=aa->i; aj=aa->j;av=aa->a;
176: adiag=aa->diag;
177: if (reuse == MAT_INITIAL_MATRIX){
178: nz = M + (aa->nz-M)/2;
179: *nnz = nz;
180: PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);
181: col = row + nz;
182: val = (PetscScalar*)(col + nz);
184: nz = 0;
185: for(i=0; i<M; i++) {
186: rnz = ai[i+1] - adiag[i];
187: ajj = aj + adiag[i];
188: v1 = av + adiag[i];
189: for(j=0; j<rnz; j++) {
190: row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j];
191: }
192: }
193: *r = row; *c = col; *v = val;
194: } else {
195: nz = 0; val = *v;
196: for(i=0; i <M; i++) {
197: rnz = ai[i+1] - adiag[i];
198: ajj = aj + adiag[i];
199: v1 = av + adiag[i];
200: for(j=0; j<rnz; j++) {
201: val[nz++] = v1[j];
202: }
203: }
204: }
205: return(0);
206: }
210: PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v)
211: {
212: const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
213: PetscErrorCode ierr;
214: PetscInt rstart,nz,i,j,jj,irow,countA,countB;
215: PetscInt *row,*col;
216: const PetscScalar *av, *bv,*v1,*v2;
217: PetscScalar *val;
218: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data;
219: Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)(mat->A)->data;
220: Mat_SeqBAIJ *bb=(Mat_SeqBAIJ*)(mat->B)->data;
223: ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart;
224: garray = mat->garray;
225: av=aa->a; bv=bb->a;
227: if (reuse == MAT_INITIAL_MATRIX){
228: nz = aa->nz + bb->nz;
229: *nnz = nz;
230: PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);
231: col = row + nz;
232: val = (PetscScalar*)(col + nz);
234: *r = row; *c = col; *v = val;
235: } else {
236: row = *r; col = *c; val = *v;
237: }
239: jj = 0; irow = rstart;
240: for ( i=0; i<m; i++ ) {
241: ajj = aj + ai[i]; /* ptr to the beginning of this row */
242: countA = ai[i+1] - ai[i];
243: countB = bi[i+1] - bi[i];
244: bjj = bj + bi[i];
245: v1 = av + ai[i];
246: v2 = bv + bi[i];
248: /* A-part */
249: for (j=0; j<countA; j++){
250: if (reuse == MAT_INITIAL_MATRIX) {
251: row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift;
252: }
253: val[jj++] = v1[j];
254: }
256: /* B-part */
257: for(j=0; j < countB; j++){
258: if (reuse == MAT_INITIAL_MATRIX) {
259: row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift;
260: }
261: val[jj++] = v2[j];
262: }
263: irow++;
264: }
265: return(0);
266: }
270: PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v)
271: {
272: const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
273: PetscErrorCode ierr;
274: PetscInt rstart,nz,i,j,jj,irow,countA,countB;
275: PetscInt *row,*col;
276: const PetscScalar *av, *bv,*v1,*v2;
277: PetscScalar *val;
278: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
279: Mat_SeqAIJ *aa=(Mat_SeqAIJ*)(mat->A)->data;
280: Mat_SeqAIJ *bb=(Mat_SeqAIJ*)(mat->B)->data;
283: ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart;
284: garray = mat->garray;
285: av=aa->a; bv=bb->a;
287: if (reuse == MAT_INITIAL_MATRIX){
288: nz = aa->nz + bb->nz;
289: *nnz = nz;
290: PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);
291: col = row + nz;
292: val = (PetscScalar*)(col + nz);
294: *r = row; *c = col; *v = val;
295: } else {
296: row = *r; col = *c; val = *v;
297: }
299: jj = 0; irow = rstart;
300: for ( i=0; i<m; i++ ) {
301: ajj = aj + ai[i]; /* ptr to the beginning of this row */
302: countA = ai[i+1] - ai[i];
303: countB = bi[i+1] - bi[i];
304: bjj = bj + bi[i];
305: v1 = av + ai[i];
306: v2 = bv + bi[i];
308: /* A-part */
309: for (j=0; j<countA; j++){
310: if (reuse == MAT_INITIAL_MATRIX){
311: row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift;
312: }
313: val[jj++] = v1[j];
314: }
316: /* B-part */
317: for(j=0; j < countB; j++){
318: if (reuse == MAT_INITIAL_MATRIX){
319: row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift;
320: }
321: val[jj++] = v2[j];
322: }
323: irow++;
324: }
325: return(0);
326: }
330: PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v)
331: {
332: Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data;
333: Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)(mat->A)->data;
334: Mat_SeqBAIJ *bb=(Mat_SeqBAIJ*)(mat->B)->data;
335: const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj;
336: const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart;
337: const PetscInt bs = A->rmap->bs,bs2=mat->bs2;
338: PetscErrorCode ierr;
339: PetscInt nz,i,j,k,n,jj,irow,countA,countB,idx;
340: PetscInt *row,*col;
341: const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2;
342: PetscScalar *val;
346: if (reuse == MAT_INITIAL_MATRIX) {
347: nz = bs2*(aa->nz + bb->nz);
348: *nnz = nz;
349: PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);
350: col = row + nz;
351: val = (PetscScalar*)(col + nz);
353: *r = row; *c = col; *v = val;
354: } else {
355: row = *r; col = *c; val = *v;
356: }
358: jj = 0; irow = rstart;
359: for ( i=0; i<mbs; i++ ) {
360: countA = ai[i+1] - ai[i];
361: countB = bi[i+1] - bi[i];
362: ajj = aj + ai[i];
363: bjj = bj + bi[i];
364: v1 = av + bs2*ai[i];
365: v2 = bv + bs2*bi[i];
367: idx = 0;
368: /* A-part */
369: for (k=0; k<countA; k++){
370: for (j=0; j<bs; j++) {
371: for (n=0; n<bs; n++) {
372: if (reuse == MAT_INITIAL_MATRIX){
373: row[jj] = irow + n + shift;
374: col[jj] = rstart + bs*ajj[k] + j + shift;
375: }
376: val[jj++] = v1[idx++];
377: }
378: }
379: }
381: idx = 0;
382: /* B-part */
383: for(k=0; k<countB; k++){
384: for (j=0; j<bs; j++) {
385: for (n=0; n<bs; n++) {
386: if (reuse == MAT_INITIAL_MATRIX){
387: row[jj] = irow + n + shift;
388: col[jj] = bs*garray[bjj[k]] + j + shift;
389: }
390: val[jj++] = v2[idx++];
391: }
392: }
393: }
394: irow += bs;
395: }
396: return(0);
397: }
401: PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v)
402: {
403: const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
404: PetscErrorCode ierr;
405: PetscInt rstart,nz,nza,nzb,i,j,jj,irow,countA,countB;
406: PetscInt *row,*col;
407: const PetscScalar *av, *bv,*v1,*v2;
408: PetscScalar *val;
409: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
410: Mat_SeqAIJ *aa=(Mat_SeqAIJ*)(mat->A)->data;
411: Mat_SeqAIJ *bb=(Mat_SeqAIJ*)(mat->B)->data;
414: ai=aa->i; aj=aa->j; adiag=aa->diag;
415: bi=bb->i; bj=bb->j; garray = mat->garray;
416: av=aa->a; bv=bb->a;
417: rstart = A->rmap->rstart;
419: if (reuse == MAT_INITIAL_MATRIX) {
420: nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
421: nzb = 0; /* num of upper triangular entries in mat->B */
422: for(i=0; i<m; i++){
423: nza += (ai[i+1] - adiag[i]);
424: countB = bi[i+1] - bi[i];
425: bjj = bj + bi[i];
426: for (j=0; j<countB; j++){
427: if (garray[bjj[j]] > rstart) nzb++;
428: }
429: }
430:
431: nz = nza + nzb; /* total nz of upper triangular part of mat */
432: *nnz = nz;
433: PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);
434: col = row + nz;
435: val = (PetscScalar*)(col + nz);
437: *r = row; *c = col; *v = val;
438: } else {
439: row = *r; col = *c; val = *v;
440: }
442: jj = 0; irow = rstart;
443: for ( i=0; i<m; i++ ) {
444: ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
445: v1 = av + adiag[i];
446: countA = ai[i+1] - adiag[i];
447: countB = bi[i+1] - bi[i];
448: bjj = bj + bi[i];
449: v2 = bv + bi[i];
451: /* A-part */
452: for (j=0; j<countA; j++){
453: if (reuse == MAT_INITIAL_MATRIX) {
454: row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift;
455: }
456: val[jj++] = v1[j];
457: }
459: /* B-part */
460: for(j=0; j < countB; j++){
461: if (garray[bjj[j]] > rstart) {
462: if (reuse == MAT_INITIAL_MATRIX) {
463: row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift;
464: }
465: val[jj++] = v2[j];
466: }
467: }
468: irow++;
469: }
470: return(0);
471: }
475: PetscErrorCode MatDestroy_MUMPS(Mat A)
476: {
477: Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr;
481: if (lu && lu->CleanUpMUMPS) {
482: /* Terminate instance, deallocate memories */
483: PetscFree2(lu->id.sol_loc,lu->id.isol_loc);
484: VecScatterDestroy(&lu->scat_rhs);
485: VecDestroy(&lu->b_seq);
486: VecScatterDestroy(&lu->scat_sol);
487: VecDestroy(&lu->x_seq);
488: ierr=PetscFree(lu->id.perm_in);
489: PetscFree(lu->irn);
490: lu->id.job=JOB_END;
491: #if defined(PETSC_USE_COMPLEX)
492: zmumps_c(&lu->id);
493: #else
494: dmumps_c(&lu->id);
495: #endif
496: MPI_Comm_free(&(lu->comm_mumps));
497: }
498: if (lu && lu->Destroy) {
499: (lu->Destroy)(A);
500: }
501: PetscFree(A->spptr);
503: /* clear composed functions */
504: PetscObjectComposeFunctionDynamic((PetscObject)A,"MatFactorGetSolverPackage_C","",PETSC_NULL);
505: PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMumpsSetIcntl_C","",PETSC_NULL);
506: return(0);
507: }
511: PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x)
512: {
513: Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr;
514: PetscScalar *array;
515: Vec b_seq;
516: IS is_iden,is_petsc;
518: PetscInt i;
521: lu->id.nrhs = 1;
522: b_seq = lu->b_seq;
523: if (lu->size > 1){
524: /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */
525: VecScatterBegin(lu->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);
526: VecScatterEnd(lu->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);
527: if (!lu->myid) {VecGetArray(b_seq,&array);}
528: } else { /* size == 1 */
529: VecCopy(b,x);
530: VecGetArray(x,&array);
531: }
532: if (!lu->myid) { /* define rhs on the host */
533: lu->id.nrhs = 1;
534: #if defined(PETSC_USE_COMPLEX)
535: lu->id.rhs = (mumps_double_complex*)array;
536: #else
537: lu->id.rhs = array;
538: #endif
539: }
541: /* solve phase */
542: /*-------------*/
543: lu->id.job = JOB_SOLVE;
544: #if defined(PETSC_USE_COMPLEX)
545: zmumps_c(&lu->id);
546: #else
547: dmumps_c(&lu->id);
548: #endif
549: if (lu->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",lu->id.INFOG(1));
551: if (lu->size > 1) { /* convert mumps distributed solution to petsc mpi x */
552: if (!lu->nSolve){ /* create scatter scat_sol */
553: ISCreateStride(PETSC_COMM_SELF,lu->id.lsol_loc,0,1,&is_iden); /* from */
554: for (i=0; i<lu->id.lsol_loc; i++){
555: lu->id.isol_loc[i] -= 1; /* change Fortran style to C style */
556: }
557: ISCreateGeneral(PETSC_COMM_SELF,lu->id.lsol_loc,lu->id.isol_loc,PETSC_COPY_VALUES,&is_petsc); /* to */
558: VecScatterCreate(lu->x_seq,is_iden,x,is_petsc,&lu->scat_sol);
559: ISDestroy(&is_iden);
560: ISDestroy(&is_petsc);
561: }
562: VecScatterBegin(lu->scat_sol,lu->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);
563: VecScatterEnd(lu->scat_sol,lu->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);
564: }
565: lu->nSolve++;
566: return(0);
567: }
571: PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x)
572: {
573: Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr;
577: lu->id.ICNTL(9) = 0;
578: MatSolve_MUMPS(A,b,x);
579: lu->id.ICNTL(9) = 1;
580: return(0);
581: }
585: PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X)
586: {
588: PetscBool flg;
591: PetscTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,PETSC_NULL);
592: if (!flg) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix");
593: PetscTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,PETSC_NULL);
594: if (!flg) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatSolve_MUMPS() is not implemented yet");
595: return(0);
596: }
598: #if !defined(PETSC_USE_COMPLEX)
599: /*
600: input:
601: F: numeric factor
602: output:
603: nneg: total number of negative pivots
604: nzero: 0
605: npos: (global dimension of F) - nneg
606: */
610: PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos)
611: {
612: Mat_MUMPS *lu =(Mat_MUMPS*)F->spptr;
614: PetscMPIInt size;
617: MPI_Comm_size(((PetscObject)F)->comm,&size);
618: /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
619: if (size > 1 && lu->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",lu->id.INFOG(13));
620: if (nneg){
621: if (!lu->myid){
622: *nneg = lu->id.INFOG(12);
623: }
624: MPI_Bcast(nneg,1,MPI_INT,0,lu->comm_mumps);
625: }
626: if (nzero) *nzero = 0;
627: if (npos) *npos = F->rmap->N - (*nneg);
628: return(0);
629: }
630: #endif /* !defined(PETSC_USE_COMPLEX) */
634: PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info)
635: {
636: Mat_MUMPS *lu =(Mat_MUMPS*)(F)->spptr;
637: PetscErrorCode ierr;
638: MatReuse reuse;
639: Mat F_diag;
640: PetscBool isMPIAIJ;
643: reuse = MAT_REUSE_MATRIX;
644: (*lu->ConvertToTriples)(A, 1, reuse, &lu->nz, &lu->irn, &lu->jcn, &lu->val);
646: /* numerical factorization phase */
647: /*-------------------------------*/
648: lu->id.job = JOB_FACTNUMERIC;
649: if(!lu->id.ICNTL(18)) {
650: if (!lu->myid) {
651: #if defined(PETSC_USE_COMPLEX)
652: lu->id.a = (mumps_double_complex*)lu->val;
653: #else
654: lu->id.a = lu->val;
655: #endif
656: }
657: } else {
658: #if defined(PETSC_USE_COMPLEX)
659: lu->id.a_loc = (mumps_double_complex*)lu->val;
660: #else
661: lu->id.a_loc = lu->val;
662: #endif
663: }
664: #if defined(PETSC_USE_COMPLEX)
665: zmumps_c(&lu->id);
666: #else
667: dmumps_c(&lu->id);
668: #endif
669: if (lu->id.INFOG(1) < 0) {
670: if (lu->id.INFO(1) == -13) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: Cannot allocate required memory %d megabytes\n",lu->id.INFO(2));
671: else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFO(1)=%d, INFO(2)=%d\n",lu->id.INFO(1),lu->id.INFO(2));
672: }
673: if (!lu->myid && lu->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," lu->id.ICNTL(16):=%d\n",lu->id.INFOG(16));
675: if (lu->size > 1){
676: PetscTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);
677: if(isMPIAIJ) {
678: F_diag = ((Mat_MPIAIJ *)(F)->data)->A;
679: } else {
680: F_diag = ((Mat_MPISBAIJ *)(F)->data)->A;
681: }
682: F_diag->assembled = PETSC_TRUE;
683: if (lu->nSolve){
684: VecScatterDestroy(&lu->scat_sol);
685: PetscFree2(lu->id.sol_loc,lu->id.isol_loc);
686: VecDestroy(&lu->x_seq);
687: }
688: }
689: (F)->assembled = PETSC_TRUE;
690: lu->matstruc = SAME_NONZERO_PATTERN;
691: lu->CleanUpMUMPS = PETSC_TRUE;
692: lu->nSolve = 0;
693:
694: if (lu->size > 1){
695: /* distributed solution */
696: lu->id.ICNTL(21) = 1;
697: if (!lu->nSolve){
698: /* Create x_seq=sol_loc for repeated use */
699: PetscInt lsol_loc;
700: PetscScalar *sol_loc;
701: lsol_loc = lu->id.INFO(23); /* length of sol_loc */
702: PetscMalloc2(lsol_loc,PetscScalar,&sol_loc,lsol_loc,PetscInt,&lu->id.isol_loc);
703: lu->id.lsol_loc = lsol_loc;
704: #if defined(PETSC_USE_COMPLEX)
705: lu->id.sol_loc = (mumps_double_complex*)sol_loc;
706: #else
707: lu->id.sol_loc = sol_loc;
708: #endif
709: VecCreateSeqWithArray(PETSC_COMM_SELF,lsol_loc,sol_loc,&lu->x_seq);
710: }
711: }
712: return(0);
713: }
717: PetscErrorCode PetscSetMUMPSOptions(Mat F, Mat A)
718: {
719: Mat_MUMPS *lu = (Mat_MUMPS*)F->spptr;
720: PetscErrorCode ierr;
721: PetscInt icntl;
722: PetscBool flg;
725: PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"MUMPS Options","Mat");
726: if (lu->size == 1){
727: lu->id.ICNTL(18) = 0; /* centralized assembled matrix input */
728: } else {
729: lu->id.ICNTL(18) = 3; /* distributed assembled matrix input */
730: }
732: icntl=-1;
733: lu->id.ICNTL(4) = 0; /* level of printing; overwrite mumps default ICNTL(4)=2 */
734: PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",lu->id.ICNTL(4),&icntl,&flg);
735: if ((flg && icntl > 0) || PetscLogPrintInfo) {
736: lu->id.ICNTL(4)=icntl; /* and use mumps default icntl(i), i=1,2,3 */
737: } else { /* no output */
738: lu->id.ICNTL(1) = 0; /* error message, default= 6 */
739: lu->id.ICNTL(2) = 0; /* output stream for diagnostic printing, statistics, and warning. default=0 */
740: lu->id.ICNTL(3) = 0; /* output stream for global information, default=6 */
741: }
742: PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): column permutation and/or scaling to get a zero-free diagonal (0 to 7)","None",lu->id.ICNTL(6),&lu->id.ICNTL(6),PETSC_NULL);
743: icntl=-1;
744: PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): sequential matrix ordering (0 to 7) 3 = Scotch, 5 = Metis","None",lu->id.ICNTL(7),&icntl,&flg);
745: if (flg) {
746: if (icntl== 1 && lu->size > 1){
747: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n");
748: } else {
749: lu->id.ICNTL(7) = icntl;
750: }
751: }
752:
753: PetscOptionsInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 7 or 77)","None",lu->id.ICNTL(8),&lu->id.ICNTL(8),PETSC_NULL);
754: PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",lu->id.ICNTL(10),&lu->id.ICNTL(10),PETSC_NULL);
755: PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to the linear system solved (via -ksp_view)","None",lu->id.ICNTL(11),&lu->id.ICNTL(11),PETSC_NULL);
756: PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): efficiency control: defines the ordering strategy with scaling constraints (0 to 3","None",lu->id.ICNTL(12),&lu->id.ICNTL(12),PETSC_NULL);
757: PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): efficiency control: with or without ScaLAPACK","None",lu->id.ICNTL(13),&lu->id.ICNTL(13),PETSC_NULL);
758: PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage of estimated workspace increase","None",lu->id.ICNTL(14),&lu->id.ICNTL(14),PETSC_NULL);
759: PetscOptionsInt("-mat_mumps_icntl_19","ICNTL(19): Schur complement","None",lu->id.ICNTL(19),&lu->id.ICNTL(19),PETSC_NULL);
761: PetscOptionsInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core facility (0 or 1)","None",lu->id.ICNTL(22),&lu->id.ICNTL(22),PETSC_NULL);
762: PetscOptionsInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",lu->id.ICNTL(23),&lu->id.ICNTL(23),PETSC_NULL);
763: PetscOptionsInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",lu->id.ICNTL(24),&lu->id.ICNTL(24),PETSC_NULL);
764: if (lu->id.ICNTL(24)){
765: lu->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */
766: }
768: PetscOptionsInt("-mat_mumps_icntl_25","ICNTL(25): computation of a null space basis","None",lu->id.ICNTL(25),&lu->id.ICNTL(25),PETSC_NULL);
769: PetscOptionsInt("-mat_mumps_icntl_26","ICNTL(26): Schur options for right-hand side or solution vector","None",lu->id.ICNTL(26),&lu->id.ICNTL(26),PETSC_NULL);
770: PetscOptionsInt("-mat_mumps_icntl_27","ICNTL(27): experimental parameter","None",lu->id.ICNTL(27),&lu->id.ICNTL(27),PETSC_NULL);
771: PetscOptionsInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",lu->id.ICNTL(28),&lu->id.ICNTL(28),PETSC_NULL);
772: PetscOptionsInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch 2 = parmetis","None",lu->id.ICNTL(29),&lu->id.ICNTL(29),PETSC_NULL);
773: PetscOptionsInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",lu->id.ICNTL(30),&lu->id.ICNTL(30),PETSC_NULL);
774: PetscOptionsInt("-mat_mumps_icntl_31","ICNTL(31): factors can be discarded in the solve phase","None",lu->id.ICNTL(31),&lu->id.ICNTL(31),PETSC_NULL);
775: PetscOptionsInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",lu->id.ICNTL(33),&lu->id.ICNTL(33),PETSC_NULL);
777: PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",lu->id.CNTL(1),&lu->id.CNTL(1),PETSC_NULL);
778: PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",lu->id.CNTL(2),&lu->id.CNTL(2),PETSC_NULL);
779: PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",lu->id.CNTL(3),&lu->id.CNTL(3),PETSC_NULL);
780: PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",lu->id.CNTL(4),&lu->id.CNTL(4),PETSC_NULL);
781: PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",lu->id.CNTL(5),&lu->id.CNTL(5),PETSC_NULL);
782: PetscOptionsEnd();
783: return(0);
784: }
785:
788: PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS* mumps)
789: {
790: PetscErrorCode ierr;
793: MPI_Comm_rank(((PetscObject)A)->comm, &mumps->myid);
794: MPI_Comm_size(((PetscObject)A)->comm,&mumps->size);
795: MPI_Comm_dup(((PetscObject)A)->comm,&(mumps->comm_mumps));
796: mumps->id.comm_fortran = MPI_Comm_c2f(mumps->comm_mumps);
798: mumps->id.job = JOB_INIT;
799: mumps->id.par = 1; /* host participates factorizaton and solve */
800: mumps->id.sym = mumps->sym;
801: #if defined(PETSC_USE_COMPLEX)
802: zmumps_c(&mumps->id);
803: #else
804: dmumps_c(&mumps->id);
805: #endif
807: mumps->CleanUpMUMPS = PETSC_FALSE;
808: mumps->scat_rhs = PETSC_NULL;
809: mumps->scat_sol = PETSC_NULL;
810: mumps->nSolve = 0;
811: return(0);
812: }
813:
814: /* Note the Petsc r and c permutations are ignored */
817: PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
818: {
819: Mat_MUMPS *lu = (Mat_MUMPS*)F->spptr;
820: PetscErrorCode ierr;
821: MatReuse reuse;
822: Vec b;
823: IS is_iden;
824: const PetscInt M = A->rmap->N;
827: lu->matstruc = DIFFERENT_NONZERO_PATTERN;
829: /* Set MUMPS options */
830: PetscSetMUMPSOptions(F,A);
831:
832: reuse = MAT_INITIAL_MATRIX;
833: (*lu->ConvertToTriples)(A, 1, reuse, &lu->nz, &lu->irn, &lu->jcn, &lu->val);
835: /* analysis phase */
836: /*----------------*/
837: lu->id.job = JOB_FACTSYMBOLIC;
838: lu->id.n = M;
839: switch (lu->id.ICNTL(18)){
840: case 0: /* centralized assembled matrix input */
841: if (!lu->myid) {
842: lu->id.nz =lu->nz; lu->id.irn=lu->irn; lu->id.jcn=lu->jcn;
843: if (lu->id.ICNTL(6)>1){
844: #if defined(PETSC_USE_COMPLEX)
845: lu->id.a = (mumps_double_complex*)lu->val;
846: #else
847: lu->id.a = lu->val;
848: #endif
849: }
850: if (lu->id.ICNTL(7) == 1){ /* use user-provide matrix ordering */
851: if (!lu->myid) {
852: const PetscInt *idx;
853: PetscInt i,*perm_in;
854: PetscMalloc(M*sizeof(PetscInt),&perm_in);
855: ISGetIndices(r,&idx);
856: lu->id.perm_in = perm_in;
857: for (i=0; i<M; i++) perm_in[i] = idx[i]+1; /* perm_in[]: start from 1, not 0! */
858: ISRestoreIndices(r,&idx);
859: }
860: }
861: }
862: break;
863: case 3: /* distributed assembled matrix input (size>1) */
864: lu->id.nz_loc = lu->nz;
865: lu->id.irn_loc=lu->irn; lu->id.jcn_loc=lu->jcn;
866: if (lu->id.ICNTL(6)>1) {
867: #if defined(PETSC_USE_COMPLEX)
868: lu->id.a_loc = (mumps_double_complex*)lu->val;
869: #else
870: lu->id.a_loc = lu->val;
871: #endif
872: }
873: /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
874: if (!lu->myid){
875: VecCreateSeq(PETSC_COMM_SELF,A->cmap->N,&lu->b_seq);
876: ISCreateStride(PETSC_COMM_SELF,A->cmap->N,0,1,&is_iden);
877: } else {
878: VecCreateSeq(PETSC_COMM_SELF,0,&lu->b_seq);
879: ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);
880: }
881: VecCreate(((PetscObject)A)->comm,&b);
882: VecSetSizes(b,A->rmap->n,PETSC_DECIDE);
883: VecSetFromOptions(b);
885: VecScatterCreate(b,is_iden,lu->b_seq,is_iden,&lu->scat_rhs);
886: ISDestroy(&is_iden);
887: VecDestroy(&b);
888: break;
889: }
890: #if defined(PETSC_USE_COMPLEX)
891: zmumps_c(&lu->id);
892: #else
893: dmumps_c(&lu->id);
894: #endif
895: if (lu->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",lu->id.INFOG(1));
896:
897: F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
898: F->ops->solve = MatSolve_MUMPS;
899: F->ops->solvetranspose = MatSolveTranspose_MUMPS;
900: F->ops->matsolve = MatMatSolve_MUMPS;
901: return(0);
902: }
904: /* Note the Petsc r and c permutations are ignored */
907: PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
908: {
910: Mat_MUMPS *lu = (Mat_MUMPS*)F->spptr;
911: PetscErrorCode ierr;
912: MatReuse reuse;
913: Vec b;
914: IS is_iden;
915: const PetscInt M = A->rmap->N;
918: lu->matstruc = DIFFERENT_NONZERO_PATTERN;
920: /* Set MUMPS options */
921: PetscSetMUMPSOptions(F,A);
923: reuse = MAT_INITIAL_MATRIX;
924: (*lu->ConvertToTriples)(A, 1, reuse, &lu->nz, &lu->irn, &lu->jcn, &lu->val);
926: /* analysis phase */
927: /*----------------*/
928: lu->id.job = JOB_FACTSYMBOLIC;
929: lu->id.n = M;
930: switch (lu->id.ICNTL(18)){
931: case 0: /* centralized assembled matrix input */
932: if (!lu->myid) {
933: lu->id.nz =lu->nz; lu->id.irn=lu->irn; lu->id.jcn=lu->jcn;
934: if (lu->id.ICNTL(6)>1){
935: #if defined(PETSC_USE_COMPLEX)
936: lu->id.a = (mumps_double_complex*)lu->val;
937: #else
938: lu->id.a = lu->val;
939: #endif
940: }
941: }
942: break;
943: case 3: /* distributed assembled matrix input (size>1) */
944: lu->id.nz_loc = lu->nz;
945: lu->id.irn_loc=lu->irn; lu->id.jcn_loc=lu->jcn;
946: if (lu->id.ICNTL(6)>1) {
947: #if defined(PETSC_USE_COMPLEX)
948: lu->id.a_loc = (mumps_double_complex*)lu->val;
949: #else
950: lu->id.a_loc = lu->val;
951: #endif
952: }
953: /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
954: if (!lu->myid){
955: VecCreateSeq(PETSC_COMM_SELF,A->cmap->N,&lu->b_seq);
956: ISCreateStride(PETSC_COMM_SELF,A->cmap->N,0,1,&is_iden);
957: } else {
958: VecCreateSeq(PETSC_COMM_SELF,0,&lu->b_seq);
959: ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);
960: }
961: VecCreate(((PetscObject)A)->comm,&b);
962: VecSetSizes(b,A->rmap->n,PETSC_DECIDE);
963: VecSetFromOptions(b);
965: VecScatterCreate(b,is_iden,lu->b_seq,is_iden,&lu->scat_rhs);
966: ISDestroy(&is_iden);
967: VecDestroy(&b);
968: break;
969: }
970: #if defined(PETSC_USE_COMPLEX)
971: zmumps_c(&lu->id);
972: #else
973: dmumps_c(&lu->id);
974: #endif
975: if (lu->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",lu->id.INFOG(1));
976:
977: F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
978: F->ops->solve = MatSolve_MUMPS;
979: F->ops->solvetranspose = MatSolveTranspose_MUMPS;
980: return(0);
981: }
983: /* Note the Petsc r permutation and factor info are ignored */
986: PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info)
987: {
988: Mat_MUMPS *lu = (Mat_MUMPS*)F->spptr;
989: PetscErrorCode ierr;
990: MatReuse reuse;
991: Vec b;
992: IS is_iden;
993: const PetscInt M = A->rmap->N;
996: lu->matstruc = DIFFERENT_NONZERO_PATTERN;
998: /* Set MUMPS options */
999: PetscSetMUMPSOptions(F,A);
1001: reuse = MAT_INITIAL_MATRIX;
1002: (*lu->ConvertToTriples)(A, 1 , reuse, &lu->nz, &lu->irn, &lu->jcn, &lu->val);
1004: /* analysis phase */
1005: /*----------------*/
1006: lu->id.job = JOB_FACTSYMBOLIC;
1007: lu->id.n = M;
1008: switch (lu->id.ICNTL(18)){
1009: case 0: /* centralized assembled matrix input */
1010: if (!lu->myid) {
1011: lu->id.nz =lu->nz; lu->id.irn=lu->irn; lu->id.jcn=lu->jcn;
1012: if (lu->id.ICNTL(6)>1){
1013: #if defined(PETSC_USE_COMPLEX)
1014: lu->id.a = (mumps_double_complex*)lu->val;
1015: #else
1016: lu->id.a = lu->val;
1017: #endif
1018: }
1019: }
1020: break;
1021: case 3: /* distributed assembled matrix input (size>1) */
1022: lu->id.nz_loc = lu->nz;
1023: lu->id.irn_loc=lu->irn; lu->id.jcn_loc=lu->jcn;
1024: if (lu->id.ICNTL(6)>1) {
1025: #if defined(PETSC_USE_COMPLEX)
1026: lu->id.a_loc = (mumps_double_complex*)lu->val;
1027: #else
1028: lu->id.a_loc = lu->val;
1029: #endif
1030: }
1031: /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
1032: if (!lu->myid){
1033: VecCreateSeq(PETSC_COMM_SELF,A->cmap->N,&lu->b_seq);
1034: ISCreateStride(PETSC_COMM_SELF,A->cmap->N,0,1,&is_iden);
1035: } else {
1036: VecCreateSeq(PETSC_COMM_SELF,0,&lu->b_seq);
1037: ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);
1038: }
1039: VecCreate(((PetscObject)A)->comm,&b);
1040: VecSetSizes(b,A->rmap->n,PETSC_DECIDE);
1041: VecSetFromOptions(b);
1043: VecScatterCreate(b,is_iden,lu->b_seq,is_iden,&lu->scat_rhs);
1044: ISDestroy(&is_iden);
1045: VecDestroy(&b);
1046: break;
1047: }
1048: #if defined(PETSC_USE_COMPLEX)
1049: zmumps_c(&lu->id);
1050: #else
1051: dmumps_c(&lu->id);
1052: #endif
1053: if (lu->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",lu->id.INFOG(1));
1055: F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
1056: F->ops->solve = MatSolve_MUMPS;
1057: F->ops->solvetranspose = MatSolve_MUMPS;
1058: #if !defined(PETSC_USE_COMPLEX)
1059: F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
1060: #else
1061: F->ops->getinertia = PETSC_NULL;
1062: #endif
1063: return(0);
1064: }
1068: PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer)
1069: {
1070: PetscErrorCode ierr;
1071: PetscBool iascii;
1072: PetscViewerFormat format;
1073: Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr;
1076: /* check if matrix is mumps type */
1077: if (A->ops->solve != MatSolve_MUMPS) return(0);
1079: PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1080: if (iascii) {
1081: PetscViewerGetFormat(viewer,&format);
1082: if (format == PETSC_VIEWER_ASCII_INFO){
1083: PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");
1084: PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",lu->id.sym);
1085: PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",lu->id.par);
1086: PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",lu->id.ICNTL(1));
1087: PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d \n",lu->id.ICNTL(2));
1088: PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",lu->id.ICNTL(3));
1089: PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",lu->id.ICNTL(4));
1090: PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",lu->id.ICNTL(5));
1091: PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",lu->id.ICNTL(6));
1092: PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequentia matrix ordering):%d \n",lu->id.ICNTL(7));
1093: PetscViewerASCIIPrintf(viewer," ICNTL(8) (scalling strategy): %d \n",lu->id.ICNTL(8));
1094: PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",lu->id.ICNTL(10));
1095: PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",lu->id.ICNTL(11));
1096: if (lu->id.ICNTL(11)>0) {
1097: PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",lu->id.RINFOG(4));
1098: PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",lu->id.RINFOG(5));
1099: PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",lu->id.RINFOG(6));
1100: PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",lu->id.RINFOG(7),lu->id.RINFOG(8));
1101: PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",lu->id.RINFOG(9));
1102: PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",lu->id.RINFOG(10),lu->id.RINFOG(11));
1103: }
1104: PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",lu->id.ICNTL(12));
1105: PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",lu->id.ICNTL(13));
1106: PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",lu->id.ICNTL(14));
1107: /* ICNTL(15-17) not used */
1108: PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",lu->id.ICNTL(18));
1109: PetscViewerASCIIPrintf(viewer," ICNTL(19) (Shur complement info): %d \n",lu->id.ICNTL(19));
1110: PetscViewerASCIIPrintf(viewer," ICNTL(20) (rhs sparse pattern): %d \n",lu->id.ICNTL(20));
1111: PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",lu->id.ICNTL(21));
1112: PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d \n",lu->id.ICNTL(22));
1113: PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d \n",lu->id.ICNTL(23));
1114:
1115: PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d \n",lu->id.ICNTL(24));
1116: PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d \n",lu->id.ICNTL(25));
1117: PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for rhs or solution): %d \n",lu->id.ICNTL(26));
1118: PetscViewerASCIIPrintf(viewer," ICNTL(27) (experimental parameter): %d \n",lu->id.ICNTL(27));
1119: PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d \n",lu->id.ICNTL(28));
1120: PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d \n",lu->id.ICNTL(29));
1121:
1122: PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d \n",lu->id.ICNTL(30));
1123: PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d \n",lu->id.ICNTL(31));
1124: PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d \n",lu->id.ICNTL(33));
1125:
1126: PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",lu->id.CNTL(1));
1127: PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",lu->id.CNTL(2));
1128: PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",lu->id.CNTL(3));
1129: PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",lu->id.CNTL(4));
1130: PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",lu->id.CNTL(5));
1131:
1132: /* infomation local to each processor */
1133: PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");
1134: PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
1135: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",lu->myid,lu->id.RINFO(1));
1136: PetscViewerFlush(viewer);
1137: PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");
1138: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",lu->myid,lu->id.RINFO(2));
1139: PetscViewerFlush(viewer);
1140: PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");
1141: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",lu->myid,lu->id.RINFO(3));
1142: PetscViewerFlush(viewer);
1143:
1144: PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");
1145: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",lu->myid,lu->id.INFO(15));
1146: PetscViewerFlush(viewer);
1147:
1148: PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");
1149: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",lu->myid,lu->id.INFO(16));
1150: PetscViewerFlush(viewer);
1151:
1152: PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");
1153: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",lu->myid,lu->id.INFO(23));
1154: PetscViewerFlush(viewer);
1155: PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
1157: if (!lu->myid){ /* information from the host */
1158: PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",lu->id.RINFOG(1));
1159: PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",lu->id.RINFOG(2));
1160: PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",lu->id.RINFOG(3));
1161: PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",lu->id.RINFOG(12),lu->id.RINFOG(13),lu->id.INFOG(34));
1162:
1163: PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(3));
1164: PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(4));
1165: PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",lu->id.INFOG(5));
1166: PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",lu->id.INFOG(6));
1167: PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively use after analysis): %d \n",lu->id.INFOG(7));
1168: PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",lu->id.INFOG(8));
1169: PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",lu->id.INFOG(9));
1170: PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",lu->id.INFOG(10));
1171: PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",lu->id.INFOG(11));
1172: PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",lu->id.INFOG(12));
1173: PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",lu->id.INFOG(13));
1174: PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",lu->id.INFOG(14));
1175: PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",lu->id.INFOG(15));
1176: PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",lu->id.INFOG(16));
1177: PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",lu->id.INFOG(17));
1178: PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",lu->id.INFOG(18));
1179: PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",lu->id.INFOG(19));
1180: PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",lu->id.INFOG(20));
1181: PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",lu->id.INFOG(21));
1182: PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",lu->id.INFOG(22));
1183: PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",lu->id.INFOG(23));
1184: PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",lu->id.INFOG(24));
1185: PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",lu->id.INFOG(25));
1186: }
1187: }
1188: }
1189: return(0);
1190: }
1194: PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info)
1195: {
1196: Mat_MUMPS *mumps =(Mat_MUMPS*)A->spptr;
1199: info->block_size = 1.0;
1200: info->nz_allocated = mumps->id.INFOG(20);
1201: info->nz_used = mumps->id.INFOG(20);
1202: info->nz_unneeded = 0.0;
1203: info->assemblies = 0.0;
1204: info->mallocs = 0.0;
1205: info->memory = 0.0;
1206: info->fill_ratio_given = 0;
1207: info->fill_ratio_needed = 0;
1208: info->factor_mallocs = 0;
1209: return(0);
1210: }
1212: /* -------------------------------------------------------------------------------------------*/
1215: PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival)
1216: {
1217: Mat_MUMPS *lu =(Mat_MUMPS*)F->spptr;
1220: lu->id.ICNTL(icntl) = ival;
1221: return(0);
1222: }
1226: /*@
1227: MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
1229: Logically Collective on Mat
1231: Input Parameters:
1232: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
1233: . icntl - index of MUMPS parameter array ICNTL()
1234: - ival - value of MUMPS ICNTL(icntl)
1236: Options Database:
1237: . -mat_mumps_icntl_<icntl> <ival>
1239: Level: beginner
1241: References: MUMPS Users' Guide
1243: .seealso: MatGetFactor()
1244: @*/
1245: PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival)
1246: {
1252: PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));
1253: return(0);
1254: }
1256: /*MC
1257: MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for
1258: distributed and sequential matrices via the external package MUMPS.
1260: Works with MATAIJ and MATSBAIJ matrices
1262: Options Database Keys:
1263: + -mat_mumps_icntl_4 <0,...,4> - print level
1264: . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide)
1265: . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guidec)
1266: . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T
1267: . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements
1268: . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view
1269: . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide)
1270: . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide)
1271: . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide)
1272: . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide)
1273: . -mat_mumps_cntl_1 <delta> - relative pivoting threshold
1274: . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement
1275: - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold
1277: Level: beginner
1279: .seealso: PCFactorSetMatSolverPackage(), MatSolverPackage
1281: M*/
1286: PetscErrorCode MatFactorGetSolverPackage_mumps(Mat A,const MatSolverPackage *type)
1287: {
1289: *type = MATSOLVERMUMPS;
1290: return(0);
1291: }
1295: /* MatGetFactor for Seq and MPI AIJ matrices */
1298: PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F)
1299: {
1300: Mat B;
1302: Mat_MUMPS *mumps;
1303: PetscBool isSeqAIJ;
1306: /* Create the factorization matrix */
1307: PetscTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);
1308: MatCreate(((PetscObject)A)->comm,&B);
1309: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
1310: MatSetType(B,((PetscObject)A)->type_name);
1311: if (isSeqAIJ) {
1312: MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
1313: } else {
1314: MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);
1315: }
1317: PetscNewLog(B,Mat_MUMPS,&mumps);
1318: B->ops->view = MatView_MUMPS;
1319: B->ops->getinfo = MatGetInfo_MUMPS;
1320: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mumps",MatFactorGetSolverPackage_mumps);
1321: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMumpsSetIcntl_C","MatMumpsSetIcntl_MUMPS",MatMumpsSetIcntl_MUMPS);
1322: if (ftype == MAT_FACTOR_LU) {
1323: B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
1324: B->factortype = MAT_FACTOR_LU;
1325: if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
1326: else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
1327: mumps->sym = 0;
1328: } else {
1329: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
1330: B->factortype = MAT_FACTOR_CHOLESKY;
1331: if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
1332: else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
1333: if (A->spd_set && A->spd) mumps->sym = 1;
1334: else mumps->sym = 2;
1335: }
1337: mumps->isAIJ = PETSC_TRUE;
1338: mumps->Destroy = B->ops->destroy;
1339: B->ops->destroy = MatDestroy_MUMPS;
1340: B->spptr = (void*)mumps;
1341: PetscInitializeMUMPS(A,mumps);
1343: *F = B;
1344: return(0);
1345: }
1350: /* MatGetFactor for Seq and MPI SBAIJ matrices */
1353: PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F)
1354: {
1355: Mat B;
1357: Mat_MUMPS *mumps;
1358: PetscBool isSeqSBAIJ;
1361: if (ftype != MAT_FACTOR_CHOLESKY) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Cannot use PETSc SBAIJ matrices with MUMPS LU, use AIJ matrix");
1362: if(A->rmap->bs > 1) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Cannot use PETSc SBAIJ matrices with block size > 1 with MUMPS Cholesky, use AIJ matrix instead");
1363: PetscTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);
1364: /* Create the factorization matrix */
1365: MatCreate(((PetscObject)A)->comm,&B);
1366: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
1367: MatSetType(B,((PetscObject)A)->type_name);
1368: PetscNewLog(B,Mat_MUMPS,&mumps);
1369: if (isSeqSBAIJ) {
1370: MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);
1371: mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
1372: } else {
1373: MatMPISBAIJSetPreallocation(B,1,0,PETSC_NULL,0,PETSC_NULL);
1374: mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
1375: }
1377: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
1378: B->ops->view = MatView_MUMPS;
1379: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mumps",MatFactorGetSolverPackage_mumps);
1380: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMumpsSetIcntl_C","MatMumpsSetIcntl",MatMumpsSetIcntl);
1381: B->factortype = MAT_FACTOR_CHOLESKY;
1382: if (A->spd_set && A->spd) mumps->sym = 1;
1383: else mumps->sym = 2;
1385: mumps->isAIJ = PETSC_FALSE;
1386: mumps->Destroy = B->ops->destroy;
1387: B->ops->destroy = MatDestroy_MUMPS;
1388: B->spptr = (void*)mumps;
1389: PetscInitializeMUMPS(A,mumps);
1391: *F = B;
1392: return(0);
1393: }
1399: PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F)
1400: {
1401: Mat B;
1403: Mat_MUMPS *mumps;
1404: PetscBool isSeqBAIJ;
1407: /* Create the factorization matrix */
1408: PetscTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);
1409: MatCreate(((PetscObject)A)->comm,&B);
1410: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
1411: MatSetType(B,((PetscObject)A)->type_name);
1412: if (isSeqBAIJ) {
1413: MatSeqBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL);
1414: } else {
1415: MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);
1416: }
1418: PetscNewLog(B,Mat_MUMPS,&mumps);
1419: if (ftype == MAT_FACTOR_LU) {
1420: B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
1421: B->factortype = MAT_FACTOR_LU;
1422: if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
1423: else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
1424: mumps->sym = 0;
1425: } else {
1426: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n");
1427: }
1429: B->ops->view = MatView_MUMPS;
1430: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mumps",MatFactorGetSolverPackage_mumps);
1431: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMumpsSetIcntl_C","MatMumpsSetIcntl_MUMPS",MatMumpsSetIcntl_MUMPS);
1433: mumps->isAIJ = PETSC_TRUE;
1434: mumps->Destroy = B->ops->destroy;
1435: B->ops->destroy = MatDestroy_MUMPS;
1436: B->spptr = (void*)mumps;
1437: PetscInitializeMUMPS(A,mumps);
1439: *F = B;
1440: return(0);
1441: }