Actual source code: mpispooles.c

  2: /* 
  3:    Provides an interface to the Spooles parallel sparse solver (MPI SPOOLES)
  4: */

  6: #include <../src/mat/impls/aij/seq/aij.h>
  7: #include <../src/mat/impls/sbaij/seq/sbaij.h>
  8: #include <../src/mat/impls/baij/seq/baij.h>
  9: #include <../src/mat/impls/aij/mpi/mpiaij.h>
 10: #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
 11: #include <../src/mat/impls/aij/seq/spooles/spooles.h>


 18: PetscErrorCode MatDestroy_MPIAIJSpooles(Mat A)
 19: {
 20:   Mat_Spooles   *lu = (Mat_Spooles*)A->spptr;

 24:   if (lu && lu->CleanUpSpooles) {
 25:     FrontMtx_free(lu->frontmtx);
 26:     IV_free(lu->newToOldIV);
 27:     IV_free(lu->oldToNewIV);
 28:     IV_free(lu->vtxmapIV);
 29:     InpMtx_free(lu->mtxA);
 30:     ETree_free(lu->frontETree);
 31:     IVL_free(lu->symbfacIVL);
 32:     SubMtxManager_free(lu->mtxmanager);
 33:     DenseMtx_free(lu->mtxX);
 34:     DenseMtx_free(lu->mtxY);
 35:     MPI_Comm_free(&(lu->comm_spooles));
 36:     VecDestroy(&lu->vec_spooles);
 37:     ISDestroy(&lu->iden);
 38:     ISDestroy(&lu->is_petsc);
 39:     VecScatterDestroy(&lu->scat);
 40:   }
 41:   PetscFree(A->spptr);
 42:   MatDestroy_MPIAIJ(A);
 43:   return(0);
 44: }

 48: PetscErrorCode MatSolve_MPISpooles(Mat A,Vec b,Vec x)
 49: {
 50:   Mat_Spooles   *lu = (Mat_Spooles*)A->spptr;
 52:   int           size,rank,m=A->rmap->n,irow,*rowindY;
 53:   PetscScalar   *array;
 54:   DenseMtx      *newY ;
 55:   SubMtxManager *solvemanager ;
 56: #if defined(PETSC_USE_COMPLEX)
 57:   double x_real,x_imag;
 58: #endif

 61:   MPI_Comm_size(((PetscObject)A)->comm,&size);
 62:   MPI_Comm_rank(((PetscObject)A)->comm,&rank);
 63: 
 64:   /* copy b into spooles' rhs mtxY */
 65:   DenseMtx_init(lu->mtxY, lu->options.typeflag, 0, 0, m, 1, 1, m);
 66:   VecGetArray(b,&array);

 68:   DenseMtx_rowIndices(lu->mtxY, &m, &rowindY);  /* get m, rowind */
 69:   for ( irow = 0 ; irow < m ; irow++ ) {
 70:     rowindY[irow] = irow + lu->rstart;           /* global rowind */
 71: #if !defined(PETSC_USE_COMPLEX)
 72:     DenseMtx_setRealEntry(lu->mtxY, irow, 0, *array++);
 73: #else
 74:     DenseMtx_setComplexEntry(lu->mtxY,irow,0,PetscRealPart(*array),PetscImaginaryPart(*array));
 75:     array++;
 76: #endif
 77:   }
 78:   VecRestoreArray(b,&array);
 79: 
 80:   if ( lu->options.msglvl > 2 ) {
 81:     int err;
 82:     PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n 1 matrix in original ordering");
 83:     DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile);
 84:     err = fflush(lu->options.msgFile);
 85:     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
 86:   }
 87: 
 88:   /* permute and redistribute Y if necessary */
 89:   DenseMtx_permuteRows(lu->mtxY, lu->oldToNewIV);
 90:   if ( lu->options.msglvl > 2 ) {
 91:     int err;
 92:     PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n rhs matrix in new ordering");
 93:     DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile);
 94:     err = fflush(lu->options.msgFile);
 95:     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
 96:   }

 98:   MPI_Barrier(((PetscObject)A)->comm); /* for initializing firsttag, because the num. of tags used
 99:                                    by FrontMtx_MPI_split() is unknown */
100:   lu->firsttag = 0;
101:   newY = DenseMtx_MPI_splitByRows(lu->mtxY, lu->vtxmapIV, lu->stats, lu->options.msglvl,
102:                                 lu->options.msgFile, lu->firsttag, lu->comm_spooles);
103:   DenseMtx_free(lu->mtxY);
104:   lu->mtxY = newY ;
105:   lu->firsttag += size ;
106:   if ( lu->options.msglvl > 2 ) {
107:     int err;
108:     PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n split DenseMtx Y");
109:     DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile);
110:     err = fflush(lu->options.msgFile);
111:     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
112:   }

114:   if ( FRONTMTX_IS_PIVOTING(lu->frontmtx) ) {
115:     /*   pivoting has taken place, redistribute the right hand side
116:          to match the final rows and columns in the fronts             */
117:     IV *rowmapIV ;
118:     rowmapIV = FrontMtx_MPI_rowmapIV(lu->frontmtx, lu->ownersIV, lu->options.msglvl,
119:                                     lu->options.msgFile, lu->comm_spooles);
120:     newY = DenseMtx_MPI_splitByRows(lu->mtxY, rowmapIV, lu->stats, lu->options.msglvl,
121:                                    lu->options.msgFile, lu->firsttag, lu->comm_spooles);
122:     DenseMtx_free(lu->mtxY);
123:     lu->mtxY = newY ;
124:     IV_free(rowmapIV);
125:     lu->firsttag += size;
126:   }
127:   if ( lu->options.msglvl > 2 ) {
128:     int err;
129:     PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n rhs matrix after split");
130:     DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile);
131:     err = fflush(lu->options.msgFile);
132:     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
133:   }

135:   if ( lu->nmycol > 0 ) IVcopy(lu->nmycol,lu->rowindX,IV_entries(lu->ownedColumnsIV)); /* must do for each solve */
136: 
137:   /* solve the linear system */
138:   solvemanager = SubMtxManager_new();
139:   SubMtxManager_init(solvemanager, NO_LOCK, 0);
140:   FrontMtx_MPI_solve(lu->frontmtx, lu->mtxX, lu->mtxY, solvemanager, lu->solvemap, lu->cpus,
141:                    lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles);
142:   SubMtxManager_free(solvemanager);
143:   if ( lu->options.msglvl > 2 ) {
144:     PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n solution in new ordering");
145:     DenseMtx_writeForHumanEye(lu->mtxX, lu->options.msgFile);
146:   }

148:   /* permute the solution into the original ordering */
149:   DenseMtx_permuteRows(lu->mtxX, lu->newToOldIV);
150:   if ( lu->options.msglvl > 2 ) {
151:     int err;
152:     PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n solution in old ordering");
153:     DenseMtx_writeForHumanEye(lu->mtxX, lu->options.msgFile);
154:     err = fflush(lu->options.msgFile);
155:     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
156:   }
157: 
158:   /* scatter local solution mtxX into mpi vector x */
159:   if( !lu->scat ){ /* create followings once for each numfactorization */
160:     /* vec_spooles <- mtxX */
161: #if !defined(PETSC_USE_COMPLEX) 
162:     VecCreateSeqWithArray(PETSC_COMM_SELF,lu->nmycol,lu->entX,&lu->vec_spooles);
163: #else    
164:     VecCreateSeq(PETSC_COMM_SELF,lu->nmycol,&lu->vec_spooles);
165: #endif 
166:     ISCreateStride(PETSC_COMM_SELF,lu->nmycol,0,1,&lu->iden);
167:     ISCreateGeneral(PETSC_COMM_SELF,lu->nmycol,lu->rowindX,PETSC_COPY_VALUES,&lu->is_petsc);
168:     VecScatterCreate(lu->vec_spooles,lu->iden,x,lu->is_petsc,&lu->scat);
169:   }
170: #if defined(PETSC_USE_COMPLEX)
171:     VecGetArray(lu->vec_spooles,&array);
172:     for (irow = 0; irow < lu->nmycol; irow++){
173:       DenseMtx_complexEntry(lu->mtxX,irow,0,&x_real,&x_imag);
174:       array[irow] = x_real+x_imag*PETSC_i;
175:     }
176:     VecRestoreArray(lu->vec_spooles,&array);
177: #endif 
178:   VecScatterBegin(lu->scat,lu->vec_spooles,x,INSERT_VALUES,SCATTER_FORWARD);
179:   VecScatterEnd(lu->scat,lu->vec_spooles,x,INSERT_VALUES,SCATTER_FORWARD);
180:   return(0);
181: }

185: PetscErrorCode MatFactorNumeric_MPISpooles(Mat F,Mat A,const MatFactorInfo *info)
186: {
187:   Mat_Spooles     *lu = (Mat_Spooles*)(F)->spptr;
188:   PetscErrorCode  ierr;
189:   int             rank,size,lookahead=0,sierr;
190:   ChvManager      *chvmanager ;
191:   Chv             *rootchv ;
192:   Graph           *graph ;
193:   IVL             *adjIVL;
194:   DV              *cumopsDV ;
195:   double          droptol=0.0,*opcounts,minops,cutoff;
196: #if !defined(PETSC_USE_COMPLEX)
197:   double          *val;
198: #endif
199:   InpMtx          *newA ;
200:   PetscScalar     *av, *bv;
201:   PetscInt        *ai, *aj, *bi,*bj, nz, *ajj, *bjj, *garray,
202:                   i,j,irow,jcol,countA,countB,jB,*row,*col,colA_start,jj;
203:   PetscInt        M=A->rmap->N,m=A->rmap->n,root,nedges,tagbound,lasttag;
204:   Mat             F_diag;
205: 
207:   MPI_Comm_size(((PetscObject)A)->comm,&size);
208:   MPI_Comm_rank(((PetscObject)A)->comm,&rank);

210:   if (lu->flg == DIFFERENT_NONZERO_PATTERN) { /* first numeric factorization */
211:     /* get input parameters */
212:     SetSpoolesOptions(A, &lu->options);

214:     (F)->assembled    = PETSC_TRUE;
215:     if ((F)->factortype == MAT_FACTOR_LU){
216:       F_diag = ((Mat_MPIAIJ *)(F)->data)->A;
217:     } else {
218:       F_diag = ((Mat_MPISBAIJ *)(F)->data)->A;
219:     }
220:     F_diag->assembled  = PETSC_TRUE;

222:     /* to be used by MatSolve() */
223:     lu->mtxY = DenseMtx_new();
224:     lu->mtxX = DenseMtx_new();
225:     lu->scat = PETSC_NULL;

227:     IVzero(20, lu->stats);
228:     DVzero(20, lu->cpus);

230:     lu->mtxA = InpMtx_new();
231:   }
232: 
233:   /* copy A to Spooles' InpMtx object */
234:   if ( lu->options.symflag == SPOOLES_NONSYMMETRIC ) {
235:     Mat_MPIAIJ  *mat =  (Mat_MPIAIJ*)A->data;
236:     Mat_SeqAIJ  *aa=(Mat_SeqAIJ*)(mat->A)->data;
237:     Mat_SeqAIJ  *bb=(Mat_SeqAIJ*)(mat->B)->data;
238:     ai=aa->i; aj=aa->j; av=aa->a;
239:     bi=bb->i; bj=bb->j; bv=bb->a;
240:     lu->rstart = A->rmap->rstart;
241:     nz         = aa->nz + bb->nz;
242:     garray     = mat->garray;
243:   } else {         /* SPOOLES_SYMMETRIC  */
244:     Mat_MPISBAIJ  *mat = (Mat_MPISBAIJ*)A->data;
245:     Mat_SeqSBAIJ  *aa=(Mat_SeqSBAIJ*)(mat->A)->data;
246:     Mat_SeqBAIJ    *bb=(Mat_SeqBAIJ*)(mat->B)->data;
247:     ai=aa->i; aj=aa->j; av=aa->a;
248:     bi=bb->i; bj=bb->j; bv=bb->a;
249:     lu->rstart = A->rmap->rstart;
250:     nz         = aa->nz + bb->nz;
251:     garray     = mat->garray;
252:   }
253: 
254:   InpMtx_init(lu->mtxA, INPMTX_BY_ROWS, lu->options.typeflag, nz, 0);
255:   row   = InpMtx_ivec1(lu->mtxA);
256:   col   = InpMtx_ivec2(lu->mtxA);
257: #if !defined(PETSC_USE_COMPLEX)
258:   val   = InpMtx_dvec(lu->mtxA);
259: #endif

261:   jj = 0; irow = lu->rstart;
262:   for ( i=0; i<m; i++ ) {
263:     ajj = aj + ai[i];                 /* ptr to the beginning of this row */
264:     countA = ai[i+1] - ai[i];
265:     countB = bi[i+1] - bi[i];
266:     bjj = bj + bi[i];
267:     jB = 0;
268: 
269:     if (lu->options.symflag == SPOOLES_NONSYMMETRIC ){
270:       /* B part, smaller col index */
271:       colA_start = lu->rstart + ajj[0]; /* the smallest col index for A */
272:       for (j=0; j<countB; j++){
273:         jcol = garray[bjj[j]];
274:         if (jcol > colA_start) {
275:           jB = j;
276:           break;
277:         }
278:         row[jj] = irow; col[jj] = jcol;
279: #if !defined(PETSC_USE_COMPLEX)
280:         val[jj++] = *bv++;
281: #else
282:         InpMtx_inputComplexEntry(lu->mtxA,irow,jcol,PetscRealPart(*bv),PetscImaginaryPart(*bv));
283:         bv++; jj++;
284: #endif
285:         if (j==countB-1) jB = countB;
286:       }
287:     }
288:     /* A part */
289:     for (j=0; j<countA; j++){
290:       row[jj] = irow; col[jj] = lu->rstart + ajj[j];
291: #if !defined(PETSC_USE_COMPLEX)
292:       val[jj++] = *av++;
293: #else
294:       InpMtx_inputComplexEntry(lu->mtxA,irow,col[jj],PetscRealPart(*av),PetscImaginaryPart(*av));
295:       av++; jj++;
296: #endif
297:     }
298:     /* B part, larger col index */
299:     for (j=jB; j<countB; j++){
300:       row[jj] = irow; col[jj] = garray[bjj[j]];
301: #if !defined(PETSC_USE_COMPLEX)
302:       val[jj++] = *bv++;
303: #else
304:      InpMtx_inputComplexEntry(lu->mtxA,irow,col[jj],PetscRealPart(*bv),PetscImaginaryPart(*bv));
305:      bv++; jj++;
306: #endif
307:     }
308:     irow++;
309:   }
310: #if !defined(PETSC_USE_COMPLEX)
311:   InpMtx_inputRealTriples(lu->mtxA, nz, row, col, val);
312: #endif
313:   InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
314:   if ( lu->options.msglvl > 0 ) {
315:     int err;
316:     printf("[%d] input matrix\n",rank);
317:     PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n [%d] input matrix\n",rank);
318:     InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile);
319:     err = fflush(lu->options.msgFile);
320:     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
321:   }

323:   if ( lu->flg == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization */
324:     /*
325:       find a low-fill ordering
326:       (1) create the Graph object
327:       (2) order the graph using multiple minimum degree
328:       (3) find out who has the best ordering w.r.t. op count,
329:           and broadcast that front tree object
330:     */
331:     graph = Graph_new();
332:     adjIVL = InpMtx_MPI_fullAdjacency(lu->mtxA, lu->stats,
333:               lu->options.msglvl, lu->options.msgFile, lu->comm_spooles);
334:     nedges = IVL_tsize(adjIVL);
335:     Graph_init2(graph, 0, M, 0, nedges, M, nedges, adjIVL, NULL, NULL);
336:     if ( lu->options.msglvl > 2 ) {
337:       int err;
338:       err = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n graph of the input matrix");
339:       Graph_writeForHumanEye(graph, lu->options.msgFile);
340:       fflush(lu->options.msgFile);
341:       if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
342:     }

344:     switch (lu->options.ordering) {
345:     case 0:
346:       lu->frontETree = orderViaBestOfNDandMS(graph,
347:                      lu->options.maxdomainsize, lu->options.maxzeros, lu->options.maxsize,
348:                      lu->options.seed + rank, lu->options.msglvl, lu->options.msgFile); break;
349:     case 1:
350:       lu->frontETree = orderViaMMD(graph,lu->options.seed + rank,lu->options.msglvl,lu->options.msgFile); break;
351:     case 2:
352:       lu->frontETree = orderViaMS(graph, lu->options.maxdomainsize,
353:                      lu->options.seed + rank,lu->options.msglvl,lu->options.msgFile); break;
354:     case 3:
355:       lu->frontETree = orderViaND(graph, lu->options.maxdomainsize,
356:                      lu->options.seed + rank,lu->options.msglvl,lu->options.msgFile); break;
357:     default:
358:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown Spooles's ordering");
359:     }

361:     Graph_free(graph);
362:     if ( lu->options.msglvl > 2 ) {
363:       int err;
364:       PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n front tree from ordering");
365:       ETree_writeForHumanEye(lu->frontETree, lu->options.msgFile);
366:       err = fflush(lu->options.msgFile);
367:       if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
368:     }

370:     opcounts = DVinit(size, 0.0);
371:     opcounts[rank] = ETree_nFactorOps(lu->frontETree, lu->options.typeflag, lu->options.symflag);
372:     MPI_Allgather((void*) &opcounts[rank], 1, MPI_DOUBLE,
373:               (void*) opcounts, 1, MPI_DOUBLE, ((PetscObject)A)->comm);
374:     minops = DVmin(size, opcounts, &root);
375:     DVfree(opcounts);
376: 
377:     lu->frontETree = ETree_MPI_Bcast(lu->frontETree, root,
378:                              lu->options.msglvl, lu->options.msgFile, lu->comm_spooles);
379:     if ( lu->options.msglvl > 2 ) {
380:       int err;
381:       PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n best front tree");
382:       ETree_writeForHumanEye(lu->frontETree, lu->options.msgFile);
383:       err = fflush(lu->options.msgFile);
384:       if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
385:     }
386: 
387:     /* get the permutations, permute the front tree, permute the matrix */
388:     lu->oldToNewIV = ETree_oldToNewVtxPerm(lu->frontETree);
389:     lu->newToOldIV = ETree_newToOldVtxPerm(lu->frontETree);

391:     ETree_permuteVertices(lu->frontETree, lu->oldToNewIV);

393:     InpMtx_permute(lu->mtxA, IV_entries(lu->oldToNewIV), IV_entries(lu->oldToNewIV));
394: 
395:     if (  lu->options.symflag == SPOOLES_SYMMETRIC ) InpMtx_mapToUpperTriangle(lu->mtxA);

397:     InpMtx_changeCoordType(lu->mtxA, INPMTX_BY_CHEVRONS);
398:     InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);

400:     /* generate the owners map IV object and the map from vertices to owners */
401:     cutoff   = 1./(2*size);
402:     cumopsDV = DV_new();
403:     DV_init(cumopsDV, size, NULL);
404:     lu->ownersIV = ETree_ddMap(lu->frontETree,
405:                        lu->options.typeflag, lu->options.symflag, cumopsDV, cutoff);
406:     DV_free(cumopsDV);
407:     lu->vtxmapIV = IV_new();
408:     IV_init(lu->vtxmapIV, M, NULL);
409:     IVgather(M, IV_entries(lu->vtxmapIV),
410:              IV_entries(lu->ownersIV), ETree_vtxToFront(lu->frontETree));
411:     if ( lu->options.msglvl > 2 ) {
412:       int err;

414:       PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n map from fronts to owning processes");
415:       IV_writeForHumanEye(lu->ownersIV, lu->options.msgFile);
416:       PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n map from vertices to owning processes");
417:       IV_writeForHumanEye(lu->vtxmapIV, lu->options.msgFile);
418:       err = fflush(lu->options.msgFile);
419:       if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
420:     }

422:     /* redistribute the matrix */
423:     lu->firsttag = 0 ;
424:     newA = InpMtx_MPI_split(lu->mtxA, lu->vtxmapIV, lu->stats,
425:                         lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles);
426:     lu->firsttag += size ;

428:     InpMtx_free(lu->mtxA);
429:     lu->mtxA = newA ;
430:     InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
431:     if ( lu->options.msglvl > 2 ) {
432:       int err;
433:       PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n split InpMtx");
434:       InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile);
435:       err = fflush(lu->options.msgFile);
436:       if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
437:     }
438: 
439:     /* compute the symbolic factorization */
440:     lu->symbfacIVL = SymbFac_MPI_initFromInpMtx(lu->frontETree, lu->ownersIV, lu->mtxA,
441:                      lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles);
442:     lu->firsttag += lu->frontETree->nfront ;
443:     if ( lu->options.msglvl > 2 ) {
444:       int err;
445:       PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n local symbolic factorization");
446:       IVL_writeForHumanEye(lu->symbfacIVL, lu->options.msgFile);
447:       err = fflush(lu->options.msgFile);
448:       if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
449:     }

451:     lu->mtxmanager = SubMtxManager_new();
452:     SubMtxManager_init(lu->mtxmanager, NO_LOCK, 0);
453:     lu->frontmtx = FrontMtx_new();

455:   } else { /* new num factorization using previously computed symbolic factor */
456:     /* different FrontMtx is required */
457:     FrontMtx_free(lu->frontmtx);
458:     lu->frontmtx   = FrontMtx_new();

460:     SubMtxManager_free(lu->mtxmanager);
461:     lu->mtxmanager = SubMtxManager_new();
462:     SubMtxManager_init(lu->mtxmanager, NO_LOCK, 0);

464:     /* permute mtxA */
465:     InpMtx_permute(lu->mtxA, IV_entries(lu->oldToNewIV), IV_entries(lu->oldToNewIV));
466:     if ( lu->options.symflag == SPOOLES_SYMMETRIC ) InpMtx_mapToUpperTriangle(lu->mtxA);
467: 
468:     InpMtx_changeCoordType(lu->mtxA, INPMTX_BY_CHEVRONS);
469:     InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);

471:     /* redistribute the matrix */
472:     MPI_Barrier(((PetscObject)A)->comm);
473:     lu->firsttag = 0;
474:     newA = InpMtx_MPI_split(lu->mtxA, lu->vtxmapIV, lu->stats,
475:                         lu->options.msglvl, lu->options.msgFile, lu->firsttag,lu->comm_spooles);
476:     lu->firsttag += size ;

478:     InpMtx_free(lu->mtxA);
479:     lu->mtxA = newA ;
480:     InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
481:     if ( lu->options.msglvl > 2 ) {
482:       int err;
483:       PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n split InpMtx");
484:       InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile);
485:       err = fflush(lu->options.msgFile);
486:       if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
487:     }
488:   } /* end of if ( lu->flg == DIFFERENT_NONZERO_PATTERN) */

490:   FrontMtx_init(lu->frontmtx, lu->frontETree, lu->symbfacIVL, lu->options.typeflag, lu->options.symflag,
491:               FRONTMTX_DENSE_FRONTS, lu->options.pivotingflag, NO_LOCK, rank,
492:               lu->ownersIV, lu->mtxmanager, lu->options.msglvl, lu->options.msgFile);

494:   if ( lu->options.symflag == SPOOLES_SYMMETRIC ) {
495:     if (lu->options.patchAndGoFlag == 1 || lu->options.patchAndGoFlag == 2 ){
496:       if (lu->frontmtx->patchinfo) PatchAndGoInfo_free(lu->frontmtx->patchinfo);
497:       lu->frontmtx->patchinfo = PatchAndGoInfo_new();
498:       PatchAndGoInfo_init(lu->frontmtx->patchinfo, lu->options.patchAndGoFlag, lu->options.toosmall, lu->options.fudge,
499:                        lu->options.storeids, lu->options.storevalues);
500:     }
501:   }

503:   /* numerical factorization */
504:   chvmanager = ChvManager_new();
505:   ChvManager_init(chvmanager, NO_LOCK, 0);

507:   tagbound = maxTagMPI(lu->comm_spooles);
508:   lasttag  = lu->firsttag + 3*lu->frontETree->nfront + 2;
509:   if ( lasttag > tagbound ) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_LIB,"fatal error in FrontMtx_MPI_factorInpMtx(), tag range is [%d,%d], tag_bound = %d",lu->firsttag, lasttag, tagbound);
510:   rootchv = FrontMtx_MPI_factorInpMtx(lu->frontmtx, lu->mtxA, lu->options.tau, droptol,
511:                      chvmanager, lu->ownersIV, lookahead, &sierr, lu->cpus,
512:                      lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag,lu->comm_spooles);
513:   ChvManager_free(chvmanager);
514:   lu->firsttag = lasttag;
515:   if ( lu->options.msglvl > 2 ) {
516:     int err;
517:     PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n numeric factorization");
518:     FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile);
519:     err = fflush(lu->options.msgFile);
520:     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
521:   }

523:   if ( lu->options.symflag == SPOOLES_SYMMETRIC ) {
524:     if ( lu->options.patchAndGoFlag == 1 ) {
525:       if ( lu->frontmtx->patchinfo->fudgeIV != NULL ) {
526:         if (lu->options.msglvl > 0 ){
527:           PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n small pivots found at these locations");
528:           IV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeIV, lu->options.msgFile);
529:         }
530:       }
531:       PatchAndGoInfo_free(lu->frontmtx->patchinfo);
532:     } else if ( lu->options.patchAndGoFlag == 2 ) {
533:       if (lu->options.msglvl > 0 ){
534:         if ( lu->frontmtx->patchinfo->fudgeIV != NULL ) {
535:           PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n small pivots found at these locations");
536:           IV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeIV, lu->options.msgFile);
537:         }
538:         if ( lu->frontmtx->patchinfo->fudgeDV != NULL ) {
539:           PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n perturbations");
540:           DV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeDV, lu->options.msgFile);
541:         }
542:       }
543:       PatchAndGoInfo_free(lu->frontmtx->patchinfo);
544:     }
545:   }
546:   if ( sierr >= 0 ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"\n proc %d : factorization error at front %d", rank, sierr);
547: 
548:   /*  post-process the factorization and split 
549:       the factor matrices into submatrices */
550:   lasttag  = lu->firsttag + 5*size;
551:   if ( lasttag > tagbound ) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_LIB,"fatal error in FrontMtx_MPI_postProcess(), tag range is [%d,%d], tag_bound = %d",lu->firsttag, lasttag, tagbound);
552:   FrontMtx_MPI_postProcess(lu->frontmtx, lu->ownersIV, lu->stats, lu->options.msglvl,
553:                          lu->options.msgFile, lu->firsttag, lu->comm_spooles);
554:   lu->firsttag += 5*size ;
555:   if ( lu->options.msglvl > 2 ) {
556:     int err;
557:     PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n numeric factorization after post-processing");
558:     FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile);
559:     err = fflush(lu->options.msgFile);
560:     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
561:   }
562: 
563:   /* create the solve map object */
564:   if (lu->solvemap) SolveMap_free(lu->solvemap);
565:   lu->solvemap = SolveMap_new();
566:   SolveMap_ddMap(lu->solvemap, lu->frontmtx->symmetryflag,
567:                FrontMtx_upperBlockIVL(lu->frontmtx),
568:                FrontMtx_lowerBlockIVL(lu->frontmtx),
569:                size, lu->ownersIV, FrontMtx_frontTree(lu->frontmtx),
570:                lu->options.seed, lu->options.msglvl, lu->options.msgFile);
571:   if ( lu->options.msglvl > 2 ) {
572:     int err;
573:     SolveMap_writeForHumanEye(lu->solvemap, lu->options.msgFile);
574:     err = fflush(lu->options.msgFile);
575:     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
576:   }

578:   /* redistribute the submatrices of the factors */
579:   FrontMtx_MPI_split(lu->frontmtx, lu->solvemap,
580:                    lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles);
581:   if ( lu->options.msglvl > 2 ) {
582:     int err;
583:     PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n numeric factorization after split");
584:     FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile);
585:     err = fflush(lu->options.msgFile);
586:     if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
587:   }

589:   /* create a solution DenseMtx object */
590:   lu->ownedColumnsIV = FrontMtx_ownedColumnsIV(lu->frontmtx, rank, lu->ownersIV,
591:                                          lu->options.msglvl, lu->options.msgFile);
592:   lu->nmycol = IV_size(lu->ownedColumnsIV);
593:   if ( lu->nmycol > 0) {
594:     DenseMtx_init(lu->mtxX, lu->options.typeflag, 0, 0, lu->nmycol, 1, 1, lu->nmycol);
595:     /* get pointers rowindX and entX */
596:     DenseMtx_rowIndices(lu->mtxX, &lu->nmycol, &lu->rowindX);
597:     lu->entX = DenseMtx_entries(lu->mtxX);
598:   } else { /* lu->nmycol == 0 */
599:     lu->entX    = 0;
600:     lu->rowindX = 0;
601:   }

603:   VecDestroy(&lu->vec_spooles);
604:   ISDestroy(&lu->iden);
605:   ISDestroy(&lu->is_petsc);
606:   VecScatterDestroy(&lu->scat);
607:   lu->scat      = PETSC_NULL;
608:   lu->flg       = SAME_NONZERO_PATTERN;
609:   F->ops->solve = MatSolve_MPISpooles;

611:   lu->CleanUpSpooles = PETSC_TRUE;
612:   return(0);
613: }