Actual source code: aijfact.c

  2: #include <../src/mat/impls/aij/seq/aij.h>
  3: #include <../src/mat/impls/sbaij/seq/sbaij.h>
  4: #include <petscbt.h>
  5: #include <../src/mat/utils/freespace.h>

 10: /*
 11:       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix

 13:       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
 14: */
 15: PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
 16: {
 17:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)mat->data;
 18:   PetscErrorCode    ierr;
 19:   PetscInt          i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
 20:   const PetscInt    *ai = a->i, *aj = a->j;
 21:   const PetscScalar *aa = a->a;
 22:   PetscBool         *done;
 23:   PetscReal         best,past = 0,future;

 26:   /* pick initial row */
 27:   best = -1;
 28:   for (i=0; i<n; i++) {
 29:     future = 0.0;
 30:     for (j=ai[i]; j<ai[i+1]; j++) {
 31:       if (aj[j] != i) future  += PetscAbsScalar(aa[j]); else past = PetscAbsScalar(aa[j]);
 32:     }
 33:     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
 34:     if (past/future > best) {
 35:       best = past/future;
 36:       current = i;
 37:     }
 38:   }

 40:   PetscMalloc(n*sizeof(PetscBool),&done);
 41:   PetscMemzero(done,n*sizeof(PetscBool));
 42:   PetscMalloc(n*sizeof(PetscInt),&order);
 43:   order[0] = current;
 44:   for (i=0; i<n-1; i++) {
 45:     done[current] = PETSC_TRUE;
 46:     best          = -1;
 47:     /* loop over all neighbors of current pivot */
 48:     for (j=ai[current]; j<ai[current+1]; j++) {
 49:       jj = aj[j];
 50:       if (done[jj]) continue;
 51:       /* loop over columns of potential next row computing weights for below and above diagonal */
 52:       past = future = 0.0;
 53:       for (k=ai[jj]; k<ai[jj+1]; k++) {
 54:         kk = aj[k];
 55:         if (done[kk]) past += PetscAbsScalar(aa[k]);
 56:         else if (kk != jj) future  += PetscAbsScalar(aa[k]);
 57:       }
 58:       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
 59:       if (past/future > best) {
 60:         best = past/future;
 61:         newcurrent = jj;
 62:       }
 63:     }
 64:     if (best == -1) { /* no neighbors to select from so select best of all that remain */
 65:       best = -1;
 66:       for (k=0; k<n; k++) {
 67:         if (done[k]) continue;
 68:         future = 0.0;
 69:         past   = 0.0;
 70:         for (j=ai[k]; j<ai[k+1]; j++) {
 71:           kk = aj[j];
 72:           if (done[kk]) past += PetscAbsScalar(aa[j]);
 73:           else if (kk != k) future  += PetscAbsScalar(aa[j]);
 74:         }
 75:         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
 76:         if (past/future > best) {
 77:           best = past/future;
 78:           newcurrent = k;
 79:         }
 80:       }
 81:     }
 82:     if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current");
 83:     current = newcurrent;
 84:     order[i+1] = current;
 85:   }
 86:   ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);
 87:   *icol = *irow;
 88:   PetscObjectReference((PetscObject)*irow);
 89:   PetscFree(done);
 90:   PetscFree(order);
 91:   return(0);
 92: }

 98: PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscBool  *flg)
 99: {
101:   *flg = PETSC_TRUE;
102:   return(0);
103: }

109: PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
110: {
111:   PetscInt           n = A->rmap->n;
112:   PetscErrorCode     ierr;

115:   MatCreate(((PetscObject)A)->comm,B);
116:   MatSetSizes(*B,n,n,n,n);
117:   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT){
118:     MatSetType(*B,MATSEQAIJ);
119:     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
120:     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
121:   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
122:     MatSetType(*B,MATSEQSBAIJ);
123:     MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);
124:     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
125:     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
126:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
127:   (*B)->factortype = ftype;
128:   return(0);
129: }

134: PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
135: {
136:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
137:   IS                 isicol;
138:   PetscErrorCode     ierr;
139:   const PetscInt     *r,*ic;
140:   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
141:   PetscInt           *bi,*bj,*ajtmp;
142:   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
143:   PetscReal          f;
144:   PetscInt           nlnk,*lnk,k,**bi_ptr;
145:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
146:   PetscBT            lnkbt;
147: 
149:   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
150:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
151:   ISGetIndices(isrow,&r);
152:   ISGetIndices(isicol,&ic);

154:   /* get new row pointers */
155:   PetscMalloc((n+1)*sizeof(PetscInt),&bi);
156:   bi[0] = 0;

158:   /* bdiag is location of diagonal in factor */
159:   PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
160:   bdiag[0] = 0;

162:   /* linked list for storing column indices of the active row */
163:   nlnk = n + 1;
164:   PetscLLCreate(n,n,nlnk,lnk,lnkbt);

166:   PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);

168:   /* initial FreeSpace size is f*(ai[n]+1) */
169:   f = info->fill;
170:   PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
171:   current_space = free_space;

173:   for (i=0; i<n; i++) {
174:     /* copy previous fill into linked list */
175:     nzi = 0;
176:     nnz = ai[r[i]+1] - ai[r[i]];
177:     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
178:     ajtmp = aj + ai[r[i]];
179:     PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
180:     nzi += nlnk;

182:     /* add pivot rows into linked list */
183:     row = lnk[n];
184:     while (row < i) {
185:       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
186:       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
187:       PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
188:       nzi += nlnk;
189:       row  = lnk[row];
190:     }
191:     bi[i+1] = bi[i] + nzi;
192:     im[i]   = nzi;

194:     /* mark bdiag */
195:     nzbd = 0;
196:     nnz  = nzi;
197:     k    = lnk[n];
198:     while (nnz-- && k < i){
199:       nzbd++;
200:       k = lnk[k];
201:     }
202:     bdiag[i] = bi[i] + nzbd;

204:     /* if free space is not available, make more free space */
205:     if (current_space->local_remaining<nzi) {
206:       nnz = (n - i)*nzi; /* estimated and max additional space needed */
207:       PetscFreeSpaceGet(nnz,&current_space);
208:       reallocs++;
209:     }

211:     /* copy data into free space, then initialize lnk */
212:     PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
213:     bi_ptr[i] = current_space->array;
214:     current_space->array           += nzi;
215:     current_space->local_used      += nzi;
216:     current_space->local_remaining -= nzi;
217:   }
218: #if defined(PETSC_USE_INFO)
219:   if (ai[n] != 0) {
220:     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
221:     PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
222:     PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
223:     PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
224:     PetscInfo(A,"for best performance.\n");
225:   } else {
226:     PetscInfo(A,"Empty matrix\n");
227:   }
228: #endif

230:   ISRestoreIndices(isrow,&r);
231:   ISRestoreIndices(isicol,&ic);

233:   /* destroy list of free space and other temporary array(s) */
234:   PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
235:   PetscFreeSpaceContiguous(&free_space,bj);
236:   PetscLLDestroy(lnk,lnkbt);
237:   PetscFree2(bi_ptr,im);

239:   /* put together the new matrix */
240:   MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);
241:   PetscLogObjectParent(B,isicol);
242:   b    = (Mat_SeqAIJ*)(B)->data;
243:   b->free_a       = PETSC_TRUE;
244:   b->free_ij      = PETSC_TRUE;
245:   b->singlemalloc = PETSC_FALSE;
246:   PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);
247:   b->j          = bj;
248:   b->i          = bi;
249:   b->diag       = bdiag;
250:   b->ilen       = 0;
251:   b->imax       = 0;
252:   b->row        = isrow;
253:   b->col        = iscol;
254:   PetscObjectReference((PetscObject)isrow);
255:   PetscObjectReference((PetscObject)iscol);
256:   b->icol       = isicol;
257:   PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);

259:   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
260:   PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
261:   b->maxnz = b->nz = bi[n] ;

263:   (B)->factortype            = MAT_FACTOR_LU;
264:   (B)->info.factor_mallocs   = reallocs;
265:   (B)->info.fill_ratio_given = f;

267:   if (ai[n]) {
268:     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
269:   } else {
270:     (B)->info.fill_ratio_needed = 0.0;
271:   }
272:   (B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_inplace;
273:   if (a->inode.size) {
274:     (B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
275:   }
276:   return(0);
277: }

281: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
282: {
283:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
284:   IS                 isicol;
285:   PetscErrorCode     ierr;
286:   const PetscInt     *r,*ic;
287:   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
288:   PetscInt           *bi,*bj,*ajtmp;
289:   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
290:   PetscReal          f;
291:   PetscInt           nlnk,*lnk,k,**bi_ptr;
292:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
293:   PetscBT            lnkbt;
294:   PetscBool          olddatastruct=PETSC_FALSE;

297:   /* Uncomment the oldatastruct part only while testing new data structure for MatSolve() */
298:   PetscOptionsGetBool(PETSC_NULL,"-lu_old",&olddatastruct,PETSC_NULL);
299:   if(olddatastruct){
300:     MatLUFactorSymbolic_SeqAIJ_inplace(B,A,isrow,iscol,info);
301:     return(0);
302:   }
303: 
304:   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
305:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
306:   ISGetIndices(isrow,&r);
307:   ISGetIndices(isicol,&ic);

309:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
310:   PetscMalloc((n+1)*sizeof(PetscInt),&bi);
311:   PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
312:   bi[0] = bdiag[0] = 0;

314:   /* linked list for storing column indices of the active row */
315:   nlnk = n + 1;
316:   PetscLLCreate(n,n,nlnk,lnk,lnkbt);

318:   PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);

320:   /* initial FreeSpace size is f*(ai[n]+1) */
321:   f = info->fill;
322:   PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
323:   current_space = free_space;

325:   for (i=0; i<n; i++) {
326:     /* copy previous fill into linked list */
327:     nzi = 0;
328:     nnz = ai[r[i]+1] - ai[r[i]];
329:     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
330:     ajtmp = aj + ai[r[i]];
331:     PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
332:     nzi += nlnk;

334:     /* add pivot rows into linked list */
335:     row = lnk[n];
336:     while (row < i){
337:       nzbd  = bdiag[row] + 1; /* num of entries in the row with column index <= row */
338:       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
339:       PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
340:       nzi  += nlnk;
341:       row   = lnk[row];
342:     }
343:     bi[i+1] = bi[i] + nzi;
344:     im[i]   = nzi;

346:     /* mark bdiag */
347:     nzbd = 0;
348:     nnz  = nzi;
349:     k    = lnk[n];
350:     while (nnz-- && k < i){
351:       nzbd++;
352:       k = lnk[k];
353:     }
354:     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */

356:     /* if free space is not available, make more free space */
357:     if (current_space->local_remaining<nzi) {
358:       nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */
359:       PetscFreeSpaceGet(nnz,&current_space);
360:       reallocs++;
361:     }

363:     /* copy data into free space, then initialize lnk */
364:     PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
365:     bi_ptr[i] = current_space->array;
366:     current_space->array           += nzi;
367:     current_space->local_used      += nzi;
368:     current_space->local_remaining -= nzi;
369:   }

371:   ISRestoreIndices(isrow,&r);
372:   ISRestoreIndices(isicol,&ic);

374:   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
375:   PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
376:   PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
377:   PetscLLDestroy(lnk,lnkbt);
378:   PetscFree2(bi_ptr,im);

380:   /* put together the new matrix */
381:   MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);
382:   PetscLogObjectParent(B,isicol);
383:   b    = (Mat_SeqAIJ*)(B)->data;
384:   b->free_a       = PETSC_TRUE;
385:   b->free_ij      = PETSC_TRUE;
386:   b->singlemalloc = PETSC_FALSE;
387:   PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);
388:   b->j          = bj;
389:   b->i          = bi;
390:   b->diag       = bdiag;
391:   b->ilen       = 0;
392:   b->imax       = 0;
393:   b->row        = isrow;
394:   b->col        = iscol;
395:   PetscObjectReference((PetscObject)isrow);
396:   PetscObjectReference((PetscObject)iscol);
397:   b->icol       = isicol;
398:   PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);

400:   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
401:   PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));
402:   b->maxnz = b->nz = bdiag[0]+1;
403:   B->factortype            = MAT_FACTOR_LU;
404:   B->info.factor_mallocs   = reallocs;
405:   B->info.fill_ratio_given = f;

407:   if (ai[n]) {
408:     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
409:   } else {
410:     B->info.fill_ratio_needed = 0.0;
411:   }
412: #if defined(PETSC_USE_INFO)
413:   if (ai[n] != 0) {
414:     PetscReal af = B->info.fill_ratio_needed;
415:     PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
416:     PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
417:     PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
418:     PetscInfo(A,"for best performance.\n");
419:   } else {
420:     PetscInfo(A,"Empty matrix\n");
421:   }
422: #endif
423:   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
424:   if (a->inode.size) {
425:     B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
426:   }
427:   return(0);
428: }

430: /*
431:     Trouble in factorization, should we dump the original matrix?
432: */
435: PetscErrorCode MatFactorDumpMatrix(Mat A)
436: {
438:   PetscBool      flg = PETSC_FALSE;

441:   PetscOptionsGetBool(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);
442:   if (flg) {
443:     PetscViewer viewer;
444:     char        filename[PETSC_MAX_PATH_LEN];

446:     PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);
447:     PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);
448:     MatView(A,viewer);
449:     PetscViewerDestroy(&viewer);
450:   }
451:   return(0);
452: }

456: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
457: {
458:   Mat              C=B;
459:   Mat_SeqAIJ       *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
460:   IS               isrow = b->row,isicol = b->icol;
461:   PetscErrorCode   ierr;
462:   const PetscInt   *r,*ic,*ics;
463:   const PetscInt   n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
464:   PetscInt         i,j,k,nz,nzL,row,*pj;
465:   const PetscInt   *ajtmp,*bjtmp;
466:   MatScalar        *rtmp,*pc,multiplier,*pv;
467:   const  MatScalar *aa=a->a,*v;
468:   PetscBool        row_identity,col_identity;
469:   FactorShiftCtx   sctx;
470:   const PetscInt   *ddiag;
471:   PetscReal        rs;
472:   MatScalar        d;

475:   /* MatPivotSetUp(): initialize shift context sctx */
476:   PetscMemzero(&sctx,sizeof(FactorShiftCtx));

478:   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
479:     ddiag          = a->diag;
480:     sctx.shift_top = info->zeropivot;
481:     for (i=0; i<n; i++) {
482:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
483:       d  = (aa)[ddiag[i]];
484:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
485:       v  = aa+ai[i];
486:       nz = ai[i+1] - ai[i];
487:       for (j=0; j<nz; j++)
488:         rs += PetscAbsScalar(v[j]);
489:       if (rs>sctx.shift_top) sctx.shift_top = rs;
490:     }
491:     sctx.shift_top   *= 1.1;
492:     sctx.nshift_max   = 5;
493:     sctx.shift_lo     = 0.;
494:     sctx.shift_hi     = 1.;
495:   }

497:   ISGetIndices(isrow,&r);
498:   ISGetIndices(isicol,&ic);
499:   PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);
500:   ics  = ic;

502:   do {
503:     sctx.newshift = PETSC_FALSE;
504:     for (i=0; i<n; i++){
505:       /* zero rtmp */
506:       /* L part */
507:       nz    = bi[i+1] - bi[i];
508:       bjtmp = bj + bi[i];
509:       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;

511:       /* U part */
512:       nz = bdiag[i]-bdiag[i+1];
513:       bjtmp = bj + bdiag[i+1]+1;
514:       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
515: 
516:       /* load in initial (unfactored row) */
517:       nz    = ai[r[i]+1] - ai[r[i]];
518:       ajtmp = aj + ai[r[i]];
519:       v     = aa + ai[r[i]];
520:       for (j=0; j<nz; j++) {
521:         rtmp[ics[ajtmp[j]]] = v[j];
522:       }
523:       /* ZeropivotApply() */
524:       rtmp[i] += sctx.shift_amount;  /* shift the diagonal of the matrix */
525: 
526:       /* elimination */
527:       bjtmp = bj + bi[i];
528:       row   = *bjtmp++;
529:       nzL   = bi[i+1] - bi[i];
530:       for(k=0; k < nzL;k++) {
531:         pc = rtmp + row;
532:         if (*pc != 0.0) {
533:           pv         = b->a + bdiag[row];
534:           multiplier = *pc * (*pv);
535:           *pc        = multiplier;
536:           pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
537:           pv = b->a + bdiag[row+1]+1;
538:           nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */
539:           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
540:           PetscLogFlops(2.0*nz);
541:         }
542:         row = *bjtmp++;
543:       }

545:       /* finished row so stick it into b->a */
546:       rs = 0.0;
547:       /* L part */
548:       pv   = b->a + bi[i] ;
549:       pj   = b->j + bi[i] ;
550:       nz   = bi[i+1] - bi[i];
551:       for (j=0; j<nz; j++) {
552:         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
553:       }

555:       /* U part */
556:       pv = b->a + bdiag[i+1]+1;
557:       pj = b->j + bdiag[i+1]+1;
558:       nz = bdiag[i] - bdiag[i+1]-1;
559:       for (j=0; j<nz; j++) {
560:         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
561:       }

563:       sctx.rs  = rs;
564:       sctx.pv  = rtmp[i];
565:       MatPivotCheck(A,info,&sctx,i);
566:       if(sctx.newshift) break; /* break for-loop */
567:       rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */

569:       /* Mark diagonal and invert diagonal for simplier triangular solves */
570:       pv  = b->a + bdiag[i];
571:       *pv = 1.0/rtmp[i];

573:     } /* endof for (i=0; i<n; i++){ */

575:     /* MatPivotRefine() */
576:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max){
577:       /* 
578:        * if no shift in this attempt & shifting & started shifting & can refine,
579:        * then try lower shift
580:        */
581:       sctx.shift_hi       = sctx.shift_fraction;
582:       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
583:       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
584:       sctx.newshift       = PETSC_TRUE;
585:       sctx.nshift++;
586:     }
587:   } while (sctx.newshift);

589:   PetscFree(rtmp);
590:   ISRestoreIndices(isicol,&ic);
591:   ISRestoreIndices(isrow,&r);
592: 
593:   ISIdentity(isrow,&row_identity);
594:   ISIdentity(isicol,&col_identity);
595:   if (row_identity && col_identity) {
596:     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
597:   } else {
598:     C->ops->solve = MatSolve_SeqAIJ;
599:   }
600:   C->ops->solveadd           = MatSolveAdd_SeqAIJ;
601:   C->ops->solvetranspose     = MatSolveTranspose_SeqAIJ;
602:   C->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ;
603:   C->ops->matsolve           = MatMatSolve_SeqAIJ;
604:   C->assembled    = PETSC_TRUE;
605:   C->preallocated = PETSC_TRUE;
606:   PetscLogFlops(C->cmap->n);

608:   /* MatShiftView(A,info,&sctx) */
609:   if (sctx.nshift){
610:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
611:       PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);
612:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
613:       PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
614:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){
615:       PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);
616:     }
617:   }
618:   Mat_CheckInode_FactorLU(C,PETSC_FALSE);
619:   return(0);
620: }

624: PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
625: {
626:   Mat             C=B;
627:   Mat_SeqAIJ      *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
628:   IS              isrow = b->row,isicol = b->icol;
629:   PetscErrorCode  ierr;
630:   const PetscInt   *r,*ic,*ics;
631:   PetscInt        nz,row,i,j,n=A->rmap->n,diag;
632:   const PetscInt  *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
633:   const PetscInt  *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
634:   MatScalar       *pv,*rtmp,*pc,multiplier,d;
635:   const MatScalar *v,*aa=a->a;
636:   PetscReal       rs=0.0;
637:   FactorShiftCtx  sctx;
638:   const PetscInt  *ddiag;
639:   PetscBool       row_identity, col_identity;

642:   /* MatPivotSetUp(): initialize shift context sctx */
643:   PetscMemzero(&sctx,sizeof(FactorShiftCtx));

645:   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
646:     ddiag          = a->diag;
647:     sctx.shift_top = info->zeropivot;
648:     for (i=0; i<n; i++) {
649:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
650:       d  = (aa)[ddiag[i]];
651:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
652:       v  = aa+ai[i];
653:       nz = ai[i+1] - ai[i];
654:       for (j=0; j<nz; j++)
655:         rs += PetscAbsScalar(v[j]);
656:       if (rs>sctx.shift_top) sctx.shift_top = rs;
657:     }
658:     sctx.shift_top   *= 1.1;
659:     sctx.nshift_max   = 5;
660:     sctx.shift_lo     = 0.;
661:     sctx.shift_hi     = 1.;
662:   }

664:   ISGetIndices(isrow,&r);
665:   ISGetIndices(isicol,&ic);
666:   PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);
667:   ics  = ic;

669:   do {
670:     sctx.newshift = PETSC_FALSE;
671:     for (i=0; i<n; i++){
672:       nz    = bi[i+1] - bi[i];
673:       bjtmp = bj + bi[i];
674:       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;

676:       /* load in initial (unfactored row) */
677:       nz    = ai[r[i]+1] - ai[r[i]];
678:       ajtmp = aj + ai[r[i]];
679:       v     = aa + ai[r[i]];
680:       for (j=0; j<nz; j++) {
681:         rtmp[ics[ajtmp[j]]] = v[j];
682:       }
683:       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */

685:       row = *bjtmp++;
686:       while  (row < i) {
687:         pc = rtmp + row;
688:         if (*pc != 0.0) {
689:           pv         = b->a + diag_offset[row];
690:           pj         = b->j + diag_offset[row] + 1;
691:           multiplier = *pc / *pv++;
692:           *pc        = multiplier;
693:           nz         = bi[row+1] - diag_offset[row] - 1;
694:           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
695:           PetscLogFlops(2.0*nz);
696:         }
697:         row = *bjtmp++;
698:       }
699:       /* finished row so stick it into b->a */
700:       pv   = b->a + bi[i] ;
701:       pj   = b->j + bi[i] ;
702:       nz   = bi[i+1] - bi[i];
703:       diag = diag_offset[i] - bi[i];
704:       rs   = 0.0;
705:       for (j=0; j<nz; j++) {
706:         pv[j] = rtmp[pj[j]];
707:         rs   += PetscAbsScalar(pv[j]);
708:       }
709:       rs   -= PetscAbsScalar(pv[diag]);

711:       sctx.rs  = rs;
712:       sctx.pv  = pv[diag];
713:       MatPivotCheck(A,info,&sctx,i);
714:       if (sctx.newshift) break;
715:       pv[diag] = sctx.pv;
716:     }

718:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
719:       /*
720:        * if no shift in this attempt & shifting & started shifting & can refine,
721:        * then try lower shift
722:        */
723:       sctx.shift_hi       = sctx.shift_fraction;
724:       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
725:       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
726:       sctx.newshift       = PETSC_TRUE;
727:       sctx.nshift++;
728:     }
729:   } while (sctx.newshift);

731:   /* invert diagonal entries for simplier triangular solves */
732:   for (i=0; i<n; i++) {
733:     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
734:   }
735:   PetscFree(rtmp);
736:   ISRestoreIndices(isicol,&ic);
737:   ISRestoreIndices(isrow,&r);

739:   ISIdentity(isrow,&row_identity);
740:   ISIdentity(isicol,&col_identity);
741:   if (row_identity && col_identity) {
742:     C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering_inplace;
743:   } else {
744:     C->ops->solve   = MatSolve_SeqAIJ_inplace;
745:   }
746:   C->ops->solveadd           = MatSolveAdd_SeqAIJ_inplace;
747:   C->ops->solvetranspose     = MatSolveTranspose_SeqAIJ_inplace;
748:   C->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ_inplace;
749:   C->ops->matsolve           = MatMatSolve_SeqAIJ_inplace;
750:   C->assembled    = PETSC_TRUE;
751:   C->preallocated = PETSC_TRUE;
752:   PetscLogFlops(C->cmap->n);
753:   if (sctx.nshift){
754:      if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
755:       PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);
756:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
757:       PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
758:     }
759:   }
760:   (C)->ops->solve            = MatSolve_SeqAIJ_inplace;
761:   (C)->ops->solvetranspose   = MatSolveTranspose_SeqAIJ_inplace;
762:   Mat_CheckInode(C,PETSC_FALSE);
763:   return(0);
764: }

766: /* 
767:    This routine implements inplace ILU(0) with row or/and column permutations. 
768:    Input: 
769:      A - original matrix
770:    Output;
771:      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 
772:          a->j (col index) is permuted by the inverse of colperm, then sorted
773:          a->a reordered accordingly with a->j
774:          a->diag (ptr to diagonal elements) is updated.
775: */
778: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
779: {
780:   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
781:   IS             isrow = a->row,isicol = a->icol;
783:   const PetscInt *r,*ic,*ics;
784:   PetscInt       i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
785:   PetscInt       *ajtmp,nz,row;
786:   PetscInt       *diag = a->diag,nbdiag,*pj;
787:   PetscScalar    *rtmp,*pc,multiplier,d;
788:   MatScalar      *pv,*v;
789:   PetscReal      rs;
790:   FactorShiftCtx sctx;
791:   const  MatScalar *aa=a->a,*vtmp;

794:   if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");

796:   /* MatPivotSetUp(): initialize shift context sctx */
797:   PetscMemzero(&sctx,sizeof(FactorShiftCtx));

799:   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
800:     const PetscInt   *ddiag = a->diag;
801:     sctx.shift_top = info->zeropivot;
802:     for (i=0; i<n; i++) {
803:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
804:       d  = (aa)[ddiag[i]];
805:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
806:       vtmp  = aa+ai[i];
807:       nz = ai[i+1] - ai[i];
808:       for (j=0; j<nz; j++)
809:         rs += PetscAbsScalar(vtmp[j]);
810:       if (rs>sctx.shift_top) sctx.shift_top = rs;
811:     }
812:     sctx.shift_top   *= 1.1;
813:     sctx.nshift_max   = 5;
814:     sctx.shift_lo     = 0.;
815:     sctx.shift_hi     = 1.;
816:   }

818:   ISGetIndices(isrow,&r);
819:   ISGetIndices(isicol,&ic);
820:   PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);
821:   PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
822:   ics = ic;

824: #if defined(MV)
825:   sctx.shift_top      = 0.;
826:   sctx.nshift_max     = 0;
827:   sctx.shift_lo       = 0.;
828:   sctx.shift_hi       = 0.;
829:   sctx.shift_fraction = 0.;

831:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
832:     sctx.shift_top = 0.;
833:     for (i=0; i<n; i++) {
834:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
835:       d  = (a->a)[diag[i]];
836:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
837:       v  = a->a+ai[i];
838:       nz = ai[i+1] - ai[i];
839:       for (j=0; j<nz; j++)
840:         rs += PetscAbsScalar(v[j]);
841:       if (rs>sctx.shift_top) sctx.shift_top = rs;
842:     }
843:     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
844:     sctx.shift_top    *= 1.1;
845:     sctx.nshift_max   = 5;
846:     sctx.shift_lo     = 0.;
847:     sctx.shift_hi     = 1.;
848:   }

850:   sctx.shift_amount = 0.;
851:   sctx.nshift       = 0;
852: #endif

854:   do {
855:     sctx.newshift = PETSC_FALSE;
856:     for (i=0; i<n; i++){
857:       /* load in initial unfactored row */
858:       nz    = ai[r[i]+1] - ai[r[i]];
859:       ajtmp = aj + ai[r[i]];
860:       v     = a->a + ai[r[i]];
861:       /* sort permuted ajtmp and values v accordingly */
862:       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
863:       PetscSortIntWithScalarArray(nz,ajtmp,v);

865:       diag[r[i]] = ai[r[i]];
866:       for (j=0; j<nz; j++) {
867:         rtmp[ajtmp[j]] = v[j];
868:         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
869:       }
870:       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */

872:       row = *ajtmp++;
873:       while  (row < i) {
874:         pc = rtmp + row;
875:         if (*pc != 0.0) {
876:           pv         = a->a + diag[r[row]];
877:           pj         = aj + diag[r[row]] + 1;

879:           multiplier = *pc / *pv++;
880:           *pc        = multiplier;
881:           nz         = ai[r[row]+1] - diag[r[row]] - 1;
882:           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
883:           PetscLogFlops(2.0*nz);
884:         }
885:         row = *ajtmp++;
886:       }
887:       /* finished row so overwrite it onto a->a */
888:       pv   = a->a + ai[r[i]] ;
889:       pj   = aj + ai[r[i]] ;
890:       nz   = ai[r[i]+1] - ai[r[i]];
891:       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
892: 
893:       rs   = 0.0;
894:       for (j=0; j<nz; j++) {
895:         pv[j] = rtmp[pj[j]];
896:         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
897:       }

899:       sctx.rs  = rs;
900:       sctx.pv  = pv[nbdiag];
901:       MatPivotCheck(A,info,&sctx,i);
902:       if (sctx.newshift) break;
903:       pv[nbdiag] = sctx.pv;
904:     }

906:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
907:       /*
908:        * if no shift in this attempt & shifting & started shifting & can refine,
909:        * then try lower shift
910:        */
911:       sctx.shift_hi        = sctx.shift_fraction;
912:       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
913:       sctx.shift_amount    = sctx.shift_fraction * sctx.shift_top;
914:       sctx.newshift         = PETSC_TRUE;
915:       sctx.nshift++;
916:     }
917:   } while (sctx.newshift);

919:   /* invert diagonal entries for simplier triangular solves */
920:   for (i=0; i<n; i++) {
921:     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
922:   }

924:   PetscFree(rtmp);
925:   ISRestoreIndices(isicol,&ic);
926:   ISRestoreIndices(isrow,&r);
927:   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
928:   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
929:   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
930:   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
931:   A->assembled = PETSC_TRUE;
932:   A->preallocated = PETSC_TRUE;
933:   PetscLogFlops(A->cmap->n);
934:   if (sctx.nshift){
935:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
936:       PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);
937:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
938:       PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
939:     }
940:   }
941:   return(0);
942: }

944: /* ----------------------------------------------------------- */
947: PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
948: {
950:   Mat            C;

953:   MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);
954:   MatLUFactorSymbolic(C,A,row,col,info);
955:   MatLUFactorNumeric(C,A,info);
956:   A->ops->solve            = C->ops->solve;
957:   A->ops->solvetranspose   = C->ops->solvetranspose;
958:   MatHeaderMerge(A,C);
959:   PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
960:   return(0);
961: }
962: /* ----------------------------------------------------------- */


967: PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
968: {
969:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
970:   IS                iscol = a->col,isrow = a->row;
971:   PetscErrorCode    ierr;
972:   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
973:   PetscInt          nz;
974:   const PetscInt    *rout,*cout,*r,*c;
975:   PetscScalar       *x,*tmp,*tmps,sum;
976:   const PetscScalar *b;
977:   const MatScalar   *aa = a->a,*v;
978: 
980:   if (!n) return(0);

982:   VecGetArrayRead(bb,&b);
983:   VecGetArray(xx,&x);
984:   tmp  = a->solve_work;

986:   ISGetIndices(isrow,&rout); r = rout;
987:   ISGetIndices(iscol,&cout); c = cout + (n-1);

989:   /* forward solve the lower triangular */
990:   tmp[0] = b[*r++];
991:   tmps   = tmp;
992:   for (i=1; i<n; i++) {
993:     v   = aa + ai[i] ;
994:     vi  = aj + ai[i] ;
995:     nz  = a->diag[i] - ai[i];
996:     sum = b[*r++];
997:     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
998:     tmp[i] = sum;
999:   }

1001:   /* backward solve the upper triangular */
1002:   for (i=n-1; i>=0; i--){
1003:     v   = aa + a->diag[i] + 1;
1004:     vi  = aj + a->diag[i] + 1;
1005:     nz  = ai[i+1] - a->diag[i] - 1;
1006:     sum = tmp[i];
1007:     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1008:     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
1009:   }

1011:   ISRestoreIndices(isrow,&rout);
1012:   ISRestoreIndices(iscol,&cout);
1013:   VecRestoreArrayRead(bb,&b);
1014:   VecRestoreArray(xx,&x);
1015:   PetscLogFlops(2.0*a->nz - A->cmap->n);
1016:   return(0);
1017: }

1021: PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1022: {
1023:   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1024:   IS              iscol = a->col,isrow = a->row;
1025:   PetscErrorCode  ierr;
1026:   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1027:   PetscInt        nz,neq;
1028:   const PetscInt  *rout,*cout,*r,*c;
1029:   PetscScalar     *x,*b,*tmp,*tmps,sum;
1030:   const MatScalar *aa = a->a,*v;
1031:   PetscBool       bisdense,xisdense;

1034:   if (!n) return(0);

1036:   PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);
1037:   if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1038:   PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);
1039:   if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");

1041:   MatGetArray(B,&b);
1042:   MatGetArray(X,&x);
1043: 
1044:   tmp  = a->solve_work;
1045:   ISGetIndices(isrow,&rout); r = rout;
1046:   ISGetIndices(iscol,&cout); c = cout;

1048:   for (neq=0; neq<B->cmap->n; neq++){
1049:     /* forward solve the lower triangular */
1050:     tmp[0] = b[r[0]];
1051:     tmps   = tmp;
1052:     for (i=1; i<n; i++) {
1053:       v   = aa + ai[i] ;
1054:       vi  = aj + ai[i] ;
1055:       nz  = a->diag[i] - ai[i];
1056:       sum = b[r[i]];
1057:       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1058:       tmp[i] = sum;
1059:     }
1060:     /* backward solve the upper triangular */
1061:     for (i=n-1; i>=0; i--){
1062:       v   = aa + a->diag[i] + 1;
1063:       vi  = aj + a->diag[i] + 1;
1064:       nz  = ai[i+1] - a->diag[i] - 1;
1065:       sum = tmp[i];
1066:       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1067:       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1068:     }

1070:     b += n;
1071:     x += n;
1072:   }
1073:   ISRestoreIndices(isrow,&rout);
1074:   ISRestoreIndices(iscol,&cout);
1075:   MatRestoreArray(B,&b);
1076:   MatRestoreArray(X,&x);
1077:   PetscLogFlops(B->cmap->n*(2.0*a->nz - n));
1078:   return(0);
1079: }

1083: PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1084: {
1085:   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1086:   IS              iscol = a->col,isrow = a->row;
1087:   PetscErrorCode  ierr;
1088:   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1089:   PetscInt        nz,neq;
1090:   const PetscInt  *rout,*cout,*r,*c;
1091:   PetscScalar     *x,*b,*tmp,sum;
1092:   const MatScalar *aa = a->a,*v;
1093:   PetscBool       bisdense,xisdense;

1096:   if (!n) return(0);

1098:   PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);
1099:   if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1100:   PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);
1101:   if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");

1103:   MatGetArray(B,&b);
1104:   MatGetArray(X,&x);
1105: 
1106:   tmp  = a->solve_work;
1107:   ISGetIndices(isrow,&rout); r = rout;
1108:   ISGetIndices(iscol,&cout); c = cout;

1110:   for (neq=0; neq<B->cmap->n; neq++){
1111:     /* forward solve the lower triangular */
1112:     tmp[0] = b[r[0]];
1113:     v      = aa;
1114:     vi     = aj;
1115:     for (i=1; i<n; i++) {
1116:       nz  = ai[i+1] - ai[i];
1117:       sum = b[r[i]];
1118:       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1119:       tmp[i] = sum;
1120:       v += nz; vi += nz;
1121:     }

1123:     /* backward solve the upper triangular */
1124:     for (i=n-1; i>=0; i--){
1125:       v   = aa + adiag[i+1]+1;
1126:       vi  = aj + adiag[i+1]+1;
1127:       nz  = adiag[i]-adiag[i+1]-1;
1128:       sum = tmp[i];
1129:       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1130:       x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1131:     }
1132: 
1133:     b += n;
1134:     x += n;
1135:   }
1136:   ISRestoreIndices(isrow,&rout);
1137:   ISRestoreIndices(iscol,&cout);
1138:   MatRestoreArray(B,&b);
1139:   MatRestoreArray(X,&x);
1140:   PetscLogFlops(B->cmap->n*(2.0*a->nz - n));
1141:   return(0);
1142: }

1146: PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1147: {
1148:   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1149:   IS              iscol = a->col,isrow = a->row;
1150:   PetscErrorCode  ierr;
1151:   const PetscInt  *r,*c,*rout,*cout;
1152:   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1153:   PetscInt        nz,row;
1154:   PetscScalar     *x,*b,*tmp,*tmps,sum;
1155:   const MatScalar *aa = a->a,*v;

1158:   if (!n) return(0);

1160:   VecGetArray(bb,&b);
1161:   VecGetArray(xx,&x);
1162:   tmp  = a->solve_work;

1164:   ISGetIndices(isrow,&rout); r = rout;
1165:   ISGetIndices(iscol,&cout); c = cout + (n-1);

1167:   /* forward solve the lower triangular */
1168:   tmp[0] = b[*r++];
1169:   tmps   = tmp;
1170:   for (row=1; row<n; row++) {
1171:     i   = rout[row]; /* permuted row */
1172:     v   = aa + ai[i] ;
1173:     vi  = aj + ai[i] ;
1174:     nz  = a->diag[i] - ai[i];
1175:     sum = b[*r++];
1176:     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1177:     tmp[row] = sum;
1178:   }

1180:   /* backward solve the upper triangular */
1181:   for (row=n-1; row>=0; row--){
1182:     i   = rout[row]; /* permuted row */
1183:     v   = aa + a->diag[i] + 1;
1184:     vi  = aj + a->diag[i] + 1;
1185:     nz  = ai[i+1] - a->diag[i] - 1;
1186:     sum = tmp[row];
1187:     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1188:     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1189:   }

1191:   ISRestoreIndices(isrow,&rout);
1192:   ISRestoreIndices(iscol,&cout);
1193:   VecRestoreArray(bb,&b);
1194:   VecRestoreArray(xx,&x);
1195:   PetscLogFlops(2.0*a->nz - A->cmap->n);
1196:   return(0);
1197: }

1199: /* ----------------------------------------------------------- */
1200: #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1203: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1204: {
1205:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1206:   PetscErrorCode    ierr;
1207:   PetscInt          n = A->rmap->n;
1208:   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag;
1209:   PetscScalar       *x;
1210:   const PetscScalar *b;
1211:   const MatScalar   *aa = a->a;
1212: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1213:   PetscInt          adiag_i,i,nz,ai_i;
1214:   const PetscInt    *vi;
1215:   const MatScalar   *v;
1216:   PetscScalar       sum;
1217: #endif

1220:   if (!n) return(0);

1222:   VecGetArrayRead(bb,&b);
1223:   VecGetArray(xx,&x);

1225: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1226:   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1227: #else
1228:   /* forward solve the lower triangular */
1229:   x[0] = b[0];
1230:   for (i=1; i<n; i++) {
1231:     ai_i = ai[i];
1232:     v    = aa + ai_i;
1233:     vi   = aj + ai_i;
1234:     nz   = adiag[i] - ai_i;
1235:     sum  = b[i];
1236:     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1237:     x[i] = sum;
1238:   }

1240:   /* backward solve the upper triangular */
1241:   for (i=n-1; i>=0; i--){
1242:     adiag_i = adiag[i];
1243:     v       = aa + adiag_i + 1;
1244:     vi      = aj + adiag_i + 1;
1245:     nz      = ai[i+1] - adiag_i - 1;
1246:     sum     = x[i];
1247:     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1248:     x[i]    = sum*aa[adiag_i];
1249:   }
1250: #endif
1251:   PetscLogFlops(2.0*a->nz - A->cmap->n);
1252:   VecRestoreArrayRead(bb,&b);
1253:   VecRestoreArray(xx,&x);
1254:   return(0);
1255: }

1259: PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1260: {
1261:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1262:   IS                iscol = a->col,isrow = a->row;
1263:   PetscErrorCode    ierr;
1264:   PetscInt          i, n = A->rmap->n,j;
1265:   PetscInt          nz;
1266:   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1267:   PetscScalar       *x,*tmp,sum;
1268:   const PetscScalar *b;
1269:   const MatScalar   *aa = a->a,*v;

1272:   if (yy != xx) {VecCopy(yy,xx);}

1274:   VecGetArrayRead(bb,&b);
1275:   VecGetArray(xx,&x);
1276:   tmp  = a->solve_work;

1278:   ISGetIndices(isrow,&rout); r = rout;
1279:   ISGetIndices(iscol,&cout); c = cout + (n-1);

1281:   /* forward solve the lower triangular */
1282:   tmp[0] = b[*r++];
1283:   for (i=1; i<n; i++) {
1284:     v   = aa + ai[i] ;
1285:     vi  = aj + ai[i] ;
1286:     nz  = a->diag[i] - ai[i];
1287:     sum = b[*r++];
1288:     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1289:     tmp[i] = sum;
1290:   }

1292:   /* backward solve the upper triangular */
1293:   for (i=n-1; i>=0; i--){
1294:     v   = aa + a->diag[i] + 1;
1295:     vi  = aj + a->diag[i] + 1;
1296:     nz  = ai[i+1] - a->diag[i] - 1;
1297:     sum = tmp[i];
1298:     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1299:     tmp[i] = sum*aa[a->diag[i]];
1300:     x[*c--] += tmp[i];
1301:   }

1303:   ISRestoreIndices(isrow,&rout);
1304:   ISRestoreIndices(iscol,&cout);
1305:   VecRestoreArrayRead(bb,&b);
1306:   VecRestoreArray(xx,&x);
1307:   PetscLogFlops(2.0*a->nz);

1309:   return(0);
1310: }

1314: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1315: {
1316:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1317:   IS                iscol = a->col,isrow = a->row;
1318:   PetscErrorCode    ierr;
1319:   PetscInt          i, n = A->rmap->n,j;
1320:   PetscInt          nz;
1321:   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1322:   PetscScalar       *x,*tmp,sum;
1323:   const PetscScalar *b;
1324:   const MatScalar   *aa = a->a,*v;

1327:   if (yy != xx) {VecCopy(yy,xx);}

1329:   VecGetArrayRead(bb,&b);
1330:   VecGetArray(xx,&x);
1331:   tmp  = a->solve_work;

1333:   ISGetIndices(isrow,&rout); r = rout;
1334:   ISGetIndices(iscol,&cout); c = cout;

1336:   /* forward solve the lower triangular */
1337:   tmp[0] = b[r[0]];
1338:   v      = aa;
1339:   vi     = aj;
1340:   for (i=1; i<n; i++) {
1341:     nz  = ai[i+1] - ai[i];
1342:     sum = b[r[i]];
1343:     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1344:     tmp[i] = sum;
1345:     v += nz; vi += nz;
1346:   }

1348:   /* backward solve the upper triangular */
1349:   v  = aa + adiag[n-1];
1350:   vi = aj + adiag[n-1];
1351:   for (i=n-1; i>=0; i--){
1352:     nz  = adiag[i] - adiag[i+1] - 1;
1353:     sum = tmp[i];
1354:     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1355:     tmp[i] = sum*v[nz];
1356:     x[c[i]] += tmp[i];
1357:     v += nz+1; vi += nz+1;
1358:   }

1360:   ISRestoreIndices(isrow,&rout);
1361:   ISRestoreIndices(iscol,&cout);
1362:   VecRestoreArrayRead(bb,&b);
1363:   VecRestoreArray(xx,&x);
1364:   PetscLogFlops(2.0*a->nz);

1366:   return(0);
1367: }

1371: PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1372: {
1373:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1374:   IS                iscol = a->col,isrow = a->row;
1375:   PetscErrorCode    ierr;
1376:   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1377:   PetscInt          i,n = A->rmap->n,j;
1378:   PetscInt          nz;
1379:   PetscScalar       *x,*tmp,s1;
1380:   const MatScalar   *aa = a->a,*v;
1381:   const PetscScalar *b;

1384:   VecGetArrayRead(bb,&b);
1385:   VecGetArray(xx,&x);
1386:   tmp  = a->solve_work;

1388:   ISGetIndices(isrow,&rout); r = rout;
1389:   ISGetIndices(iscol,&cout); c = cout;

1391:   /* copy the b into temp work space according to permutation */
1392:   for (i=0; i<n; i++) tmp[i] = b[c[i]];

1394:   /* forward solve the U^T */
1395:   for (i=0; i<n; i++) {
1396:     v   = aa + diag[i] ;
1397:     vi  = aj + diag[i] + 1;
1398:     nz  = ai[i+1] - diag[i] - 1;
1399:     s1  = tmp[i];
1400:     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1401:     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1402:     tmp[i] = s1;
1403:   }

1405:   /* backward solve the L^T */
1406:   for (i=n-1; i>=0; i--){
1407:     v   = aa + diag[i] - 1 ;
1408:     vi  = aj + diag[i] - 1 ;
1409:     nz  = diag[i] - ai[i];
1410:     s1  = tmp[i];
1411:     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1412:   }

1414:   /* copy tmp into x according to permutation */
1415:   for (i=0; i<n; i++) x[r[i]] = tmp[i];

1417:   ISRestoreIndices(isrow,&rout);
1418:   ISRestoreIndices(iscol,&cout);
1419:   VecRestoreArrayRead(bb,&b);
1420:   VecRestoreArray(xx,&x);

1422:   PetscLogFlops(2.0*a->nz-A->cmap->n);
1423:   return(0);
1424: }

1428: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1429: {
1430:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1431:   IS                iscol = a->col,isrow = a->row;
1432:   PetscErrorCode    ierr;
1433:   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1434:   PetscInt          i,n = A->rmap->n,j;
1435:   PetscInt          nz;
1436:   PetscScalar       *x,*tmp,s1;
1437:   const MatScalar   *aa = a->a,*v;
1438:   const PetscScalar *b;

1441:   VecGetArrayRead(bb,&b);
1442:   VecGetArray(xx,&x);
1443:   tmp  = a->solve_work;

1445:   ISGetIndices(isrow,&rout); r = rout;
1446:   ISGetIndices(iscol,&cout); c = cout;

1448:   /* copy the b into temp work space according to permutation */
1449:   for (i=0; i<n; i++) tmp[i] = b[c[i]];

1451:   /* forward solve the U^T */
1452:   for (i=0; i<n; i++) {
1453:     v   = aa + adiag[i+1] + 1;
1454:     vi  = aj + adiag[i+1] + 1;
1455:     nz  = adiag[i] - adiag[i+1] - 1;
1456:     s1  = tmp[i];
1457:     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1458:     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1459:     tmp[i] = s1;
1460:   }

1462:   /* backward solve the L^T */
1463:   for (i=n-1; i>=0; i--){
1464:     v   = aa + ai[i];
1465:     vi  = aj + ai[i];
1466:     nz  = ai[i+1] - ai[i];
1467:     s1  = tmp[i];
1468:     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1469:   }

1471:   /* copy tmp into x according to permutation */
1472:   for (i=0; i<n; i++) x[r[i]] = tmp[i];

1474:   ISRestoreIndices(isrow,&rout);
1475:   ISRestoreIndices(iscol,&cout);
1476:   VecRestoreArrayRead(bb,&b);
1477:   VecRestoreArray(xx,&x);

1479:   PetscLogFlops(2.0*a->nz-A->cmap->n);
1480:   return(0);
1481: }

1485: PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1486: {
1487:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1488:   IS                iscol = a->col,isrow = a->row;
1489:   PetscErrorCode    ierr;
1490:   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1491:   PetscInt          i,n = A->rmap->n,j;
1492:   PetscInt          nz;
1493:   PetscScalar       *x,*tmp,s1;
1494:   const MatScalar   *aa = a->a,*v;
1495:   const PetscScalar *b;

1498:   if (zz != xx) {VecCopy(zz,xx);}
1499:   VecGetArrayRead(bb,&b);
1500:   VecGetArray(xx,&x);
1501:   tmp  = a->solve_work;

1503:   ISGetIndices(isrow,&rout); r = rout;
1504:   ISGetIndices(iscol,&cout); c = cout;

1506:   /* copy the b into temp work space according to permutation */
1507:   for (i=0; i<n; i++) tmp[i] = b[c[i]];

1509:   /* forward solve the U^T */
1510:   for (i=0; i<n; i++) {
1511:     v   = aa + diag[i] ;
1512:     vi  = aj + diag[i] + 1;
1513:     nz  = ai[i+1] - diag[i] - 1;
1514:     s1  = tmp[i];
1515:     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1516:     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1517:     tmp[i] = s1;
1518:   }

1520:   /* backward solve the L^T */
1521:   for (i=n-1; i>=0; i--){
1522:     v   = aa + diag[i] - 1 ;
1523:     vi  = aj + diag[i] - 1 ;
1524:     nz  = diag[i] - ai[i];
1525:     s1  = tmp[i];
1526:     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1527:   }

1529:   /* copy tmp into x according to permutation */
1530:   for (i=0; i<n; i++) x[r[i]] += tmp[i];

1532:   ISRestoreIndices(isrow,&rout);
1533:   ISRestoreIndices(iscol,&cout);
1534:   VecRestoreArrayRead(bb,&b);
1535:   VecRestoreArray(xx,&x);

1537:   PetscLogFlops(2.0*a->nz-A->cmap->n);
1538:   return(0);
1539: }

1543: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1544: {
1545:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1546:   IS                iscol = a->col,isrow = a->row;
1547:   PetscErrorCode    ierr;
1548:   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1549:   PetscInt          i,n = A->rmap->n,j;
1550:   PetscInt          nz;
1551:   PetscScalar       *x,*tmp,s1;
1552:   const MatScalar   *aa = a->a,*v;
1553:   const PetscScalar *b;

1556:   if (zz != xx) {VecCopy(zz,xx);}
1557:   VecGetArrayRead(bb,&b);
1558:   VecGetArray(xx,&x);
1559:   tmp  = a->solve_work;

1561:   ISGetIndices(isrow,&rout); r = rout;
1562:   ISGetIndices(iscol,&cout); c = cout;

1564:   /* copy the b into temp work space according to permutation */
1565:   for (i=0; i<n; i++) tmp[i] = b[c[i]];

1567:   /* forward solve the U^T */
1568:   for (i=0; i<n; i++) {
1569:     v   = aa + adiag[i+1] + 1;
1570:     vi  = aj + adiag[i+1] + 1;
1571:     nz  = adiag[i] - adiag[i+1] - 1;
1572:     s1  = tmp[i];
1573:     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1574:     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1575:     tmp[i] = s1;
1576:   }


1579:   /* backward solve the L^T */
1580:   for (i=n-1; i>=0; i--){
1581:     v   = aa + ai[i] ;
1582:     vi  = aj + ai[i];
1583:     nz  = ai[i+1] - ai[i];
1584:     s1  = tmp[i];
1585:     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1586:   }

1588:   /* copy tmp into x according to permutation */
1589:   for (i=0; i<n; i++) x[r[i]] += tmp[i];

1591:   ISRestoreIndices(isrow,&rout);
1592:   ISRestoreIndices(iscol,&cout);
1593:   VecRestoreArrayRead(bb,&b);
1594:   VecRestoreArray(xx,&x);

1596:   PetscLogFlops(2.0*a->nz-A->cmap->n);
1597:   return(0);
1598: }

1600: /* ----------------------------------------------------------------*/


1604: /* 
1605:    ilu() under revised new data structure.
1606:    Factored arrays bj and ba are stored as
1607:      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)

1609:    bi=fact->i is an array of size n+1, in which 
1610:    bi+
1611:      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1612:      bi[n]:  points to L(n-1,n-1)+1
1613:      
1614:   bdiag=fact->diag is an array of size n+1,in which
1615:      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1616:      bdiag[n]: points to entry of U(n-1,0)-1

1618:    U(i,:) contains bdiag[i] as its last entry, i.e., 
1619:     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1620: */
1623: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1624: {
1625: 
1626:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1627:   PetscErrorCode     ierr;
1628:   const PetscInt     n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1629:   PetscInt           i,j,k=0,nz,*bi,*bj,*bdiag;
1630:   PetscBool          missing;
1631:   IS                 isicol;

1634:   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1635:   MatMissingDiagonal(A,&missing,&i);
1636:   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1637:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);

1639:   MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);
1640:   b    = (Mat_SeqAIJ*)(fact)->data;

1642:   /* allocate matrix arrays for new data structure */
1643:   PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,n+1,PetscInt,&b->i);
1644:   PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));
1645:   b->singlemalloc = PETSC_TRUE;
1646:   if (!b->diag){
1647:     PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);
1648:     PetscLogObjectMemory(fact,(n+1)*sizeof(PetscInt));
1649:   }
1650:   bdiag = b->diag;
1651: 
1652:   if (n > 0) {
1653:     PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));
1654:   }
1655: 
1656:   /* set bi and bj with new data structure */
1657:   bi = b->i;
1658:   bj = b->j;

1660:   /* L part */
1661:   bi[0] = 0;
1662:   for (i=0; i<n; i++){
1663:     nz = adiag[i] - ai[i];
1664:     bi[i+1] = bi[i] + nz;
1665:     aj = a->j + ai[i];
1666:     for (j=0; j<nz; j++){
1667:       /*   *bj = aj[j]; bj++; */
1668:       bj[k++] = aj[j];
1669:     }
1670:   }
1671: 
1672:   /* U part */
1673:   bdiag[n] = bi[n]-1;
1674:   for (i=n-1; i>=0; i--){
1675:     nz = ai[i+1] - adiag[i] - 1;
1676:     aj = a->j + adiag[i] + 1;
1677:     for (j=0; j<nz; j++){
1678:       /*      *bj = aj[j]; bj++; */
1679:       bj[k++] = aj[j];
1680:     }
1681:     /* diag[i] */
1682:     /*    *bj = i; bj++; */
1683:     bj[k++] = i;
1684:     bdiag[i] = bdiag[i+1] + nz + 1;
1685:   }

1687:   fact->factortype             = MAT_FACTOR_ILU;
1688:   fact->info.factor_mallocs    = 0;
1689:   fact->info.fill_ratio_given  = info->fill;
1690:   fact->info.fill_ratio_needed = 1.0;
1691:   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;

1693:   b       = (Mat_SeqAIJ*)(fact)->data;
1694:   b->row  = isrow;
1695:   b->col  = iscol;
1696:   b->icol = isicol;
1697:   PetscMalloc((fact->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);
1698:   PetscObjectReference((PetscObject)isrow);
1699:   PetscObjectReference((PetscObject)iscol);
1700:   return(0);
1701: }

1705: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1706: {
1707:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1708:   IS                 isicol;
1709:   PetscErrorCode     ierr;
1710:   const PetscInt     *r,*ic;
1711:   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j;
1712:   PetscInt           *bi,*cols,nnz,*cols_lvl;
1713:   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1714:   PetscInt           i,levels,diagonal_fill;
1715:   PetscBool          col_identity,row_identity;
1716:   PetscReal          f;
1717:   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1718:   PetscBT            lnkbt;
1719:   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1720:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1721:   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1722: 
1724:   /* Uncomment the old data struct part only while testing new data structure for MatSolve() */
1725:   /*
1726:   PetscBool          olddatastruct=PETSC_FALSE;
1727:   PetscOptionsGetBool(PETSC_NULL,"-ilu_old",&olddatastruct,PETSC_NULL);
1728:   if(olddatastruct){
1729:     MatILUFactorSymbolic_SeqAIJ_inplace(fact,A,isrow,iscol,info);
1730:     return(0);
1731:   }
1732:   */
1733: 
1734:   levels = (PetscInt)info->levels;
1735:   ISIdentity(isrow,&row_identity);
1736:   ISIdentity(iscol,&col_identity);

1738:   if (!levels && row_identity && col_identity) {
1739:     /* special case: ilu(0) with natural ordering */
1740:     MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);
1741:     if (a->inode.size) {
1742:       fact->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode;
1743:     }
1744:     return(0);
1745:   }

1747:   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1748:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1749:   ISGetIndices(isrow,&r);
1750:   ISGetIndices(isicol,&ic);

1752:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1753:   PetscMalloc((n+1)*sizeof(PetscInt),&bi);
1754:   PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
1755:   bi[0] = bdiag[0] = 0;

1757:   PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);

1759:   /* create a linked list for storing column indices of the active row */
1760:   nlnk = n + 1;
1761:   PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);

1763:   /* initial FreeSpace size is f*(ai[n]+1) */
1764:   f             = info->fill;
1765:   diagonal_fill = (PetscInt)info->diagonal_fill;
1766:   PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
1767:   current_space = free_space;
1768:   PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);
1769:   current_space_lvl = free_space_lvl;
1770: 
1771:   for (i=0; i<n; i++) {
1772:     nzi = 0;
1773:     /* copy current row into linked list */
1774:     nnz  = ai[r[i]+1] - ai[r[i]];
1775:     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1776:     cols = aj + ai[r[i]];
1777:     lnk[i] = -1; /* marker to indicate if diagonal exists */
1778:     PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1779:     nzi += nlnk;

1781:     /* make sure diagonal entry is included */
1782:     if (diagonal_fill && lnk[i] == -1) {
1783:       fm = n;
1784:       while (lnk[fm] < i) fm = lnk[fm];
1785:       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1786:       lnk[fm]    = i;
1787:       lnk_lvl[i] = 0;
1788:       nzi++; dcount++;
1789:     }

1791:     /* add pivot rows into the active row */
1792:     nzbd = 0;
1793:     prow = lnk[n];
1794:     while (prow < i) {
1795:       nnz      = bdiag[prow];
1796:       cols     = bj_ptr[prow] + nnz + 1;
1797:       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1798:       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1799:       PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1800:       nzi += nlnk;
1801:       prow = lnk[prow];
1802:       nzbd++;
1803:     }
1804:     bdiag[i] = nzbd;
1805:     bi[i+1]  = bi[i] + nzi;

1807:     /* if free space is not available, make more free space */
1808:     if (current_space->local_remaining<nzi) {
1809:       nnz = 2*nzi*(n - i); /* estimated and max additional space needed */
1810:       PetscFreeSpaceGet(nnz,&current_space);
1811:       PetscFreeSpaceGet(nnz,&current_space_lvl);
1812:       reallocs++;
1813:     }

1815:     /* copy data into free_space and free_space_lvl, then initialize lnk */
1816:     PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1817:     bj_ptr[i]    = current_space->array;
1818:     bjlvl_ptr[i] = current_space_lvl->array;

1820:     /* make sure the active row i has diagonal entry */
1821:     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);

1823:     current_space->array           += nzi;
1824:     current_space->local_used      += nzi;
1825:     current_space->local_remaining -= nzi;
1826:     current_space_lvl->array           += nzi;
1827:     current_space_lvl->local_used      += nzi;
1828:     current_space_lvl->local_remaining -= nzi;
1829:   }

1831:   ISRestoreIndices(isrow,&r);
1832:   ISRestoreIndices(isicol,&ic);

1834:   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1835:   PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
1836:   PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
1837: 
1838:   PetscIncompleteLLDestroy(lnk,lnkbt);
1839:   PetscFreeSpaceDestroy(free_space_lvl);
1840:   PetscFree2(bj_ptr,bjlvl_ptr);

1842: #if defined(PETSC_USE_INFO)
1843:   {
1844:     PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1845:     PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
1846:     PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);
1847:     PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);
1848:     PetscInfo(A,"for best performance.\n");
1849:     if (diagonal_fill) {
1850:       PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);
1851:     }
1852:   }
1853: #endif

1855:   /* put together the new matrix */
1856:   MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);
1857:   PetscLogObjectParent(fact,isicol);
1858:   b = (Mat_SeqAIJ*)(fact)->data;
1859:   b->free_a       = PETSC_TRUE;
1860:   b->free_ij      = PETSC_TRUE;
1861:   b->singlemalloc = PETSC_FALSE;
1862:   PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);
1863:   b->j          = bj;
1864:   b->i          = bi;
1865:   b->diag       = bdiag;
1866:   b->ilen       = 0;
1867:   b->imax       = 0;
1868:   b->row        = isrow;
1869:   b->col        = iscol;
1870:   PetscObjectReference((PetscObject)isrow);
1871:   PetscObjectReference((PetscObject)iscol);
1872:   b->icol       = isicol;
1873:   PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
1874:   /* In b structure:  Free imax, ilen, old a, old j.  
1875:      Allocate bdiag, solve_work, new a, new j */
1876:   PetscLogObjectMemory(fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));
1877:   b->maxnz = b->nz = bdiag[0]+1;
1878:   (fact)->info.factor_mallocs    = reallocs;
1879:   (fact)->info.fill_ratio_given  = f;
1880:   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1881:   (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1882:   if (a->inode.size) {
1883:     (fact)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode;
1884:   }
1885:   return(0);
1886: }

1890: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1891: {
1892:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1893:   IS                 isicol;
1894:   PetscErrorCode     ierr;
1895:   const PetscInt     *r,*ic;
1896:   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j,d;
1897:   PetscInt           *bi,*cols,nnz,*cols_lvl;
1898:   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1899:   PetscInt           i,levels,diagonal_fill;
1900:   PetscBool          col_identity,row_identity;
1901:   PetscReal          f;
1902:   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1903:   PetscBT            lnkbt;
1904:   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1905:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1906:   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1907:   PetscBool          missing;
1908: 
1910:   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1911:   f             = info->fill;
1912:   levels        = (PetscInt)info->levels;
1913:   diagonal_fill = (PetscInt)info->diagonal_fill;
1914:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);

1916:   ISIdentity(isrow,&row_identity);
1917:   ISIdentity(iscol,&col_identity);
1918:   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1919:     MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);
1920:     (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ_inplace;
1921:     if (a->inode.size) {
1922:       (fact)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1923:     }
1924:     fact->factortype = MAT_FACTOR_ILU;
1925:     (fact)->info.factor_mallocs    = 0;
1926:     (fact)->info.fill_ratio_given  = info->fill;
1927:     (fact)->info.fill_ratio_needed = 1.0;
1928:     b               = (Mat_SeqAIJ*)(fact)->data;
1929:     MatMissingDiagonal(A,&missing,&d);
1930:     if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1931:     b->row              = isrow;
1932:     b->col              = iscol;
1933:     b->icol             = isicol;
1934:     PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);
1935:     PetscObjectReference((PetscObject)isrow);
1936:     PetscObjectReference((PetscObject)iscol);
1937:     return(0);
1938:   }

1940:   ISGetIndices(isrow,&r);
1941:   ISGetIndices(isicol,&ic);

1943:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1944:   PetscMalloc((n+1)*sizeof(PetscInt),&bi);
1945:   PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
1946:   bi[0] = bdiag[0] = 0;

1948:   PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);

1950:   /* create a linked list for storing column indices of the active row */
1951:   nlnk = n + 1;
1952:   PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);

1954:   /* initial FreeSpace size is f*(ai[n]+1) */
1955:   PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
1956:   current_space = free_space;
1957:   PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);
1958:   current_space_lvl = free_space_lvl;
1959: 
1960:   for (i=0; i<n; i++) {
1961:     nzi = 0;
1962:     /* copy current row into linked list */
1963:     nnz  = ai[r[i]+1] - ai[r[i]];
1964:     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1965:     cols = aj + ai[r[i]];
1966:     lnk[i] = -1; /* marker to indicate if diagonal exists */
1967:     PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1968:     nzi += nlnk;

1970:     /* make sure diagonal entry is included */
1971:     if (diagonal_fill && lnk[i] == -1) {
1972:       fm = n;
1973:       while (lnk[fm] < i) fm = lnk[fm];
1974:       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1975:       lnk[fm]    = i;
1976:       lnk_lvl[i] = 0;
1977:       nzi++; dcount++;
1978:     }

1980:     /* add pivot rows into the active row */
1981:     nzbd = 0;
1982:     prow = lnk[n];
1983:     while (prow < i) {
1984:       nnz      = bdiag[prow];
1985:       cols     = bj_ptr[prow] + nnz + 1;
1986:       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1987:       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1988:       PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1989:       nzi += nlnk;
1990:       prow = lnk[prow];
1991:       nzbd++;
1992:     }
1993:     bdiag[i] = nzbd;
1994:     bi[i+1]  = bi[i] + nzi;

1996:     /* if free space is not available, make more free space */
1997:     if (current_space->local_remaining<nzi) {
1998:       nnz = nzi*(n - i); /* estimated and max additional space needed */
1999:       PetscFreeSpaceGet(nnz,&current_space);
2000:       PetscFreeSpaceGet(nnz,&current_space_lvl);
2001:       reallocs++;
2002:     }

2004:     /* copy data into free_space and free_space_lvl, then initialize lnk */
2005:     PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2006:     bj_ptr[i]    = current_space->array;
2007:     bjlvl_ptr[i] = current_space_lvl->array;

2009:     /* make sure the active row i has diagonal entry */
2010:     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);

2012:     current_space->array           += nzi;
2013:     current_space->local_used      += nzi;
2014:     current_space->local_remaining -= nzi;
2015:     current_space_lvl->array           += nzi;
2016:     current_space_lvl->local_used      += nzi;
2017:     current_space_lvl->local_remaining -= nzi;
2018:   }

2020:   ISRestoreIndices(isrow,&r);
2021:   ISRestoreIndices(isicol,&ic);

2023:   /* destroy list of free space and other temporary arrays */
2024:   PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
2025:   PetscFreeSpaceContiguous(&free_space,bj); /* copy free_space -> bj */
2026:   PetscIncompleteLLDestroy(lnk,lnkbt);
2027:   PetscFreeSpaceDestroy(free_space_lvl);
2028:   PetscFree2(bj_ptr,bjlvl_ptr);

2030: #if defined(PETSC_USE_INFO)
2031:   {
2032:     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2033:     PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
2034:     PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);
2035:     PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);
2036:     PetscInfo(A,"for best performance.\n");
2037:     if (diagonal_fill) {
2038:       PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);
2039:     }
2040:   }
2041: #endif

2043:   /* put together the new matrix */
2044:   MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);
2045:   PetscLogObjectParent(fact,isicol);
2046:   b = (Mat_SeqAIJ*)(fact)->data;
2047:   b->free_a       = PETSC_TRUE;
2048:   b->free_ij      = PETSC_TRUE;
2049:   b->singlemalloc = PETSC_FALSE;
2050:   PetscMalloc(bi[n]*sizeof(PetscScalar),&b->a);
2051:   b->j          = bj;
2052:   b->i          = bi;
2053:   for (i=0; i<n; i++) bdiag[i] += bi[i];
2054:   b->diag       = bdiag;
2055:   b->ilen       = 0;
2056:   b->imax       = 0;
2057:   b->row        = isrow;
2058:   b->col        = iscol;
2059:   PetscObjectReference((PetscObject)isrow);
2060:   PetscObjectReference((PetscObject)iscol);
2061:   b->icol       = isicol;
2062:   PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
2063:   /* In b structure:  Free imax, ilen, old a, old j.  
2064:      Allocate bdiag, solve_work, new a, new j */
2065:   PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
2066:   b->maxnz             = b->nz = bi[n] ;
2067:   (fact)->info.factor_mallocs    = reallocs;
2068:   (fact)->info.fill_ratio_given  = f;
2069:   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2070:   (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ_inplace;
2071:   if (a->inode.size) {
2072:     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2073:   }
2074:   return(0);
2075: }

2079: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2080: {
2081:   Mat            C = B;
2082:   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2083:   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2084:   IS             ip=b->row,iip = b->icol;
2086:   const PetscInt *rip,*riip;
2087:   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2088:   PetscInt       *ai=a->i,*aj=a->j;
2089:   PetscInt       k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2090:   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2091:   PetscBool      perm_identity;
2092:   FactorShiftCtx sctx;
2093:   PetscReal      rs;
2094:   MatScalar      d,*v;

2097:   /* MatPivotSetUp(): initialize shift context sctx */
2098:   PetscMemzero(&sctx,sizeof(FactorShiftCtx));

2100:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2101:     sctx.shift_top = info->zeropivot;
2102:     for (i=0; i<mbs; i++) {
2103:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2104:       d  = (aa)[a->diag[i]];
2105:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2106:       v  = aa+ai[i];
2107:       nz = ai[i+1] - ai[i];
2108:       for (j=0; j<nz; j++)
2109:         rs += PetscAbsScalar(v[j]);
2110:       if (rs>sctx.shift_top) sctx.shift_top = rs;
2111:     }
2112:     sctx.shift_top   *= 1.1;
2113:     sctx.nshift_max   = 5;
2114:     sctx.shift_lo     = 0.;
2115:     sctx.shift_hi     = 1.;
2116:   }

2118:   ISGetIndices(ip,&rip);
2119:   ISGetIndices(iip,&riip);
2120: 
2121:   /* allocate working arrays
2122:      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2123:      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 
2124:   */
2125:   PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&c2r);
2126: 
2127:   do {
2128:     sctx.newshift = PETSC_FALSE;

2130:     for (i=0; i<mbs; i++) c2r[i] = mbs;
2131:     il[0] = 0;
2132: 
2133:     for (k = 0; k<mbs; k++){
2134:       /* zero rtmp */
2135:       nz = bi[k+1] - bi[k];
2136:       bjtmp = bj + bi[k];
2137:       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2138: 
2139:       /* load in initial unfactored row */
2140:       bval = ba + bi[k];
2141:       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2142:       for (j = jmin; j < jmax; j++){
2143:         col = riip[aj[j]];
2144:         if (col >= k){ /* only take upper triangular entry */
2145:           rtmp[col] = aa[j];
2146:           *bval++   = 0.0; /* for in-place factorization */
2147:         }
2148:       }
2149:       /* shift the diagonal of the matrix: ZeropivotApply() */
2150:       rtmp[k] += sctx.shift_amount;  /* shift the diagonal of the matrix */
2151: 
2152:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2153:       dk = rtmp[k];
2154:       i  = c2r[k]; /* first row to be added to k_th row  */

2156:       while (i < k){
2157:         nexti = c2r[i]; /* next row to be added to k_th row */
2158: 
2159:         /* compute multiplier, update diag(k) and U(i,k) */
2160:         ili   = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
2161:         uikdi = - ba[ili]*ba[bdiag[i]];  /* diagonal(k) */
2162:         dk   += uikdi*ba[ili]; /* update diag[k] */
2163:         ba[ili] = uikdi; /* -U(i,k) */

2165:         /* add multiple of row i to k-th row */
2166:         jmin = ili + 1; jmax = bi[i+1];
2167:         if (jmin < jmax){
2168:           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2169:           /* update il and c2r for row i */
2170:           il[i] = jmin;
2171:           j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2172:         }
2173:         i = nexti;
2174:       }

2176:       /* copy data into U(k,:) */
2177:       rs   = 0.0;
2178:       jmin = bi[k]; jmax = bi[k+1]-1;
2179:       if (jmin < jmax) {
2180:         for (j=jmin; j<jmax; j++){
2181:           col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2182:         }
2183:         /* add the k-th row into il and c2r */
2184:         il[k] = jmin;
2185:         i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2186:       }

2188:       /* MatPivotCheck() */
2189:       sctx.rs  = rs;
2190:       sctx.pv  = dk;
2191:       MatPivotCheck(A,info,&sctx,i);
2192:       if(sctx.newshift) break;
2193:       dk = sctx.pv;
2194: 
2195:       ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2196:     }
2197:   } while (sctx.newshift);
2198: 
2199:   PetscFree3(rtmp,il,c2r);
2200:   ISRestoreIndices(ip,&rip);
2201:   ISRestoreIndices(iip,&riip);

2203:   ISIdentity(ip,&perm_identity);
2204:   if (perm_identity){
2205:     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2206:     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2207:     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2208:     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2209:   } else {
2210:     B->ops->solve           = MatSolve_SeqSBAIJ_1;
2211:     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1;
2212:     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1;
2213:     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1;
2214:   }

2216:   C->assembled    = PETSC_TRUE;
2217:   C->preallocated = PETSC_TRUE;
2218:   PetscLogFlops(C->rmap->n);

2220:   /* MatPivotView() */
2221:   if (sctx.nshift){
2222:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2223:       PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);
2224:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2225:       PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
2226:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){
2227:       PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);
2228:     }
2229:   }
2230:   return(0);
2231: }

2235: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2236: {
2237:   Mat            C = B;
2238:   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2239:   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2240:   IS             ip=b->row,iip = b->icol;
2242:   const PetscInt *rip,*riip;
2243:   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2244:   PetscInt       *ai=a->i,*aj=a->j;
2245:   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2246:   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2247:   PetscBool      perm_identity;
2248:   FactorShiftCtx sctx;
2249:   PetscReal      rs;
2250:   MatScalar      d,*v;

2253:   /* MatPivotSetUp(): initialize shift context sctx */
2254:   PetscMemzero(&sctx,sizeof(FactorShiftCtx));

2256:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2257:     sctx.shift_top = info->zeropivot;
2258:     for (i=0; i<mbs; i++) {
2259:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2260:       d  = (aa)[a->diag[i]];
2261:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2262:       v  = aa+ai[i];
2263:       nz = ai[i+1] - ai[i];
2264:       for (j=0; j<nz; j++)
2265:         rs += PetscAbsScalar(v[j]);
2266:       if (rs>sctx.shift_top) sctx.shift_top = rs;
2267:     }
2268:     sctx.shift_top   *= 1.1;
2269:     sctx.nshift_max   = 5;
2270:     sctx.shift_lo     = 0.;
2271:     sctx.shift_hi     = 1.;
2272:   }

2274:   ISGetIndices(ip,&rip);
2275:   ISGetIndices(iip,&riip);
2276: 
2277:   /* initialization */
2278:   PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&jl);
2279: 
2280:   do {
2281:     sctx.newshift = PETSC_FALSE;

2283:     for (i=0; i<mbs; i++) jl[i] = mbs;
2284:     il[0] = 0;
2285: 
2286:     for (k = 0; k<mbs; k++){
2287:       /* zero rtmp */
2288:       nz = bi[k+1] - bi[k];
2289:       bjtmp = bj + bi[k];
2290:       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;

2292:       bval = ba + bi[k];
2293:       /* initialize k-th row by the perm[k]-th row of A */
2294:       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2295:       for (j = jmin; j < jmax; j++){
2296:         col = riip[aj[j]];
2297:         if (col >= k){ /* only take upper triangular entry */
2298:           rtmp[col] = aa[j];
2299:           *bval++  = 0.0; /* for in-place factorization */
2300:         }
2301:       }
2302:       /* shift the diagonal of the matrix */
2303:       if (sctx.nshift) rtmp[k] += sctx.shift_amount;

2305:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2306:       dk = rtmp[k];
2307:       i = jl[k]; /* first row to be added to k_th row  */

2309:       while (i < k){
2310:         nexti = jl[i]; /* next row to be added to k_th row */
2311: 
2312:         /* compute multiplier, update diag(k) and U(i,k) */
2313:         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
2314:         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
2315:         dk += uikdi*ba[ili];
2316:         ba[ili] = uikdi; /* -U(i,k) */

2318:         /* add multiple of row i to k-th row */
2319:         jmin = ili + 1; jmax = bi[i+1];
2320:         if (jmin < jmax){
2321:           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2322:           /* update il and jl for row i */
2323:           il[i] = jmin;
2324:           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2325:         }
2326:         i = nexti;
2327:       }

2329:       /* shift the diagonals when zero pivot is detected */
2330:       /* compute rs=sum of abs(off-diagonal) */
2331:       rs   = 0.0;
2332:       jmin = bi[k]+1;
2333:       nz   = bi[k+1] - jmin;
2334:       bcol = bj + jmin;
2335:       for (j=0; j<nz; j++) {
2336:         rs += PetscAbsScalar(rtmp[bcol[j]]);
2337:       }

2339:       sctx.rs = rs;
2340:       sctx.pv = dk;
2341:       MatPivotCheck(A,info,&sctx,k);
2342:       if (sctx.newshift) break;
2343:       dk = sctx.pv;
2344: 
2345:       /* copy data into U(k,:) */
2346:       ba[bi[k]] = 1.0/dk; /* U(k,k) */
2347:       jmin = bi[k]+1; jmax = bi[k+1];
2348:       if (jmin < jmax) {
2349:         for (j=jmin; j<jmax; j++){
2350:           col = bj[j]; ba[j] = rtmp[col];
2351:         }
2352:         /* add the k-th row into il and jl */
2353:         il[k] = jmin;
2354:         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2355:       }
2356:     }
2357:   } while (sctx.newshift);

2359:   PetscFree3(rtmp,il,jl);
2360:   ISRestoreIndices(ip,&rip);
2361:   ISRestoreIndices(iip,&riip);

2363:   ISIdentity(ip,&perm_identity);
2364:   if (perm_identity){
2365:     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2366:     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2367:     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2368:     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2369:   } else {
2370:     B->ops->solve           = MatSolve_SeqSBAIJ_1_inplace;
2371:     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_inplace;
2372:     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_inplace;
2373:     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_inplace;
2374:   }

2376:   C->assembled    = PETSC_TRUE;
2377:   C->preallocated = PETSC_TRUE;
2378:   PetscLogFlops(C->rmap->n);
2379:   if (sctx.nshift){
2380:     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2381:       PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
2382:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2383:       PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
2384:     }
2385:   }
2386:   return(0);
2387: }

2389: /* 
2390:    icc() under revised new data structure.
2391:    Factored arrays bj and ba are stored as
2392:      U(0,:),...,U(i,:),U(n-1,:)

2394:    ui=fact->i is an array of size n+1, in which 
2395:    ui+
2396:      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2397:      ui[n]:  points to U(n-1,n-1)+1
2398:      
2399:   udiag=fact->diag is an array of size n,in which
2400:      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1

2402:    U(i,:) contains udiag[i] as its last entry, i.e., 
2403:     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2404: */

2408: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2409: {
2410:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2411:   Mat_SeqSBAIJ       *b;
2412:   PetscErrorCode     ierr;
2413:   PetscBool          perm_identity,missing;
2414:   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2415:   const PetscInt     *rip,*riip;
2416:   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2417:   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
2418:   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2419:   PetscReal          fill=info->fill,levels=info->levels;
2420:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2421:   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2422:   PetscBT            lnkbt;
2423:   IS                 iperm;
2424: 
2426:   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2427:   MatMissingDiagonal(A,&missing,&d);
2428:   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2429:   ISIdentity(perm,&perm_identity);
2430:   ISInvertPermutation(perm,PETSC_DECIDE,&iperm);

2432:   PetscMalloc((am+1)*sizeof(PetscInt),&ui);
2433:   PetscMalloc((am+1)*sizeof(PetscInt),&udiag);
2434:   ui[0] = 0;

2436:   /* ICC(0) without matrix ordering: simply rearrange column indices */
2437:   if (!levels && perm_identity) {
2438:     for (i=0; i<am; i++) {
2439:       ncols    = ai[i+1] - a->diag[i];
2440:       ui[i+1]  = ui[i] + ncols;
2441:       udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2442:     }
2443:     PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
2444:     cols = uj;
2445:     for (i=0; i<am; i++) {
2446:       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2447:       ncols = ai[i+1] - a->diag[i] -1;
2448:       for (j=0; j<ncols; j++) *cols++ = aj[j];
2449:       *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */
2450:     }
2451:   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2452:     ISGetIndices(iperm,&riip);
2453:     ISGetIndices(perm,&rip);

2455:     /* initialization */
2456:     PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);

2458:     /* jl: linked list for storing indices of the pivot rows 
2459:        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2460:     PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);
2461:     for (i=0; i<am; i++){
2462:       jl[i] = am; il[i] = 0;
2463:     }

2465:     /* create and initialize a linked list for storing column indices of the active row k */
2466:     nlnk = am + 1;
2467:     PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);

2469:     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2470:     PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);
2471:     current_space = free_space;
2472:     PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space_lvl);
2473:     current_space_lvl = free_space_lvl;

2475:     for (k=0; k<am; k++){  /* for each active row k */
2476:       /* initialize lnk by the column indices of row rip[k] of A */
2477:       nzk   = 0;
2478:       ncols = ai[rip[k]+1] - ai[rip[k]];
2479:       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2480:       ncols_upper = 0;
2481:       for (j=0; j<ncols; j++){
2482:         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2483:         if (riip[i] >= k){ /* only take upper triangular entry */
2484:           ajtmp[ncols_upper] = i;
2485:           ncols_upper++;
2486:         }
2487:       }
2488:       PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
2489:       nzk += nlnk;

2491:       /* update lnk by computing fill-in for each pivot row to be merged in */
2492:       prow = jl[k]; /* 1st pivot row */
2493: 
2494:       while (prow < k){
2495:         nextprow = jl[prow];
2496: 
2497:         /* merge prow into k-th row */
2498:         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2499:         jmax = ui[prow+1];
2500:         ncols = jmax-jmin;
2501:         i     = jmin - ui[prow];
2502:         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2503:         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2504:         j     = *(uj - 1);
2505:         PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
2506:         nzk += nlnk;

2508:         /* update il and jl for prow */
2509:         if (jmin < jmax){
2510:           il[prow] = jmin;
2511:           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2512:         }
2513:         prow = nextprow;
2514:       }

2516:       /* if free space is not available, make more free space */
2517:       if (current_space->local_remaining<nzk) {
2518:         i  = am - k + 1; /* num of unfactored rows */
2519:         i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2520:         PetscFreeSpaceGet(i,&current_space);
2521:         PetscFreeSpaceGet(i,&current_space_lvl);
2522:         reallocs++;
2523:       }

2525:       /* copy data into free_space and free_space_lvl, then initialize lnk */
2526:       if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2527:       PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);

2529:       /* add the k-th row into il and jl */
2530:       if (nzk > 1){
2531:         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2532:         jl[k] = jl[i]; jl[i] = k;
2533:         il[k] = ui[k] + 1;
2534:       }
2535:       uj_ptr[k]     = current_space->array;
2536:       uj_lvl_ptr[k] = current_space_lvl->array;

2538:       current_space->array           += nzk;
2539:       current_space->local_used      += nzk;
2540:       current_space->local_remaining -= nzk;

2542:       current_space_lvl->array           += nzk;
2543:       current_space_lvl->local_used      += nzk;
2544:       current_space_lvl->local_remaining -= nzk;

2546:       ui[k+1] = ui[k] + nzk;
2547:     }

2549:     ISRestoreIndices(perm,&rip);
2550:     ISRestoreIndices(iperm,&riip);
2551:     PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);
2552:     PetscFree(ajtmp);

2554:     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2555:     PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
2556:     PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag); /* store matrix factor  */
2557:     PetscIncompleteLLDestroy(lnk,lnkbt);
2558:     PetscFreeSpaceDestroy(free_space_lvl);

2560:   } /* end of case: levels>0 || (levels=0 && !perm_identity) */

2562:   /* put together the new matrix in MATSEQSBAIJ format */
2563:   b    = (Mat_SeqSBAIJ*)(fact)->data;
2564:   b->singlemalloc = PETSC_FALSE;
2565:   PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
2566:   b->j    = uj;
2567:   b->i    = ui;
2568:   b->diag = udiag;
2569:   b->free_diag = PETSC_TRUE;
2570:   b->ilen = 0;
2571:   b->imax = 0;
2572:   b->row  = perm;
2573:   b->col  = perm;
2574:   PetscObjectReference((PetscObject)perm);
2575:   PetscObjectReference((PetscObject)perm);
2576:   b->icol = iperm;
2577:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2578:   PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
2579:   PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));
2580:   b->maxnz   = b->nz = ui[am];
2581:   b->free_a  = PETSC_TRUE;
2582:   b->free_ij = PETSC_TRUE;
2583: 
2584:   fact->info.factor_mallocs   = reallocs;
2585:   fact->info.fill_ratio_given = fill;
2586:   if (ai[am] != 0) {
2587:     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2588:     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2589:   } else {
2590:     fact->info.fill_ratio_needed = 0.0;
2591:   }
2592: #if defined(PETSC_USE_INFO)
2593:     if (ai[am] != 0) {
2594:       PetscReal af = fact->info.fill_ratio_needed;
2595:       PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
2596:       PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
2597:       PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
2598:     } else {
2599:       PetscInfo(A,"Empty matrix.\n");
2600:     }
2601: #endif
2602:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2603:   return(0);
2604: }

2608: PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2609: {
2610:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2611:   Mat_SeqSBAIJ       *b;
2612:   PetscErrorCode     ierr;
2613:   PetscBool          perm_identity,missing;
2614:   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2615:   const PetscInt     *rip,*riip;
2616:   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2617:   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
2618:   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2619:   PetscReal          fill=info->fill,levels=info->levels;
2620:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2621:   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2622:   PetscBT            lnkbt;
2623:   IS                 iperm;
2624: 
2626:   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2627:   MatMissingDiagonal(A,&missing,&d);
2628:   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2629:   ISIdentity(perm,&perm_identity);
2630:   ISInvertPermutation(perm,PETSC_DECIDE,&iperm);

2632:   PetscMalloc((am+1)*sizeof(PetscInt),&ui);
2633:   PetscMalloc((am+1)*sizeof(PetscInt),&udiag);
2634:   ui[0] = 0;

2636:   /* ICC(0) without matrix ordering: simply copies fill pattern */
2637:   if (!levels && perm_identity) {

2639:     for (i=0; i<am; i++) {
2640:       ui[i+1]  = ui[i] + ai[i+1] - a->diag[i];
2641:       udiag[i] = ui[i];
2642:     }
2643:     PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
2644:     cols = uj;
2645:     for (i=0; i<am; i++) {
2646:       aj    = a->j + a->diag[i];
2647:       ncols = ui[i+1] - ui[i];
2648:       for (j=0; j<ncols; j++) *cols++ = *aj++;
2649:     }
2650:   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2651:     ISGetIndices(iperm,&riip);
2652:     ISGetIndices(perm,&rip);

2654:     /* initialization */
2655:     PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);

2657:     /* jl: linked list for storing indices of the pivot rows 
2658:        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2659:     PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);
2660:     for (i=0; i<am; i++){
2661:       jl[i] = am; il[i] = 0;
2662:     }

2664:     /* create and initialize a linked list for storing column indices of the active row k */
2665:     nlnk = am + 1;
2666:     PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);

2668:     /* initial FreeSpace size is fill*(ai[am]+1) */
2669:     PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
2670:     current_space = free_space;
2671:     PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);
2672:     current_space_lvl = free_space_lvl;

2674:     for (k=0; k<am; k++){  /* for each active row k */
2675:       /* initialize lnk by the column indices of row rip[k] of A */
2676:       nzk   = 0;
2677:       ncols = ai[rip[k]+1] - ai[rip[k]];
2678:       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2679:       ncols_upper = 0;
2680:       for (j=0; j<ncols; j++){
2681:         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2682:         if (riip[i] >= k){ /* only take upper triangular entry */
2683:           ajtmp[ncols_upper] = i;
2684:           ncols_upper++;
2685:         }
2686:       }
2687:       PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
2688:       nzk += nlnk;

2690:       /* update lnk by computing fill-in for each pivot row to be merged in */
2691:       prow = jl[k]; /* 1st pivot row */
2692: 
2693:       while (prow < k){
2694:         nextprow = jl[prow];
2695: 
2696:         /* merge prow into k-th row */
2697:         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2698:         jmax = ui[prow+1];
2699:         ncols = jmax-jmin;
2700:         i     = jmin - ui[prow];
2701:         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2702:         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2703:         j     = *(uj - 1);
2704:         PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
2705:         nzk += nlnk;

2707:         /* update il and jl for prow */
2708:         if (jmin < jmax){
2709:           il[prow] = jmin;
2710:           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2711:         }
2712:         prow = nextprow;
2713:       }

2715:       /* if free space is not available, make more free space */
2716:       if (current_space->local_remaining<nzk) {
2717:         i = am - k + 1; /* num of unfactored rows */
2718:         i *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2719:         PetscFreeSpaceGet(i,&current_space);
2720:         PetscFreeSpaceGet(i,&current_space_lvl);
2721:         reallocs++;
2722:       }

2724:       /* copy data into free_space and free_space_lvl, then initialize lnk */
2725:       if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2726:       PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);

2728:       /* add the k-th row into il and jl */
2729:       if (nzk > 1){
2730:         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2731:         jl[k] = jl[i]; jl[i] = k;
2732:         il[k] = ui[k] + 1;
2733:       }
2734:       uj_ptr[k]     = current_space->array;
2735:       uj_lvl_ptr[k] = current_space_lvl->array;

2737:       current_space->array           += nzk;
2738:       current_space->local_used      += nzk;
2739:       current_space->local_remaining -= nzk;

2741:       current_space_lvl->array           += nzk;
2742:       current_space_lvl->local_used      += nzk;
2743:       current_space_lvl->local_remaining -= nzk;

2745:       ui[k+1] = ui[k] + nzk;
2746:     }

2748: #if defined(PETSC_USE_INFO)
2749:     if (ai[am] != 0) {
2750:       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2751:       PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
2752:       PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
2753:       PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
2754:     } else {
2755:       PetscInfo(A,"Empty matrix.\n");
2756:     }
2757: #endif

2759:     ISRestoreIndices(perm,&rip);
2760:     ISRestoreIndices(iperm,&riip);
2761:     PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);
2762:     PetscFree(ajtmp);

2764:     /* destroy list of free space and other temporary array(s) */
2765:     PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
2766:     PetscFreeSpaceContiguous(&free_space,uj);
2767:     PetscIncompleteLLDestroy(lnk,lnkbt);
2768:     PetscFreeSpaceDestroy(free_space_lvl);

2770:   } /* end of case: levels>0 || (levels=0 && !perm_identity) */

2772:   /* put together the new matrix in MATSEQSBAIJ format */

2774:   b    = (Mat_SeqSBAIJ*)fact->data;
2775:   b->singlemalloc = PETSC_FALSE;
2776:   PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
2777:   b->j    = uj;
2778:   b->i    = ui;
2779:   b->diag = udiag;
2780:   b->free_diag = PETSC_TRUE;
2781:   b->ilen = 0;
2782:   b->imax = 0;
2783:   b->row  = perm;
2784:   b->col  = perm;
2785:   PetscObjectReference((PetscObject)perm);
2786:   PetscObjectReference((PetscObject)perm);
2787:   b->icol = iperm;
2788:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2789:   PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
2790:   PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
2791:   b->maxnz   = b->nz = ui[am];
2792:   b->free_a  = PETSC_TRUE;
2793:   b->free_ij = PETSC_TRUE;
2794: 
2795:   fact->info.factor_mallocs    = reallocs;
2796:   fact->info.fill_ratio_given  = fill;
2797:   if (ai[am] != 0) {
2798:     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2799:   } else {
2800:     fact->info.fill_ratio_needed = 0.0;
2801:   }
2802:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2803:   return(0);
2804: }

2808: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2809: {
2810:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2811:   Mat_SeqSBAIJ       *b;
2812:   PetscErrorCode     ierr;
2813:   PetscBool          perm_identity;
2814:   PetscReal          fill = info->fill;
2815:   const PetscInt     *rip,*riip;
2816:   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2817:   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2818:   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2819:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2820:   PetscBT            lnkbt;
2821:   IS                 iperm;

2824:   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2825:   /* check whether perm is the identity mapping */
2826:   ISIdentity(perm,&perm_identity);
2827:   ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2828:   ISGetIndices(iperm,&riip);
2829:   ISGetIndices(perm,&rip);

2831:   /* initialization */
2832:   PetscMalloc((am+1)*sizeof(PetscInt),&ui);
2833:   PetscMalloc((am+1)*sizeof(PetscInt),&udiag);
2834:   ui[0] = 0;

2836:   /* jl: linked list for storing indices of the pivot rows 
2837:      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2838:   PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);
2839:   for (i=0; i<am; i++){
2840:     jl[i] = am; il[i] = 0;
2841:   }

2843:   /* create and initialize a linked list for storing column indices of the active row k */
2844:   nlnk = am + 1;
2845:   PetscLLCreate(am,am,nlnk,lnk,lnkbt);

2847:   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2848:   PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);
2849:   current_space = free_space;

2851:   for (k=0; k<am; k++){  /* for each active row k */
2852:     /* initialize lnk by the column indices of row rip[k] of A */
2853:     nzk   = 0;
2854:     ncols = ai[rip[k]+1] - ai[rip[k]];
2855:     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2856:     ncols_upper = 0;
2857:     for (j=0; j<ncols; j++){
2858:       i = riip[*(aj + ai[rip[k]] + j)];
2859:       if (i >= k){ /* only take upper triangular entry */
2860:         cols[ncols_upper] = i;
2861:         ncols_upper++;
2862:       }
2863:     }
2864:     PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
2865:     nzk += nlnk;

2867:     /* update lnk by computing fill-in for each pivot row to be merged in */
2868:     prow = jl[k]; /* 1st pivot row */
2869: 
2870:     while (prow < k){
2871:       nextprow = jl[prow];
2872:       /* merge prow into k-th row */
2873:       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2874:       jmax = ui[prow+1];
2875:       ncols = jmax-jmin;
2876:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2877:       PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
2878:       nzk += nlnk;

2880:       /* update il and jl for prow */
2881:       if (jmin < jmax){
2882:         il[prow] = jmin;
2883:         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
2884:       }
2885:       prow = nextprow;
2886:     }

2888:     /* if free space is not available, make more free space */
2889:     if (current_space->local_remaining<nzk) {
2890:       i  = am - k + 1; /* num of unfactored rows */
2891:       i *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2892:       PetscFreeSpaceGet(i,&current_space);
2893:       reallocs++;
2894:     }

2896:     /* copy data into free space, then initialize lnk */
2897:     PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);

2899:     /* add the k-th row into il and jl */
2900:     if (nzk > 1){
2901:       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2902:       jl[k] = jl[i]; jl[i] = k;
2903:       il[k] = ui[k] + 1;
2904:     }
2905:     ui_ptr[k] = current_space->array;
2906:     current_space->array           += nzk;
2907:     current_space->local_used      += nzk;
2908:     current_space->local_remaining -= nzk;

2910:     ui[k+1] = ui[k] + nzk;
2911:   }

2913:   ISRestoreIndices(perm,&rip);
2914:   ISRestoreIndices(iperm,&riip);
2915:   PetscFree4(ui_ptr,jl,il,cols);

2917:   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2918:   PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
2919:   PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag); /* store matrix factor */
2920:   PetscLLDestroy(lnk,lnkbt);

2922:   /* put together the new matrix in MATSEQSBAIJ format */

2924:   b = (Mat_SeqSBAIJ*)fact->data;
2925:   b->singlemalloc = PETSC_FALSE;
2926:   b->free_a       = PETSC_TRUE;
2927:   b->free_ij      = PETSC_TRUE;
2928:   PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
2929:   b->j    = uj;
2930:   b->i    = ui;
2931:   b->diag = udiag;
2932:   b->free_diag = PETSC_TRUE;
2933:   b->ilen = 0;
2934:   b->imax = 0;
2935:   b->row  = perm;
2936:   b->col  = perm;
2937:   PetscObjectReference((PetscObject)perm);
2938:   PetscObjectReference((PetscObject)perm);
2939:   b->icol = iperm;
2940:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2941:   PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
2942:   PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));
2943:   b->maxnz = b->nz = ui[am];
2944: 
2945:   fact->info.factor_mallocs    = reallocs;
2946:   fact->info.fill_ratio_given  = fill;
2947:   if (ai[am] != 0) {
2948:     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2949:     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2950:   } else {
2951:     fact->info.fill_ratio_needed = 0.0;
2952:   }
2953: #if defined(PETSC_USE_INFO)
2954:   if (ai[am] != 0) {
2955:     PetscReal af = fact->info.fill_ratio_needed;
2956:     PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
2957:     PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
2958:     PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
2959:   } else {
2960:      PetscInfo(A,"Empty matrix.\n");
2961:   }
2962: #endif
2963:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2964:   return(0);
2965: }

2969: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2970: {
2971:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2972:   Mat_SeqSBAIJ       *b;
2973:   PetscErrorCode     ierr;
2974:   PetscBool          perm_identity;
2975:   PetscReal          fill = info->fill;
2976:   const PetscInt     *rip,*riip;
2977:   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2978:   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2979:   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
2980:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2981:   PetscBT            lnkbt;
2982:   IS                 iperm;

2985:   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2986:   /* check whether perm is the identity mapping */
2987:   ISIdentity(perm,&perm_identity);
2988:   ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2989:   ISGetIndices(iperm,&riip);
2990:   ISGetIndices(perm,&rip);

2992:   /* initialization */
2993:   PetscMalloc((am+1)*sizeof(PetscInt),&ui);
2994:   ui[0] = 0;

2996:   /* jl: linked list for storing indices of the pivot rows 
2997:      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2998:   PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);
2999:   for (i=0; i<am; i++){
3000:     jl[i] = am; il[i] = 0;
3001:   }

3003:   /* create and initialize a linked list for storing column indices of the active row k */
3004:   nlnk = am + 1;
3005:   PetscLLCreate(am,am,nlnk,lnk,lnkbt);

3007:   /* initial FreeSpace size is fill*(ai[am]+1) */
3008:   PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
3009:   current_space = free_space;

3011:   for (k=0; k<am; k++){  /* for each active row k */
3012:     /* initialize lnk by the column indices of row rip[k] of A */
3013:     nzk   = 0;
3014:     ncols = ai[rip[k]+1] - ai[rip[k]];
3015:     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
3016:     ncols_upper = 0;
3017:     for (j=0; j<ncols; j++){
3018:       i = riip[*(aj + ai[rip[k]] + j)];
3019:       if (i >= k){ /* only take upper triangular entry */
3020:         cols[ncols_upper] = i;
3021:         ncols_upper++;
3022:       }
3023:     }
3024:     PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
3025:     nzk += nlnk;

3027:     /* update lnk by computing fill-in for each pivot row to be merged in */
3028:     prow = jl[k]; /* 1st pivot row */
3029: 
3030:     while (prow < k){
3031:       nextprow = jl[prow];
3032:       /* merge prow into k-th row */
3033:       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
3034:       jmax = ui[prow+1];
3035:       ncols = jmax-jmin;
3036:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3037:       PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
3038:       nzk += nlnk;

3040:       /* update il and jl for prow */
3041:       if (jmin < jmax){
3042:         il[prow] = jmin;
3043:         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
3044:       }
3045:       prow = nextprow;
3046:     }

3048:     /* if free space is not available, make more free space */
3049:     if (current_space->local_remaining<nzk) {
3050:       i = am - k + 1; /* num of unfactored rows */
3051:       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3052:       PetscFreeSpaceGet(i,&current_space);
3053:       reallocs++;
3054:     }

3056:     /* copy data into free space, then initialize lnk */
3057:     PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);

3059:     /* add the k-th row into il and jl */
3060:     if (nzk-1 > 0){
3061:       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3062:       jl[k] = jl[i]; jl[i] = k;
3063:       il[k] = ui[k] + 1;
3064:     }
3065:     ui_ptr[k] = current_space->array;
3066:     current_space->array           += nzk;
3067:     current_space->local_used      += nzk;
3068:     current_space->local_remaining -= nzk;

3070:     ui[k+1] = ui[k] + nzk;
3071:   }

3073: #if defined(PETSC_USE_INFO)
3074:   if (ai[am] != 0) {
3075:     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
3076:     PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
3077:     PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
3078:     PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
3079:   } else {
3080:      PetscInfo(A,"Empty matrix.\n");
3081:   }
3082: #endif

3084:   ISRestoreIndices(perm,&rip);
3085:   ISRestoreIndices(iperm,&riip);
3086:   PetscFree4(ui_ptr,jl,il,cols);

3088:   /* destroy list of free space and other temporary array(s) */
3089:   PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
3090:   PetscFreeSpaceContiguous(&free_space,uj);
3091:   PetscLLDestroy(lnk,lnkbt);

3093:   /* put together the new matrix in MATSEQSBAIJ format */

3095:   b = (Mat_SeqSBAIJ*)fact->data;
3096:   b->singlemalloc = PETSC_FALSE;
3097:   b->free_a       = PETSC_TRUE;
3098:   b->free_ij      = PETSC_TRUE;
3099:   PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
3100:   b->j    = uj;
3101:   b->i    = ui;
3102:   b->diag = 0;
3103:   b->ilen = 0;
3104:   b->imax = 0;
3105:   b->row  = perm;
3106:   b->col  = perm;
3107:   PetscObjectReference((PetscObject)perm);
3108:   PetscObjectReference((PetscObject)perm);
3109:   b->icol = iperm;
3110:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3111:   PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
3112:   PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
3113:   b->maxnz = b->nz = ui[am];
3114: 
3115:   fact->info.factor_mallocs    = reallocs;
3116:   fact->info.fill_ratio_given  = fill;
3117:   if (ai[am] != 0) {
3118:     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3119:   } else {
3120:     fact->info.fill_ratio_needed = 0.0;
3121:   }
3122:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3123:   return(0);
3124: }

3128: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3129: {
3130:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3131:   PetscErrorCode    ierr;
3132:   PetscInt          n = A->rmap->n;
3133:   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3134:   PetscScalar       *x,sum;
3135:   const PetscScalar *b;
3136:   const MatScalar   *aa = a->a,*v;
3137:   PetscInt          i,nz;

3140:   if (!n) return(0);

3142:   VecGetArrayRead(bb,&b);
3143:   VecGetArray(xx,&x);

3145:   /* forward solve the lower triangular */
3146:   x[0] = b[0];
3147:   v    = aa;
3148:   vi   = aj;
3149:   for (i=1; i<n; i++) {
3150:     nz  = ai[i+1] - ai[i];
3151:     sum = b[i];
3152:     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3153:     v  += nz;
3154:     vi += nz;
3155:     x[i] = sum;
3156:   }
3157: 
3158:   /* backward solve the upper triangular */
3159:   for (i=n-1; i>=0; i--){
3160:     v   = aa + adiag[i+1] + 1;
3161:     vi  = aj + adiag[i+1] + 1;
3162:     nz = adiag[i] - adiag[i+1]-1;
3163:     sum = x[i];
3164:     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3165:     x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3166:   }
3167: 
3168:   PetscLogFlops(2.0*a->nz - A->cmap->n);
3169:   VecRestoreArrayRead(bb,&b);
3170:   VecRestoreArray(xx,&x);
3171:   return(0);
3172: }

3176: PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3177: {
3178:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3179:   IS                iscol = a->col,isrow = a->row;
3180:   PetscErrorCode    ierr;
3181:   PetscInt          i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3182:   const PetscInt    *rout,*cout,*r,*c;
3183:   PetscScalar       *x,*tmp,sum;
3184:   const PetscScalar *b;
3185:   const MatScalar   *aa = a->a,*v;

3188:   if (!n) return(0);

3190:   VecGetArrayRead(bb,&b);
3191:   VecGetArray(xx,&x);
3192:   tmp  = a->solve_work;

3194:   ISGetIndices(isrow,&rout); r = rout;
3195:   ISGetIndices(iscol,&cout); c = cout;

3197:   /* forward solve the lower triangular */
3198:   tmp[0] = b[r[0]];
3199:   v      = aa;
3200:   vi     = aj;
3201:   for (i=1; i<n; i++) {
3202:     nz  = ai[i+1] - ai[i];
3203:     sum = b[r[i]];
3204:     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3205:     tmp[i] = sum;
3206:     v += nz; vi += nz;
3207:   }

3209:   /* backward solve the upper triangular */
3210:   for (i=n-1; i>=0; i--){
3211:     v   = aa + adiag[i+1]+1;
3212:     vi  = aj + adiag[i+1]+1;
3213:     nz  = adiag[i]-adiag[i+1]-1;
3214:     sum = tmp[i];
3215:     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3216:     x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3217:   }

3219:   ISRestoreIndices(isrow,&rout);
3220:   ISRestoreIndices(iscol,&cout);
3221:   VecRestoreArrayRead(bb,&b);
3222:   VecRestoreArray(xx,&x);
3223:   PetscLogFlops(2*a->nz - A->cmap->n);
3224:   return(0);
3225: }

3229: /*
3230:     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3231: */
3232: PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3233: {
3234:   Mat                B = *fact;
3235:   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*b;
3236:   IS                 isicol;
3237:   PetscErrorCode     ierr;
3238:   const PetscInt     *r,*ic;
3239:   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3240:   PetscInt           *bi,*bj,*bdiag,*bdiag_rev;
3241:   PetscInt           row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3242:   PetscInt           nlnk,*lnk;
3243:   PetscBT            lnkbt;
3244:   PetscBool          row_identity,icol_identity;
3245:   MatScalar          *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3246:   const PetscInt     *ics;
3247:   PetscInt           j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3248:   PetscReal          dt=info->dt,dtcol=info->dtcol,shift=info->shiftamount;
3249:   PetscInt           dtcount=(PetscInt)info->dtcount,nnz_max;
3250:   PetscBool          missing;


3254:   if (dt      == PETSC_DEFAULT) dt      = 0.005;
3255:   if (dtcol   == PETSC_DEFAULT) dtcol   = 0.01; /* XXX unused! */
3256:   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);

3258:   /* ------- symbolic factorization, can be reused ---------*/
3259:   MatMissingDiagonal(A,&missing,&i);
3260:   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3261:   adiag=a->diag;

3263:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);

3265:   /* bdiag is location of diagonal in factor */
3266:   PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);     /* becomes b->diag */
3267:   PetscMalloc((n+1)*sizeof(PetscInt),&bdiag_rev); /* temporary */

3269:   /* allocate row pointers bi */
3270:   PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);

3272:   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3273:   if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3274:   nnz_max  = ai[n]+2*n*dtcount+2;

3276:   PetscMalloc((nnz_max+1)*sizeof(PetscInt),&bj);
3277:   PetscMalloc((nnz_max+1)*sizeof(MatScalar),&ba);

3279:   /* put together the new matrix */
3280:   MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);
3281:   PetscLogObjectParent(B,isicol);
3282:   b    = (Mat_SeqAIJ*)B->data;
3283:   b->free_a       = PETSC_TRUE;
3284:   b->free_ij      = PETSC_TRUE;
3285:   b->singlemalloc = PETSC_FALSE;
3286:   b->a          = ba;
3287:   b->j          = bj;
3288:   b->i          = bi;
3289:   b->diag       = bdiag;
3290:   b->ilen       = 0;
3291:   b->imax       = 0;
3292:   b->row        = isrow;
3293:   b->col        = iscol;
3294:   PetscObjectReference((PetscObject)isrow);
3295:   PetscObjectReference((PetscObject)iscol);
3296:   b->icol       = isicol;
3297:   PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);

3299:   PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));
3300:   b->maxnz = nnz_max;

3302:   B->factortype            = MAT_FACTOR_ILUDT;
3303:   B->info.factor_mallocs   = 0;
3304:   B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3305:   CHKMEMQ;
3306:   /* ------- end of symbolic factorization ---------*/

3308:   ISGetIndices(isrow,&r);
3309:   ISGetIndices(isicol,&ic);
3310:   ics  = ic;

3312:   /* linked list for storing column indices of the active row */
3313:   nlnk = n + 1;
3314:   PetscLLCreate(n,n,nlnk,lnk,lnkbt);

3316:   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3317:   PetscMalloc2(n,PetscInt,&im,n,PetscInt,&jtmp);
3318:   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3319:   PetscMalloc2(n,MatScalar,&rtmp,n,MatScalar,&vtmp);
3320:   PetscMemzero(rtmp,n*sizeof(MatScalar));

3322:   bi[0]    = 0;
3323:   bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */
3324:   bdiag_rev[n] = bdiag[0];
3325:   bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */
3326:   for (i=0; i<n; i++) {
3327:     /* copy initial fill into linked list */
3328:     nzi = 0; /* nonzeros for active row i */
3329:     nzi = ai[r[i]+1] - ai[r[i]];
3330:     if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
3331:     nzi_al = adiag[r[i]] - ai[r[i]];
3332:     nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3333:     ajtmp = aj + ai[r[i]];
3334:     PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);
3335: 
3336:     /* load in initial (unfactored row) */
3337:     aatmp = a->a + ai[r[i]];
3338:     for (j=0; j<nzi; j++) {
3339:       rtmp[ics[*ajtmp++]] = *aatmp++;
3340:     }
3341: 
3342:     /* add pivot rows into linked list */
3343:     row = lnk[n];
3344:     while (row < i ) {
3345:       nzi_bl = bi[row+1] - bi[row] + 1;
3346:       bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3347:       PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);
3348:       nzi  += nlnk;
3349:       row   = lnk[row];
3350:     }
3351: 
3352:     /* copy data from lnk into jtmp, then initialize lnk */
3353:     PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);

3355:     /* numerical factorization */
3356:     bjtmp = jtmp;
3357:     row   = *bjtmp++; /* 1st pivot row */
3358:     while  ( row < i ) {
3359:       pc         = rtmp + row;
3360:       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3361:       multiplier = (*pc) * (*pv);
3362:       *pc        = multiplier;
3363:       if (PetscAbsScalar(*pc) > dt){ /* apply tolerance dropping rule */
3364:         pj         = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3365:         pv         = ba + bdiag[row+1] + 1;
3366:         /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */
3367:         nz         = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3368:         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3369:         PetscLogFlops(2.0*nz);
3370:       }
3371:       row = *bjtmp++;
3372:     }

3374:     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3375:     diag_tmp = rtmp[i];  /* save diagonal value - may not needed?? */
3376:     nzi_bl = 0; j = 0;
3377:     while (jtmp[j] < i){ /* Note: jtmp is sorted */
3378:       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3379:       nzi_bl++; j++;
3380:     }
3381:     nzi_bu = nzi - nzi_bl -1;
3382:     while (j < nzi){
3383:       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3384:       j++;
3385:     }
3386: 
3387:     bjtmp = bj + bi[i];
3388:     batmp = ba + bi[i];
3389:     /* apply level dropping rule to L part */
3390:     ncut = nzi_al + dtcount;
3391:     if (ncut < nzi_bl){
3392:       PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);
3393:       PetscSortIntWithScalarArray(ncut,jtmp,vtmp);
3394:     } else {
3395:       ncut = nzi_bl;
3396:     }
3397:     for (j=0; j<ncut; j++){
3398:       bjtmp[j] = jtmp[j];
3399:       batmp[j] = vtmp[j];
3400:       /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */
3401:     }
3402:     bi[i+1] = bi[i] + ncut;
3403:     nzi = ncut + 1;
3404: 
3405:     /* apply level dropping rule to U part */
3406:     ncut = nzi_au + dtcount;
3407:     if (ncut < nzi_bu){
3408:       PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);
3409:       PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);
3410:     } else {
3411:       ncut = nzi_bu;
3412:     }
3413:     nzi += ncut;

3415:     /* mark bdiagonal */
3416:     bdiag[i+1]       = bdiag[i] - (ncut + 1);
3417:     bdiag_rev[n-i-1] = bdiag[i+1];
3418:     bi[2*n - i]      = bi[2*n - i +1] - (ncut + 1);
3419:     bjtmp = bj + bdiag[i];
3420:     batmp = ba + bdiag[i];
3421:     *bjtmp = i;
3422:     *batmp = diag_tmp; /* rtmp[i]; */
3423:     if (*batmp == 0.0) {
3424:       *batmp = dt+shift;
3425:       /* printf(" row %d add shift %g\n",i,shift); */
3426:     }
3427:     *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */
3428:     /* printf(" (%d,%g),",*bjtmp,*batmp); */
3429: 
3430:     bjtmp = bj + bdiag[i+1]+1;
3431:     batmp = ba + bdiag[i+1]+1;
3432:     for (k=0; k<ncut; k++){
3433:       bjtmp[k] = jtmp[nzi_bl+1+k];
3434:       batmp[k] = vtmp[nzi_bl+1+k];
3435:       /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */
3436:     }
3437:     /* printf("\n"); */
3438: 
3439:     im[i]   = nzi; /* used by PetscLLAddSortedLU() */
3440:     /*
3441:     printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]);
3442:     printf(" ----------------------------\n");
3443:     */
3444:   } /* for (i=0; i<n; i++) */
3445:   /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */
3446:   if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);

3448:   ISRestoreIndices(isrow,&r);
3449:   ISRestoreIndices(isicol,&ic);

3451:   PetscLLDestroy(lnk,lnkbt);
3452:   PetscFree2(im,jtmp);
3453:   PetscFree2(rtmp,vtmp);
3454:   PetscFree(bdiag_rev);

3456:   PetscLogFlops(B->cmap->n);
3457:   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];

3459:   ISIdentity(isrow,&row_identity);
3460:   ISIdentity(isicol,&icol_identity);
3461:   if (row_identity && icol_identity) {
3462:     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3463:   } else {
3464:     B->ops->solve = MatSolve_SeqAIJ;
3465:   }
3466: 
3467:   B->ops->solveadd          = 0;
3468:   B->ops->solvetranspose    = 0;
3469:   B->ops->solvetransposeadd = 0;
3470:   B->ops->matsolve          = 0;
3471:   B->assembled              = PETSC_TRUE;
3472:   B->preallocated           = PETSC_TRUE;
3473:   return(0);
3474: }

3476: /* a wraper of MatILUDTFactor_SeqAIJ() */
3479: /*
3480:     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3481: */

3483: PetscErrorCode  MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3484: {
3485:   PetscErrorCode     ierr;

3488:   MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);
3489:   return(0);
3490: }

3492: /* 
3493:    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 
3494:    - intend to replace existing MatLUFactorNumeric_SeqAIJ() 
3495: */
3498: /*
3499:     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3500: */

3502: PetscErrorCode  MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3503: {
3504:   Mat            C=fact;
3505:   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
3506:   IS             isrow = b->row,isicol = b->icol;
3508:   const PetscInt *r,*ic,*ics;
3509:   PetscInt       i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3510:   PetscInt       *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3511:   MatScalar      *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3512:   PetscReal      dt=info->dt,shift=info->shiftamount;
3513:   PetscBool      row_identity, col_identity;

3516:   ISGetIndices(isrow,&r);
3517:   ISGetIndices(isicol,&ic);
3518:   PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);
3519:   ics  = ic;

3521:   for (i=0; i<n; i++){
3522:     /* initialize rtmp array */
3523:     nzl   = bi[i+1] - bi[i];       /* num of nozeros in L(i,:) */
3524:     bjtmp = bj + bi[i];
3525:     for  (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3526:     rtmp[i] = 0.0;
3527:     nzu   = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3528:     bjtmp = bj + bdiag[i+1] + 1;
3529:     for  (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;

3531:     /* load in initial unfactored row of A */
3532:     /* printf("row %d\n",i); */
3533:     nz    = ai[r[i]+1] - ai[r[i]];
3534:     ajtmp = aj + ai[r[i]];
3535:     v     = aa + ai[r[i]];
3536:     for (j=0; j<nz; j++) {
3537:       rtmp[ics[*ajtmp++]] = v[j];
3538:       /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */
3539:     }
3540:     /* printf("\n"); */

3542:     /* numerical factorization */
3543:     bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3544:     nzl   = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3545:     k = 0;
3546:     while (k < nzl){
3547:       row   = *bjtmp++;
3548:       /* printf("  prow %d\n",row); */
3549:       pc         = rtmp + row;
3550:       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3551:       multiplier = (*pc) * (*pv);
3552:       *pc        = multiplier;
3553:       if (PetscAbsScalar(multiplier) > dt){
3554:         pj         = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3555:         pv         = b->a + bdiag[row+1] + 1;
3556:         nz         = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3557:         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3558:         /* PetscLogFlops(2.0*nz); */
3559:       }
3560:       k++;
3561:     }
3562: 
3563:     /* finished row so stick it into b->a */
3564:     /* L-part */
3565:     pv = b->a + bi[i] ;
3566:     pj = bj + bi[i] ;
3567:     nzl = bi[i+1] - bi[i];
3568:     for (j=0; j<nzl; j++) {
3569:       pv[j] = rtmp[pj[j]];
3570:       /* printf(" (%d,%g),",pj[j],pv[j]); */
3571:     }

3573:     /* diagonal: invert diagonal entries for simplier triangular solves */
3574:     if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3575:     b->a[bdiag[i]] = 1.0/rtmp[i];
3576:     /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */

3578:     /* U-part */
3579:     pv = b->a + bdiag[i+1] + 1;
3580:     pj = bj + bdiag[i+1] + 1;
3581:     nzu = bdiag[i] - bdiag[i+1] - 1;
3582:     for (j=0; j<nzu; j++) {
3583:       pv[j] = rtmp[pj[j]];
3584:       /* printf(" (%d,%g),",pj[j],pv[j]); */
3585:     }
3586:     /* printf("\n"); */
3587:   }

3589:   PetscFree(rtmp);
3590:   ISRestoreIndices(isicol,&ic);
3591:   ISRestoreIndices(isrow,&r);
3592: 
3593:   ISIdentity(isrow,&row_identity);
3594:   ISIdentity(isicol,&col_identity);
3595:   if (row_identity && col_identity) {
3596:     C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering;
3597:   } else {
3598:     C->ops->solve   = MatSolve_SeqAIJ;
3599:   }
3600:   C->ops->solveadd           = 0;
3601:   C->ops->solvetranspose     = 0;
3602:   C->ops->solvetransposeadd  = 0;
3603:   C->ops->matsolve           = 0;
3604:   C->assembled    = PETSC_TRUE;
3605:   C->preallocated = PETSC_TRUE;
3606:   PetscLogFlops(C->cmap->n);
3607:   return(0);
3608: }