Actual source code: sbaijfact.c

  2: #include <../src/mat/impls/baij/seq/baij.h> 
  3: #include <../src/mat/impls/sbaij/seq/sbaij.h>
  4: #include <../src/mat/blockinvert.h>
  5: #include <petscis.h>

  7: /* 
  8:   input:
  9:    F -- numeric factor 
 10:   output:
 11:    nneg, nzero, npos: matrix inertia 
 12: */

 16: PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F,PetscInt *nneig,PetscInt *nzero,PetscInt *npos)
 17: {
 18:   Mat_SeqSBAIJ *fact_ptr = (Mat_SeqSBAIJ*)F->data;
 19:   MatScalar    *dd = fact_ptr->a;
 20:   PetscInt     mbs=fact_ptr->mbs,bs=F->rmap->bs,i,nneig_tmp,npos_tmp,*fi = fact_ptr->diag;

 23:   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for bs: %D >1 yet",bs);
 24:   nneig_tmp = 0; npos_tmp = 0;
 25:   for (i=0; i<mbs; i++){
 26:     if (PetscRealPart(dd[*fi]) > 0.0){
 27:       npos_tmp++;
 28:     } else if (PetscRealPart(dd[*fi]) < 0.0){
 29:       nneig_tmp++;
 30:     }
 31:     fi++;
 32:   }
 33:   if (nneig) *nneig = nneig_tmp;
 34:   if (npos)  *npos  = npos_tmp;
 35:   if (nzero) *nzero = mbs - nneig_tmp - npos_tmp;

 37:   return(0);
 38: }

 40: /* 
 41:   Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
 42:   Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad. 
 43: */
 46: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat F,Mat A,IS perm,const MatFactorInfo *info)
 47: {
 48:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b;
 50:   const PetscInt *rip,*ai,*aj;
 51:   PetscInt       i,mbs = a->mbs,*jutmp,bs = A->rmap->bs,bs2=a->bs2;
 52:   PetscInt       m,reallocs = 0,prow;
 53:   PetscInt       *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd;
 54:   PetscReal      f = info->fill;
 55:   PetscBool      perm_identity;

 58:   /* check whether perm is the identity mapping */
 59:   ISIdentity(perm,&perm_identity);
 60:   ISGetIndices(perm,&rip);
 61: 
 62:   if (perm_identity){ /* without permutation */
 63:     a->permute = PETSC_FALSE;
 64:     ai = a->i; aj = a->j;
 65:   } else {            /* non-trivial permutation */
 66:     a->permute = PETSC_TRUE;
 67:     MatReorderingSeqSBAIJ(A,perm);
 68:     ai = a->inew; aj = a->jnew;
 69:   }
 70: 
 71:   /* initialization */
 72:   PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);
 73:   umax  = (PetscInt)(f*ai[mbs] + 1); umax += mbs + 1;
 74:   PetscMalloc(umax*sizeof(PetscInt),&ju);
 75:   iu[0] = mbs+1;
 76:   juidx = mbs + 1; /* index for ju */
 77:   /* jl linked list for pivot row -- linked list for col index */
 78:   PetscMalloc2(mbs,PetscInt,&jl,mbs,PetscInt,&q);
 79:   for (i=0; i<mbs; i++){
 80:     jl[i] = mbs;
 81:     q[i] = 0;
 82:   }

 84:   /* for each row k */
 85:   for (k=0; k<mbs; k++){
 86:     for (i=0; i<mbs; i++) q[i] = 0;  /* to be removed! */
 87:     nzk  = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
 88:     q[k] = mbs;
 89:     /* initialize nonzero structure of k-th row to row rip[k] of A */
 90:     jmin = ai[rip[k]] +1; /* exclude diag[k] */
 91:     jmax = ai[rip[k]+1];
 92:     for (j=jmin; j<jmax; j++){
 93:       vj = rip[aj[j]]; /* col. value */
 94:       if(vj > k){
 95:         qm = k;
 96:         do {
 97:           m  = qm; qm = q[m];
 98:         } while(qm < vj);
 99:         if (qm == vj) {
100:           SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Duplicate entry in A\n");
101:         }
102:         nzk++;
103:         q[m]  = vj;
104:         q[vj] = qm;
105:       } /* if(vj > k) */
106:     } /* for (j=jmin; j<jmax; j++) */

108:     /* modify nonzero structure of k-th row by computing fill-in
109:        for each row i to be merged in */
110:     prow = k;
111:     prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
112: 
113:     while (prow < k){
114:       /* merge row prow into k-th row */
115:       jmin = iu[prow] + 1; jmax = iu[prow+1];
116:       qm = k;
117:       for (j=jmin; j<jmax; j++){
118:         vj = ju[j];
119:         do {
120:           m = qm; qm = q[m];
121:         } while (qm < vj);
122:         if (qm != vj){
123:          nzk++; q[m] = vj; q[vj] = qm; qm = vj;
124:         }
125:       }
126:       prow = jl[prow]; /* next pivot row */
127:     }
128: 
129:     /* add k to row list for first nonzero element in k-th row */
130:     if (nzk > 0){
131:       i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
132:       jl[k] = jl[i]; jl[i] = k;
133:     }
134:     iu[k+1] = iu[k] + nzk;

136:     /* allocate more space to ju if needed */
137:     if (iu[k+1] > umax) {
138:       /* estimate how much additional space we will need */
139:       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
140:       /* just double the memory each time */
141:       maxadd = umax;
142:       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
143:       umax += maxadd;

145:       /* allocate a longer ju */
146:       PetscMalloc(umax*sizeof(PetscInt),&jutmp);
147:       PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));
148:       PetscFree(ju);
149:       ju   = jutmp;
150:       reallocs++; /* count how many times we realloc */
151:     }

153:     /* save nonzero structure of k-th row in ju */
154:     i=k;
155:     while (nzk --) {
156:       i           = q[i];
157:       ju[juidx++] = i;
158:     }
159:   }

161: #if defined(PETSC_USE_INFO)
162:   if (ai[mbs] != 0) {
163:     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
164:     PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
165:     PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
166:     PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
167:     PetscInfo(A,"for best performance.\n");
168:   } else {
169:     PetscInfo(A,"Empty matrix.\n");
170:   }
171: #endif

173:   ISRestoreIndices(perm,&rip);
174:   PetscFree2(jl,q);

176:   /* put together the new matrix */
177:   MatSeqSBAIJSetPreallocation_SeqSBAIJ(F,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);

179:   /* PetscLogObjectParent(B,iperm); */
180:   b = (Mat_SeqSBAIJ*)(F)->data;
181:   b->singlemalloc = PETSC_FALSE;
182:   b->free_a       = PETSC_TRUE;
183:   b->free_ij       = PETSC_TRUE;
184:   PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);
185:   b->j    = ju;
186:   b->i    = iu;
187:   b->diag = 0;
188:   b->ilen = 0;
189:   b->imax = 0;
190:   b->row  = perm;
191:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
192:   PetscObjectReference((PetscObject)perm);
193:   b->icol = perm;
194:   PetscObjectReference((PetscObject)perm);
195:   PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);
196:   /* In b structure:  Free imax, ilen, old a, old j.  
197:      Allocate idnew, solve_work, new a, new j */
198:   PetscLogObjectMemory(F,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
199:   b->maxnz = b->nz = iu[mbs];
200: 
201:   (F)->info.factor_mallocs    = reallocs;
202:   (F)->info.fill_ratio_given  = f;
203:   if (ai[mbs] != 0) {
204:     (F)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
205:   } else {
206:     (F)->info.fill_ratio_needed = 0.0;
207:   }
208:   MatSeqSBAIJSetNumericFactorization_inplace(F,perm_identity);
209:   return(0);
210: }
211: /*
212:     Symbolic U^T*D*U factorization for SBAIJ format. 
213:     See MatICCFactorSymbolic_SeqAIJ() for description of its data structure.
214: */
215: #include <petscbt.h>
216: #include <../src/mat/utils/freespace.h>
219: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
220: {
221:   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data;
222:   Mat_SeqSBAIJ       *b;
223:   PetscErrorCode     ierr;
224:   PetscBool          perm_identity,missing;
225:   PetscReal          fill = info->fill;
226:   const PetscInt     *rip,*ai=a->i,*aj=a->j;
227:   PetscInt           i,mbs=a->mbs,bs=A->rmap->bs,reallocs=0,prow,d;
228:   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
229:   PetscInt           nlnk,*lnk,ncols,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
230:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
231:   PetscBT            lnkbt;

234:   if (bs > 1){
235:     MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(fact,A,perm,info);
236:     return(0);
237:   }
238:   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
239:   MatMissingDiagonal(A,&missing,&d);
240:   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);

242:   /* check whether perm is the identity mapping */
243:   ISIdentity(perm,&perm_identity);
244:   if (!perm_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix reordering is not supported for sbaij matrix. Use aij format");
245:   a->permute = PETSC_FALSE;
246:   ISGetIndices(perm,&rip);

248:   /* initialization */
249:   PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);
250:   PetscMalloc((mbs+1)*sizeof(PetscInt),&udiag);
251:   ui[0] = 0;

253:   /* jl: linked list for storing indices of the pivot rows 
254:      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
255:   PetscMalloc4(mbs,PetscInt*,&ui_ptr,mbs,PetscInt,&il,mbs,PetscInt,&jl,mbs,PetscInt,&cols);
256:   for (i=0; i<mbs; i++){
257:     jl[i] = mbs; il[i] = 0;
258:   }

260:   /* create and initialize a linked list for storing column indices of the active row k */
261:   nlnk = mbs + 1;
262:   PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);

264:   /* initial FreeSpace size is fill*(ai[mbs]+1) */
265:   PetscFreeSpaceGet((PetscInt)(fill*(ai[mbs]+1)),&free_space);
266:   current_space = free_space;

268:   for (k=0; k<mbs; k++){  /* for each active row k */
269:     /* initialize lnk by the column indices of row rip[k] of A */
270:     nzk   = 0;
271:     ncols = ai[k+1] - ai[k];
272:     if (!ncols) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row %D in matrix ",k);
273:     for (j=0; j<ncols; j++){
274:       i = *(aj + ai[k] + j);
275:       cols[j] = i;
276:     }
277:     PetscLLAdd(ncols,cols,mbs,nlnk,lnk,lnkbt);
278:     nzk += nlnk;

280:     /* update lnk by computing fill-in for each pivot row to be merged in */
281:     prow = jl[k]; /* 1st pivot row */
282: 
283:     while (prow < k){
284:       nextprow = jl[prow];
285:       /* merge prow into k-th row */
286:       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
287:       jmax = ui[prow+1];
288:       ncols = jmax-jmin;
289:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
290:       PetscLLAddSorted(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);
291:       nzk += nlnk;

293:       /* update il and jl for prow */
294:       if (jmin < jmax){
295:         il[prow] = jmin;
296:         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
297:       }
298:       prow = nextprow;
299:     }

301:     /* if free space is not available, make more free space */
302:     if (current_space->local_remaining<nzk) {
303:       i  = mbs - k + 1; /* num of unfactored rows */
304:       i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
305:       PetscFreeSpaceGet(i,&current_space);
306:       reallocs++;
307:     }

309:     /* copy data into free space, then initialize lnk */
310:     PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);

312:     /* add the k-th row into il and jl */
313:     if (nzk > 1){
314:       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
315:       jl[k] = jl[i]; jl[i] = k;
316:       il[k] = ui[k] + 1;
317:     }
318:     ui_ptr[k] = current_space->array;
319:     current_space->array           += nzk;
320:     current_space->local_used      += nzk;
321:     current_space->local_remaining -= nzk;

323:     ui[k+1] = ui[k] + nzk;
324:   }

326:   ISRestoreIndices(perm,&rip);
327:   PetscFree4(ui_ptr,il,jl,cols);

329:   /* destroy list of free space and other temporary array(s) */
330:   PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);
331:   PetscFreeSpaceContiguous_Cholesky(&free_space,uj,mbs,ui,udiag); /* store matrix factor */
332:   PetscLLDestroy(lnk,lnkbt);

334:   /* put together the new matrix in MATSEQSBAIJ format */
335:   MatSeqSBAIJSetPreallocation_SeqSBAIJ(fact,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);
336: 
337:   b = (Mat_SeqSBAIJ*)fact->data;
338:   b->singlemalloc = PETSC_FALSE;
339:   b->free_a       = PETSC_TRUE;
340:   b->free_ij      = PETSC_TRUE;
341:   PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);
342:   b->j    = uj;
343:   b->i    = ui;
344:   b->diag = udiag;
345:   b->free_diag = PETSC_TRUE;
346:   b->ilen = 0;
347:   b->imax = 0;
348:   b->row  = perm;
349:   b->icol = perm;
350:   PetscObjectReference((PetscObject)perm);
351:   PetscObjectReference((PetscObject)perm);
352:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
353:   PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);
354:   PetscLogObjectMemory(fact,ui[mbs]*(sizeof(PetscInt)+sizeof(MatScalar)));
355:   b->maxnz = b->nz = ui[mbs];
356: 
357:   fact->info.factor_mallocs    = reallocs;
358:   fact->info.fill_ratio_given  = fill;
359:   if (ai[mbs] != 0) {
360:     fact->info.fill_ratio_needed = ((PetscReal)ui[mbs])/ai[mbs];
361:   } else {
362:     fact->info.fill_ratio_needed = 0.0;
363:   }
364: #if defined(PETSC_USE_INFO)
365:   if (ai[mbs] != 0) {
366:     PetscReal af = fact->info.fill_ratio_needed;
367:     PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
368:     PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
369:     PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
370:   } else {
371:     PetscInfo(A,"Empty matrix.\n");
372:   }
373: #endif
374:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
375:   return(0);
376: }

380: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
381: {
382:   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data;
383:   Mat_SeqSBAIJ       *b;
384:   PetscErrorCode     ierr;
385:   PetscBool          perm_identity,missing;
386:   PetscReal          fill = info->fill;
387:   const PetscInt     *rip,*ai,*aj;
388:   PetscInt           i,mbs=a->mbs,bs=A->rmap->bs,reallocs=0,prow,d;
389:   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
390:   PetscInt           nlnk,*lnk,ncols,*cols,*uj,**ui_ptr,*uj_ptr;
391:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
392:   PetscBT            lnkbt;

395:   MatMissingDiagonal(A,&missing,&d);
396:   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);

398:   /*  
399:    This code originally uses Modified Sparse Row (MSR) storage
400:    (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choise!
401:    Then it is rewritten so the factor B takes seqsbaij format. However the associated 
402:    MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity, 
403:    thus the original code in MSR format is still used for these cases. 
404:    The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever 
405:    MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor.
406:   */
407:   if (bs > 1){
408:     MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(fact,A,perm,info);
409:     return(0);
410:   }

412:   /* check whether perm is the identity mapping */
413:   ISIdentity(perm,&perm_identity);

415:   if (perm_identity){
416:     a->permute = PETSC_FALSE;
417:     ai = a->i; aj = a->j;
418:   } else {
419:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix reordering is not supported for sbaij matrix. Use aij format");
420:     /* There are bugs for reordeing. Needs further work. 
421:        MatReordering for sbaij cannot be efficient. User should use aij formt! */
422:     a->permute = PETSC_TRUE;
423:     MatReorderingSeqSBAIJ(A,perm);
424:     ai = a->inew; aj = a->jnew;
425:   }
426:   ISGetIndices(perm,&rip);

428:   /* initialization */
429:   PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);
430:   ui[0] = 0;

432:   /* jl: linked list for storing indices of the pivot rows 
433:      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
434:   PetscMalloc4(mbs,PetscInt*,&ui_ptr,mbs,PetscInt,&il,mbs,PetscInt,&jl,mbs,PetscInt,&cols);
435:   for (i=0; i<mbs; i++){
436:     jl[i] = mbs; il[i] = 0;
437:   }

439:   /* create and initialize a linked list for storing column indices of the active row k */
440:   nlnk = mbs + 1;
441:   PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);

443:   /* initial FreeSpace size is fill*(ai[mbs]+1) */
444:   PetscFreeSpaceGet((PetscInt)(fill*(ai[mbs]+1)),&free_space);
445:   current_space = free_space;

447:   for (k=0; k<mbs; k++){  /* for each active row k */
448:     /* initialize lnk by the column indices of row rip[k] of A */
449:     nzk   = 0;
450:     ncols = ai[rip[k]+1] - ai[rip[k]];
451:     for (j=0; j<ncols; j++){
452:       i = *(aj + ai[rip[k]] + j);
453:       cols[j] = rip[i];
454:     }
455:     PetscLLAdd(ncols,cols,mbs,nlnk,lnk,lnkbt);
456:     nzk += nlnk;

458:     /* update lnk by computing fill-in for each pivot row to be merged in */
459:     prow = jl[k]; /* 1st pivot row */
460: 
461:     while (prow < k){
462:       nextprow = jl[prow];
463:       /* merge prow into k-th row */
464:       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
465:       jmax = ui[prow+1];
466:       ncols = jmax-jmin;
467:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
468:       PetscLLAddSorted(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);
469:       nzk += nlnk;

471:       /* update il and jl for prow */
472:       if (jmin < jmax){
473:         il[prow] = jmin;
474:         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
475:       }
476:       prow = nextprow;
477:     }

479:     /* if free space is not available, make more free space */
480:     if (current_space->local_remaining<nzk) {
481:       i = mbs - k + 1; /* num of unfactored rows */
482:       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
483:       PetscFreeSpaceGet(i,&current_space);
484:       reallocs++;
485:     }

487:     /* copy data into free space, then initialize lnk */
488:     PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);

490:     /* add the k-th row into il and jl */
491:     if (nzk-1 > 0){
492:       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
493:       jl[k] = jl[i]; jl[i] = k;
494:       il[k] = ui[k] + 1;
495:     }
496:     ui_ptr[k] = current_space->array;
497:     current_space->array           += nzk;
498:     current_space->local_used      += nzk;
499:     current_space->local_remaining -= nzk;

501:     ui[k+1] = ui[k] + nzk;
502:   }

504:   ISRestoreIndices(perm,&rip);
505:   PetscFree4(ui_ptr,il,jl,cols);

507:   /* destroy list of free space and other temporary array(s) */
508:   PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);
509:   PetscFreeSpaceContiguous(&free_space,uj);
510:   PetscLLDestroy(lnk,lnkbt);

512:   /* put together the new matrix in MATSEQSBAIJ format */
513:   MatSeqSBAIJSetPreallocation_SeqSBAIJ(fact,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);
514: 
515:   b = (Mat_SeqSBAIJ*)fact->data;
516:   b->singlemalloc = PETSC_FALSE;
517:   b->free_a       = PETSC_TRUE;
518:   b->free_ij      = PETSC_TRUE;
519:   PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);
520:   b->j    = uj;
521:   b->i    = ui;
522:   b->diag = 0;
523:   b->ilen = 0;
524:   b->imax = 0;
525:   b->row  = perm;
526:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
527:   PetscObjectReference((PetscObject)perm);
528:   b->icol = perm;
529:   PetscObjectReference((PetscObject)perm);
530:   PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);
531:   PetscLogObjectMemory(fact,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
532:   b->maxnz = b->nz = ui[mbs];
533: 
534:   fact->info.factor_mallocs    = reallocs;
535:   fact->info.fill_ratio_given  = fill;
536:   if (ai[mbs] != 0) {
537:     fact->info.fill_ratio_needed = ((PetscReal)ui[mbs])/ai[mbs];
538:   } else {
539:     fact->info.fill_ratio_needed = 0.0;
540:   }
541: #if defined(PETSC_USE_INFO)
542:   if (ai[mbs] != 0) {
543:     PetscReal af = fact->info.fill_ratio_needed;
544:     PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
545:     PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
546:     PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
547:   } else {
548:     PetscInfo(A,"Empty matrix.\n");
549:   }
550: #endif
551:   MatSeqSBAIJSetNumericFactorization_inplace(fact,perm_identity);
552:   return(0);
553: }

557: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat C,Mat A,const MatFactorInfo *info)
558: {
559:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
560:   IS             perm = b->row;
562:   const PetscInt *ai,*aj,*perm_ptr,mbs=a->mbs,*bi=b->i,*bj=b->j;
563:   PetscInt       i,j;
564:   PetscInt       *a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
565:   PetscInt       bs=A->rmap->bs,bs2 = a->bs2,bslog = 0;
566:   MatScalar      *ba = b->a,*aa,*ap,*dk,*uik;
567:   MatScalar      *u,*diag,*rtmp,*rtmp_ptr;
568:   MatScalar      *work;
569:   PetscInt       *pivots;

572:   /* initialization */
573:   PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);
574:   PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));
575:   PetscMalloc2(mbs,PetscInt,&il,mbs,PetscInt,&jl);
576:   for (i=0; i<mbs; i++) {
577:     jl[i] = mbs; il[0] = 0;
578:   }
579:   PetscMalloc3(bs2,MatScalar,&dk,bs2,MatScalar,&uik,bs,MatScalar,&work);
580:   PetscMalloc(bs*sizeof(PetscInt),&pivots);
581: 
582:   ISGetIndices(perm,&perm_ptr);
583: 
584:   /* check permutation */
585:   if (!a->permute){
586:     ai = a->i; aj = a->j; aa = a->a;
587:   } else {
588:     ai   = a->inew; aj = a->jnew;
589:     PetscMalloc(bs2*ai[mbs]*sizeof(MatScalar),&aa);
590:     PetscMemcpy(aa,a->a,bs2*ai[mbs]*sizeof(MatScalar));
591:     PetscMalloc(ai[mbs]*sizeof(PetscInt),&a2anew);
592:     PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(PetscInt));

594:     /* flops in while loop */
595:     bslog = 2*bs*bs2;

597:     for (i=0; i<mbs; i++){
598:       jmin = ai[i]; jmax = ai[i+1];
599:       for (j=jmin; j<jmax; j++){
600:         while (a2anew[j] != j){
601:           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
602:           for (k1=0; k1<bs2; k1++){
603:             dk[k1]       = aa[k*bs2+k1];
604:             aa[k*bs2+k1] = aa[j*bs2+k1];
605:             aa[j*bs2+k1] = dk[k1];
606:           }
607:         }
608:         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
609:         if (i > aj[j]){
610:           /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */
611:           ap = aa + j*bs2;                     /* ptr to the beginning of j-th block of aa */
612:           for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
613:           for (k=0; k<bs; k++){               /* j-th block of aa <- dk^T */
614:             for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1];
615:           }
616:         }
617:       }
618:     }
619:     PetscFree(a2anew);
620:   }
621: 
622:   /* for each row k */
623:   for (k = 0; k<mbs; k++){

625:     /*initialize k-th row with elements nonzero in row perm(k) of A */
626:     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
627: 
628:     ap = aa + jmin*bs2;
629:     for (j = jmin; j < jmax; j++){
630:       vj = perm_ptr[aj[j]];         /* block col. index */
631:       rtmp_ptr = rtmp + vj*bs2;
632:       for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
633:     }

635:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
636:     PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));
637:     i = jl[k]; /* first row to be added to k_th row  */

639:     while (i < k){
640:       nexti = jl[i]; /* next row to be added to k_th row */

642:       /* compute multiplier */
643:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */

645:       /* uik = -inv(Di)*U_bar(i,k) */
646:       diag = ba + i*bs2;
647:       u    = ba + ili*bs2;
648:       PetscMemzero(uik,bs2*sizeof(MatScalar));
649:       Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
650: 
651:       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
652:       Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
653:       PetscLogFlops(bslog*2.0);
654: 
655:       /* update -U(i,k) */
656:       PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));

658:       /* add multiple of row i to k-th row ... */
659:       jmin = ili + 1; jmax = bi[i+1];
660:       if (jmin < jmax){
661:         for (j=jmin; j<jmax; j++) {
662:           /* rtmp += -U(i,k)^T * U_bar(i,j) */
663:           rtmp_ptr = rtmp + bj[j]*bs2;
664:           u = ba + j*bs2;
665:           Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
666:         }
667:         PetscLogFlops(bslog*(jmax-jmin));
668: 
669:         /* ... add i to row list for next nonzero entry */
670:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
671:         j     = bj[jmin];
672:         jl[i] = jl[j]; jl[j] = i; /* update jl */
673:       }
674:       i = nexti;
675:     }

677:     /* save nonzero entries in k-th row of U ... */

679:     /* invert diagonal block */
680:     diag = ba+k*bs2;
681:     PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));
682:     Kernel_A_gets_inverse_A(bs,diag,pivots,work);
683: 
684:     jmin = bi[k]; jmax = bi[k+1];
685:     if (jmin < jmax) {
686:       for (j=jmin; j<jmax; j++){
687:          vj = bj[j];           /* block col. index of U */
688:          u   = ba + j*bs2;
689:          rtmp_ptr = rtmp + vj*bs2;
690:          for (k1=0; k1<bs2; k1++){
691:            *u++        = *rtmp_ptr;
692:            *rtmp_ptr++ = 0.0;
693:          }
694:       }
695: 
696:       /* ... add k to row list for first nonzero entry in k-th row */
697:       il[k] = jmin;
698:       i     = bj[jmin];
699:       jl[k] = jl[i]; jl[i] = k;
700:     }
701:   }

703:   PetscFree(rtmp);
704:   PetscFree2(il,jl);
705:   PetscFree3(dk,uik,work);
706:   PetscFree(pivots);
707:   if (a->permute){
708:     PetscFree(aa);
709:   }

711:   ISRestoreIndices(perm,&perm_ptr);
712:   C->ops->solve          = MatSolve_SeqSBAIJ_N_inplace;
713:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_inplace;
714:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_N_inplace;
715:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_N_inplace;

717:   C->assembled    = PETSC_TRUE;
718:   C->preallocated = PETSC_TRUE;
719:   PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
720:   return(0);
721: }

725: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat C,Mat A,const MatFactorInfo *info)
726: {
727:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
729:   PetscInt       i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
730:   PetscInt       *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
731:   PetscInt       bs=A->rmap->bs,bs2 = a->bs2,bslog;
732:   MatScalar      *ba = b->a,*aa,*ap,*dk,*uik;
733:   MatScalar      *u,*diag,*rtmp,*rtmp_ptr;
734:   MatScalar      *work;
735:   PetscInt       *pivots;

738:   PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);
739:   PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));
740:   PetscMalloc2(mbs,PetscInt,&il,mbs,PetscInt,&jl);
741:   for (i=0; i<mbs; i++) {
742:     jl[i] = mbs; il[0] = 0;
743:   }
744:   PetscMalloc3(bs2,MatScalar,&dk,bs2,MatScalar,&uik,bs,MatScalar,&work);
745:   PetscMalloc(bs*sizeof(PetscInt),&pivots);
746: 
747:   ai = a->i; aj = a->j; aa = a->a;

749:   /* flops in while loop */
750:   bslog = 2*bs*bs2;
751: 
752:   /* for each row k */
753:   for (k = 0; k<mbs; k++){

755:     /*initialize k-th row with elements nonzero in row k of A */
756:     jmin = ai[k]; jmax = ai[k+1];
757:     ap = aa + jmin*bs2;
758:     for (j = jmin; j < jmax; j++){
759:       vj = aj[j];         /* block col. index */
760:       rtmp_ptr = rtmp + vj*bs2;
761:       for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
762:     }

764:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
765:     PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));
766:     i = jl[k]; /* first row to be added to k_th row  */

768:     while (i < k){
769:       nexti = jl[i]; /* next row to be added to k_th row */

771:       /* compute multiplier */
772:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */

774:       /* uik = -inv(Di)*U_bar(i,k) */
775:       diag = ba + i*bs2;
776:       u    = ba + ili*bs2;
777:       PetscMemzero(uik,bs2*sizeof(MatScalar));
778:       Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
779: 
780:       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
781:       Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
782:       PetscLogFlops(bslog*2.0);
783: 
784:       /* update -U(i,k) */
785:       PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));

787:       /* add multiple of row i to k-th row ... */
788:       jmin = ili + 1; jmax = bi[i+1];
789:       if (jmin < jmax){
790:         for (j=jmin; j<jmax; j++) {
791:           /* rtmp += -U(i,k)^T * U_bar(i,j) */
792:           rtmp_ptr = rtmp + bj[j]*bs2;
793:           u = ba + j*bs2;
794:           Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
795:         }
796:         PetscLogFlops(bslog*(jmax-jmin));
797: 
798:         /* ... add i to row list for next nonzero entry */
799:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
800:         j     = bj[jmin];
801:         jl[i] = jl[j]; jl[j] = i; /* update jl */
802:       }
803:       i = nexti;
804:     }

806:     /* save nonzero entries in k-th row of U ... */

808:     /* invert diagonal block */
809:     diag = ba+k*bs2;
810:     PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));
811:     Kernel_A_gets_inverse_A(bs,diag,pivots,work);
812: 
813:     jmin = bi[k]; jmax = bi[k+1];
814:     if (jmin < jmax) {
815:       for (j=jmin; j<jmax; j++){
816:          vj = bj[j];           /* block col. index of U */
817:          u   = ba + j*bs2;
818:          rtmp_ptr = rtmp + vj*bs2;
819:          for (k1=0; k1<bs2; k1++){
820:            *u++        = *rtmp_ptr;
821:            *rtmp_ptr++ = 0.0;
822:          }
823:       }
824: 
825:       /* ... add k to row list for first nonzero entry in k-th row */
826:       il[k] = jmin;
827:       i     = bj[jmin];
828:       jl[k] = jl[i]; jl[i] = k;
829:     }
830:   }

832:   PetscFree(rtmp);
833:   PetscFree2(il,jl);
834:   PetscFree3(dk,uik,work);
835:   PetscFree(pivots);

837:   C->ops->solve          = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
838:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
839:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
840:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
841:   C->assembled = PETSC_TRUE;
842:   C->preallocated = PETSC_TRUE;
843:   PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
844:   return(0);
845: }

847: /*
848:     Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
849:     Version for blocks 2 by 2.
850: */
853: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat C,Mat A,const MatFactorInfo *info)
854: {
855:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
856:   IS             perm = b->row;
858:   const PetscInt *ai,*aj,*perm_ptr;
859:   PetscInt       i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
860:   PetscInt       *a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
861:   MatScalar      *ba = b->a,*aa,*ap;
862:   MatScalar      *u,*diag,*rtmp,*rtmp_ptr,dk[4],uik[4];
863:   PetscReal      shift = info->shiftamount;

866:   /* initialization */
867:   /* il and jl record the first nonzero element in each row of the accessing 
868:      window U(0:k, k:mbs-1).
869:      jl:    list of rows to be added to uneliminated rows 
870:             i>= k: jl(i) is the first row to be added to row i
871:             i<  k: jl(i) is the row following row i in some list of rows
872:             jl(i) = mbs indicates the end of a list                        
873:      il(i): points to the first nonzero element in columns k,...,mbs-1 of 
874:             row i of U */
875:   PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);
876:   PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));
877:   PetscMalloc2(mbs,PetscInt,&il,mbs,PetscInt,&jl);
878:   for (i=0; i<mbs; i++) {
879:     jl[i] = mbs; il[0] = 0;
880:   }
881:   ISGetIndices(perm,&perm_ptr);

883:   /* check permutation */
884:   if (!a->permute){
885:     ai = a->i; aj = a->j; aa = a->a;
886:   } else {
887:     ai   = a->inew; aj = a->jnew;
888:     PetscMalloc(4*ai[mbs]*sizeof(MatScalar),&aa);
889:     PetscMemcpy(aa,a->a,4*ai[mbs]*sizeof(MatScalar));
890:     PetscMalloc(ai[mbs]*sizeof(PetscInt),&a2anew);
891:     PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(PetscInt));

893:     for (i=0; i<mbs; i++){
894:       jmin = ai[i]; jmax = ai[i+1];
895:       for (j=jmin; j<jmax; j++){
896:         while (a2anew[j] != j){
897:           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
898:           for (k1=0; k1<4; k1++){
899:             dk[k1]       = aa[k*4+k1];
900:             aa[k*4+k1] = aa[j*4+k1];
901:             aa[j*4+k1] = dk[k1];
902:           }
903:         }
904:         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
905:         if (i > aj[j]){
906:           /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */
907:           ap = aa + j*4;     /* ptr to the beginning of the block */
908:           dk[1] = ap[1];     /* swap ap[1] and ap[2] */
909:           ap[1] = ap[2];
910:           ap[2] = dk[1];
911:         }
912:       }
913:     }
914:     PetscFree(a2anew);
915:   }

917:   /* for each row k */
918:   for (k = 0; k<mbs; k++){

920:     /*initialize k-th row with elements nonzero in row perm(k) of A */
921:     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
922:     ap = aa + jmin*4;
923:     for (j = jmin; j < jmax; j++){
924:       vj = perm_ptr[aj[j]];         /* block col. index */
925:       rtmp_ptr = rtmp + vj*4;
926:       for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
927:     }

929:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
930:     PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));
931:     i = jl[k]; /* first row to be added to k_th row  */

933:     while (i < k){
934:       nexti = jl[i]; /* next row to be added to k_th row */

936:       /* compute multiplier */
937:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */

939:       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
940:       diag = ba + i*4;
941:       u    = ba + ili*4;
942:       uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
943:       uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
944:       uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
945:       uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
946: 
947:       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
948:       dk[0] += uik[0]*u[0] + uik[1]*u[1];
949:       dk[1] += uik[2]*u[0] + uik[3]*u[1];
950:       dk[2] += uik[0]*u[2] + uik[1]*u[3];
951:       dk[3] += uik[2]*u[2] + uik[3]*u[3];

953:       PetscLogFlops(16.0*2.0);

955:       /* update -U(i,k): ba[ili] = uik */
956:       PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));

958:       /* add multiple of row i to k-th row ... */
959:       jmin = ili + 1; jmax = bi[i+1];
960:       if (jmin < jmax){
961:         for (j=jmin; j<jmax; j++) {
962:           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
963:           rtmp_ptr = rtmp + bj[j]*4;
964:           u = ba + j*4;
965:           rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
966:           rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
967:           rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
968:           rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
969:         }
970:         PetscLogFlops(16.0*(jmax-jmin));
971: 
972:         /* ... add i to row list for next nonzero entry */
973:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
974:         j     = bj[jmin];
975:         jl[i] = jl[j]; jl[j] = i; /* update jl */
976:       }
977:       i = nexti;
978:     }

980:     /* save nonzero entries in k-th row of U ... */

982:     /* invert diagonal block */
983:     diag = ba+k*4;
984:     PetscMemcpy(diag,dk,4*sizeof(MatScalar));
985:     Kernel_A_gets_inverse_A_2(diag,shift);
986: 
987:     jmin = bi[k]; jmax = bi[k+1];
988:     if (jmin < jmax) {
989:       for (j=jmin; j<jmax; j++){
990:          vj = bj[j];           /* block col. index of U */
991:          u   = ba + j*4;
992:          rtmp_ptr = rtmp + vj*4;
993:          for (k1=0; k1<4; k1++){
994:            *u++        = *rtmp_ptr;
995:            *rtmp_ptr++ = 0.0;
996:          }
997:       }
998: 
999:       /* ... add k to row list for first nonzero entry in k-th row */
1000:       il[k] = jmin;
1001:       i     = bj[jmin];
1002:       jl[k] = jl[i]; jl[i] = k;
1003:     }
1004:   }

1006:   PetscFree(rtmp);
1007:   PetscFree2(il,jl);
1008:   if (a->permute) {
1009:     PetscFree(aa);
1010:   }
1011:   ISRestoreIndices(perm,&perm_ptr);
1012:   C->ops->solve          = MatSolve_SeqSBAIJ_2_inplace;
1013:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_inplace;
1014:   C->assembled = PETSC_TRUE;
1015:   C->preallocated = PETSC_TRUE;
1016:   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
1017:   return(0);
1018: }

1020: /*
1021:       Version for when blocks are 2 by 2 Using natural ordering
1022: */
1025: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat C,Mat A,const MatFactorInfo *info)
1026: {
1027:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
1029:   PetscInt       i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
1030:   PetscInt       *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
1031:   MatScalar      *ba = b->a,*aa,*ap,dk[8],uik[8];
1032:   MatScalar      *u,*diag,*rtmp,*rtmp_ptr;
1033:   PetscReal      shift = info->shiftamount;

1036:   /* initialization */
1037:   /* il and jl record the first nonzero element in each row of the accessing 
1038:      window U(0:k, k:mbs-1).
1039:      jl:    list of rows to be added to uneliminated rows 
1040:             i>= k: jl(i) is the first row to be added to row i
1041:             i<  k: jl(i) is the row following row i in some list of rows
1042:             jl(i) = mbs indicates the end of a list                        
1043:      il(i): points to the first nonzero element in columns k,...,mbs-1 of 
1044:             row i of U */
1045:   PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);
1046:   PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));
1047:   PetscMalloc2(mbs,PetscInt,&il,mbs,PetscInt,&jl);
1048:   for (i=0; i<mbs; i++) {
1049:     jl[i] = mbs; il[0] = 0;
1050:   }
1051:   ai = a->i; aj = a->j; aa = a->a;

1053:   /* for each row k */
1054:   for (k = 0; k<mbs; k++){

1056:     /*initialize k-th row with elements nonzero in row k of A */
1057:     jmin = ai[k]; jmax = ai[k+1];
1058:     ap = aa + jmin*4;
1059:     for (j = jmin; j < jmax; j++){
1060:       vj = aj[j];         /* block col. index */
1061:       rtmp_ptr = rtmp + vj*4;
1062:       for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
1063:     }
1064: 
1065:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1066:     PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));
1067:     i = jl[k]; /* first row to be added to k_th row  */

1069:     while (i < k){
1070:       nexti = jl[i]; /* next row to be added to k_th row */

1072:       /* compute multiplier */
1073:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */

1075:       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
1076:       diag = ba + i*4;
1077:       u    = ba + ili*4;
1078:       uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
1079:       uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
1080:       uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
1081:       uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
1082: 
1083:       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
1084:       dk[0] += uik[0]*u[0] + uik[1]*u[1];
1085:       dk[1] += uik[2]*u[0] + uik[3]*u[1];
1086:       dk[2] += uik[0]*u[2] + uik[1]*u[3];
1087:       dk[3] += uik[2]*u[2] + uik[3]*u[3];

1089:       PetscLogFlops(16.0*2.0);

1091:       /* update -U(i,k): ba[ili] = uik */
1092:       PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));

1094:       /* add multiple of row i to k-th row ... */
1095:       jmin = ili + 1; jmax = bi[i+1];
1096:       if (jmin < jmax){
1097:         for (j=jmin; j<jmax; j++) {
1098:           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
1099:           rtmp_ptr = rtmp + bj[j]*4;
1100:           u = ba + j*4;
1101:           rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
1102:           rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
1103:           rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
1104:           rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
1105:         }
1106:         PetscLogFlops(16.0*(jmax-jmin));

1108:         /* ... add i to row list for next nonzero entry */
1109:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
1110:         j     = bj[jmin];
1111:         jl[i] = jl[j]; jl[j] = i; /* update jl */
1112:       }
1113:       i = nexti;
1114:     }

1116:     /* save nonzero entries in k-th row of U ... */

1118:     /* invert diagonal block */
1119:     diag = ba+k*4;
1120:     PetscMemcpy(diag,dk,4*sizeof(MatScalar));
1121:     Kernel_A_gets_inverse_A_2(diag,shift);
1122: 
1123:     jmin = bi[k]; jmax = bi[k+1];
1124:     if (jmin < jmax) {
1125:       for (j=jmin; j<jmax; j++){
1126:          vj = bj[j];           /* block col. index of U */
1127:          u   = ba + j*4;
1128:          rtmp_ptr = rtmp + vj*4;
1129:          for (k1=0; k1<4; k1++){
1130:            *u++        = *rtmp_ptr;
1131:            *rtmp_ptr++ = 0.0;
1132:          }
1133:       }
1134: 
1135:       /* ... add k to row list for first nonzero entry in k-th row */
1136:       il[k] = jmin;
1137:       i     = bj[jmin];
1138:       jl[k] = jl[i]; jl[i] = k;
1139:     }
1140:   }

1142:   PetscFree(rtmp);
1143:   PetscFree2(il,jl);

1145:   C->ops->solve          = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1146:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1147:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1148:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1149:   C->assembled = PETSC_TRUE;
1150:   C->preallocated = PETSC_TRUE;
1151:   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
1152:   return(0);
1153: }

1155: /*
1156:     Numeric U^T*D*U factorization for SBAIJ format. 
1157:     Version for blocks are 1 by 1.
1158: */
1161: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace(Mat C,Mat A,const MatFactorInfo *info)
1162: {
1163:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ *)C->data;
1164:   IS             ip=b->row;
1166:   const PetscInt *ai,*aj,*rip;
1167:   PetscInt       *a2anew,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j,*bcol;
1168:   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1169:   MatScalar      *rtmp,*ba=b->a,*bval,*aa,dk,uikdi;
1170:   PetscReal      rs;
1171:   FactorShiftCtx sctx;

1174:   /* MatPivotSetUp(): initialize shift context sctx */
1175:   PetscMemzero(&sctx,sizeof(FactorShiftCtx));
1176: 
1177:   ISGetIndices(ip,&rip);
1178:   if (!a->permute){
1179:     ai = a->i; aj = a->j; aa = a->a;
1180:   } else {
1181:     ai = a->inew; aj = a->jnew;
1182:     nz = ai[mbs];
1183:     PetscMalloc(nz*sizeof(MatScalar),&aa);
1184:     a2anew = a->a2anew;
1185:     bval   = a->a;
1186:     for (j=0; j<nz; j++){
1187:       aa[a2anew[j]] = *(bval++);
1188:     }
1189:   }
1190: 
1191:   /* initialization */
1192:   /* il and jl record the first nonzero element in each row of the accessing 
1193:      window U(0:k, k:mbs-1).
1194:      jl:    list of rows to be added to uneliminated rows 
1195:             i>= k: jl(i) is the first row to be added to row i
1196:             i<  k: jl(i) is the row following row i in some list of rows
1197:             jl(i) = mbs indicates the end of a list                        
1198:      il(i): points to the first nonzero element in columns k,...,mbs-1 of 
1199:             row i of U */
1200:   PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&jl);

1202:   do {
1203:     sctx.newshift = PETSC_FALSE;
1204:     for (i=0; i<mbs; i++) {
1205:       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1206:     }
1207: 
1208:     for (k = 0; k<mbs; k++){
1209:       /*initialize k-th row by the perm[k]-th row of A */
1210:       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1211:       bval = ba + bi[k];
1212:       for (j = jmin; j < jmax; j++){
1213:         col = rip[aj[j]];
1214:         rtmp[col] = aa[j];
1215:         *bval++  = 0.0; /* for in-place factorization */
1216:       }

1218:       /* shift the diagonal of the matrix */
1219:       if (sctx.nshift) rtmp[k] += sctx.shift_amount;

1221:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1222:       dk = rtmp[k];
1223:       i = jl[k]; /* first row to be added to k_th row  */

1225:       while (i < k){
1226:         nexti = jl[i]; /* next row to be added to k_th row */

1228:         /* compute multiplier, update diag(k) and U(i,k) */
1229:         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1230:         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1231:         dk += uikdi*ba[ili];
1232:         ba[ili] = uikdi; /* -U(i,k) */

1234:         /* add multiple of row i to k-th row */
1235:         jmin = ili + 1; jmax = bi[i+1];
1236:         if (jmin < jmax){
1237:           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1238:           PetscLogFlops(2.0*(jmax-jmin));

1240:           /* update il and jl for row i */
1241:           il[i] = jmin;
1242:           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1243:         }
1244:         i = nexti;
1245:       }

1247:       /* shift the diagonals when zero pivot is detected */
1248:       /* compute rs=sum of abs(off-diagonal) */
1249:       rs   = 0.0;
1250:       jmin = bi[k]+1;
1251:       nz   = bi[k+1] - jmin;
1252:       if (nz){
1253:         bcol = bj + jmin;
1254:         while (nz--){
1255:           rs += PetscAbsScalar(rtmp[*bcol]);
1256:           bcol++;
1257:         }
1258:       }

1260:       sctx.rs = rs;
1261:       sctx.pv = dk;
1262:       MatPivotCheck(A,info,&sctx,k);
1263:       if (sctx.newshift) break;    /* sctx.shift_amount is updated */
1264:       dk = sctx.pv;
1265: 
1266:       /* copy data into U(k,:) */
1267:       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1268:       jmin = bi[k]+1; jmax = bi[k+1];
1269:       if (jmin < jmax) {
1270:         for (j=jmin; j<jmax; j++){
1271:           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1272:         }
1273:         /* add the k-th row into il and jl */
1274:         il[k] = jmin;
1275:         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1276:       }
1277:     }
1278:   } while (sctx.newshift);
1279:   PetscFree3(rtmp,il,jl);
1280:   if (a->permute){PetscFree(aa);}

1282:   ISRestoreIndices(ip,&rip);
1283:   C->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
1284:   C->ops->solves         = MatSolves_SeqSBAIJ_1_inplace;
1285:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
1286:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
1287:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
1288:   C->assembled    = PETSC_TRUE;
1289:   C->preallocated = PETSC_TRUE;
1290:   PetscLogFlops(C->rmap->N);
1291:   if (sctx.nshift){
1292:     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1293:       PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
1294:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1295:       PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
1296:     }
1297:   }
1298:   return(0);
1299: }

1301: /*
1302:   Version for when blocks are 1 by 1 Using natural ordering under new datastructure
1303:   Modified from MatCholeskyFactorNumeric_SeqAIJ() 
1304: */
1307: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat B,Mat A,const MatFactorInfo *info)
1308: {
1309:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1310:   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)B->data;
1312:   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
1313:   PetscInt       *ai=a->i,*aj=a->j,*ajtmp;
1314:   PetscInt       k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
1315:   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1316:   FactorShiftCtx sctx;
1317:   PetscReal      rs;
1318:   MatScalar      d,*v;

1321:   PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&c2r);

1323:   /* MatPivotSetUp(): initialize shift context sctx */
1324:   PetscMemzero(&sctx,sizeof(FactorShiftCtx));

1326:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1327:     sctx.shift_top = info->zeropivot;
1328:     PetscMemzero(rtmp,mbs*sizeof(MatScalar));
1329:     for (i=0; i<mbs; i++) {
1330:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1331:       d  = (aa)[a->diag[i]];
1332:       rtmp[i] += - PetscRealPart(d); /* diagonal entry */
1333:       ajtmp = aj + ai[i] + 1;        /* exclude diagonal */
1334:       v     = aa + ai[i] + 1;
1335:       nz    = ai[i+1] - ai[i] - 1 ;
1336:       for (j=0; j<nz; j++){
1337:         rtmp[i] += PetscAbsScalar(v[j]);
1338:         rtmp[ajtmp[j]] += PetscAbsScalar(v[j]);
1339:       }
1340:       if (PetscRealPart(rtmp[i]) > sctx.shift_top) sctx.shift_top = PetscRealPart(rtmp[i]);
1341:     }
1342:     sctx.shift_top   *= 1.1;
1343:     sctx.nshift_max   = 5;
1344:     sctx.shift_lo     = 0.;
1345:     sctx.shift_hi     = 1.;
1346:   }

1348:   /* allocate working arrays
1349:      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
1350:      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 
1351:   */
1352:   do {
1353:     sctx.newshift = PETSC_FALSE;

1355:     for (i=0; i<mbs; i++)  c2r[i] = mbs;
1356:     il[0] = 0;
1357: 
1358:     for (k = 0; k<mbs; k++){
1359:       /* zero rtmp */
1360:       nz = bi[k+1] - bi[k];
1361:       bjtmp = bj + bi[k];
1362:       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
1363: 
1364:       /* load in initial unfactored row */
1365:       bval = ba + bi[k];
1366:       jmin = ai[k]; jmax = ai[k+1];
1367:       for (j = jmin; j < jmax; j++){
1368:         col = aj[j];
1369:         rtmp[col] = aa[j];
1370:         *bval++   = 0.0; /* for in-place factorization */
1371:       }
1372:       /* shift the diagonal of the matrix: ZeropivotApply() */
1373:       rtmp[k] += sctx.shift_amount;  /* shift the diagonal of the matrix */
1374: 
1375:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1376:       dk = rtmp[k];
1377:       i  = c2r[k]; /* first row to be added to k_th row  */

1379:       while (i < k){
1380:         nexti = c2r[i]; /* next row to be added to k_th row */
1381: 
1382:         /* compute multiplier, update diag(k) and U(i,k) */
1383:         ili   = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1384:         uikdi = - ba[ili]*ba[bdiag[i]];  /* diagonal(k) */
1385:         dk   += uikdi*ba[ili]; /* update diag[k] */
1386:         ba[ili] = uikdi; /* -U(i,k) */

1388:         /* add multiple of row i to k-th row */
1389:         jmin = ili + 1; jmax = bi[i+1];
1390:         if (jmin < jmax){
1391:           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1392:           /* update il and c2r for row i */
1393:           il[i] = jmin;
1394:           j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
1395:         }
1396:         i = nexti;
1397:       }

1399:       /* copy data into U(k,:) */
1400:       rs   = 0.0;
1401:       jmin = bi[k]; jmax = bi[k+1]-1;
1402:       if (jmin < jmax) {
1403:         for (j=jmin; j<jmax; j++){
1404:           col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
1405:         }
1406:         /* add the k-th row into il and c2r */
1407:         il[k] = jmin;
1408:         i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
1409:       }

1411:       sctx.rs  = rs;
1412:       sctx.pv  = dk;
1413:       MatPivotCheck(A,info,&sctx,k);
1414:       if(sctx.newshift) break;
1415:       dk = sctx.pv;
1416: 
1417:       ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
1418:     }
1419:   } while (sctx.newshift);
1420: 
1421:   PetscFree3(rtmp,il,c2r);
1422: 
1423:   B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1424:   B->ops->solves          = MatSolves_SeqSBAIJ_1;
1425:   B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1426:   B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1427:   B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;

1429:   B->assembled    = PETSC_TRUE;
1430:   B->preallocated = PETSC_TRUE;
1431:   PetscLogFlops(B->rmap->n);

1433:   /* MatPivotView() */
1434:   if (sctx.nshift){
1435:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1436:       PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);
1437:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1438:       PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
1439:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){
1440:       PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);
1441:     }
1442:   }
1443:   return(0);
1444: }

1448: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(Mat C,Mat A,const MatFactorInfo *info)
1449: {
1450:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ *)C->data;
1452:   PetscInt       i,j,mbs = a->mbs;
1453:   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
1454:   PetscInt       k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz;
1455:   MatScalar      *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval;
1456:   PetscReal      rs;
1457:   FactorShiftCtx sctx;

1460:   /* MatPivotSetUp(): initialize shift context sctx */
1461:   PetscMemzero(&sctx,sizeof(FactorShiftCtx));

1463:   /* initialization */
1464:   /* il and jl record the first nonzero element in each row of the accessing 
1465:      window U(0:k, k:mbs-1).
1466:      jl:    list of rows to be added to uneliminated rows 
1467:             i>= k: jl(i) is the first row to be added to row i
1468:             i<  k: jl(i) is the row following row i in some list of rows
1469:             jl(i) = mbs indicates the end of a list                        
1470:      il(i): points to the first nonzero element in U(i,k:mbs-1) 
1471:   */
1472:   PetscMalloc(mbs*sizeof(MatScalar),&rtmp);
1473:   PetscMalloc2(mbs,PetscInt,&il,mbs,PetscInt,&jl);

1475:   do {
1476:     sctx.newshift = PETSC_FALSE;
1477:     for (i=0; i<mbs; i++) {
1478:       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1479:     }

1481:     for (k = 0; k<mbs; k++){
1482:       /*initialize k-th row with elements nonzero in row perm(k) of A */
1483:       nz   = ai[k+1] - ai[k];
1484:       acol = aj + ai[k];
1485:       aval = aa + ai[k];
1486:       bval = ba + bi[k];
1487:       while (nz -- ){
1488:         rtmp[*acol++] = *aval++;
1489:         *bval++       = 0.0; /* for in-place factorization */
1490:       }

1492:       /* shift the diagonal of the matrix */
1493:       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1494: 
1495:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1496:       dk = rtmp[k];
1497:       i  = jl[k]; /* first row to be added to k_th row  */

1499:       while (i < k){
1500:         nexti = jl[i]; /* next row to be added to k_th row */
1501:         /* compute multiplier, update D(k) and U(i,k) */
1502:         ili   = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1503:         uikdi = - ba[ili]*ba[bi[i]];
1504:         dk   += uikdi*ba[ili];
1505:         ba[ili] = uikdi; /* -U(i,k) */

1507:         /* add multiple of row i to k-th row ... */
1508:         jmin = ili + 1;
1509:         nz   = bi[i+1] - jmin;
1510:         if (nz > 0){
1511:           bcol = bj + jmin;
1512:           bval = ba + jmin;
1513:           PetscLogFlops(2.0*nz);
1514:           while (nz --) rtmp[*bcol++] += uikdi*(*bval++);
1515: 
1516:           /* update il and jl for i-th row */
1517:           il[i] = jmin;
1518:           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1519:         }
1520:         i = nexti;
1521:       }

1523:       /* shift the diagonals when zero pivot is detected */
1524:       /* compute rs=sum of abs(off-diagonal) */
1525:       rs   = 0.0;
1526:       jmin = bi[k]+1;
1527:       nz   = bi[k+1] - jmin;
1528:       if (nz){
1529:         bcol = bj + jmin;
1530:         while (nz--){
1531:           rs += PetscAbsScalar(rtmp[*bcol]);
1532:           bcol++;
1533:         }
1534:       }

1536:       sctx.rs = rs;
1537:       sctx.pv = dk;
1538:       MatPivotCheck(A,info,&sctx,k);
1539:       if (sctx.newshift) break;    /* sctx.shift_amount is updated */
1540:       dk = sctx.pv;
1541: 
1542:       /* copy data into U(k,:) */
1543:       ba[bi[k]] = 1.0/dk;
1544:       jmin      = bi[k]+1;
1545:       nz        = bi[k+1] - jmin;
1546:       if (nz){
1547:         bcol = bj + jmin;
1548:         bval = ba + jmin;
1549:         while (nz--){
1550:           *bval++       = rtmp[*bcol];
1551:           rtmp[*bcol++] = 0.0;
1552:         }
1553:         /* add k-th row into il and jl */
1554:         il[k] = jmin;
1555:         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1556:       }
1557:     } /* end of for (k = 0; k<mbs; k++) */
1558:   } while (sctx.newshift);
1559:   PetscFree(rtmp);
1560:   PetscFree2(il,jl);
1561: 
1562:   C->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1563:   C->ops->solves         = MatSolves_SeqSBAIJ_1_inplace;
1564:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1565:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1566:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;

1568:   C->assembled    = PETSC_TRUE;
1569:   C->preallocated = PETSC_TRUE;
1570:   PetscLogFlops(C->rmap->N);
1571:   if (sctx.nshift){
1572:     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1573:       PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
1574:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1575:       PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
1576:     }
1577:   }
1578:   return(0);
1579: }

1583: PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,const MatFactorInfo *info)
1584: {
1586:   Mat            C;

1589:   MatGetFactor(A,"petsc",MAT_FACTOR_CHOLESKY,&C);
1590:   MatCholeskyFactorSymbolic(C,A,perm,info);
1591:   MatCholeskyFactorNumeric(C,A,info);
1592:   A->ops->solve            = C->ops->solve;
1593:   A->ops->solvetranspose   = C->ops->solvetranspose;
1594:   MatHeaderMerge(A,C);
1595:   return(0);
1596: }