Actual source code: aijfact.c
2: #include <../src/mat/impls/aij/seq/aij.h>
3: #include <../src/mat/impls/sbaij/seq/sbaij.h>
4: #include <petscbt.h>
5: #include <../src/mat/utils/freespace.h>
10: /*
11: Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
13: This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
14: */
15: PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
16: {
17: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data;
18: PetscErrorCode ierr;
19: PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
20: const PetscInt *ai = a->i, *aj = a->j;
21: const PetscScalar *aa = a->a;
22: PetscBool *done;
23: PetscReal best,past = 0,future;
26: /* pick initial row */
27: best = -1;
28: for (i=0; i<n; i++) {
29: future = 0.0;
30: for (j=ai[i]; j<ai[i+1]; j++) {
31: if (aj[j] != i) future += PetscAbsScalar(aa[j]); else past = PetscAbsScalar(aa[j]);
32: }
33: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
34: if (past/future > best) {
35: best = past/future;
36: current = i;
37: }
38: }
40: PetscMalloc(n*sizeof(PetscBool),&done);
41: PetscMemzero(done,n*sizeof(PetscBool));
42: PetscMalloc(n*sizeof(PetscInt),&order);
43: order[0] = current;
44: for (i=0; i<n-1; i++) {
45: done[current] = PETSC_TRUE;
46: best = -1;
47: /* loop over all neighbors of current pivot */
48: for (j=ai[current]; j<ai[current+1]; j++) {
49: jj = aj[j];
50: if (done[jj]) continue;
51: /* loop over columns of potential next row computing weights for below and above diagonal */
52: past = future = 0.0;
53: for (k=ai[jj]; k<ai[jj+1]; k++) {
54: kk = aj[k];
55: if (done[kk]) past += PetscAbsScalar(aa[k]);
56: else if (kk != jj) future += PetscAbsScalar(aa[k]);
57: }
58: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
59: if (past/future > best) {
60: best = past/future;
61: newcurrent = jj;
62: }
63: }
64: if (best == -1) { /* no neighbors to select from so select best of all that remain */
65: best = -1;
66: for (k=0; k<n; k++) {
67: if (done[k]) continue;
68: future = 0.0;
69: past = 0.0;
70: for (j=ai[k]; j<ai[k+1]; j++) {
71: kk = aj[j];
72: if (done[kk]) past += PetscAbsScalar(aa[j]);
73: else if (kk != k) future += PetscAbsScalar(aa[j]);
74: }
75: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
76: if (past/future > best) {
77: best = past/future;
78: newcurrent = k;
79: }
80: }
81: }
82: if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current");
83: current = newcurrent;
84: order[i+1] = current;
85: }
86: ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);
87: *icol = *irow;
88: PetscObjectReference((PetscObject)*irow);
89: PetscFree(done);
90: PetscFree(order);
91: return(0);
92: }
98: PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscBool *flg)
99: {
101: *flg = PETSC_TRUE;
102: return(0);
103: }
109: PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
110: {
111: PetscInt n = A->rmap->n;
112: PetscErrorCode ierr;
115: MatCreate(((PetscObject)A)->comm,B);
116: MatSetSizes(*B,n,n,n,n);
117: if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT){
118: MatSetType(*B,MATSEQAIJ);
119: (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
120: (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
121: } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
122: MatSetType(*B,MATSEQSBAIJ);
123: MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);
124: (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ;
125: (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
126: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
127: (*B)->factortype = ftype;
128: return(0);
129: }
134: PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
135: {
136: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
137: IS isicol;
138: PetscErrorCode ierr;
139: const PetscInt *r,*ic;
140: PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j;
141: PetscInt *bi,*bj,*ajtmp;
142: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
143: PetscReal f;
144: PetscInt nlnk,*lnk,k,**bi_ptr;
145: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
146: PetscBT lnkbt;
147:
149: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
150: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
151: ISGetIndices(isrow,&r);
152: ISGetIndices(isicol,&ic);
154: /* get new row pointers */
155: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
156: bi[0] = 0;
158: /* bdiag is location of diagonal in factor */
159: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
160: bdiag[0] = 0;
162: /* linked list for storing column indices of the active row */
163: nlnk = n + 1;
164: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
166: PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);
168: /* initial FreeSpace size is f*(ai[n]+1) */
169: f = info->fill;
170: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
171: current_space = free_space;
173: for (i=0; i<n; i++) {
174: /* copy previous fill into linked list */
175: nzi = 0;
176: nnz = ai[r[i]+1] - ai[r[i]];
177: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
178: ajtmp = aj + ai[r[i]];
179: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
180: nzi += nlnk;
182: /* add pivot rows into linked list */
183: row = lnk[n];
184: while (row < i) {
185: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
186: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
187: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
188: nzi += nlnk;
189: row = lnk[row];
190: }
191: bi[i+1] = bi[i] + nzi;
192: im[i] = nzi;
194: /* mark bdiag */
195: nzbd = 0;
196: nnz = nzi;
197: k = lnk[n];
198: while (nnz-- && k < i){
199: nzbd++;
200: k = lnk[k];
201: }
202: bdiag[i] = bi[i] + nzbd;
204: /* if free space is not available, make more free space */
205: if (current_space->local_remaining<nzi) {
206: nnz = (n - i)*nzi; /* estimated and max additional space needed */
207: PetscFreeSpaceGet(nnz,¤t_space);
208: reallocs++;
209: }
211: /* copy data into free space, then initialize lnk */
212: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
213: bi_ptr[i] = current_space->array;
214: current_space->array += nzi;
215: current_space->local_used += nzi;
216: current_space->local_remaining -= nzi;
217: }
218: #if defined(PETSC_USE_INFO)
219: if (ai[n] != 0) {
220: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
221: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
222: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
223: PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
224: PetscInfo(A,"for best performance.\n");
225: } else {
226: PetscInfo(A,"Empty matrix\n");
227: }
228: #endif
230: ISRestoreIndices(isrow,&r);
231: ISRestoreIndices(isicol,&ic);
233: /* destroy list of free space and other temporary array(s) */
234: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
235: PetscFreeSpaceContiguous(&free_space,bj);
236: PetscLLDestroy(lnk,lnkbt);
237: PetscFree2(bi_ptr,im);
239: /* put together the new matrix */
240: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);
241: PetscLogObjectParent(B,isicol);
242: b = (Mat_SeqAIJ*)(B)->data;
243: b->free_a = PETSC_TRUE;
244: b->free_ij = PETSC_TRUE;
245: b->singlemalloc = PETSC_FALSE;
246: PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);
247: b->j = bj;
248: b->i = bi;
249: b->diag = bdiag;
250: b->ilen = 0;
251: b->imax = 0;
252: b->row = isrow;
253: b->col = iscol;
254: PetscObjectReference((PetscObject)isrow);
255: PetscObjectReference((PetscObject)iscol);
256: b->icol = isicol;
257: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
259: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
260: PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
261: b->maxnz = b->nz = bi[n] ;
263: (B)->factortype = MAT_FACTOR_LU;
264: (B)->info.factor_mallocs = reallocs;
265: (B)->info.fill_ratio_given = f;
267: if (ai[n]) {
268: (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
269: } else {
270: (B)->info.fill_ratio_needed = 0.0;
271: }
272: (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
273: if (a->inode.size) {
274: (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
275: }
276: return(0);
277: }
281: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
282: {
283: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
284: IS isicol;
285: PetscErrorCode ierr;
286: const PetscInt *r,*ic;
287: PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j;
288: PetscInt *bi,*bj,*ajtmp;
289: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
290: PetscReal f;
291: PetscInt nlnk,*lnk,k,**bi_ptr;
292: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
293: PetscBT lnkbt;
294: PetscBool olddatastruct=PETSC_FALSE;
297: /* Uncomment the oldatastruct part only while testing new data structure for MatSolve() */
298: PetscOptionsGetBool(PETSC_NULL,"-lu_old",&olddatastruct,PETSC_NULL);
299: if(olddatastruct){
300: MatLUFactorSymbolic_SeqAIJ_inplace(B,A,isrow,iscol,info);
301: return(0);
302: }
303:
304: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
305: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
306: ISGetIndices(isrow,&r);
307: ISGetIndices(isicol,&ic);
309: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
310: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
311: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
312: bi[0] = bdiag[0] = 0;
314: /* linked list for storing column indices of the active row */
315: nlnk = n + 1;
316: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
318: PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);
320: /* initial FreeSpace size is f*(ai[n]+1) */
321: f = info->fill;
322: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
323: current_space = free_space;
325: for (i=0; i<n; i++) {
326: /* copy previous fill into linked list */
327: nzi = 0;
328: nnz = ai[r[i]+1] - ai[r[i]];
329: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
330: ajtmp = aj + ai[r[i]];
331: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
332: nzi += nlnk;
334: /* add pivot rows into linked list */
335: row = lnk[n];
336: while (row < i){
337: nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */
338: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
339: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
340: nzi += nlnk;
341: row = lnk[row];
342: }
343: bi[i+1] = bi[i] + nzi;
344: im[i] = nzi;
346: /* mark bdiag */
347: nzbd = 0;
348: nnz = nzi;
349: k = lnk[n];
350: while (nnz-- && k < i){
351: nzbd++;
352: k = lnk[k];
353: }
354: bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
356: /* if free space is not available, make more free space */
357: if (current_space->local_remaining<nzi) {
358: nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */
359: PetscFreeSpaceGet(nnz,¤t_space);
360: reallocs++;
361: }
363: /* copy data into free space, then initialize lnk */
364: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
365: bi_ptr[i] = current_space->array;
366: current_space->array += nzi;
367: current_space->local_used += nzi;
368: current_space->local_remaining -= nzi;
369: }
371: ISRestoreIndices(isrow,&r);
372: ISRestoreIndices(isicol,&ic);
374: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
375: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
376: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
377: PetscLLDestroy(lnk,lnkbt);
378: PetscFree2(bi_ptr,im);
380: /* put together the new matrix */
381: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);
382: PetscLogObjectParent(B,isicol);
383: b = (Mat_SeqAIJ*)(B)->data;
384: b->free_a = PETSC_TRUE;
385: b->free_ij = PETSC_TRUE;
386: b->singlemalloc = PETSC_FALSE;
387: PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);
388: b->j = bj;
389: b->i = bi;
390: b->diag = bdiag;
391: b->ilen = 0;
392: b->imax = 0;
393: b->row = isrow;
394: b->col = iscol;
395: PetscObjectReference((PetscObject)isrow);
396: PetscObjectReference((PetscObject)iscol);
397: b->icol = isicol;
398: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
400: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
401: PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));
402: b->maxnz = b->nz = bdiag[0]+1;
403: B->factortype = MAT_FACTOR_LU;
404: B->info.factor_mallocs = reallocs;
405: B->info.fill_ratio_given = f;
407: if (ai[n]) {
408: B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
409: } else {
410: B->info.fill_ratio_needed = 0.0;
411: }
412: #if defined(PETSC_USE_INFO)
413: if (ai[n] != 0) {
414: PetscReal af = B->info.fill_ratio_needed;
415: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
416: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
417: PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
418: PetscInfo(A,"for best performance.\n");
419: } else {
420: PetscInfo(A,"Empty matrix\n");
421: }
422: #endif
423: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
424: if (a->inode.size) {
425: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
426: }
427: return(0);
428: }
430: /*
431: Trouble in factorization, should we dump the original matrix?
432: */
435: PetscErrorCode MatFactorDumpMatrix(Mat A)
436: {
438: PetscBool flg = PETSC_FALSE;
441: PetscOptionsGetBool(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);
442: if (flg) {
443: PetscViewer viewer;
444: char filename[PETSC_MAX_PATH_LEN];
446: PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);
447: PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);
448: MatView(A,viewer);
449: PetscViewerDestroy(&viewer);
450: }
451: return(0);
452: }
456: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
457: {
458: Mat C=B;
459: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
460: IS isrow = b->row,isicol = b->icol;
461: PetscErrorCode ierr;
462: const PetscInt *r,*ic,*ics;
463: const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
464: PetscInt i,j,k,nz,nzL,row,*pj;
465: const PetscInt *ajtmp,*bjtmp;
466: MatScalar *rtmp,*pc,multiplier,*pv;
467: const MatScalar *aa=a->a,*v;
468: PetscBool row_identity,col_identity;
469: FactorShiftCtx sctx;
470: const PetscInt *ddiag;
471: PetscReal rs;
472: MatScalar d;
475: /* MatPivotSetUp(): initialize shift context sctx */
476: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
478: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
479: ddiag = a->diag;
480: sctx.shift_top = info->zeropivot;
481: for (i=0; i<n; i++) {
482: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
483: d = (aa)[ddiag[i]];
484: rs = -PetscAbsScalar(d) - PetscRealPart(d);
485: v = aa+ai[i];
486: nz = ai[i+1] - ai[i];
487: for (j=0; j<nz; j++)
488: rs += PetscAbsScalar(v[j]);
489: if (rs>sctx.shift_top) sctx.shift_top = rs;
490: }
491: sctx.shift_top *= 1.1;
492: sctx.nshift_max = 5;
493: sctx.shift_lo = 0.;
494: sctx.shift_hi = 1.;
495: }
497: ISGetIndices(isrow,&r);
498: ISGetIndices(isicol,&ic);
499: PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);
500: ics = ic;
502: do {
503: sctx.newshift = PETSC_FALSE;
504: for (i=0; i<n; i++){
505: /* zero rtmp */
506: /* L part */
507: nz = bi[i+1] - bi[i];
508: bjtmp = bj + bi[i];
509: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
511: /* U part */
512: nz = bdiag[i]-bdiag[i+1];
513: bjtmp = bj + bdiag[i+1]+1;
514: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
515:
516: /* load in initial (unfactored row) */
517: nz = ai[r[i]+1] - ai[r[i]];
518: ajtmp = aj + ai[r[i]];
519: v = aa + ai[r[i]];
520: for (j=0; j<nz; j++) {
521: rtmp[ics[ajtmp[j]]] = v[j];
522: }
523: /* ZeropivotApply() */
524: rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
525:
526: /* elimination */
527: bjtmp = bj + bi[i];
528: row = *bjtmp++;
529: nzL = bi[i+1] - bi[i];
530: for(k=0; k < nzL;k++) {
531: pc = rtmp + row;
532: if (*pc != 0.0) {
533: pv = b->a + bdiag[row];
534: multiplier = *pc * (*pv);
535: *pc = multiplier;
536: pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
537: pv = b->a + bdiag[row+1]+1;
538: nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */
539: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
540: PetscLogFlops(2.0*nz);
541: }
542: row = *bjtmp++;
543: }
545: /* finished row so stick it into b->a */
546: rs = 0.0;
547: /* L part */
548: pv = b->a + bi[i] ;
549: pj = b->j + bi[i] ;
550: nz = bi[i+1] - bi[i];
551: for (j=0; j<nz; j++) {
552: pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
553: }
555: /* U part */
556: pv = b->a + bdiag[i+1]+1;
557: pj = b->j + bdiag[i+1]+1;
558: nz = bdiag[i] - bdiag[i+1]-1;
559: for (j=0; j<nz; j++) {
560: pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
561: }
563: sctx.rs = rs;
564: sctx.pv = rtmp[i];
565: MatPivotCheck(A,info,&sctx,i);
566: if(sctx.newshift) break; /* break for-loop */
567: rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
569: /* Mark diagonal and invert diagonal for simplier triangular solves */
570: pv = b->a + bdiag[i];
571: *pv = 1.0/rtmp[i];
573: } /* endof for (i=0; i<n; i++){ */
575: /* MatPivotRefine() */
576: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max){
577: /*
578: * if no shift in this attempt & shifting & started shifting & can refine,
579: * then try lower shift
580: */
581: sctx.shift_hi = sctx.shift_fraction;
582: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
583: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
584: sctx.newshift = PETSC_TRUE;
585: sctx.nshift++;
586: }
587: } while (sctx.newshift);
589: PetscFree(rtmp);
590: ISRestoreIndices(isicol,&ic);
591: ISRestoreIndices(isrow,&r);
592:
593: ISIdentity(isrow,&row_identity);
594: ISIdentity(isicol,&col_identity);
595: if (row_identity && col_identity) {
596: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
597: } else {
598: C->ops->solve = MatSolve_SeqAIJ;
599: }
600: C->ops->solveadd = MatSolveAdd_SeqAIJ;
601: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ;
602: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
603: C->ops->matsolve = MatMatSolve_SeqAIJ;
604: C->assembled = PETSC_TRUE;
605: C->preallocated = PETSC_TRUE;
606: PetscLogFlops(C->cmap->n);
608: /* MatShiftView(A,info,&sctx) */
609: if (sctx.nshift){
610: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
611: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);
612: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
613: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
614: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){
615: PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);
616: }
617: }
618: Mat_CheckInode_FactorLU(C,PETSC_FALSE);
619: return(0);
620: }
624: PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
625: {
626: Mat C=B;
627: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
628: IS isrow = b->row,isicol = b->icol;
629: PetscErrorCode ierr;
630: const PetscInt *r,*ic,*ics;
631: PetscInt nz,row,i,j,n=A->rmap->n,diag;
632: const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
633: const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
634: MatScalar *pv,*rtmp,*pc,multiplier,d;
635: const MatScalar *v,*aa=a->a;
636: PetscReal rs=0.0;
637: FactorShiftCtx sctx;
638: const PetscInt *ddiag;
639: PetscBool row_identity, col_identity;
642: /* MatPivotSetUp(): initialize shift context sctx */
643: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
645: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
646: ddiag = a->diag;
647: sctx.shift_top = info->zeropivot;
648: for (i=0; i<n; i++) {
649: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
650: d = (aa)[ddiag[i]];
651: rs = -PetscAbsScalar(d) - PetscRealPart(d);
652: v = aa+ai[i];
653: nz = ai[i+1] - ai[i];
654: for (j=0; j<nz; j++)
655: rs += PetscAbsScalar(v[j]);
656: if (rs>sctx.shift_top) sctx.shift_top = rs;
657: }
658: sctx.shift_top *= 1.1;
659: sctx.nshift_max = 5;
660: sctx.shift_lo = 0.;
661: sctx.shift_hi = 1.;
662: }
664: ISGetIndices(isrow,&r);
665: ISGetIndices(isicol,&ic);
666: PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);
667: ics = ic;
669: do {
670: sctx.newshift = PETSC_FALSE;
671: for (i=0; i<n; i++){
672: nz = bi[i+1] - bi[i];
673: bjtmp = bj + bi[i];
674: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
676: /* load in initial (unfactored row) */
677: nz = ai[r[i]+1] - ai[r[i]];
678: ajtmp = aj + ai[r[i]];
679: v = aa + ai[r[i]];
680: for (j=0; j<nz; j++) {
681: rtmp[ics[ajtmp[j]]] = v[j];
682: }
683: rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
685: row = *bjtmp++;
686: while (row < i) {
687: pc = rtmp + row;
688: if (*pc != 0.0) {
689: pv = b->a + diag_offset[row];
690: pj = b->j + diag_offset[row] + 1;
691: multiplier = *pc / *pv++;
692: *pc = multiplier;
693: nz = bi[row+1] - diag_offset[row] - 1;
694: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
695: PetscLogFlops(2.0*nz);
696: }
697: row = *bjtmp++;
698: }
699: /* finished row so stick it into b->a */
700: pv = b->a + bi[i] ;
701: pj = b->j + bi[i] ;
702: nz = bi[i+1] - bi[i];
703: diag = diag_offset[i] - bi[i];
704: rs = 0.0;
705: for (j=0; j<nz; j++) {
706: pv[j] = rtmp[pj[j]];
707: rs += PetscAbsScalar(pv[j]);
708: }
709: rs -= PetscAbsScalar(pv[diag]);
711: sctx.rs = rs;
712: sctx.pv = pv[diag];
713: MatPivotCheck(A,info,&sctx,i);
714: if (sctx.newshift) break;
715: pv[diag] = sctx.pv;
716: }
718: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
719: /*
720: * if no shift in this attempt & shifting & started shifting & can refine,
721: * then try lower shift
722: */
723: sctx.shift_hi = sctx.shift_fraction;
724: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
725: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
726: sctx.newshift = PETSC_TRUE;
727: sctx.nshift++;
728: }
729: } while (sctx.newshift);
731: /* invert diagonal entries for simplier triangular solves */
732: for (i=0; i<n; i++) {
733: b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
734: }
735: PetscFree(rtmp);
736: ISRestoreIndices(isicol,&ic);
737: ISRestoreIndices(isrow,&r);
739: ISIdentity(isrow,&row_identity);
740: ISIdentity(isicol,&col_identity);
741: if (row_identity && col_identity) {
742: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
743: } else {
744: C->ops->solve = MatSolve_SeqAIJ_inplace;
745: }
746: C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
747: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
748: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
749: C->ops->matsolve = MatMatSolve_SeqAIJ_inplace;
750: C->assembled = PETSC_TRUE;
751: C->preallocated = PETSC_TRUE;
752: PetscLogFlops(C->cmap->n);
753: if (sctx.nshift){
754: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
755: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);
756: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
757: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
758: }
759: }
760: (C)->ops->solve = MatSolve_SeqAIJ_inplace;
761: (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
762: Mat_CheckInode(C,PETSC_FALSE);
763: return(0);
764: }
766: /*
767: This routine implements inplace ILU(0) with row or/and column permutations.
768: Input:
769: A - original matrix
770: Output;
771: A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
772: a->j (col index) is permuted by the inverse of colperm, then sorted
773: a->a reordered accordingly with a->j
774: a->diag (ptr to diagonal elements) is updated.
775: */
778: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
779: {
780: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
781: IS isrow = a->row,isicol = a->icol;
783: const PetscInt *r,*ic,*ics;
784: PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
785: PetscInt *ajtmp,nz,row;
786: PetscInt *diag = a->diag,nbdiag,*pj;
787: PetscScalar *rtmp,*pc,multiplier,d;
788: MatScalar *pv,*v;
789: PetscReal rs;
790: FactorShiftCtx sctx;
791: const MatScalar *aa=a->a,*vtmp;
794: if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
796: /* MatPivotSetUp(): initialize shift context sctx */
797: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
799: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
800: const PetscInt *ddiag = a->diag;
801: sctx.shift_top = info->zeropivot;
802: for (i=0; i<n; i++) {
803: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
804: d = (aa)[ddiag[i]];
805: rs = -PetscAbsScalar(d) - PetscRealPart(d);
806: vtmp = aa+ai[i];
807: nz = ai[i+1] - ai[i];
808: for (j=0; j<nz; j++)
809: rs += PetscAbsScalar(vtmp[j]);
810: if (rs>sctx.shift_top) sctx.shift_top = rs;
811: }
812: sctx.shift_top *= 1.1;
813: sctx.nshift_max = 5;
814: sctx.shift_lo = 0.;
815: sctx.shift_hi = 1.;
816: }
818: ISGetIndices(isrow,&r);
819: ISGetIndices(isicol,&ic);
820: PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);
821: PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
822: ics = ic;
824: #if defined(MV)
825: sctx.shift_top = 0.;
826: sctx.nshift_max = 0;
827: sctx.shift_lo = 0.;
828: sctx.shift_hi = 0.;
829: sctx.shift_fraction = 0.;
831: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
832: sctx.shift_top = 0.;
833: for (i=0; i<n; i++) {
834: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
835: d = (a->a)[diag[i]];
836: rs = -PetscAbsScalar(d) - PetscRealPart(d);
837: v = a->a+ai[i];
838: nz = ai[i+1] - ai[i];
839: for (j=0; j<nz; j++)
840: rs += PetscAbsScalar(v[j]);
841: if (rs>sctx.shift_top) sctx.shift_top = rs;
842: }
843: if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
844: sctx.shift_top *= 1.1;
845: sctx.nshift_max = 5;
846: sctx.shift_lo = 0.;
847: sctx.shift_hi = 1.;
848: }
850: sctx.shift_amount = 0.;
851: sctx.nshift = 0;
852: #endif
854: do {
855: sctx.newshift = PETSC_FALSE;
856: for (i=0; i<n; i++){
857: /* load in initial unfactored row */
858: nz = ai[r[i]+1] - ai[r[i]];
859: ajtmp = aj + ai[r[i]];
860: v = a->a + ai[r[i]];
861: /* sort permuted ajtmp and values v accordingly */
862: for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
863: PetscSortIntWithScalarArray(nz,ajtmp,v);
865: diag[r[i]] = ai[r[i]];
866: for (j=0; j<nz; j++) {
867: rtmp[ajtmp[j]] = v[j];
868: if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
869: }
870: rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
872: row = *ajtmp++;
873: while (row < i) {
874: pc = rtmp + row;
875: if (*pc != 0.0) {
876: pv = a->a + diag[r[row]];
877: pj = aj + diag[r[row]] + 1;
879: multiplier = *pc / *pv++;
880: *pc = multiplier;
881: nz = ai[r[row]+1] - diag[r[row]] - 1;
882: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
883: PetscLogFlops(2.0*nz);
884: }
885: row = *ajtmp++;
886: }
887: /* finished row so overwrite it onto a->a */
888: pv = a->a + ai[r[i]] ;
889: pj = aj + ai[r[i]] ;
890: nz = ai[r[i]+1] - ai[r[i]];
891: nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
892:
893: rs = 0.0;
894: for (j=0; j<nz; j++) {
895: pv[j] = rtmp[pj[j]];
896: if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
897: }
899: sctx.rs = rs;
900: sctx.pv = pv[nbdiag];
901: MatPivotCheck(A,info,&sctx,i);
902: if (sctx.newshift) break;
903: pv[nbdiag] = sctx.pv;
904: }
906: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
907: /*
908: * if no shift in this attempt & shifting & started shifting & can refine,
909: * then try lower shift
910: */
911: sctx.shift_hi = sctx.shift_fraction;
912: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
913: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
914: sctx.newshift = PETSC_TRUE;
915: sctx.nshift++;
916: }
917: } while (sctx.newshift);
919: /* invert diagonal entries for simplier triangular solves */
920: for (i=0; i<n; i++) {
921: a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
922: }
924: PetscFree(rtmp);
925: ISRestoreIndices(isicol,&ic);
926: ISRestoreIndices(isrow,&r);
927: A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm;
928: A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
929: A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
930: A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
931: A->assembled = PETSC_TRUE;
932: A->preallocated = PETSC_TRUE;
933: PetscLogFlops(A->cmap->n);
934: if (sctx.nshift){
935: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
936: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);
937: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
938: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
939: }
940: }
941: return(0);
942: }
944: /* ----------------------------------------------------------- */
947: PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
948: {
950: Mat C;
953: MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);
954: MatLUFactorSymbolic(C,A,row,col,info);
955: MatLUFactorNumeric(C,A,info);
956: A->ops->solve = C->ops->solve;
957: A->ops->solvetranspose = C->ops->solvetranspose;
958: MatHeaderMerge(A,C);
959: PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
960: return(0);
961: }
962: /* ----------------------------------------------------------- */
967: PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
968: {
969: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
970: IS iscol = a->col,isrow = a->row;
971: PetscErrorCode ierr;
972: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
973: PetscInt nz;
974: const PetscInt *rout,*cout,*r,*c;
975: PetscScalar *x,*tmp,*tmps,sum;
976: const PetscScalar *b;
977: const MatScalar *aa = a->a,*v;
978:
980: if (!n) return(0);
982: VecGetArrayRead(bb,&b);
983: VecGetArray(xx,&x);
984: tmp = a->solve_work;
986: ISGetIndices(isrow,&rout); r = rout;
987: ISGetIndices(iscol,&cout); c = cout + (n-1);
989: /* forward solve the lower triangular */
990: tmp[0] = b[*r++];
991: tmps = tmp;
992: for (i=1; i<n; i++) {
993: v = aa + ai[i] ;
994: vi = aj + ai[i] ;
995: nz = a->diag[i] - ai[i];
996: sum = b[*r++];
997: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
998: tmp[i] = sum;
999: }
1001: /* backward solve the upper triangular */
1002: for (i=n-1; i>=0; i--){
1003: v = aa + a->diag[i] + 1;
1004: vi = aj + a->diag[i] + 1;
1005: nz = ai[i+1] - a->diag[i] - 1;
1006: sum = tmp[i];
1007: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1008: x[*c--] = tmp[i] = sum*aa[a->diag[i]];
1009: }
1011: ISRestoreIndices(isrow,&rout);
1012: ISRestoreIndices(iscol,&cout);
1013: VecRestoreArrayRead(bb,&b);
1014: VecRestoreArray(xx,&x);
1015: PetscLogFlops(2.0*a->nz - A->cmap->n);
1016: return(0);
1017: }
1021: PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1022: {
1023: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1024: IS iscol = a->col,isrow = a->row;
1025: PetscErrorCode ierr;
1026: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1027: PetscInt nz,neq;
1028: const PetscInt *rout,*cout,*r,*c;
1029: PetscScalar *x,*b,*tmp,*tmps,sum;
1030: const MatScalar *aa = a->a,*v;
1031: PetscBool bisdense,xisdense;
1034: if (!n) return(0);
1036: PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);
1037: if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1038: PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);
1039: if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1041: MatGetArray(B,&b);
1042: MatGetArray(X,&x);
1043:
1044: tmp = a->solve_work;
1045: ISGetIndices(isrow,&rout); r = rout;
1046: ISGetIndices(iscol,&cout); c = cout;
1048: for (neq=0; neq<B->cmap->n; neq++){
1049: /* forward solve the lower triangular */
1050: tmp[0] = b[r[0]];
1051: tmps = tmp;
1052: for (i=1; i<n; i++) {
1053: v = aa + ai[i] ;
1054: vi = aj + ai[i] ;
1055: nz = a->diag[i] - ai[i];
1056: sum = b[r[i]];
1057: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1058: tmp[i] = sum;
1059: }
1060: /* backward solve the upper triangular */
1061: for (i=n-1; i>=0; i--){
1062: v = aa + a->diag[i] + 1;
1063: vi = aj + a->diag[i] + 1;
1064: nz = ai[i+1] - a->diag[i] - 1;
1065: sum = tmp[i];
1066: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1067: x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1068: }
1070: b += n;
1071: x += n;
1072: }
1073: ISRestoreIndices(isrow,&rout);
1074: ISRestoreIndices(iscol,&cout);
1075: MatRestoreArray(B,&b);
1076: MatRestoreArray(X,&x);
1077: PetscLogFlops(B->cmap->n*(2.0*a->nz - n));
1078: return(0);
1079: }
1083: PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1084: {
1085: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1086: IS iscol = a->col,isrow = a->row;
1087: PetscErrorCode ierr;
1088: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1089: PetscInt nz,neq;
1090: const PetscInt *rout,*cout,*r,*c;
1091: PetscScalar *x,*b,*tmp,sum;
1092: const MatScalar *aa = a->a,*v;
1093: PetscBool bisdense,xisdense;
1096: if (!n) return(0);
1098: PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);
1099: if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1100: PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);
1101: if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1103: MatGetArray(B,&b);
1104: MatGetArray(X,&x);
1105:
1106: tmp = a->solve_work;
1107: ISGetIndices(isrow,&rout); r = rout;
1108: ISGetIndices(iscol,&cout); c = cout;
1110: for (neq=0; neq<B->cmap->n; neq++){
1111: /* forward solve the lower triangular */
1112: tmp[0] = b[r[0]];
1113: v = aa;
1114: vi = aj;
1115: for (i=1; i<n; i++) {
1116: nz = ai[i+1] - ai[i];
1117: sum = b[r[i]];
1118: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1119: tmp[i] = sum;
1120: v += nz; vi += nz;
1121: }
1123: /* backward solve the upper triangular */
1124: for (i=n-1; i>=0; i--){
1125: v = aa + adiag[i+1]+1;
1126: vi = aj + adiag[i+1]+1;
1127: nz = adiag[i]-adiag[i+1]-1;
1128: sum = tmp[i];
1129: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1130: x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1131: }
1132:
1133: b += n;
1134: x += n;
1135: }
1136: ISRestoreIndices(isrow,&rout);
1137: ISRestoreIndices(iscol,&cout);
1138: MatRestoreArray(B,&b);
1139: MatRestoreArray(X,&x);
1140: PetscLogFlops(B->cmap->n*(2.0*a->nz - n));
1141: return(0);
1142: }
1146: PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1147: {
1148: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1149: IS iscol = a->col,isrow = a->row;
1150: PetscErrorCode ierr;
1151: const PetscInt *r,*c,*rout,*cout;
1152: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1153: PetscInt nz,row;
1154: PetscScalar *x,*b,*tmp,*tmps,sum;
1155: const MatScalar *aa = a->a,*v;
1158: if (!n) return(0);
1160: VecGetArray(bb,&b);
1161: VecGetArray(xx,&x);
1162: tmp = a->solve_work;
1164: ISGetIndices(isrow,&rout); r = rout;
1165: ISGetIndices(iscol,&cout); c = cout + (n-1);
1167: /* forward solve the lower triangular */
1168: tmp[0] = b[*r++];
1169: tmps = tmp;
1170: for (row=1; row<n; row++) {
1171: i = rout[row]; /* permuted row */
1172: v = aa + ai[i] ;
1173: vi = aj + ai[i] ;
1174: nz = a->diag[i] - ai[i];
1175: sum = b[*r++];
1176: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1177: tmp[row] = sum;
1178: }
1180: /* backward solve the upper triangular */
1181: for (row=n-1; row>=0; row--){
1182: i = rout[row]; /* permuted row */
1183: v = aa + a->diag[i] + 1;
1184: vi = aj + a->diag[i] + 1;
1185: nz = ai[i+1] - a->diag[i] - 1;
1186: sum = tmp[row];
1187: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1188: x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1189: }
1191: ISRestoreIndices(isrow,&rout);
1192: ISRestoreIndices(iscol,&cout);
1193: VecRestoreArray(bb,&b);
1194: VecRestoreArray(xx,&x);
1195: PetscLogFlops(2.0*a->nz - A->cmap->n);
1196: return(0);
1197: }
1199: /* ----------------------------------------------------------- */
1200: #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1203: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1204: {
1205: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1206: PetscErrorCode ierr;
1207: PetscInt n = A->rmap->n;
1208: const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag;
1209: PetscScalar *x;
1210: const PetscScalar *b;
1211: const MatScalar *aa = a->a;
1212: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1213: PetscInt adiag_i,i,nz,ai_i;
1214: const PetscInt *vi;
1215: const MatScalar *v;
1216: PetscScalar sum;
1217: #endif
1220: if (!n) return(0);
1222: VecGetArrayRead(bb,&b);
1223: VecGetArray(xx,&x);
1225: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1226: fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1227: #else
1228: /* forward solve the lower triangular */
1229: x[0] = b[0];
1230: for (i=1; i<n; i++) {
1231: ai_i = ai[i];
1232: v = aa + ai_i;
1233: vi = aj + ai_i;
1234: nz = adiag[i] - ai_i;
1235: sum = b[i];
1236: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1237: x[i] = sum;
1238: }
1240: /* backward solve the upper triangular */
1241: for (i=n-1; i>=0; i--){
1242: adiag_i = adiag[i];
1243: v = aa + adiag_i + 1;
1244: vi = aj + adiag_i + 1;
1245: nz = ai[i+1] - adiag_i - 1;
1246: sum = x[i];
1247: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1248: x[i] = sum*aa[adiag_i];
1249: }
1250: #endif
1251: PetscLogFlops(2.0*a->nz - A->cmap->n);
1252: VecRestoreArrayRead(bb,&b);
1253: VecRestoreArray(xx,&x);
1254: return(0);
1255: }
1259: PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1260: {
1261: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1262: IS iscol = a->col,isrow = a->row;
1263: PetscErrorCode ierr;
1264: PetscInt i, n = A->rmap->n,j;
1265: PetscInt nz;
1266: const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1267: PetscScalar *x,*tmp,sum;
1268: const PetscScalar *b;
1269: const MatScalar *aa = a->a,*v;
1272: if (yy != xx) {VecCopy(yy,xx);}
1274: VecGetArrayRead(bb,&b);
1275: VecGetArray(xx,&x);
1276: tmp = a->solve_work;
1278: ISGetIndices(isrow,&rout); r = rout;
1279: ISGetIndices(iscol,&cout); c = cout + (n-1);
1281: /* forward solve the lower triangular */
1282: tmp[0] = b[*r++];
1283: for (i=1; i<n; i++) {
1284: v = aa + ai[i] ;
1285: vi = aj + ai[i] ;
1286: nz = a->diag[i] - ai[i];
1287: sum = b[*r++];
1288: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1289: tmp[i] = sum;
1290: }
1292: /* backward solve the upper triangular */
1293: for (i=n-1; i>=0; i--){
1294: v = aa + a->diag[i] + 1;
1295: vi = aj + a->diag[i] + 1;
1296: nz = ai[i+1] - a->diag[i] - 1;
1297: sum = tmp[i];
1298: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1299: tmp[i] = sum*aa[a->diag[i]];
1300: x[*c--] += tmp[i];
1301: }
1303: ISRestoreIndices(isrow,&rout);
1304: ISRestoreIndices(iscol,&cout);
1305: VecRestoreArrayRead(bb,&b);
1306: VecRestoreArray(xx,&x);
1307: PetscLogFlops(2.0*a->nz);
1309: return(0);
1310: }
1314: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1315: {
1316: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1317: IS iscol = a->col,isrow = a->row;
1318: PetscErrorCode ierr;
1319: PetscInt i, n = A->rmap->n,j;
1320: PetscInt nz;
1321: const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1322: PetscScalar *x,*tmp,sum;
1323: const PetscScalar *b;
1324: const MatScalar *aa = a->a,*v;
1327: if (yy != xx) {VecCopy(yy,xx);}
1329: VecGetArrayRead(bb,&b);
1330: VecGetArray(xx,&x);
1331: tmp = a->solve_work;
1333: ISGetIndices(isrow,&rout); r = rout;
1334: ISGetIndices(iscol,&cout); c = cout;
1336: /* forward solve the lower triangular */
1337: tmp[0] = b[r[0]];
1338: v = aa;
1339: vi = aj;
1340: for (i=1; i<n; i++) {
1341: nz = ai[i+1] - ai[i];
1342: sum = b[r[i]];
1343: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1344: tmp[i] = sum;
1345: v += nz; vi += nz;
1346: }
1348: /* backward solve the upper triangular */
1349: v = aa + adiag[n-1];
1350: vi = aj + adiag[n-1];
1351: for (i=n-1; i>=0; i--){
1352: nz = adiag[i] - adiag[i+1] - 1;
1353: sum = tmp[i];
1354: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1355: tmp[i] = sum*v[nz];
1356: x[c[i]] += tmp[i];
1357: v += nz+1; vi += nz+1;
1358: }
1360: ISRestoreIndices(isrow,&rout);
1361: ISRestoreIndices(iscol,&cout);
1362: VecRestoreArrayRead(bb,&b);
1363: VecRestoreArray(xx,&x);
1364: PetscLogFlops(2.0*a->nz);
1366: return(0);
1367: }
1371: PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1372: {
1373: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1374: IS iscol = a->col,isrow = a->row;
1375: PetscErrorCode ierr;
1376: const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1377: PetscInt i,n = A->rmap->n,j;
1378: PetscInt nz;
1379: PetscScalar *x,*tmp,s1;
1380: const MatScalar *aa = a->a,*v;
1381: const PetscScalar *b;
1384: VecGetArrayRead(bb,&b);
1385: VecGetArray(xx,&x);
1386: tmp = a->solve_work;
1388: ISGetIndices(isrow,&rout); r = rout;
1389: ISGetIndices(iscol,&cout); c = cout;
1391: /* copy the b into temp work space according to permutation */
1392: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1394: /* forward solve the U^T */
1395: for (i=0; i<n; i++) {
1396: v = aa + diag[i] ;
1397: vi = aj + diag[i] + 1;
1398: nz = ai[i+1] - diag[i] - 1;
1399: s1 = tmp[i];
1400: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1401: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1402: tmp[i] = s1;
1403: }
1405: /* backward solve the L^T */
1406: for (i=n-1; i>=0; i--){
1407: v = aa + diag[i] - 1 ;
1408: vi = aj + diag[i] - 1 ;
1409: nz = diag[i] - ai[i];
1410: s1 = tmp[i];
1411: for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1412: }
1414: /* copy tmp into x according to permutation */
1415: for (i=0; i<n; i++) x[r[i]] = tmp[i];
1417: ISRestoreIndices(isrow,&rout);
1418: ISRestoreIndices(iscol,&cout);
1419: VecRestoreArrayRead(bb,&b);
1420: VecRestoreArray(xx,&x);
1422: PetscLogFlops(2.0*a->nz-A->cmap->n);
1423: return(0);
1424: }
1428: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1429: {
1430: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1431: IS iscol = a->col,isrow = a->row;
1432: PetscErrorCode ierr;
1433: const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1434: PetscInt i,n = A->rmap->n,j;
1435: PetscInt nz;
1436: PetscScalar *x,*tmp,s1;
1437: const MatScalar *aa = a->a,*v;
1438: const PetscScalar *b;
1441: VecGetArrayRead(bb,&b);
1442: VecGetArray(xx,&x);
1443: tmp = a->solve_work;
1445: ISGetIndices(isrow,&rout); r = rout;
1446: ISGetIndices(iscol,&cout); c = cout;
1448: /* copy the b into temp work space according to permutation */
1449: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1451: /* forward solve the U^T */
1452: for (i=0; i<n; i++) {
1453: v = aa + adiag[i+1] + 1;
1454: vi = aj + adiag[i+1] + 1;
1455: nz = adiag[i] - adiag[i+1] - 1;
1456: s1 = tmp[i];
1457: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1458: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1459: tmp[i] = s1;
1460: }
1462: /* backward solve the L^T */
1463: for (i=n-1; i>=0; i--){
1464: v = aa + ai[i];
1465: vi = aj + ai[i];
1466: nz = ai[i+1] - ai[i];
1467: s1 = tmp[i];
1468: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1469: }
1471: /* copy tmp into x according to permutation */
1472: for (i=0; i<n; i++) x[r[i]] = tmp[i];
1474: ISRestoreIndices(isrow,&rout);
1475: ISRestoreIndices(iscol,&cout);
1476: VecRestoreArrayRead(bb,&b);
1477: VecRestoreArray(xx,&x);
1479: PetscLogFlops(2.0*a->nz-A->cmap->n);
1480: return(0);
1481: }
1485: PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1486: {
1487: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1488: IS iscol = a->col,isrow = a->row;
1489: PetscErrorCode ierr;
1490: const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1491: PetscInt i,n = A->rmap->n,j;
1492: PetscInt nz;
1493: PetscScalar *x,*tmp,s1;
1494: const MatScalar *aa = a->a,*v;
1495: const PetscScalar *b;
1498: if (zz != xx) {VecCopy(zz,xx);}
1499: VecGetArrayRead(bb,&b);
1500: VecGetArray(xx,&x);
1501: tmp = a->solve_work;
1503: ISGetIndices(isrow,&rout); r = rout;
1504: ISGetIndices(iscol,&cout); c = cout;
1506: /* copy the b into temp work space according to permutation */
1507: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1509: /* forward solve the U^T */
1510: for (i=0; i<n; i++) {
1511: v = aa + diag[i] ;
1512: vi = aj + diag[i] + 1;
1513: nz = ai[i+1] - diag[i] - 1;
1514: s1 = tmp[i];
1515: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1516: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1517: tmp[i] = s1;
1518: }
1520: /* backward solve the L^T */
1521: for (i=n-1; i>=0; i--){
1522: v = aa + diag[i] - 1 ;
1523: vi = aj + diag[i] - 1 ;
1524: nz = diag[i] - ai[i];
1525: s1 = tmp[i];
1526: for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1527: }
1529: /* copy tmp into x according to permutation */
1530: for (i=0; i<n; i++) x[r[i]] += tmp[i];
1532: ISRestoreIndices(isrow,&rout);
1533: ISRestoreIndices(iscol,&cout);
1534: VecRestoreArrayRead(bb,&b);
1535: VecRestoreArray(xx,&x);
1537: PetscLogFlops(2.0*a->nz-A->cmap->n);
1538: return(0);
1539: }
1543: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1544: {
1545: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1546: IS iscol = a->col,isrow = a->row;
1547: PetscErrorCode ierr;
1548: const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1549: PetscInt i,n = A->rmap->n,j;
1550: PetscInt nz;
1551: PetscScalar *x,*tmp,s1;
1552: const MatScalar *aa = a->a,*v;
1553: const PetscScalar *b;
1556: if (zz != xx) {VecCopy(zz,xx);}
1557: VecGetArrayRead(bb,&b);
1558: VecGetArray(xx,&x);
1559: tmp = a->solve_work;
1561: ISGetIndices(isrow,&rout); r = rout;
1562: ISGetIndices(iscol,&cout); c = cout;
1564: /* copy the b into temp work space according to permutation */
1565: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1567: /* forward solve the U^T */
1568: for (i=0; i<n; i++) {
1569: v = aa + adiag[i+1] + 1;
1570: vi = aj + adiag[i+1] + 1;
1571: nz = adiag[i] - adiag[i+1] - 1;
1572: s1 = tmp[i];
1573: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1574: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1575: tmp[i] = s1;
1576: }
1579: /* backward solve the L^T */
1580: for (i=n-1; i>=0; i--){
1581: v = aa + ai[i] ;
1582: vi = aj + ai[i];
1583: nz = ai[i+1] - ai[i];
1584: s1 = tmp[i];
1585: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1586: }
1588: /* copy tmp into x according to permutation */
1589: for (i=0; i<n; i++) x[r[i]] += tmp[i];
1591: ISRestoreIndices(isrow,&rout);
1592: ISRestoreIndices(iscol,&cout);
1593: VecRestoreArrayRead(bb,&b);
1594: VecRestoreArray(xx,&x);
1596: PetscLogFlops(2.0*a->nz-A->cmap->n);
1597: return(0);
1598: }
1600: /* ----------------------------------------------------------------*/
1604: /*
1605: ilu() under revised new data structure.
1606: Factored arrays bj and ba are stored as
1607: L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1609: bi=fact->i is an array of size n+1, in which
1610: bi+
1611: bi[i]: points to 1st entry of L(i,:),i=0,...,n-1
1612: bi[n]: points to L(n-1,n-1)+1
1613:
1614: bdiag=fact->diag is an array of size n+1,in which
1615: bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1616: bdiag[n]: points to entry of U(n-1,0)-1
1618: U(i,:) contains bdiag[i] as its last entry, i.e.,
1619: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1620: */
1623: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1624: {
1625:
1626: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1627: PetscErrorCode ierr;
1628: const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1629: PetscInt i,j,k=0,nz,*bi,*bj,*bdiag;
1630: PetscBool missing;
1631: IS isicol;
1634: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1635: MatMissingDiagonal(A,&missing,&i);
1636: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1637: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1639: MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);
1640: b = (Mat_SeqAIJ*)(fact)->data;
1642: /* allocate matrix arrays for new data structure */
1643: PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,n+1,PetscInt,&b->i);
1644: PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));
1645: b->singlemalloc = PETSC_TRUE;
1646: if (!b->diag){
1647: PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);
1648: PetscLogObjectMemory(fact,(n+1)*sizeof(PetscInt));
1649: }
1650: bdiag = b->diag;
1651:
1652: if (n > 0) {
1653: PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));
1654: }
1655:
1656: /* set bi and bj with new data structure */
1657: bi = b->i;
1658: bj = b->j;
1660: /* L part */
1661: bi[0] = 0;
1662: for (i=0; i<n; i++){
1663: nz = adiag[i] - ai[i];
1664: bi[i+1] = bi[i] + nz;
1665: aj = a->j + ai[i];
1666: for (j=0; j<nz; j++){
1667: /* *bj = aj[j]; bj++; */
1668: bj[k++] = aj[j];
1669: }
1670: }
1671:
1672: /* U part */
1673: bdiag[n] = bi[n]-1;
1674: for (i=n-1; i>=0; i--){
1675: nz = ai[i+1] - adiag[i] - 1;
1676: aj = a->j + adiag[i] + 1;
1677: for (j=0; j<nz; j++){
1678: /* *bj = aj[j]; bj++; */
1679: bj[k++] = aj[j];
1680: }
1681: /* diag[i] */
1682: /* *bj = i; bj++; */
1683: bj[k++] = i;
1684: bdiag[i] = bdiag[i+1] + nz + 1;
1685: }
1687: fact->factortype = MAT_FACTOR_ILU;
1688: fact->info.factor_mallocs = 0;
1689: fact->info.fill_ratio_given = info->fill;
1690: fact->info.fill_ratio_needed = 1.0;
1691: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1693: b = (Mat_SeqAIJ*)(fact)->data;
1694: b->row = isrow;
1695: b->col = iscol;
1696: b->icol = isicol;
1697: PetscMalloc((fact->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);
1698: PetscObjectReference((PetscObject)isrow);
1699: PetscObjectReference((PetscObject)iscol);
1700: return(0);
1701: }
1705: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1706: {
1707: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1708: IS isicol;
1709: PetscErrorCode ierr;
1710: const PetscInt *r,*ic;
1711: PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j;
1712: PetscInt *bi,*cols,nnz,*cols_lvl;
1713: PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1714: PetscInt i,levels,diagonal_fill;
1715: PetscBool col_identity,row_identity;
1716: PetscReal f;
1717: PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1718: PetscBT lnkbt;
1719: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
1720: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1721: PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1722:
1724: /* Uncomment the old data struct part only while testing new data structure for MatSolve() */
1725: /*
1726: PetscBool olddatastruct=PETSC_FALSE;
1727: PetscOptionsGetBool(PETSC_NULL,"-ilu_old",&olddatastruct,PETSC_NULL);
1728: if(olddatastruct){
1729: MatILUFactorSymbolic_SeqAIJ_inplace(fact,A,isrow,iscol,info);
1730: return(0);
1731: }
1732: */
1733:
1734: levels = (PetscInt)info->levels;
1735: ISIdentity(isrow,&row_identity);
1736: ISIdentity(iscol,&col_identity);
1738: if (!levels && row_identity && col_identity) {
1739: /* special case: ilu(0) with natural ordering */
1740: MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);
1741: if (a->inode.size) {
1742: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1743: }
1744: return(0);
1745: }
1747: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1748: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1749: ISGetIndices(isrow,&r);
1750: ISGetIndices(isicol,&ic);
1752: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1753: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
1754: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
1755: bi[0] = bdiag[0] = 0;
1757: PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);
1759: /* create a linked list for storing column indices of the active row */
1760: nlnk = n + 1;
1761: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
1763: /* initial FreeSpace size is f*(ai[n]+1) */
1764: f = info->fill;
1765: diagonal_fill = (PetscInt)info->diagonal_fill;
1766: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
1767: current_space = free_space;
1768: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);
1769: current_space_lvl = free_space_lvl;
1770:
1771: for (i=0; i<n; i++) {
1772: nzi = 0;
1773: /* copy current row into linked list */
1774: nnz = ai[r[i]+1] - ai[r[i]];
1775: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1776: cols = aj + ai[r[i]];
1777: lnk[i] = -1; /* marker to indicate if diagonal exists */
1778: PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1779: nzi += nlnk;
1781: /* make sure diagonal entry is included */
1782: if (diagonal_fill && lnk[i] == -1) {
1783: fm = n;
1784: while (lnk[fm] < i) fm = lnk[fm];
1785: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1786: lnk[fm] = i;
1787: lnk_lvl[i] = 0;
1788: nzi++; dcount++;
1789: }
1791: /* add pivot rows into the active row */
1792: nzbd = 0;
1793: prow = lnk[n];
1794: while (prow < i) {
1795: nnz = bdiag[prow];
1796: cols = bj_ptr[prow] + nnz + 1;
1797: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1798: nnz = bi[prow+1] - bi[prow] - nnz - 1;
1799: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1800: nzi += nlnk;
1801: prow = lnk[prow];
1802: nzbd++;
1803: }
1804: bdiag[i] = nzbd;
1805: bi[i+1] = bi[i] + nzi;
1807: /* if free space is not available, make more free space */
1808: if (current_space->local_remaining<nzi) {
1809: nnz = 2*nzi*(n - i); /* estimated and max additional space needed */
1810: PetscFreeSpaceGet(nnz,¤t_space);
1811: PetscFreeSpaceGet(nnz,¤t_space_lvl);
1812: reallocs++;
1813: }
1815: /* copy data into free_space and free_space_lvl, then initialize lnk */
1816: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1817: bj_ptr[i] = current_space->array;
1818: bjlvl_ptr[i] = current_space_lvl->array;
1820: /* make sure the active row i has diagonal entry */
1821: if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1823: current_space->array += nzi;
1824: current_space->local_used += nzi;
1825: current_space->local_remaining -= nzi;
1826: current_space_lvl->array += nzi;
1827: current_space_lvl->local_used += nzi;
1828: current_space_lvl->local_remaining -= nzi;
1829: }
1831: ISRestoreIndices(isrow,&r);
1832: ISRestoreIndices(isicol,&ic);
1834: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1835: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
1836: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
1837:
1838: PetscIncompleteLLDestroy(lnk,lnkbt);
1839: PetscFreeSpaceDestroy(free_space_lvl);
1840: PetscFree2(bj_ptr,bjlvl_ptr);
1842: #if defined(PETSC_USE_INFO)
1843: {
1844: PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1845: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
1846: PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);
1847: PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);
1848: PetscInfo(A,"for best performance.\n");
1849: if (diagonal_fill) {
1850: PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);
1851: }
1852: }
1853: #endif
1855: /* put together the new matrix */
1856: MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);
1857: PetscLogObjectParent(fact,isicol);
1858: b = (Mat_SeqAIJ*)(fact)->data;
1859: b->free_a = PETSC_TRUE;
1860: b->free_ij = PETSC_TRUE;
1861: b->singlemalloc = PETSC_FALSE;
1862: PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);
1863: b->j = bj;
1864: b->i = bi;
1865: b->diag = bdiag;
1866: b->ilen = 0;
1867: b->imax = 0;
1868: b->row = isrow;
1869: b->col = iscol;
1870: PetscObjectReference((PetscObject)isrow);
1871: PetscObjectReference((PetscObject)iscol);
1872: b->icol = isicol;
1873: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
1874: /* In b structure: Free imax, ilen, old a, old j.
1875: Allocate bdiag, solve_work, new a, new j */
1876: PetscLogObjectMemory(fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));
1877: b->maxnz = b->nz = bdiag[0]+1;
1878: (fact)->info.factor_mallocs = reallocs;
1879: (fact)->info.fill_ratio_given = f;
1880: (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1881: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1882: if (a->inode.size) {
1883: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1884: }
1885: return(0);
1886: }
1890: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1891: {
1892: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1893: IS isicol;
1894: PetscErrorCode ierr;
1895: const PetscInt *r,*ic;
1896: PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d;
1897: PetscInt *bi,*cols,nnz,*cols_lvl;
1898: PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1899: PetscInt i,levels,diagonal_fill;
1900: PetscBool col_identity,row_identity;
1901: PetscReal f;
1902: PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1903: PetscBT lnkbt;
1904: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
1905: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1906: PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1907: PetscBool missing;
1908:
1910: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1911: f = info->fill;
1912: levels = (PetscInt)info->levels;
1913: diagonal_fill = (PetscInt)info->diagonal_fill;
1914: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1916: ISIdentity(isrow,&row_identity);
1917: ISIdentity(iscol,&col_identity);
1918: if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1919: MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);
1920: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1921: if (a->inode.size) {
1922: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1923: }
1924: fact->factortype = MAT_FACTOR_ILU;
1925: (fact)->info.factor_mallocs = 0;
1926: (fact)->info.fill_ratio_given = info->fill;
1927: (fact)->info.fill_ratio_needed = 1.0;
1928: b = (Mat_SeqAIJ*)(fact)->data;
1929: MatMissingDiagonal(A,&missing,&d);
1930: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1931: b->row = isrow;
1932: b->col = iscol;
1933: b->icol = isicol;
1934: PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);
1935: PetscObjectReference((PetscObject)isrow);
1936: PetscObjectReference((PetscObject)iscol);
1937: return(0);
1938: }
1940: ISGetIndices(isrow,&r);
1941: ISGetIndices(isicol,&ic);
1943: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1944: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
1945: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
1946: bi[0] = bdiag[0] = 0;
1948: PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);
1950: /* create a linked list for storing column indices of the active row */
1951: nlnk = n + 1;
1952: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
1954: /* initial FreeSpace size is f*(ai[n]+1) */
1955: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
1956: current_space = free_space;
1957: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);
1958: current_space_lvl = free_space_lvl;
1959:
1960: for (i=0; i<n; i++) {
1961: nzi = 0;
1962: /* copy current row into linked list */
1963: nnz = ai[r[i]+1] - ai[r[i]];
1964: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1965: cols = aj + ai[r[i]];
1966: lnk[i] = -1; /* marker to indicate if diagonal exists */
1967: PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1968: nzi += nlnk;
1970: /* make sure diagonal entry is included */
1971: if (diagonal_fill && lnk[i] == -1) {
1972: fm = n;
1973: while (lnk[fm] < i) fm = lnk[fm];
1974: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1975: lnk[fm] = i;
1976: lnk_lvl[i] = 0;
1977: nzi++; dcount++;
1978: }
1980: /* add pivot rows into the active row */
1981: nzbd = 0;
1982: prow = lnk[n];
1983: while (prow < i) {
1984: nnz = bdiag[prow];
1985: cols = bj_ptr[prow] + nnz + 1;
1986: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1987: nnz = bi[prow+1] - bi[prow] - nnz - 1;
1988: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1989: nzi += nlnk;
1990: prow = lnk[prow];
1991: nzbd++;
1992: }
1993: bdiag[i] = nzbd;
1994: bi[i+1] = bi[i] + nzi;
1996: /* if free space is not available, make more free space */
1997: if (current_space->local_remaining<nzi) {
1998: nnz = nzi*(n - i); /* estimated and max additional space needed */
1999: PetscFreeSpaceGet(nnz,¤t_space);
2000: PetscFreeSpaceGet(nnz,¤t_space_lvl);
2001: reallocs++;
2002: }
2004: /* copy data into free_space and free_space_lvl, then initialize lnk */
2005: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2006: bj_ptr[i] = current_space->array;
2007: bjlvl_ptr[i] = current_space_lvl->array;
2009: /* make sure the active row i has diagonal entry */
2010: if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
2012: current_space->array += nzi;
2013: current_space->local_used += nzi;
2014: current_space->local_remaining -= nzi;
2015: current_space_lvl->array += nzi;
2016: current_space_lvl->local_used += nzi;
2017: current_space_lvl->local_remaining -= nzi;
2018: }
2020: ISRestoreIndices(isrow,&r);
2021: ISRestoreIndices(isicol,&ic);
2023: /* destroy list of free space and other temporary arrays */
2024: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
2025: PetscFreeSpaceContiguous(&free_space,bj); /* copy free_space -> bj */
2026: PetscIncompleteLLDestroy(lnk,lnkbt);
2027: PetscFreeSpaceDestroy(free_space_lvl);
2028: PetscFree2(bj_ptr,bjlvl_ptr);
2030: #if defined(PETSC_USE_INFO)
2031: {
2032: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2033: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
2034: PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);
2035: PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);
2036: PetscInfo(A,"for best performance.\n");
2037: if (diagonal_fill) {
2038: PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);
2039: }
2040: }
2041: #endif
2043: /* put together the new matrix */
2044: MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);
2045: PetscLogObjectParent(fact,isicol);
2046: b = (Mat_SeqAIJ*)(fact)->data;
2047: b->free_a = PETSC_TRUE;
2048: b->free_ij = PETSC_TRUE;
2049: b->singlemalloc = PETSC_FALSE;
2050: PetscMalloc(bi[n]*sizeof(PetscScalar),&b->a);
2051: b->j = bj;
2052: b->i = bi;
2053: for (i=0; i<n; i++) bdiag[i] += bi[i];
2054: b->diag = bdiag;
2055: b->ilen = 0;
2056: b->imax = 0;
2057: b->row = isrow;
2058: b->col = iscol;
2059: PetscObjectReference((PetscObject)isrow);
2060: PetscObjectReference((PetscObject)iscol);
2061: b->icol = isicol;
2062: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
2063: /* In b structure: Free imax, ilen, old a, old j.
2064: Allocate bdiag, solve_work, new a, new j */
2065: PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
2066: b->maxnz = b->nz = bi[n] ;
2067: (fact)->info.factor_mallocs = reallocs;
2068: (fact)->info.fill_ratio_given = f;
2069: (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2070: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
2071: if (a->inode.size) {
2072: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2073: }
2074: return(0);
2075: }
2079: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2080: {
2081: Mat C = B;
2082: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
2083: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
2084: IS ip=b->row,iip = b->icol;
2086: const PetscInt *rip,*riip;
2087: PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2088: PetscInt *ai=a->i,*aj=a->j;
2089: PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2090: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2091: PetscBool perm_identity;
2092: FactorShiftCtx sctx;
2093: PetscReal rs;
2094: MatScalar d,*v;
2097: /* MatPivotSetUp(): initialize shift context sctx */
2098: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
2100: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2101: sctx.shift_top = info->zeropivot;
2102: for (i=0; i<mbs; i++) {
2103: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2104: d = (aa)[a->diag[i]];
2105: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2106: v = aa+ai[i];
2107: nz = ai[i+1] - ai[i];
2108: for (j=0; j<nz; j++)
2109: rs += PetscAbsScalar(v[j]);
2110: if (rs>sctx.shift_top) sctx.shift_top = rs;
2111: }
2112: sctx.shift_top *= 1.1;
2113: sctx.nshift_max = 5;
2114: sctx.shift_lo = 0.;
2115: sctx.shift_hi = 1.;
2116: }
2118: ISGetIndices(ip,&rip);
2119: ISGetIndices(iip,&riip);
2120:
2121: /* allocate working arrays
2122: c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2123: il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2124: */
2125: PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&c2r);
2126:
2127: do {
2128: sctx.newshift = PETSC_FALSE;
2130: for (i=0; i<mbs; i++) c2r[i] = mbs;
2131: il[0] = 0;
2132:
2133: for (k = 0; k<mbs; k++){
2134: /* zero rtmp */
2135: nz = bi[k+1] - bi[k];
2136: bjtmp = bj + bi[k];
2137: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2138:
2139: /* load in initial unfactored row */
2140: bval = ba + bi[k];
2141: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2142: for (j = jmin; j < jmax; j++){
2143: col = riip[aj[j]];
2144: if (col >= k){ /* only take upper triangular entry */
2145: rtmp[col] = aa[j];
2146: *bval++ = 0.0; /* for in-place factorization */
2147: }
2148: }
2149: /* shift the diagonal of the matrix: ZeropivotApply() */
2150: rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
2151:
2152: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2153: dk = rtmp[k];
2154: i = c2r[k]; /* first row to be added to k_th row */
2156: while (i < k){
2157: nexti = c2r[i]; /* next row to be added to k_th row */
2158:
2159: /* compute multiplier, update diag(k) and U(i,k) */
2160: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2161: uikdi = - ba[ili]*ba[bdiag[i]]; /* diagonal(k) */
2162: dk += uikdi*ba[ili]; /* update diag[k] */
2163: ba[ili] = uikdi; /* -U(i,k) */
2165: /* add multiple of row i to k-th row */
2166: jmin = ili + 1; jmax = bi[i+1];
2167: if (jmin < jmax){
2168: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2169: /* update il and c2r for row i */
2170: il[i] = jmin;
2171: j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2172: }
2173: i = nexti;
2174: }
2176: /* copy data into U(k,:) */
2177: rs = 0.0;
2178: jmin = bi[k]; jmax = bi[k+1]-1;
2179: if (jmin < jmax) {
2180: for (j=jmin; j<jmax; j++){
2181: col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2182: }
2183: /* add the k-th row into il and c2r */
2184: il[k] = jmin;
2185: i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2186: }
2188: /* MatPivotCheck() */
2189: sctx.rs = rs;
2190: sctx.pv = dk;
2191: MatPivotCheck(A,info,&sctx,i);
2192: if(sctx.newshift) break;
2193: dk = sctx.pv;
2194:
2195: ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2196: }
2197: } while (sctx.newshift);
2198:
2199: PetscFree3(rtmp,il,c2r);
2200: ISRestoreIndices(ip,&rip);
2201: ISRestoreIndices(iip,&riip);
2203: ISIdentity(ip,&perm_identity);
2204: if (perm_identity){
2205: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2206: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2207: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2208: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2209: } else {
2210: B->ops->solve = MatSolve_SeqSBAIJ_1;
2211: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2212: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1;
2213: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1;
2214: }
2216: C->assembled = PETSC_TRUE;
2217: C->preallocated = PETSC_TRUE;
2218: PetscLogFlops(C->rmap->n);
2220: /* MatPivotView() */
2221: if (sctx.nshift){
2222: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2223: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);
2224: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2225: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
2226: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){
2227: PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);
2228: }
2229: }
2230: return(0);
2231: }
2235: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2236: {
2237: Mat C = B;
2238: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
2239: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
2240: IS ip=b->row,iip = b->icol;
2242: const PetscInt *rip,*riip;
2243: PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2244: PetscInt *ai=a->i,*aj=a->j;
2245: PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2246: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2247: PetscBool perm_identity;
2248: FactorShiftCtx sctx;
2249: PetscReal rs;
2250: MatScalar d,*v;
2253: /* MatPivotSetUp(): initialize shift context sctx */
2254: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
2256: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2257: sctx.shift_top = info->zeropivot;
2258: for (i=0; i<mbs; i++) {
2259: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2260: d = (aa)[a->diag[i]];
2261: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2262: v = aa+ai[i];
2263: nz = ai[i+1] - ai[i];
2264: for (j=0; j<nz; j++)
2265: rs += PetscAbsScalar(v[j]);
2266: if (rs>sctx.shift_top) sctx.shift_top = rs;
2267: }
2268: sctx.shift_top *= 1.1;
2269: sctx.nshift_max = 5;
2270: sctx.shift_lo = 0.;
2271: sctx.shift_hi = 1.;
2272: }
2274: ISGetIndices(ip,&rip);
2275: ISGetIndices(iip,&riip);
2276:
2277: /* initialization */
2278: PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&jl);
2279:
2280: do {
2281: sctx.newshift = PETSC_FALSE;
2283: for (i=0; i<mbs; i++) jl[i] = mbs;
2284: il[0] = 0;
2285:
2286: for (k = 0; k<mbs; k++){
2287: /* zero rtmp */
2288: nz = bi[k+1] - bi[k];
2289: bjtmp = bj + bi[k];
2290: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2292: bval = ba + bi[k];
2293: /* initialize k-th row by the perm[k]-th row of A */
2294: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2295: for (j = jmin; j < jmax; j++){
2296: col = riip[aj[j]];
2297: if (col >= k){ /* only take upper triangular entry */
2298: rtmp[col] = aa[j];
2299: *bval++ = 0.0; /* for in-place factorization */
2300: }
2301: }
2302: /* shift the diagonal of the matrix */
2303: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2305: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2306: dk = rtmp[k];
2307: i = jl[k]; /* first row to be added to k_th row */
2309: while (i < k){
2310: nexti = jl[i]; /* next row to be added to k_th row */
2311:
2312: /* compute multiplier, update diag(k) and U(i,k) */
2313: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2314: uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */
2315: dk += uikdi*ba[ili];
2316: ba[ili] = uikdi; /* -U(i,k) */
2318: /* add multiple of row i to k-th row */
2319: jmin = ili + 1; jmax = bi[i+1];
2320: if (jmin < jmax){
2321: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2322: /* update il and jl for row i */
2323: il[i] = jmin;
2324: j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2325: }
2326: i = nexti;
2327: }
2329: /* shift the diagonals when zero pivot is detected */
2330: /* compute rs=sum of abs(off-diagonal) */
2331: rs = 0.0;
2332: jmin = bi[k]+1;
2333: nz = bi[k+1] - jmin;
2334: bcol = bj + jmin;
2335: for (j=0; j<nz; j++) {
2336: rs += PetscAbsScalar(rtmp[bcol[j]]);
2337: }
2339: sctx.rs = rs;
2340: sctx.pv = dk;
2341: MatPivotCheck(A,info,&sctx,k);
2342: if (sctx.newshift) break;
2343: dk = sctx.pv;
2344:
2345: /* copy data into U(k,:) */
2346: ba[bi[k]] = 1.0/dk; /* U(k,k) */
2347: jmin = bi[k]+1; jmax = bi[k+1];
2348: if (jmin < jmax) {
2349: for (j=jmin; j<jmax; j++){
2350: col = bj[j]; ba[j] = rtmp[col];
2351: }
2352: /* add the k-th row into il and jl */
2353: il[k] = jmin;
2354: i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2355: }
2356: }
2357: } while (sctx.newshift);
2359: PetscFree3(rtmp,il,jl);
2360: ISRestoreIndices(ip,&rip);
2361: ISRestoreIndices(iip,&riip);
2363: ISIdentity(ip,&perm_identity);
2364: if (perm_identity){
2365: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2366: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2367: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2368: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2369: } else {
2370: B->ops->solve = MatSolve_SeqSBAIJ_1_inplace;
2371: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2372: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace;
2373: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace;
2374: }
2376: C->assembled = PETSC_TRUE;
2377: C->preallocated = PETSC_TRUE;
2378: PetscLogFlops(C->rmap->n);
2379: if (sctx.nshift){
2380: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2381: PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
2382: } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2383: PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
2384: }
2385: }
2386: return(0);
2387: }
2389: /*
2390: icc() under revised new data structure.
2391: Factored arrays bj and ba are stored as
2392: U(0,:),...,U(i,:),U(n-1,:)
2394: ui=fact->i is an array of size n+1, in which
2395: ui+
2396: ui[i]: points to 1st entry of U(i,:),i=0,...,n-1
2397: ui[n]: points to U(n-1,n-1)+1
2398:
2399: udiag=fact->diag is an array of size n,in which
2400: udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2402: U(i,:) contains udiag[i] as its last entry, i.e.,
2403: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2404: */
2408: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2409: {
2410: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2411: Mat_SeqSBAIJ *b;
2412: PetscErrorCode ierr;
2413: PetscBool perm_identity,missing;
2414: PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2415: const PetscInt *rip,*riip;
2416: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2417: PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
2418: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2419: PetscReal fill=info->fill,levels=info->levels;
2420: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2421: PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2422: PetscBT lnkbt;
2423: IS iperm;
2424:
2426: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2427: MatMissingDiagonal(A,&missing,&d);
2428: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2429: ISIdentity(perm,&perm_identity);
2430: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2432: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
2433: PetscMalloc((am+1)*sizeof(PetscInt),&udiag);
2434: ui[0] = 0;
2436: /* ICC(0) without matrix ordering: simply rearrange column indices */
2437: if (!levels && perm_identity) {
2438: for (i=0; i<am; i++) {
2439: ncols = ai[i+1] - a->diag[i];
2440: ui[i+1] = ui[i] + ncols;
2441: udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2442: }
2443: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
2444: cols = uj;
2445: for (i=0; i<am; i++) {
2446: aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2447: ncols = ai[i+1] - a->diag[i] -1;
2448: for (j=0; j<ncols; j++) *cols++ = aj[j];
2449: *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */
2450: }
2451: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2452: ISGetIndices(iperm,&riip);
2453: ISGetIndices(perm,&rip);
2455: /* initialization */
2456: PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);
2458: /* jl: linked list for storing indices of the pivot rows
2459: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2460: PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);
2461: for (i=0; i<am; i++){
2462: jl[i] = am; il[i] = 0;
2463: }
2465: /* create and initialize a linked list for storing column indices of the active row k */
2466: nlnk = am + 1;
2467: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
2469: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2470: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);
2471: current_space = free_space;
2472: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space_lvl);
2473: current_space_lvl = free_space_lvl;
2475: for (k=0; k<am; k++){ /* for each active row k */
2476: /* initialize lnk by the column indices of row rip[k] of A */
2477: nzk = 0;
2478: ncols = ai[rip[k]+1] - ai[rip[k]];
2479: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2480: ncols_upper = 0;
2481: for (j=0; j<ncols; j++){
2482: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2483: if (riip[i] >= k){ /* only take upper triangular entry */
2484: ajtmp[ncols_upper] = i;
2485: ncols_upper++;
2486: }
2487: }
2488: PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
2489: nzk += nlnk;
2491: /* update lnk by computing fill-in for each pivot row to be merged in */
2492: prow = jl[k]; /* 1st pivot row */
2493:
2494: while (prow < k){
2495: nextprow = jl[prow];
2496:
2497: /* merge prow into k-th row */
2498: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2499: jmax = ui[prow+1];
2500: ncols = jmax-jmin;
2501: i = jmin - ui[prow];
2502: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2503: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2504: j = *(uj - 1);
2505: PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
2506: nzk += nlnk;
2508: /* update il and jl for prow */
2509: if (jmin < jmax){
2510: il[prow] = jmin;
2511: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2512: }
2513: prow = nextprow;
2514: }
2516: /* if free space is not available, make more free space */
2517: if (current_space->local_remaining<nzk) {
2518: i = am - k + 1; /* num of unfactored rows */
2519: i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2520: PetscFreeSpaceGet(i,¤t_space);
2521: PetscFreeSpaceGet(i,¤t_space_lvl);
2522: reallocs++;
2523: }
2525: /* copy data into free_space and free_space_lvl, then initialize lnk */
2526: if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2527: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2529: /* add the k-th row into il and jl */
2530: if (nzk > 1){
2531: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2532: jl[k] = jl[i]; jl[i] = k;
2533: il[k] = ui[k] + 1;
2534: }
2535: uj_ptr[k] = current_space->array;
2536: uj_lvl_ptr[k] = current_space_lvl->array;
2538: current_space->array += nzk;
2539: current_space->local_used += nzk;
2540: current_space->local_remaining -= nzk;
2542: current_space_lvl->array += nzk;
2543: current_space_lvl->local_used += nzk;
2544: current_space_lvl->local_remaining -= nzk;
2546: ui[k+1] = ui[k] + nzk;
2547: }
2549: ISRestoreIndices(perm,&rip);
2550: ISRestoreIndices(iperm,&riip);
2551: PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);
2552: PetscFree(ajtmp);
2554: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2555: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
2556: PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag); /* store matrix factor */
2557: PetscIncompleteLLDestroy(lnk,lnkbt);
2558: PetscFreeSpaceDestroy(free_space_lvl);
2560: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2562: /* put together the new matrix in MATSEQSBAIJ format */
2563: b = (Mat_SeqSBAIJ*)(fact)->data;
2564: b->singlemalloc = PETSC_FALSE;
2565: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
2566: b->j = uj;
2567: b->i = ui;
2568: b->diag = udiag;
2569: b->free_diag = PETSC_TRUE;
2570: b->ilen = 0;
2571: b->imax = 0;
2572: b->row = perm;
2573: b->col = perm;
2574: PetscObjectReference((PetscObject)perm);
2575: PetscObjectReference((PetscObject)perm);
2576: b->icol = iperm;
2577: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2578: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
2579: PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));
2580: b->maxnz = b->nz = ui[am];
2581: b->free_a = PETSC_TRUE;
2582: b->free_ij = PETSC_TRUE;
2583:
2584: fact->info.factor_mallocs = reallocs;
2585: fact->info.fill_ratio_given = fill;
2586: if (ai[am] != 0) {
2587: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2588: fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2589: } else {
2590: fact->info.fill_ratio_needed = 0.0;
2591: }
2592: #if defined(PETSC_USE_INFO)
2593: if (ai[am] != 0) {
2594: PetscReal af = fact->info.fill_ratio_needed;
2595: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
2596: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
2597: PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
2598: } else {
2599: PetscInfo(A,"Empty matrix.\n");
2600: }
2601: #endif
2602: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2603: return(0);
2604: }
2608: PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2609: {
2610: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2611: Mat_SeqSBAIJ *b;
2612: PetscErrorCode ierr;
2613: PetscBool perm_identity,missing;
2614: PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2615: const PetscInt *rip,*riip;
2616: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2617: PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
2618: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2619: PetscReal fill=info->fill,levels=info->levels;
2620: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2621: PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2622: PetscBT lnkbt;
2623: IS iperm;
2624:
2626: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2627: MatMissingDiagonal(A,&missing,&d);
2628: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2629: ISIdentity(perm,&perm_identity);
2630: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2632: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
2633: PetscMalloc((am+1)*sizeof(PetscInt),&udiag);
2634: ui[0] = 0;
2636: /* ICC(0) without matrix ordering: simply copies fill pattern */
2637: if (!levels && perm_identity) {
2639: for (i=0; i<am; i++) {
2640: ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
2641: udiag[i] = ui[i];
2642: }
2643: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
2644: cols = uj;
2645: for (i=0; i<am; i++) {
2646: aj = a->j + a->diag[i];
2647: ncols = ui[i+1] - ui[i];
2648: for (j=0; j<ncols; j++) *cols++ = *aj++;
2649: }
2650: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2651: ISGetIndices(iperm,&riip);
2652: ISGetIndices(perm,&rip);
2654: /* initialization */
2655: PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);
2657: /* jl: linked list for storing indices of the pivot rows
2658: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2659: PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);
2660: for (i=0; i<am; i++){
2661: jl[i] = am; il[i] = 0;
2662: }
2664: /* create and initialize a linked list for storing column indices of the active row k */
2665: nlnk = am + 1;
2666: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
2668: /* initial FreeSpace size is fill*(ai[am]+1) */
2669: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
2670: current_space = free_space;
2671: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);
2672: current_space_lvl = free_space_lvl;
2674: for (k=0; k<am; k++){ /* for each active row k */
2675: /* initialize lnk by the column indices of row rip[k] of A */
2676: nzk = 0;
2677: ncols = ai[rip[k]+1] - ai[rip[k]];
2678: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2679: ncols_upper = 0;
2680: for (j=0; j<ncols; j++){
2681: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2682: if (riip[i] >= k){ /* only take upper triangular entry */
2683: ajtmp[ncols_upper] = i;
2684: ncols_upper++;
2685: }
2686: }
2687: PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
2688: nzk += nlnk;
2690: /* update lnk by computing fill-in for each pivot row to be merged in */
2691: prow = jl[k]; /* 1st pivot row */
2692:
2693: while (prow < k){
2694: nextprow = jl[prow];
2695:
2696: /* merge prow into k-th row */
2697: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2698: jmax = ui[prow+1];
2699: ncols = jmax-jmin;
2700: i = jmin - ui[prow];
2701: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2702: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2703: j = *(uj - 1);
2704: PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
2705: nzk += nlnk;
2707: /* update il and jl for prow */
2708: if (jmin < jmax){
2709: il[prow] = jmin;
2710: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2711: }
2712: prow = nextprow;
2713: }
2715: /* if free space is not available, make more free space */
2716: if (current_space->local_remaining<nzk) {
2717: i = am - k + 1; /* num of unfactored rows */
2718: i *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2719: PetscFreeSpaceGet(i,¤t_space);
2720: PetscFreeSpaceGet(i,¤t_space_lvl);
2721: reallocs++;
2722: }
2724: /* copy data into free_space and free_space_lvl, then initialize lnk */
2725: if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2726: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2728: /* add the k-th row into il and jl */
2729: if (nzk > 1){
2730: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2731: jl[k] = jl[i]; jl[i] = k;
2732: il[k] = ui[k] + 1;
2733: }
2734: uj_ptr[k] = current_space->array;
2735: uj_lvl_ptr[k] = current_space_lvl->array;
2737: current_space->array += nzk;
2738: current_space->local_used += nzk;
2739: current_space->local_remaining -= nzk;
2741: current_space_lvl->array += nzk;
2742: current_space_lvl->local_used += nzk;
2743: current_space_lvl->local_remaining -= nzk;
2745: ui[k+1] = ui[k] + nzk;
2746: }
2748: #if defined(PETSC_USE_INFO)
2749: if (ai[am] != 0) {
2750: PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2751: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
2752: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
2753: PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
2754: } else {
2755: PetscInfo(A,"Empty matrix.\n");
2756: }
2757: #endif
2759: ISRestoreIndices(perm,&rip);
2760: ISRestoreIndices(iperm,&riip);
2761: PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);
2762: PetscFree(ajtmp);
2764: /* destroy list of free space and other temporary array(s) */
2765: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
2766: PetscFreeSpaceContiguous(&free_space,uj);
2767: PetscIncompleteLLDestroy(lnk,lnkbt);
2768: PetscFreeSpaceDestroy(free_space_lvl);
2770: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2772: /* put together the new matrix in MATSEQSBAIJ format */
2774: b = (Mat_SeqSBAIJ*)fact->data;
2775: b->singlemalloc = PETSC_FALSE;
2776: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
2777: b->j = uj;
2778: b->i = ui;
2779: b->diag = udiag;
2780: b->free_diag = PETSC_TRUE;
2781: b->ilen = 0;
2782: b->imax = 0;
2783: b->row = perm;
2784: b->col = perm;
2785: PetscObjectReference((PetscObject)perm);
2786: PetscObjectReference((PetscObject)perm);
2787: b->icol = iperm;
2788: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2789: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
2790: PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
2791: b->maxnz = b->nz = ui[am];
2792: b->free_a = PETSC_TRUE;
2793: b->free_ij = PETSC_TRUE;
2794:
2795: fact->info.factor_mallocs = reallocs;
2796: fact->info.fill_ratio_given = fill;
2797: if (ai[am] != 0) {
2798: fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2799: } else {
2800: fact->info.fill_ratio_needed = 0.0;
2801: }
2802: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2803: return(0);
2804: }
2808: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2809: {
2810: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2811: Mat_SeqSBAIJ *b;
2812: PetscErrorCode ierr;
2813: PetscBool perm_identity;
2814: PetscReal fill = info->fill;
2815: const PetscInt *rip,*riip;
2816: PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2817: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2818: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2819: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2820: PetscBT lnkbt;
2821: IS iperm;
2824: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2825: /* check whether perm is the identity mapping */
2826: ISIdentity(perm,&perm_identity);
2827: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2828: ISGetIndices(iperm,&riip);
2829: ISGetIndices(perm,&rip);
2831: /* initialization */
2832: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
2833: PetscMalloc((am+1)*sizeof(PetscInt),&udiag);
2834: ui[0] = 0;
2836: /* jl: linked list for storing indices of the pivot rows
2837: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2838: PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);
2839: for (i=0; i<am; i++){
2840: jl[i] = am; il[i] = 0;
2841: }
2843: /* create and initialize a linked list for storing column indices of the active row k */
2844: nlnk = am + 1;
2845: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
2847: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2848: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);
2849: current_space = free_space;
2851: for (k=0; k<am; k++){ /* for each active row k */
2852: /* initialize lnk by the column indices of row rip[k] of A */
2853: nzk = 0;
2854: ncols = ai[rip[k]+1] - ai[rip[k]];
2855: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2856: ncols_upper = 0;
2857: for (j=0; j<ncols; j++){
2858: i = riip[*(aj + ai[rip[k]] + j)];
2859: if (i >= k){ /* only take upper triangular entry */
2860: cols[ncols_upper] = i;
2861: ncols_upper++;
2862: }
2863: }
2864: PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
2865: nzk += nlnk;
2867: /* update lnk by computing fill-in for each pivot row to be merged in */
2868: prow = jl[k]; /* 1st pivot row */
2869:
2870: while (prow < k){
2871: nextprow = jl[prow];
2872: /* merge prow into k-th row */
2873: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2874: jmax = ui[prow+1];
2875: ncols = jmax-jmin;
2876: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2877: PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
2878: nzk += nlnk;
2880: /* update il and jl for prow */
2881: if (jmin < jmax){
2882: il[prow] = jmin;
2883: j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
2884: }
2885: prow = nextprow;
2886: }
2888: /* if free space is not available, make more free space */
2889: if (current_space->local_remaining<nzk) {
2890: i = am - k + 1; /* num of unfactored rows */
2891: i *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2892: PetscFreeSpaceGet(i,¤t_space);
2893: reallocs++;
2894: }
2896: /* copy data into free space, then initialize lnk */
2897: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
2899: /* add the k-th row into il and jl */
2900: if (nzk > 1){
2901: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2902: jl[k] = jl[i]; jl[i] = k;
2903: il[k] = ui[k] + 1;
2904: }
2905: ui_ptr[k] = current_space->array;
2906: current_space->array += nzk;
2907: current_space->local_used += nzk;
2908: current_space->local_remaining -= nzk;
2910: ui[k+1] = ui[k] + nzk;
2911: }
2913: ISRestoreIndices(perm,&rip);
2914: ISRestoreIndices(iperm,&riip);
2915: PetscFree4(ui_ptr,jl,il,cols);
2917: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2918: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
2919: PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag); /* store matrix factor */
2920: PetscLLDestroy(lnk,lnkbt);
2922: /* put together the new matrix in MATSEQSBAIJ format */
2924: b = (Mat_SeqSBAIJ*)fact->data;
2925: b->singlemalloc = PETSC_FALSE;
2926: b->free_a = PETSC_TRUE;
2927: b->free_ij = PETSC_TRUE;
2928: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
2929: b->j = uj;
2930: b->i = ui;
2931: b->diag = udiag;
2932: b->free_diag = PETSC_TRUE;
2933: b->ilen = 0;
2934: b->imax = 0;
2935: b->row = perm;
2936: b->col = perm;
2937: PetscObjectReference((PetscObject)perm);
2938: PetscObjectReference((PetscObject)perm);
2939: b->icol = iperm;
2940: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2941: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
2942: PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));
2943: b->maxnz = b->nz = ui[am];
2944:
2945: fact->info.factor_mallocs = reallocs;
2946: fact->info.fill_ratio_given = fill;
2947: if (ai[am] != 0) {
2948: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2949: fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2950: } else {
2951: fact->info.fill_ratio_needed = 0.0;
2952: }
2953: #if defined(PETSC_USE_INFO)
2954: if (ai[am] != 0) {
2955: PetscReal af = fact->info.fill_ratio_needed;
2956: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
2957: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
2958: PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
2959: } else {
2960: PetscInfo(A,"Empty matrix.\n");
2961: }
2962: #endif
2963: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2964: return(0);
2965: }
2969: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2970: {
2971: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2972: Mat_SeqSBAIJ *b;
2973: PetscErrorCode ierr;
2974: PetscBool perm_identity;
2975: PetscReal fill = info->fill;
2976: const PetscInt *rip,*riip;
2977: PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2978: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2979: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
2980: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2981: PetscBT lnkbt;
2982: IS iperm;
2985: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2986: /* check whether perm is the identity mapping */
2987: ISIdentity(perm,&perm_identity);
2988: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2989: ISGetIndices(iperm,&riip);
2990: ISGetIndices(perm,&rip);
2992: /* initialization */
2993: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
2994: ui[0] = 0;
2996: /* jl: linked list for storing indices of the pivot rows
2997: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2998: PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);
2999: for (i=0; i<am; i++){
3000: jl[i] = am; il[i] = 0;
3001: }
3003: /* create and initialize a linked list for storing column indices of the active row k */
3004: nlnk = am + 1;
3005: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
3007: /* initial FreeSpace size is fill*(ai[am]+1) */
3008: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
3009: current_space = free_space;
3011: for (k=0; k<am; k++){ /* for each active row k */
3012: /* initialize lnk by the column indices of row rip[k] of A */
3013: nzk = 0;
3014: ncols = ai[rip[k]+1] - ai[rip[k]];
3015: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
3016: ncols_upper = 0;
3017: for (j=0; j<ncols; j++){
3018: i = riip[*(aj + ai[rip[k]] + j)];
3019: if (i >= k){ /* only take upper triangular entry */
3020: cols[ncols_upper] = i;
3021: ncols_upper++;
3022: }
3023: }
3024: PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
3025: nzk += nlnk;
3027: /* update lnk by computing fill-in for each pivot row to be merged in */
3028: prow = jl[k]; /* 1st pivot row */
3029:
3030: while (prow < k){
3031: nextprow = jl[prow];
3032: /* merge prow into k-th row */
3033: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
3034: jmax = ui[prow+1];
3035: ncols = jmax-jmin;
3036: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3037: PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
3038: nzk += nlnk;
3040: /* update il and jl for prow */
3041: if (jmin < jmax){
3042: il[prow] = jmin;
3043: j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
3044: }
3045: prow = nextprow;
3046: }
3048: /* if free space is not available, make more free space */
3049: if (current_space->local_remaining<nzk) {
3050: i = am - k + 1; /* num of unfactored rows */
3051: i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3052: PetscFreeSpaceGet(i,¤t_space);
3053: reallocs++;
3054: }
3056: /* copy data into free space, then initialize lnk */
3057: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
3059: /* add the k-th row into il and jl */
3060: if (nzk-1 > 0){
3061: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3062: jl[k] = jl[i]; jl[i] = k;
3063: il[k] = ui[k] + 1;
3064: }
3065: ui_ptr[k] = current_space->array;
3066: current_space->array += nzk;
3067: current_space->local_used += nzk;
3068: current_space->local_remaining -= nzk;
3070: ui[k+1] = ui[k] + nzk;
3071: }
3073: #if defined(PETSC_USE_INFO)
3074: if (ai[am] != 0) {
3075: PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
3076: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
3077: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
3078: PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
3079: } else {
3080: PetscInfo(A,"Empty matrix.\n");
3081: }
3082: #endif
3084: ISRestoreIndices(perm,&rip);
3085: ISRestoreIndices(iperm,&riip);
3086: PetscFree4(ui_ptr,jl,il,cols);
3088: /* destroy list of free space and other temporary array(s) */
3089: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
3090: PetscFreeSpaceContiguous(&free_space,uj);
3091: PetscLLDestroy(lnk,lnkbt);
3093: /* put together the new matrix in MATSEQSBAIJ format */
3095: b = (Mat_SeqSBAIJ*)fact->data;
3096: b->singlemalloc = PETSC_FALSE;
3097: b->free_a = PETSC_TRUE;
3098: b->free_ij = PETSC_TRUE;
3099: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
3100: b->j = uj;
3101: b->i = ui;
3102: b->diag = 0;
3103: b->ilen = 0;
3104: b->imax = 0;
3105: b->row = perm;
3106: b->col = perm;
3107: PetscObjectReference((PetscObject)perm);
3108: PetscObjectReference((PetscObject)perm);
3109: b->icol = iperm;
3110: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3111: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
3112: PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
3113: b->maxnz = b->nz = ui[am];
3114:
3115: fact->info.factor_mallocs = reallocs;
3116: fact->info.fill_ratio_given = fill;
3117: if (ai[am] != 0) {
3118: fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3119: } else {
3120: fact->info.fill_ratio_needed = 0.0;
3121: }
3122: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3123: return(0);
3124: }
3128: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3129: {
3130: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
3131: PetscErrorCode ierr;
3132: PetscInt n = A->rmap->n;
3133: const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3134: PetscScalar *x,sum;
3135: const PetscScalar *b;
3136: const MatScalar *aa = a->a,*v;
3137: PetscInt i,nz;
3140: if (!n) return(0);
3142: VecGetArrayRead(bb,&b);
3143: VecGetArray(xx,&x);
3145: /* forward solve the lower triangular */
3146: x[0] = b[0];
3147: v = aa;
3148: vi = aj;
3149: for (i=1; i<n; i++) {
3150: nz = ai[i+1] - ai[i];
3151: sum = b[i];
3152: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3153: v += nz;
3154: vi += nz;
3155: x[i] = sum;
3156: }
3157:
3158: /* backward solve the upper triangular */
3159: for (i=n-1; i>=0; i--){
3160: v = aa + adiag[i+1] + 1;
3161: vi = aj + adiag[i+1] + 1;
3162: nz = adiag[i] - adiag[i+1]-1;
3163: sum = x[i];
3164: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3165: x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3166: }
3167:
3168: PetscLogFlops(2.0*a->nz - A->cmap->n);
3169: VecRestoreArrayRead(bb,&b);
3170: VecRestoreArray(xx,&x);
3171: return(0);
3172: }
3176: PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3177: {
3178: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
3179: IS iscol = a->col,isrow = a->row;
3180: PetscErrorCode ierr;
3181: PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3182: const PetscInt *rout,*cout,*r,*c;
3183: PetscScalar *x,*tmp,sum;
3184: const PetscScalar *b;
3185: const MatScalar *aa = a->a,*v;
3188: if (!n) return(0);
3190: VecGetArrayRead(bb,&b);
3191: VecGetArray(xx,&x);
3192: tmp = a->solve_work;
3194: ISGetIndices(isrow,&rout); r = rout;
3195: ISGetIndices(iscol,&cout); c = cout;
3197: /* forward solve the lower triangular */
3198: tmp[0] = b[r[0]];
3199: v = aa;
3200: vi = aj;
3201: for (i=1; i<n; i++) {
3202: nz = ai[i+1] - ai[i];
3203: sum = b[r[i]];
3204: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3205: tmp[i] = sum;
3206: v += nz; vi += nz;
3207: }
3209: /* backward solve the upper triangular */
3210: for (i=n-1; i>=0; i--){
3211: v = aa + adiag[i+1]+1;
3212: vi = aj + adiag[i+1]+1;
3213: nz = adiag[i]-adiag[i+1]-1;
3214: sum = tmp[i];
3215: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3216: x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3217: }
3219: ISRestoreIndices(isrow,&rout);
3220: ISRestoreIndices(iscol,&cout);
3221: VecRestoreArrayRead(bb,&b);
3222: VecRestoreArray(xx,&x);
3223: PetscLogFlops(2*a->nz - A->cmap->n);
3224: return(0);
3225: }
3229: /*
3230: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3231: */
3232: PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3233: {
3234: Mat B = *fact;
3235: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b;
3236: IS isicol;
3237: PetscErrorCode ierr;
3238: const PetscInt *r,*ic;
3239: PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3240: PetscInt *bi,*bj,*bdiag,*bdiag_rev;
3241: PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3242: PetscInt nlnk,*lnk;
3243: PetscBT lnkbt;
3244: PetscBool row_identity,icol_identity;
3245: MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3246: const PetscInt *ics;
3247: PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3248: PetscReal dt=info->dt,dtcol=info->dtcol,shift=info->shiftamount;
3249: PetscInt dtcount=(PetscInt)info->dtcount,nnz_max;
3250: PetscBool missing;
3254: if (dt == PETSC_DEFAULT) dt = 0.005;
3255: if (dtcol == PETSC_DEFAULT) dtcol = 0.01; /* XXX unused! */
3256: if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);
3258: /* ------- symbolic factorization, can be reused ---------*/
3259: MatMissingDiagonal(A,&missing,&i);
3260: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3261: adiag=a->diag;
3263: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
3265: /* bdiag is location of diagonal in factor */
3266: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag); /* becomes b->diag */
3267: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag_rev); /* temporary */
3269: /* allocate row pointers bi */
3270: PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);
3272: /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3273: if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3274: nnz_max = ai[n]+2*n*dtcount+2;
3276: PetscMalloc((nnz_max+1)*sizeof(PetscInt),&bj);
3277: PetscMalloc((nnz_max+1)*sizeof(MatScalar),&ba);
3279: /* put together the new matrix */
3280: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);
3281: PetscLogObjectParent(B,isicol);
3282: b = (Mat_SeqAIJ*)B->data;
3283: b->free_a = PETSC_TRUE;
3284: b->free_ij = PETSC_TRUE;
3285: b->singlemalloc = PETSC_FALSE;
3286: b->a = ba;
3287: b->j = bj;
3288: b->i = bi;
3289: b->diag = bdiag;
3290: b->ilen = 0;
3291: b->imax = 0;
3292: b->row = isrow;
3293: b->col = iscol;
3294: PetscObjectReference((PetscObject)isrow);
3295: PetscObjectReference((PetscObject)iscol);
3296: b->icol = isicol;
3297: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
3299: PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));
3300: b->maxnz = nnz_max;
3302: B->factortype = MAT_FACTOR_ILUDT;
3303: B->info.factor_mallocs = 0;
3304: B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3305: CHKMEMQ;
3306: /* ------- end of symbolic factorization ---------*/
3308: ISGetIndices(isrow,&r);
3309: ISGetIndices(isicol,&ic);
3310: ics = ic;
3312: /* linked list for storing column indices of the active row */
3313: nlnk = n + 1;
3314: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
3316: /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3317: PetscMalloc2(n,PetscInt,&im,n,PetscInt,&jtmp);
3318: /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3319: PetscMalloc2(n,MatScalar,&rtmp,n,MatScalar,&vtmp);
3320: PetscMemzero(rtmp,n*sizeof(MatScalar));
3322: bi[0] = 0;
3323: bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */
3324: bdiag_rev[n] = bdiag[0];
3325: bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */
3326: for (i=0; i<n; i++) {
3327: /* copy initial fill into linked list */
3328: nzi = 0; /* nonzeros for active row i */
3329: nzi = ai[r[i]+1] - ai[r[i]];
3330: if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
3331: nzi_al = adiag[r[i]] - ai[r[i]];
3332: nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3333: ajtmp = aj + ai[r[i]];
3334: PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);
3335:
3336: /* load in initial (unfactored row) */
3337: aatmp = a->a + ai[r[i]];
3338: for (j=0; j<nzi; j++) {
3339: rtmp[ics[*ajtmp++]] = *aatmp++;
3340: }
3341:
3342: /* add pivot rows into linked list */
3343: row = lnk[n];
3344: while (row < i ) {
3345: nzi_bl = bi[row+1] - bi[row] + 1;
3346: bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3347: PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);
3348: nzi += nlnk;
3349: row = lnk[row];
3350: }
3351:
3352: /* copy data from lnk into jtmp, then initialize lnk */
3353: PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);
3355: /* numerical factorization */
3356: bjtmp = jtmp;
3357: row = *bjtmp++; /* 1st pivot row */
3358: while ( row < i ) {
3359: pc = rtmp + row;
3360: pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3361: multiplier = (*pc) * (*pv);
3362: *pc = multiplier;
3363: if (PetscAbsScalar(*pc) > dt){ /* apply tolerance dropping rule */
3364: pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3365: pv = ba + bdiag[row+1] + 1;
3366: /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */
3367: nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3368: for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3369: PetscLogFlops(2.0*nz);
3370: }
3371: row = *bjtmp++;
3372: }
3374: /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3375: diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3376: nzi_bl = 0; j = 0;
3377: while (jtmp[j] < i){ /* Note: jtmp is sorted */
3378: vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3379: nzi_bl++; j++;
3380: }
3381: nzi_bu = nzi - nzi_bl -1;
3382: while (j < nzi){
3383: vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3384: j++;
3385: }
3386:
3387: bjtmp = bj + bi[i];
3388: batmp = ba + bi[i];
3389: /* apply level dropping rule to L part */
3390: ncut = nzi_al + dtcount;
3391: if (ncut < nzi_bl){
3392: PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);
3393: PetscSortIntWithScalarArray(ncut,jtmp,vtmp);
3394: } else {
3395: ncut = nzi_bl;
3396: }
3397: for (j=0; j<ncut; j++){
3398: bjtmp[j] = jtmp[j];
3399: batmp[j] = vtmp[j];
3400: /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */
3401: }
3402: bi[i+1] = bi[i] + ncut;
3403: nzi = ncut + 1;
3404:
3405: /* apply level dropping rule to U part */
3406: ncut = nzi_au + dtcount;
3407: if (ncut < nzi_bu){
3408: PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);
3409: PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);
3410: } else {
3411: ncut = nzi_bu;
3412: }
3413: nzi += ncut;
3415: /* mark bdiagonal */
3416: bdiag[i+1] = bdiag[i] - (ncut + 1);
3417: bdiag_rev[n-i-1] = bdiag[i+1];
3418: bi[2*n - i] = bi[2*n - i +1] - (ncut + 1);
3419: bjtmp = bj + bdiag[i];
3420: batmp = ba + bdiag[i];
3421: *bjtmp = i;
3422: *batmp = diag_tmp; /* rtmp[i]; */
3423: if (*batmp == 0.0) {
3424: *batmp = dt+shift;
3425: /* printf(" row %d add shift %g\n",i,shift); */
3426: }
3427: *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */
3428: /* printf(" (%d,%g),",*bjtmp,*batmp); */
3429:
3430: bjtmp = bj + bdiag[i+1]+1;
3431: batmp = ba + bdiag[i+1]+1;
3432: for (k=0; k<ncut; k++){
3433: bjtmp[k] = jtmp[nzi_bl+1+k];
3434: batmp[k] = vtmp[nzi_bl+1+k];
3435: /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */
3436: }
3437: /* printf("\n"); */
3438:
3439: im[i] = nzi; /* used by PetscLLAddSortedLU() */
3440: /*
3441: printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]);
3442: printf(" ----------------------------\n");
3443: */
3444: } /* for (i=0; i<n; i++) */
3445: /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */
3446: if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);
3448: ISRestoreIndices(isrow,&r);
3449: ISRestoreIndices(isicol,&ic);
3451: PetscLLDestroy(lnk,lnkbt);
3452: PetscFree2(im,jtmp);
3453: PetscFree2(rtmp,vtmp);
3454: PetscFree(bdiag_rev);
3456: PetscLogFlops(B->cmap->n);
3457: b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3459: ISIdentity(isrow,&row_identity);
3460: ISIdentity(isicol,&icol_identity);
3461: if (row_identity && icol_identity) {
3462: B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3463: } else {
3464: B->ops->solve = MatSolve_SeqAIJ;
3465: }
3466:
3467: B->ops->solveadd = 0;
3468: B->ops->solvetranspose = 0;
3469: B->ops->solvetransposeadd = 0;
3470: B->ops->matsolve = 0;
3471: B->assembled = PETSC_TRUE;
3472: B->preallocated = PETSC_TRUE;
3473: return(0);
3474: }
3476: /* a wraper of MatILUDTFactor_SeqAIJ() */
3479: /*
3480: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3481: */
3483: PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3484: {
3485: PetscErrorCode ierr;
3488: MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);
3489: return(0);
3490: }
3492: /*
3493: same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3494: - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3495: */
3498: /*
3499: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3500: */
3502: PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3503: {
3504: Mat C=fact;
3505: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
3506: IS isrow = b->row,isicol = b->icol;
3508: const PetscInt *r,*ic,*ics;
3509: PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3510: PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3511: MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3512: PetscReal dt=info->dt,shift=info->shiftamount;
3513: PetscBool row_identity, col_identity;
3516: ISGetIndices(isrow,&r);
3517: ISGetIndices(isicol,&ic);
3518: PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);
3519: ics = ic;
3521: for (i=0; i<n; i++){
3522: /* initialize rtmp array */
3523: nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */
3524: bjtmp = bj + bi[i];
3525: for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3526: rtmp[i] = 0.0;
3527: nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3528: bjtmp = bj + bdiag[i+1] + 1;
3529: for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
3531: /* load in initial unfactored row of A */
3532: /* printf("row %d\n",i); */
3533: nz = ai[r[i]+1] - ai[r[i]];
3534: ajtmp = aj + ai[r[i]];
3535: v = aa + ai[r[i]];
3536: for (j=0; j<nz; j++) {
3537: rtmp[ics[*ajtmp++]] = v[j];
3538: /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */
3539: }
3540: /* printf("\n"); */
3542: /* numerical factorization */
3543: bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3544: nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3545: k = 0;
3546: while (k < nzl){
3547: row = *bjtmp++;
3548: /* printf(" prow %d\n",row); */
3549: pc = rtmp + row;
3550: pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3551: multiplier = (*pc) * (*pv);
3552: *pc = multiplier;
3553: if (PetscAbsScalar(multiplier) > dt){
3554: pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3555: pv = b->a + bdiag[row+1] + 1;
3556: nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3557: for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3558: /* PetscLogFlops(2.0*nz); */
3559: }
3560: k++;
3561: }
3562:
3563: /* finished row so stick it into b->a */
3564: /* L-part */
3565: pv = b->a + bi[i] ;
3566: pj = bj + bi[i] ;
3567: nzl = bi[i+1] - bi[i];
3568: for (j=0; j<nzl; j++) {
3569: pv[j] = rtmp[pj[j]];
3570: /* printf(" (%d,%g),",pj[j],pv[j]); */
3571: }
3573: /* diagonal: invert diagonal entries for simplier triangular solves */
3574: if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3575: b->a[bdiag[i]] = 1.0/rtmp[i];
3576: /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */
3578: /* U-part */
3579: pv = b->a + bdiag[i+1] + 1;
3580: pj = bj + bdiag[i+1] + 1;
3581: nzu = bdiag[i] - bdiag[i+1] - 1;
3582: for (j=0; j<nzu; j++) {
3583: pv[j] = rtmp[pj[j]];
3584: /* printf(" (%d,%g),",pj[j],pv[j]); */
3585: }
3586: /* printf("\n"); */
3587: }
3589: PetscFree(rtmp);
3590: ISRestoreIndices(isicol,&ic);
3591: ISRestoreIndices(isrow,&r);
3592:
3593: ISIdentity(isrow,&row_identity);
3594: ISIdentity(isicol,&col_identity);
3595: if (row_identity && col_identity) {
3596: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3597: } else {
3598: C->ops->solve = MatSolve_SeqAIJ;
3599: }
3600: C->ops->solveadd = 0;
3601: C->ops->solvetranspose = 0;
3602: C->ops->solvetransposeadd = 0;
3603: C->ops->matsolve = 0;
3604: C->assembled = PETSC_TRUE;
3605: C->preallocated = PETSC_TRUE;
3606: PetscLogFlops(C->cmap->n);
3607: return(0);
3608: }