Actual source code: spbas_cholesky.h
2: /*
3: spbas_cholesky_row_alloc:
4: in the data arrays, find a place where another row may be stored.
5: Return PETSC_ERR_MEM if there is insufficient space to store the
6: row, so garbage collection and/or re-allocation may be done.
7: */
10: PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used)
11: {
13: retval.icols[k] = &retval.alloc_icol[*n_alloc_used];
14: retval.values[k] = &retval.alloc_val[*n_alloc_used];
15: *n_alloc_used += r_nnz;
16: if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_ERR_MEM);
17: return(0);
18: }
21: /*
22: spbas_cholesky_garbage_collect:
23: move the rows which have been calculated so far, as well as
24: those currently under construction, to the front of the
25: array, while putting them in the proper order.
26: When it seems necessary, enlarge the current arrays.
28: NB: re-allocation is being simulated using
29: PetscMalloc, memcpy, PetscFree, because
30: PetscRealloc does not seem to exist.
32: */
35: PetscErrorCode spbas_cholesky_garbage_collect(
36: spbas_matrix *result, /* I/O: the Cholesky factor matrix being constructed.
37: Only the storage, not the contents of this matrix
38: is changed in this function */
39: PetscInt i_row, /* I : Number of rows for which the final contents are
40: known */
41: PetscInt *n_row_alloc_ok, /* I/O: Number of rows which are already in their final
42: places in the arrays: they need not be moved any more */
43: PetscInt *n_alloc_used, /* I/O: */
44: PetscInt *max_row_nnz /* I : Over-estimate of the number of nonzeros needed to
45: store each row */
46: )
47: {
50: /* PSEUDO-CODE:
51: 1. Choose the appropriate size for the arrays
52: 2. Rescue the arrays which would be lost during garbage collection
53: 3. Reallocate and correct administration
54: 4. Move all arrays so that they are in proper order */
56: PetscInt i,j;
57: PetscInt nrows = result->nrows;
58: PetscInt n_alloc_ok=0;
59: PetscInt n_alloc_ok_max=0;
60: PetscErrorCode ierr;
61: PetscInt need_already= 0;
62: PetscInt n_rows_ahead=0;
63: PetscInt max_need_extra= 0;
64: PetscInt n_alloc_max, n_alloc_est, n_alloc;
65: PetscInt n_alloc_now = result->n_alloc_icol;
66: PetscInt *alloc_icol_old = result->alloc_icol;
67: PetscScalar *alloc_val_old = result->alloc_val;
68: PetscInt *icol_rescue;
69: PetscScalar *val_rescue;
70: PetscInt n_rescue;
71: PetscInt n_row_rescue;
72: PetscInt i_here, i_last, n_copy;
73: const PetscReal xtra_perc = 20;
77: /*********************************************************
78: 1. Choose appropriate array size
79: Count number of rows and memory usage which is already final */
80: for (i=0;i<i_row; i++) {
81: n_alloc_ok += result->row_nnz[i];
82: n_alloc_ok_max += max_row_nnz[i];
83: }
84:
85: /* Count currently needed memory usage and future memory requirements
86: (max, predicted)*/
87: for (i=i_row; i<nrows; i++) {
88: if (!result->row_nnz[i]) {
89: max_need_extra += max_row_nnz[i];
90: } else {
91: need_already += max_row_nnz[i];
92: n_rows_ahead++;
93: }
94: }
96: /* Make maximal and realistic memory requirement estimates */
97: n_alloc_max = n_alloc_ok + need_already + max_need_extra;
98: n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) * ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max));
100: /* Choose array sizes */
101: if (n_alloc_max == n_alloc_est) { n_alloc = n_alloc_max; }
102: else if (n_alloc_now >= n_alloc_est) { n_alloc = n_alloc_now; }
103: else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) { n_alloc = n_alloc_max; }
104: else { n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0)); }
106: /* If new estimate is less than what we already have,
107: don't reallocate, just garbage-collect */
108: if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) {
109: n_alloc = result->n_alloc_icol;
110: }
112: /* Motivate dimension choice */
113: PetscInfo1(PETSC_NULL," Allocating %d nonzeros: ",n_alloc);
114: if (n_alloc_max == n_alloc_est) { PetscInfo(PETSC_NULL,"this is the correct size\n"); }
115: else if (n_alloc_now >= n_alloc_est) { PetscInfo(PETSC_NULL,"the current size, which seems enough\n"); }
116: else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) { PetscInfo(PETSC_NULL,"the maximum estimate\n"); }
117: else { PetscInfo1(PETSC_NULL,"%6.2f %% more than the estimate\n",xtra_perc); }
118:
120: /**********************************************************
121: 2. Rescue arrays which would be lost
122: Count how many rows and nonzeros will have to be rescued
123: when moving all arrays in place */
124: n_row_rescue = 0; n_rescue = 0;
125: if (*n_row_alloc_ok==0) { *n_alloc_used = 0; }
126: else {
127: i = *n_row_alloc_ok - 1;
128: *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i];
129: }
131: for (i=*n_row_alloc_ok; i<nrows; i++) {
132: i_here = result->icols[i]-result->alloc_icol;
133: i_last = i_here + result->row_nnz[i];
134: if (result->row_nnz[i]>0) {
135: if (*n_alloc_used > i_here || i_last > n_alloc) {
136: n_rescue += result->row_nnz[i];
137: n_row_rescue++;
138: }
139:
140: if (i<i_row) { *n_alloc_used += result->row_nnz[i];}
141: else { *n_alloc_used += max_row_nnz[i];}
142: }
143: }
145: /* Allocate rescue arrays */
146: PetscMalloc( n_rescue * sizeof(int), &icol_rescue);
147: PetscMalloc( n_rescue * sizeof(PetscScalar), &val_rescue);
149: /* Rescue the arrays which need rescuing */
150: n_row_rescue = 0; n_rescue = 0;
151: if (*n_row_alloc_ok==0) { *n_alloc_used = 0; }
152: else {
153: i = *n_row_alloc_ok - 1;
154: *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i];
155: }
157: for (i=*n_row_alloc_ok; i<nrows; i++) {
158: i_here = result->icols[i]-result->alloc_icol;
159: i_last = i_here + result->row_nnz[i];
160: if (result->row_nnz[i]>0) {
161: if (*n_alloc_used > i_here || i_last > n_alloc) {
162: PetscMemcpy((void*) &icol_rescue[n_rescue], (void*) result->icols[i], result->row_nnz[i] * sizeof(int));
163: PetscMemcpy((void*) &val_rescue[n_rescue], (void*) result->values[i], result->row_nnz[i] * sizeof(PetscScalar));
164: n_rescue += result->row_nnz[i];
165: n_row_rescue++;
166: }
167:
168: if (i<i_row) { *n_alloc_used += result->row_nnz[i];}
169: else { *n_alloc_used += max_row_nnz[i];}
170: }
171: }
173: /**********************************************************
174: 3. Reallocate and correct administration */
175:
176: if (n_alloc != result->n_alloc_icol) {
177: n_copy = PetscMin(n_alloc,result->n_alloc_icol);
179: /* PETSC knows no REALLOC, so we'll REALLOC ourselves.
181: Allocate new icol-data, copy old contents */
182: PetscMalloc( n_alloc * sizeof(int), &result->alloc_icol);
183: PetscMemcpy(result->alloc_icol, alloc_icol_old, n_copy*sizeof(int));
185: /* Update administration, Reset pointers to new arrays */
186: result->n_alloc_icol = n_alloc;
187: for (i=0; i<nrows; i++) {
188: result->icols[i] = result->alloc_icol + (result->icols[i] - alloc_icol_old);
189: result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol);
190: }
192: /* Delete old array */
193: PetscFree(alloc_icol_old);
195: /* Allocate new value-data, copy old contents */
196: PetscMalloc( n_alloc * sizeof(PetscScalar), &result->alloc_val);
197: PetscMemcpy(result->alloc_val, alloc_val_old, n_copy*sizeof(PetscScalar));
199: /* Update administration, Reset pointers to new arrays */
200: result->n_alloc_val = n_alloc;
201: for (i=0; i<nrows; i++) {
202: result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol);
203: }
205: /* Delete old array */
206: PetscFree(alloc_val_old);
207: }
210: /*********************************************************
211: 4. Copy all the arrays to their proper places */
212: n_row_rescue = 0; n_rescue = 0;
213: if (*n_row_alloc_ok==0) { *n_alloc_used = 0; }
214: else {
215: i = *n_row_alloc_ok - 1;
216: *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i];
217: }
219: for (i=*n_row_alloc_ok; i<nrows; i++) {
220: i_here = result->icols[i]-result->alloc_icol;
221: i_last = i_here + result->row_nnz[i];
223: result->icols[i] = result->alloc_icol + *n_alloc_used;
224: result->values[i]= result->alloc_val + *n_alloc_used;
226: if (result->row_nnz[i]>0) {
227: if (*n_alloc_used > i_here || i_last > n_alloc) {
228: PetscMemcpy((void*) result->icols[i], (void*) &icol_rescue[n_rescue], result->row_nnz[i] * sizeof(int));
229: PetscMemcpy((void*) result->values[i], (void*) &val_rescue[n_rescue],result->row_nnz[i] * sizeof(PetscScalar));
230: n_rescue += result->row_nnz[i];
231: n_row_rescue++;
232: } else {
233: for (j=0; j<result->row_nnz[i]; j++) {
234: result->icols[i][j] = result->alloc_icol[i_here+j];
235: result->values[i][j] = result->alloc_val[ i_here+j];
236: }
237: }
238: if (i<i_row) { *n_alloc_used += result->row_nnz[i];}
239: else { *n_alloc_used += max_row_nnz[i];}
240: }
241: }
243: /* Delete the rescue arrays */
244: PetscFree(icol_rescue);
245: PetscFree(val_rescue);
246: *n_row_alloc_ok = i_row;
247: return(0);
248: }
250: /*
251: spbas_incomplete_cholesky:
252: incomplete Cholesky decomposition of a square, symmetric,
253: positive definite matrix.
255: In case negative diagonals are encountered, function returns
256: NEGATIVE_DIAGONAL. When this happens, call this function again
257: with a larger epsdiag_in, a less sparse pattern, and/or a smaller
258: droptol
259: */
262: PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L)
263: {
264: PetscInt jL;
265: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
266: PetscInt *ai=a->i,*aj=a->j;
267: MatScalar *aa=a->a;
268: PetscInt nrows, ncols;
269: PetscInt *max_row_nnz;
270: PetscErrorCode ierr;
271: spbas_matrix retval;
272: PetscScalar * diag;
273: PetscScalar * val;
274: PetscScalar * lvec;
275: PetscScalar epsdiag;
276: PetscInt i,j,k;
277: const PetscBool do_values = PETSC_TRUE;
278: PetscInt * r1_icol;
279: PetscScalar *r1_val;
280: PetscInt * r_icol;
281: PetscInt r_nnz;
282: PetscScalar *r_val;
283: PetscInt * A_icol;
284: PetscInt A_nnz;
285: PetscScalar *A_val;
286: PetscInt * p_icol;
287: PetscInt p_nnz;
288: PetscInt n_row_alloc_ok = 0; /* number of rows which have been stored correctly in the matrix */
289: PetscInt n_alloc_used = 0; /* part of result->icols and result->values which is currently being used */
292: /* Convert the Manteuffel shift from 'fraction of average diagonal' to dimensioned value */
293: MatGetSize(A, &nrows, &ncols);
294: MatGetTrace(A, &epsdiag);
295: epsdiag *= epsdiag_in / nrows;
296: PetscInfo2(PETSC_NULL," Dimensioned Manteuffel shift %G Drop tolerance %G\n", PetscRealPart(epsdiag),droptol);
298: if (droptol<1e-10) {droptol=1e-10;}
300: if ( (nrows != pattern.nrows) || (ncols != pattern.ncols) || (ncols != nrows) ) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP,"Dimension error in spbas_incomplete_cholesky\n");
302: retval.nrows = nrows;
303: retval.ncols = nrows;
304: retval.nnz = pattern.nnz/10;
305: retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
306: retval.block_data = PETSC_TRUE;
308: spbas_allocate_pattern(&retval, do_values);
309: PetscMemzero((void*) retval.row_nnz, nrows*sizeof(int));
310: spbas_allocate_data(&retval);
311: retval.nnz = 0;
313: PetscMalloc(nrows*sizeof(PetscScalar), &diag);
314: PetscMalloc(nrows*sizeof(PetscScalar), &val);
315: PetscMalloc(nrows*sizeof(PetscScalar), &lvec);
316: PetscMalloc(nrows*sizeof(int), &max_row_nnz);
317: if (!(diag && val && lvec && max_row_nnz)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MEM, "Allocation error in spbas_incomplete_cholesky\n");
319: PetscMemzero((void*) val, nrows*sizeof(PetscScalar));
320: PetscMemzero((void*) lvec, nrows*sizeof(PetscScalar));
321: PetscMemzero((void*) max_row_nnz, nrows*sizeof(int));
323: /* Check correct format of sparseness pattern */
324: if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n");
326: /* Count the nonzeros on transpose of pattern */
327: for (i = 0; i<nrows; i++) {
328: p_nnz = pattern.row_nnz[i];
329: p_icol = pattern.icols[i];
330: for (j=0; j<p_nnz; j++) {
331: max_row_nnz[i+p_icol[j]]++;
332: }
333: }
335: /* Calculate rows of L */
336: for (i = 0; i<nrows; i++) {
337: p_nnz = pattern.row_nnz[i];
338: p_icol = pattern.icols[i];
340: r_nnz = retval.row_nnz[i];
341: r_icol = retval.icols[i];
342: r_val = retval.values[i];
343: A_nnz = ai[rip[i]+1] - ai[rip[i]];
344: A_icol = &aj[ai[rip[i]]];
345: A_val = &aa[ai[rip[i]]];
347: /* Calculate val += A(i,i:n)'; */
348: for (j=0; j<A_nnz; j++) {
349: k = riip[A_icol[j]];
350: if (k>=i) { val[k] = A_val[j]; }
351: }
353: /* Add regularization */
354: val[i] += epsdiag;
356: /* Calculate lvec = diag(D(0:i-1)) * L(0:i-1,i);
357: val(i) = A(i,i) - L(0:i-1,i)' * lvec */
358: for ( j=0; j<r_nnz; j++) {
359: k = r_icol[j];
360: lvec[k] = diag[k] * r_val[j];
361: val[i] -= r_val[j] * lvec[k];
362: }
364: /* Calculate the new diagonal */
365: diag[i] = val[i];
366: if (PetscRealPart(diag[i])<droptol) {
367: PetscInfo(PETSC_NULL,"Error in spbas_incomplete_cholesky:\n");
368: PetscInfo1(PETSC_NULL,"Negative diagonal in row %d\n",i+1);
370: /* Delete the whole matrix at once. */
371: spbas_delete(retval);
372: return NEGATIVE_DIAGONAL;
373: }
375: /* If necessary, allocate arrays */
376: if (r_nnz==0) {
377: spbas_cholesky_row_alloc( retval, i, 1, &n_alloc_used);
378: if (ierr == PETSC_ERR_MEM) {
379: spbas_cholesky_garbage_collect( &retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
380: spbas_cholesky_row_alloc( retval, i, 1, &n_alloc_used);
381: }
382: r_icol = retval.icols[i];
383: r_val = retval.values[i];
384: }
386: /* Now, fill in */
387: r_icol[r_nnz] = i;
388: r_val [r_nnz] = 1.0;
389: r_nnz++;
390: retval.row_nnz[i]++;
392: retval.nnz += r_nnz;
394: /* Calculate
395: val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */
396: for (j=1; j<p_nnz; j++) {
397: k = i+p_icol[j];
398: r1_icol = retval.icols[k];
399: r1_val = retval.values[k];
400: for (jL=0; jL<retval.row_nnz[k]; jL++) {
401: val[k] -= r1_val[jL] * lvec[r1_icol[jL]];
402: }
403: val[k] /= diag[i];
405: if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) {
406: /* If necessary, allocate arrays */
407: if (retval.row_nnz[k]==0) {
408: spbas_cholesky_row_alloc( retval, k, max_row_nnz[k], &n_alloc_used);
409: if (ierr == PETSC_ERR_MEM) {
410: spbas_cholesky_garbage_collect( &retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
411: spbas_cholesky_row_alloc( retval, k, max_row_nnz[k], &n_alloc_used);
412: r_icol = retval.icols[i];
413: r_val = retval.values[i];
414: }
415: }
417: retval.icols[k][retval.row_nnz[k]] = i;
418: retval.values[k][retval.row_nnz[k]] = val[k];
419: retval.row_nnz[k]++;
420: }
421: val[k] = 0;
422: }
424: /* Erase the values used in the work arrays */
425: for (j=0; j<r_nnz; j++) { lvec[r_icol[j]] = 0; }
426: }
428: ierr=PetscFree(lvec);
429: ierr=PetscFree(val);
431: spbas_cholesky_garbage_collect( &retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
432: ierr=PetscFree(max_row_nnz);
434: /* Place the inverse of the diagonals in the matrix */
435: for (i=0; i<nrows; i++) {
436: r_nnz = retval.row_nnz[i];
437: retval.values[i][r_nnz-1] = 1.0 / diag[i];
438: for (j=0; j<r_nnz-1; j++) {
439: retval.values[i][j] *= -1;
440: }
441: }
442: ierr=PetscFree(diag);
443: *matrix_L = retval;
444: return(0);
445: }