Actual source code: spbas_cholesky.h

  2: /*
  3:    spbas_cholesky_row_alloc: 
  4:       in the data arrays, find a place where another row may be stored.
  5:       Return PETSC_ERR_MEM if there is insufficient space to store the 
  6:       row, so garbage collection and/or re-allocation may be done.
  7: */
 10: PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used)
 11: {
 13:    retval.icols[k]  = &retval.alloc_icol[*n_alloc_used];
 14:    retval.values[k] = &retval.alloc_val[*n_alloc_used];
 15:    *n_alloc_used   += r_nnz;
 16:    if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_ERR_MEM);
 17:    return(0);
 18: }


 21: /*
 22:   spbas_cholesky_garbage_collect:
 23:      move the rows which have been calculated so far, as well as 
 24:      those currently under construction, to the front of the 
 25:      array, while putting them in the proper order.
 26:      When it seems necessary, enlarge the current arrays.

 28:      NB: re-allocation is being simulated using 
 29:          PetscMalloc, memcpy, PetscFree, because
 30:          PetscRealloc does not seem to exist.

 32: */
 35: PetscErrorCode spbas_cholesky_garbage_collect(
 36:       spbas_matrix *result, /* I/O: the Cholesky factor matrix being constructed.
 37:                                    Only the storage, not the contents of this matrix 
 38:                                    is changed in this function */
 39:       PetscInt     i_row,             /* I  : Number of rows for which the final contents are 
 40:                                      known */
 41:       PetscInt     *n_row_alloc_ok,   /* I/O: Number of rows which are already in their final  
 42:                                      places in the arrays: they need not be moved any more */
 43:       PetscInt     *n_alloc_used,     /* I/O:  */
 44:       PetscInt     *max_row_nnz       /* I  : Over-estimate of the number of nonzeros needed to 
 45:                                     store each row */
 46:      )
 47: {


 50:   /* PSEUDO-CODE:
 51:     1. Choose the appropriate size for the arrays
 52:     2. Rescue the arrays which would be lost during garbage collection
 53:     3. Reallocate and correct administration
 54:     4. Move all arrays so that they are in proper order */

 56:    PetscInt        i,j;
 57:    PetscInt        nrows = result->nrows;
 58:    PetscInt        n_alloc_ok=0;
 59:    PetscInt        n_alloc_ok_max=0;
 60:    PetscErrorCode  ierr;
 61:    PetscInt        need_already= 0;
 62:    PetscInt        n_rows_ahead=0;
 63:    PetscInt        max_need_extra= 0;
 64:    PetscInt        n_alloc_max, n_alloc_est, n_alloc;
 65:    PetscInt        n_alloc_now = result->n_alloc_icol;
 66:    PetscInt        *alloc_icol_old = result->alloc_icol;
 67:    PetscScalar     *alloc_val_old  = result->alloc_val;
 68:    PetscInt        *icol_rescue;
 69:    PetscScalar     *val_rescue;
 70:    PetscInt        n_rescue;
 71:    PetscInt        n_row_rescue;
 72:    PetscInt        i_here, i_last, n_copy;
 73:    const PetscReal xtra_perc = 20;


 77:    /*********************************************************
 78:     1. Choose appropriate array size
 79:     Count number of rows and memory usage which is already final */
 80:    for (i=0;i<i_row; i++)  {
 81:       n_alloc_ok += result->row_nnz[i];
 82:       n_alloc_ok_max += max_row_nnz[i];
 83:    }
 84: 
 85:    /* Count currently needed memory usage and future memory requirements
 86:       (max, predicted)*/
 87:    for (i=i_row; i<nrows; i++) {
 88:      if (!result->row_nnz[i]) {
 89:          max_need_extra  += max_row_nnz[i];
 90:       } else {
 91:          need_already    += max_row_nnz[i];
 92:          n_rows_ahead++;
 93:       }
 94:    }

 96:    /* Make maximal and realistic memory requirement estimates */
 97:    n_alloc_max = n_alloc_ok + need_already + max_need_extra;
 98:    n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) *  ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max));

100:    /* Choose array sizes */
101:    if (n_alloc_max == n_alloc_est)  { n_alloc = n_alloc_max; }
102:    else if (n_alloc_now >= n_alloc_est)  { n_alloc = n_alloc_now; }
103:    else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0))  { n_alloc  = n_alloc_max;  }
104:    else { n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0)); }

106:    /* If new estimate is less than what we already have,
107:       don't reallocate, just garbage-collect */
108:    if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) {
109:       n_alloc = result->n_alloc_icol;
110:    }

112:    /* Motivate dimension choice */
113:    PetscInfo1(PETSC_NULL,"   Allocating %d nonzeros: ",n_alloc);
114:    if (n_alloc_max == n_alloc_est) { PetscInfo(PETSC_NULL,"this is the correct size\n"); }
115:    else if (n_alloc_now >= n_alloc_est) { PetscInfo(PETSC_NULL,"the current size, which seems enough\n"); }
116:    else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) { PetscInfo(PETSC_NULL,"the maximum estimate\n"); }
117:    else { PetscInfo1(PETSC_NULL,"%6.2f %% more than the estimate\n",xtra_perc); }
118: 

120:    /**********************************************************
121:     2. Rescue arrays which would be lost
122:     Count how many rows and nonzeros will have to be rescued
123:     when moving all arrays in place */
124:    n_row_rescue = 0; n_rescue = 0;
125:    if (*n_row_alloc_ok==0) { *n_alloc_used = 0; }
126:    else  {
127:       i = *n_row_alloc_ok - 1;
128:       *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
129:    }

131:    for (i=*n_row_alloc_ok; i<nrows; i++) {
132:       i_here = result->icols[i]-result->alloc_icol;
133:       i_last = i_here + result->row_nnz[i];
134:       if (result->row_nnz[i]>0) {
135:          if (*n_alloc_used > i_here || i_last > n_alloc) {
136:             n_rescue += result->row_nnz[i];
137:             n_row_rescue++;
138:          }
139: 
140:          if (i<i_row) {  *n_alloc_used += result->row_nnz[i];}
141:          else         {  *n_alloc_used += max_row_nnz[i];}
142:       }
143:    }

145:    /* Allocate rescue arrays */
146:    PetscMalloc( n_rescue * sizeof(int), &icol_rescue);
147:    PetscMalloc( n_rescue * sizeof(PetscScalar), &val_rescue);

149:    /* Rescue the arrays which need rescuing */
150:    n_row_rescue = 0; n_rescue = 0;
151:    if (*n_row_alloc_ok==0) { *n_alloc_used = 0; }
152:    else {
153:       i = *n_row_alloc_ok - 1;
154:       *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
155:    }

157:    for (i=*n_row_alloc_ok; i<nrows; i++) {
158:       i_here = result->icols[i]-result->alloc_icol;
159:       i_last = i_here + result->row_nnz[i];
160:       if (result->row_nnz[i]>0) {
161:          if (*n_alloc_used > i_here || i_last > n_alloc) {
162:             PetscMemcpy((void*) &icol_rescue[n_rescue], (void*) result->icols[i], result->row_nnz[i] * sizeof(int));
163:             PetscMemcpy((void*) &val_rescue[n_rescue], (void*) result->values[i], result->row_nnz[i] * sizeof(PetscScalar));
164:             n_rescue += result->row_nnz[i];
165:             n_row_rescue++;
166:          }
167: 
168:          if (i<i_row) {  *n_alloc_used += result->row_nnz[i];}
169:          else         {  *n_alloc_used += max_row_nnz[i];}
170:       }
171:    }

173:    /**********************************************************
174:     3. Reallocate and correct administration */
175: 
176:    if (n_alloc != result->n_alloc_icol)  {
177:       n_copy = PetscMin(n_alloc,result->n_alloc_icol);

179:       /* PETSC knows no REALLOC, so we'll REALLOC ourselves.

181:          Allocate new icol-data, copy old contents */
182:       PetscMalloc(  n_alloc * sizeof(int), &result->alloc_icol);
183:       PetscMemcpy(result->alloc_icol, alloc_icol_old, n_copy*sizeof(int));

185:       /* Update administration, Reset pointers to new arrays  */
186:       result->n_alloc_icol = n_alloc;
187:       for (i=0; i<nrows; i++) {
188:           result->icols[i] =  result->alloc_icol + (result->icols[i]  - alloc_icol_old);
189:           result->values[i] =  result->alloc_val + (result->icols[i]  - result->alloc_icol);
190:       }

192:       /* Delete old array */
193:       PetscFree(alloc_icol_old);

195:       /* Allocate new value-data, copy old contents */
196:       PetscMalloc(  n_alloc * sizeof(PetscScalar), &result->alloc_val);
197:       PetscMemcpy(result->alloc_val, alloc_val_old, n_copy*sizeof(PetscScalar));

199:       /* Update administration, Reset pointers to new arrays  */
200:       result->n_alloc_val  = n_alloc;
201:       for (i=0; i<nrows; i++) {
202:           result->values[i] =  result->alloc_val + (result->icols[i]  - result->alloc_icol);
203:       }

205:       /* Delete old array */
206:       PetscFree(alloc_val_old);
207:    }


210:    /*********************************************************
211:     4. Copy all the arrays to their proper places */
212:    n_row_rescue = 0; n_rescue = 0;
213:    if (*n_row_alloc_ok==0) { *n_alloc_used = 0; }
214:    else {
215:       i = *n_row_alloc_ok - 1;
216:       *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
217:    }

219:    for (i=*n_row_alloc_ok; i<nrows; i++) {
220:       i_here = result->icols[i]-result->alloc_icol;
221:       i_last = i_here + result->row_nnz[i];

223:       result->icols[i] = result->alloc_icol + *n_alloc_used;
224:       result->values[i]= result->alloc_val  + *n_alloc_used;

226:       if (result->row_nnz[i]>0) {
227:          if (*n_alloc_used > i_here || i_last > n_alloc)  {
228:             PetscMemcpy((void*) result->icols[i],  (void*) &icol_rescue[n_rescue], result->row_nnz[i] * sizeof(int));
229:             PetscMemcpy((void*) result->values[i], (void*) &val_rescue[n_rescue],result->row_nnz[i] * sizeof(PetscScalar));
230:             n_rescue += result->row_nnz[i];
231:             n_row_rescue++;
232:          } else {
233:             for (j=0; j<result->row_nnz[i]; j++) {
234:                 result->icols[i][j]   = result->alloc_icol[i_here+j];
235:                 result->values[i][j]  = result->alloc_val[ i_here+j];
236:             }
237:          }
238:          if (i<i_row) {  *n_alloc_used += result->row_nnz[i];}
239:          else         {  *n_alloc_used += max_row_nnz[i];}
240:       }
241:    }

243:    /* Delete the rescue arrays */
244:    PetscFree(icol_rescue);
245:    PetscFree(val_rescue);
246:    *n_row_alloc_ok = i_row;
247:    return(0);
248: }

250: /*
251:   spbas_incomplete_cholesky:
252:      incomplete Cholesky decomposition of a square, symmetric, 
253:      positive definite matrix. 

255:      In case negative diagonals are encountered, function returns
256:      NEGATIVE_DIAGONAL. When this happens, call this function again
257:      with a larger epsdiag_in, a less sparse pattern, and/or a smaller 
258:      droptol
259: */
262: PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L)
263: {
264:    PetscInt         jL;
265:    Mat_SeqAIJ       *a=(Mat_SeqAIJ*)A->data;
266:    PetscInt         *ai=a->i,*aj=a->j;
267:    MatScalar        *aa=a->a;
268:    PetscInt         nrows, ncols;
269:    PetscInt         *max_row_nnz;
270:    PetscErrorCode   ierr;
271:    spbas_matrix     retval;
272:    PetscScalar      * diag;
273:    PetscScalar      * val;
274:    PetscScalar      * lvec;
275:    PetscScalar      epsdiag;
276:    PetscInt         i,j,k;
277:    const PetscBool  do_values = PETSC_TRUE;
278:    PetscInt         * r1_icol;
279:    PetscScalar      *r1_val;
280:    PetscInt         * r_icol;
281:    PetscInt         r_nnz;
282:    PetscScalar      *r_val;
283:    PetscInt         * A_icol;
284:    PetscInt         A_nnz;
285:    PetscScalar      *A_val;
286:    PetscInt         * p_icol;
287:    PetscInt         p_nnz;
288:    PetscInt         n_row_alloc_ok = 0;  /* number of rows which have been stored   correctly in the matrix */
289:    PetscInt         n_alloc_used = 0;    /* part of result->icols and result->values   which is currently being used */

292:    /* Convert the Manteuffel shift from 'fraction of average diagonal' to   dimensioned value */
293:    MatGetSize(A, &nrows, &ncols);
294:    MatGetTrace(A, &epsdiag);
295:    epsdiag *= epsdiag_in / nrows;
296:    PetscInfo2(PETSC_NULL,"   Dimensioned Manteuffel shift %G Drop tolerance %G\n", PetscRealPart(epsdiag),droptol);

298:    if (droptol<1e-10) {droptol=1e-10;}

300:    if ( (nrows != pattern.nrows) || (ncols != pattern.ncols) || (ncols != nrows) ) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP,"Dimension error in spbas_incomplete_cholesky\n");

302:    retval.nrows = nrows;
303:    retval.ncols = nrows;
304:    retval.nnz   = pattern.nnz/10;
305:    retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
306:    retval.block_data = PETSC_TRUE;

308:     spbas_allocate_pattern(&retval, do_values);
309:    PetscMemzero((void*) retval.row_nnz, nrows*sizeof(int));
310:     spbas_allocate_data(&retval);
311:    retval.nnz   = 0;

313:    PetscMalloc(nrows*sizeof(PetscScalar), &diag);
314:    PetscMalloc(nrows*sizeof(PetscScalar), &val);
315:    PetscMalloc(nrows*sizeof(PetscScalar), &lvec);
316:    PetscMalloc(nrows*sizeof(int), &max_row_nnz);
317:    if (!(diag && val && lvec && max_row_nnz))  SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MEM, "Allocation error in spbas_incomplete_cholesky\n");

319:    PetscMemzero((void*) val, nrows*sizeof(PetscScalar));
320:    PetscMemzero((void*) lvec, nrows*sizeof(PetscScalar));
321:    PetscMemzero((void*) max_row_nnz, nrows*sizeof(int));

323:    /* Check correct format of sparseness pattern */
324:    if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS)  SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n");

326:    /* Count the nonzeros on transpose of pattern */
327:    for (i = 0; i<nrows; i++)  {
328:       p_nnz  = pattern.row_nnz[i];
329:       p_icol = pattern.icols[i];
330:       for (j=0; j<p_nnz; j++)  {
331:          max_row_nnz[i+p_icol[j]]++;
332:       }
333:    }

335:    /* Calculate rows of L */
336:    for (i = 0; i<nrows; i++)  {
337:       p_nnz  = pattern.row_nnz[i];
338:       p_icol = pattern.icols[i];

340:       r_nnz  = retval.row_nnz[i];
341:       r_icol = retval.icols[i];
342:       r_val  = retval.values[i];
343:       A_nnz  = ai[rip[i]+1] - ai[rip[i]];
344:       A_icol = &aj[ai[rip[i]]];
345:       A_val  = &aa[ai[rip[i]]];

347:       /* Calculate  val += A(i,i:n)'; */
348:       for (j=0; j<A_nnz; j++) {
349:          k = riip[A_icol[j]];
350:          if (k>=i) { val[k] = A_val[j]; }
351:       }

353:       /*  Add regularization */
354:       val[i] += epsdiag;

356:       /* Calculate lvec   = diag(D(0:i-1)) * L(0:i-1,i);
357:          val(i) = A(i,i) - L(0:i-1,i)' * lvec */
358:       for ( j=0; j<r_nnz; j++)  {
359:          k           = r_icol[j];
360:          lvec[k]     = diag[k] * r_val[j];
361:          val[i]     -= r_val[j] * lvec[k];
362:       }

364:       /* Calculate the new diagonal */
365:       diag[i] = val[i];
366:       if (PetscRealPart(diag[i])<droptol)  {
367:          PetscInfo(PETSC_NULL,"Error in spbas_incomplete_cholesky:\n");
368:          PetscInfo1(PETSC_NULL,"Negative diagonal in row %d\n",i+1);

370:          /* Delete the whole matrix at once. */
371:          spbas_delete(retval);
372:          return NEGATIVE_DIAGONAL;
373:       }

375:       /* If necessary, allocate arrays */
376:       if (r_nnz==0) {
377:          spbas_cholesky_row_alloc( retval, i, 1, &n_alloc_used);
378:          if (ierr == PETSC_ERR_MEM) {
379:             spbas_cholesky_garbage_collect( &retval,  i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
380:             spbas_cholesky_row_alloc( retval, i, 1, &n_alloc_used);
381:          }
382:          r_icol = retval.icols[i];
383:          r_val  = retval.values[i];
384:       }

386:       /* Now, fill in */
387:       r_icol[r_nnz] = i;
388:       r_val [r_nnz] = 1.0;
389:       r_nnz++;
390:       retval.row_nnz[i]++;

392:       retval.nnz += r_nnz;

394:       /* Calculate
395:          val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */
396:       for (j=1; j<p_nnz; j++)  {
397:          k = i+p_icol[j];
398:          r1_icol = retval.icols[k];
399:          r1_val  = retval.values[k];
400:          for (jL=0; jL<retval.row_nnz[k]; jL++)   {
401:             val[k] -= r1_val[jL] * lvec[r1_icol[jL]];
402:          }
403:          val[k] /= diag[i];

405:          if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) {
406:            /* If necessary, allocate arrays */
407:             if (retval.row_nnz[k]==0) {
408:                spbas_cholesky_row_alloc( retval, k, max_row_nnz[k], &n_alloc_used);
409:                if (ierr == PETSC_ERR_MEM) {
410:                   spbas_cholesky_garbage_collect( &retval,  i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
411:                   spbas_cholesky_row_alloc( retval, k, max_row_nnz[k], &n_alloc_used);
412:                   r_icol = retval.icols[i];
413:                   r_val  = retval.values[i];
414:                }
415:             }

417:             retval.icols[k][retval.row_nnz[k]] = i;
418:             retval.values[k][retval.row_nnz[k]] = val[k];
419:             retval.row_nnz[k]++;
420:          }
421:          val[k] = 0;
422:       }

424:       /* Erase the values used in the work arrays */
425:       for (j=0; j<r_nnz; j++) { lvec[r_icol[j]] = 0; }
426:    }

428:    ierr=PetscFree(lvec);
429:    ierr=PetscFree(val);

431:    spbas_cholesky_garbage_collect( &retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
432:    ierr=PetscFree(max_row_nnz);

434:    /* Place the inverse of the diagonals in the matrix */
435:    for (i=0; i<nrows; i++) {
436:       r_nnz = retval.row_nnz[i];
437:       retval.values[i][r_nnz-1] = 1.0 / diag[i];
438:       for (j=0; j<r_nnz-1; j++) {
439:          retval.values[i][j] *= -1;
440:       }
441:    }
442:    ierr=PetscFree(diag);
443:    *matrix_L = retval;
444:    return(0);
445: }