Actual source code: blockmat.c

  2: /*
  3:    This provides a matrix that consists of Mats
  4: */

  6: #include <private/matimpl.h>              /*I "petscmat.h" I*/
  7: #include <../src/mat/impls/baij/seq/baij.h>    /* use the common AIJ data-structure */
  8: #include <petscksp.h>

 10: typedef struct {
 11:   SEQAIJHEADER(Mat);
 12:   SEQBAIJHEADER;
 13:   Mat               *diags;

 15:   Vec               left,right,middle,workb;   /* dummy vectors to perform local parts of product */
 16: } Mat_BlockMat;


 22: PetscErrorCode MatSOR_BlockMat_Symmetric(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
 23: {
 24:   Mat_BlockMat       *a = (Mat_BlockMat*)A->data;
 25:   PetscScalar        *x;
 26:   const Mat          *v = a->a;
 27:   const PetscScalar  *b;
 28:   PetscErrorCode     ierr;
 29:   PetscInt           n = A->cmap->n,i,mbs = n/A->rmap->bs,j,bs = A->rmap->bs;
 30:   const PetscInt     *idx;
 31:   IS                 row,col;
 32:   MatFactorInfo      info;
 33:   Vec                left = a->left,right = a->right, middle = a->middle;
 34:   Mat                *diag;

 37:   its = its*lits;
 38:   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
 39:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
 40:   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for omega not equal to 1.0");
 41:   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for fshift");
 42:   if ((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP))
 43:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot do backward sweep without forward sweep");

 45:   if (!a->diags) {
 46:     PetscMalloc(mbs*sizeof(Mat),&a->diags);
 47:     MatFactorInfoInitialize(&info);
 48:     for (i=0; i<mbs; i++) {
 49:       MatGetOrdering(a->a[a->diag[i]], MATORDERINGND,&row,&col);
 50:       MatCholeskyFactorSymbolic(a->diags[i],a->a[a->diag[i]],row,&info);
 51:       MatCholeskyFactorNumeric(a->diags[i],a->a[a->diag[i]],&info);
 52:       ISDestroy(&row);
 53:       ISDestroy(&col);
 54:     }
 55:     VecDuplicate(bb,&a->workb);
 56:   }
 57:   diag    = a->diags;

 59:   VecSet(xx,0.0);
 60:   VecGetArray(xx,&x);
 61:   /* copy right hand side because it must be modified during iteration */
 62:   VecCopy(bb,a->workb);
 63:   VecGetArrayRead(a->workb,&b);

 65:   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
 66:   while (its--) {
 67:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){

 69:       for (i=0; i<mbs; i++) {
 70:         n    = a->i[i+1] - a->i[i] - 1;
 71:         idx  = a->j + a->i[i] + 1;
 72:         v    = a->a + a->i[i] + 1;

 74:         VecSet(left,0.0);
 75:         for (j=0; j<n; j++) {
 76:           VecPlaceArray(right,x + idx[j]*bs);
 77:           MatMultAdd(v[j],right,left,left);
 78:           VecResetArray(right);
 79:         }
 80:         VecPlaceArray(right,b + i*bs);
 81:         VecAYPX(left,-1.0,right);
 82:         VecResetArray(right);

 84:         VecPlaceArray(right,x + i*bs);
 85:         MatSolve(diag[i],left,right);

 87:         /* now adjust right hand side, see MatSOR_SeqSBAIJ */
 88:         for (j=0; j<n; j++) {
 89:           MatMultTranspose(v[j],right,left);
 90:           VecPlaceArray(middle,b + idx[j]*bs);
 91:           VecAXPY(middle,-1.0,left);
 92:           VecResetArray(middle);
 93:         }
 94:         VecResetArray(right);

 96:       }
 97:     }
 98:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){

100:       for (i=mbs-1; i>=0; i--) {
101:         n    = a->i[i+1] - a->i[i] - 1;
102:         idx  = a->j + a->i[i] + 1;
103:         v    = a->a + a->i[i] + 1;

105:         VecSet(left,0.0);
106:         for (j=0; j<n; j++) {
107:           VecPlaceArray(right,x + idx[j]*bs);
108:           MatMultAdd(v[j],right,left,left);
109:           VecResetArray(right);
110:         }
111:         VecPlaceArray(right,b + i*bs);
112:         VecAYPX(left,-1.0,right);
113:         VecResetArray(right);

115:         VecPlaceArray(right,x + i*bs);
116:         MatSolve(diag[i],left,right);
117:         VecResetArray(right);

119:       }
120:     }
121:   }
122:   VecRestoreArray(xx,&x);
123:   VecRestoreArrayRead(a->workb,&b);
124:   return(0);
125: }

129: PetscErrorCode MatSOR_BlockMat(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130: {
131:   Mat_BlockMat       *a = (Mat_BlockMat*)A->data;
132:   PetscScalar        *x;
133:   const Mat          *v = a->a;
134:   const PetscScalar  *b;
135:   PetscErrorCode     ierr;
136:   PetscInt           n = A->cmap->n,i,mbs = n/A->rmap->bs,j,bs = A->rmap->bs;
137:   const PetscInt     *idx;
138:   IS                 row,col;
139:   MatFactorInfo      info;
140:   Vec                left = a->left,right = a->right;
141:   Mat                *diag;

144:   its = its*lits;
145:   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
146:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
147:   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for omega not equal to 1.0");
148:   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for fshift");

150:   if (!a->diags) {
151:     PetscMalloc(mbs*sizeof(Mat),&a->diags);
152:     MatFactorInfoInitialize(&info);
153:     for (i=0; i<mbs; i++) {
154:       MatGetOrdering(a->a[a->diag[i]], MATORDERINGND,&row,&col);
155:       MatLUFactorSymbolic(a->diags[i],a->a[a->diag[i]],row,col,&info);
156:       MatLUFactorNumeric(a->diags[i],a->a[a->diag[i]],&info);
157:       ISDestroy(&row);
158:       ISDestroy(&col);
159:     }
160:   }
161:   diag = a->diags;

163:   VecSet(xx,0.0);
164:   VecGetArray(xx,&x);
165:   VecGetArrayRead(bb,&b);

167:   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
168:   while (its--) {
169:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){

171:       for (i=0; i<mbs; i++) {
172:         n    = a->i[i+1] - a->i[i];
173:         idx  = a->j + a->i[i];
174:         v    = a->a + a->i[i];

176:         VecSet(left,0.0);
177:         for (j=0; j<n; j++) {
178:           if (idx[j] != i) {
179:             VecPlaceArray(right,x + idx[j]*bs);
180:             MatMultAdd(v[j],right,left,left);
181:             VecResetArray(right);
182:           }
183:         }
184:         VecPlaceArray(right,b + i*bs);
185:         VecAYPX(left,-1.0,right);
186:         VecResetArray(right);

188:         VecPlaceArray(right,x + i*bs);
189:         MatSolve(diag[i],left,right);
190:         VecResetArray(right);
191:       }
192:     }
193:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){

195:       for (i=mbs-1; i>=0; i--) {
196:         n    = a->i[i+1] - a->i[i];
197:         idx  = a->j + a->i[i];
198:         v    = a->a + a->i[i];

200:         VecSet(left,0.0);
201:         for (j=0; j<n; j++) {
202:           if (idx[j] != i) {
203:             VecPlaceArray(right,x + idx[j]*bs);
204:             MatMultAdd(v[j],right,left,left);
205:             VecResetArray(right);
206:           }
207:         }
208:         VecPlaceArray(right,b + i*bs);
209:         VecAYPX(left,-1.0,right);
210:         VecResetArray(right);

212:         VecPlaceArray(right,x + i*bs);
213:         MatSolve(diag[i],left,right);
214:         VecResetArray(right);

216:       }
217:     }
218:   }
219:   VecRestoreArray(xx,&x);
220:   VecRestoreArrayRead(bb,&b);
221:   return(0);
222: }

226: PetscErrorCode MatSetValues_BlockMat(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
227: {
228:   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
229:   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
230:   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
231:   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
233:   PetscInt       ridx,cidx;
234:   PetscBool      roworiented=a->roworiented;
235:   MatScalar      value;
236:   Mat            *ap,*aa = a->a;

240:   for (k=0; k<m; k++) { /* loop over added rows */
241:     row  = im[k];
242:     brow = row/bs;
243:     if (row < 0) continue;
244: #if defined(PETSC_USE_DEBUG)  
245:     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
246: #endif
247:     rp   = aj + ai[brow];
248:     ap   = aa + ai[brow];
249:     rmax = imax[brow];
250:     nrow = ailen[brow];
251:     low  = 0;
252:     high = nrow;
253:     for (l=0; l<n; l++) { /* loop over added columns */
254:       if (in[l] < 0) continue;
255: #if defined(PETSC_USE_DEBUG)  
256:       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
257: #endif
258:       col = in[l]; bcol = col/bs;
259:       if (A->symmetric && brow > bcol) continue;
260:       ridx = row % bs; cidx = col % bs;
261:       if (roworiented) {
262:         value = v[l + k*n];
263:       } else {
264:         value = v[k + l*m];
265:       }
266:       if (col <= lastcol) low = 0; else high = nrow;
267:       lastcol = col;
268:       while (high-low > 7) {
269:         t = (low+high)/2;
270:         if (rp[t] > bcol) high = t;
271:         else              low  = t;
272:       }
273:       for (i=low; i<high; i++) {
274:         if (rp[i] > bcol) break;
275:         if (rp[i] == bcol) {
276:           goto noinsert1;
277:         }
278:       }
279:       if (nonew == 1) goto noinsert1;
280:       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
281:       MatSeqXAIJReallocateAIJ(A,a->mbs,1,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,Mat);
282:       N = nrow++ - 1; high++;
283:       /* shift up all the later entries in this row */
284:       for (ii=N; ii>=i; ii--) {
285:         rp[ii+1] = rp[ii];
286:         ap[ii+1] = ap[ii];
287:       }
288:       if (N>=i) ap[i] = 0;
289:       rp[i]           = bcol;
290:       a->nz++;
291:       noinsert1:;
292:       if (!*(ap+i)) {
293:         MatCreateSeqAIJ(PETSC_COMM_SELF,bs,bs,0,0,ap+i);
294:       }
295:       MatSetValues(ap[i],1,&ridx,1,&cidx,&value,is);
296:       low = i;
297:     }
298:     ailen[brow] = nrow;
299:   }
300:   A->same_nonzero = PETSC_FALSE;
301:   return(0);
302: }

306: PetscErrorCode MatLoad_BlockMat(Mat newmat, PetscViewer viewer)
307: {
308:   PetscErrorCode    ierr;
309:   Mat               tmpA;
310:   PetscInt          i,j,m,n,bs = 1,ncols,*lens,currentcol,mbs,**ii,*ilens,nextcol,*llens,cnt = 0;
311:   const PetscInt    *cols;
312:   const PetscScalar *values;
313:   PetscBool         flg = PETSC_FALSE,notdone;
314:   Mat_SeqAIJ        *a;
315:   Mat_BlockMat      *amat;

318:   MatCreate(PETSC_COMM_SELF,&tmpA);
319:   MatSetType(tmpA,MATSEQAIJ);
320:   MatLoad_SeqAIJ(tmpA,viewer);

322:   MatGetLocalSize(tmpA,&m,&n);
323:   PetscOptionsBegin(PETSC_COMM_SELF,PETSC_NULL,"Options for loading BlockMat matrix 1","Mat");
324:   PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);
325:   PetscOptionsBool("-matload_symmetric","Store the matrix as symmetric","MatLoad",flg,&flg,PETSC_NULL);
326:   PetscOptionsEnd();

328:   /* Determine number of nonzero blocks for each block row */
329:   a    = (Mat_SeqAIJ*) tmpA->data;
330:   mbs  = m/bs;
331:   PetscMalloc3(mbs,PetscInt,&lens,bs,PetscInt*,&ii,bs,PetscInt,&ilens);
332:   PetscMemzero(lens,mbs*sizeof(PetscInt));

334:   for (i=0; i<mbs; i++) {
335:     for (j=0; j<bs; j++) {
336:       ii[j]         = a->j + a->i[i*bs + j];
337:       ilens[j]      = a->i[i*bs + j + 1] - a->i[i*bs + j];
338:     }

340:     currentcol = -1;
341:     notdone = PETSC_TRUE;
342:     while (PETSC_TRUE) {
343:       notdone = PETSC_FALSE;
344:       nextcol = 1000000000;
345:       for (j=0; j<bs; j++) {
346:         while ((ilens[j] > 0 && ii[j][0]/bs <= currentcol)) {
347:           ii[j]++;
348:           ilens[j]--;
349:         }
350:         if (ilens[j] > 0) {
351:           notdone = PETSC_TRUE;
352:           nextcol = PetscMin(nextcol,ii[j][0]/bs);
353:         }
354:       }
355:       if (!notdone) break;
356:       if (!flg || (nextcol >= i)) lens[i]++;
357:       currentcol = nextcol;
358:     }
359:   }

361:   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
362:     MatSetSizes(newmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
363:   }
364:   MatBlockMatSetPreallocation(newmat,bs,0,lens);
365:   if (flg) {
366:     MatSetOption(newmat,MAT_SYMMETRIC,PETSC_TRUE);
367:   }
368:   amat = (Mat_BlockMat*)(newmat)->data;

370:   /* preallocate the submatrices */
371:   PetscMalloc(bs*sizeof(PetscInt),&llens);
372:   for (i=0; i<mbs; i++) { /* loops for block rows */
373:     for (j=0; j<bs; j++) {
374:       ii[j]         = a->j + a->i[i*bs + j];
375:       ilens[j]      = a->i[i*bs + j + 1] - a->i[i*bs + j];
376:     }

378:     currentcol = 1000000000;
379:     for (j=0; j<bs; j++) { /* loop over rows in block finding first nonzero block */
380:       if (ilens[j] > 0) {
381:         currentcol = PetscMin(currentcol,ii[j][0]/bs);
382:       }
383:     }

385:     notdone = PETSC_TRUE;
386:     while (PETSC_TRUE) {  /* loops over blocks in block row */

388:       notdone = PETSC_FALSE;
389:       nextcol = 1000000000;
390:       PetscMemzero(llens,bs*sizeof(PetscInt));
391:       for (j=0; j<bs; j++) { /* loop over rows in block */
392:         while ((ilens[j] > 0 && ii[j][0]/bs <= currentcol)) { /* loop over columns in row */
393:           ii[j]++;
394:           ilens[j]--;
395:           llens[j]++;
396:         }
397:         if (ilens[j] > 0) {
398:           notdone = PETSC_TRUE;
399:           nextcol = PetscMin(nextcol,ii[j][0]/bs);
400:         }
401:       }
402:       if (cnt >= amat->maxnz) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of blocks found greater than expected %D",cnt);
403:       if (!flg || currentcol >= i) {
404:         amat->j[cnt] = currentcol;
405:         MatCreateSeqAIJ(PETSC_COMM_SELF,bs,bs,0,llens,amat->a+cnt++);
406:       }

408:       if (!notdone) break;
409:       currentcol = nextcol;
410:     }
411:     amat->ilen[i] = lens[i];
412:   }
413:   CHKMEMQ;

415:   PetscFree3(lens,ii,ilens);
416:   PetscFree(llens);

418:   /* copy over the matrix, one row at a time */
419:   for (i=0; i<m; i++) {
420:     MatGetRow(tmpA,i,&ncols,&cols,&values);
421:     MatSetValues(newmat,1,&i,ncols,cols,values,INSERT_VALUES);
422:     MatRestoreRow(tmpA,i,&ncols,&cols,&values);
423:   }
424:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
425:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
426:   return(0);
427: }

431: PetscErrorCode MatView_BlockMat(Mat A,PetscViewer viewer)
432: {
433:   Mat_BlockMat      *a = (Mat_BlockMat*)A->data;
434:   PetscErrorCode    ierr;
435:   const char        *name;
436:   PetscViewerFormat format;

439:   PetscObjectGetName((PetscObject)A,&name);
440:   PetscViewerGetFormat(viewer,&format);
441:   if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
442:     PetscViewerASCIIPrintf(viewer,"Nonzero block matrices = %D \n",a->nz);
443:     if (A->symmetric) {
444:       PetscViewerASCIIPrintf(viewer,"Only upper triangular part of symmetric matrix is stored\n");
445:     }
446:   }
447:   return(0);
448: }

452: PetscErrorCode MatDestroy_BlockMat(Mat mat)
453: {
455:   Mat_BlockMat   *bmat = (Mat_BlockMat*)mat->data;
456:   PetscInt       i;

459:   VecDestroy(&bmat->right);
460:   VecDestroy(&bmat->left);
461:   VecDestroy(&bmat->middle);
462:   VecDestroy(&bmat->workb);
463:   if (bmat->diags) {
464:     for (i=0; i<mat->rmap->n/mat->rmap->bs; i++) {
465:       MatDestroy(&bmat->diags[i]);
466:     }
467:   }
468:   if (bmat->a) {
469:     for (i=0; i<bmat->nz; i++) {
470:       MatDestroy(&bmat->a[i]);
471:     }
472:   }
473:   MatSeqXAIJFreeAIJ(mat,(PetscScalar**)&bmat->a,&bmat->j,&bmat->i);
474:   PetscFree(mat->data);
475:   return(0);
476: }

480: PetscErrorCode MatMult_BlockMat(Mat A,Vec x,Vec y)
481: {
482:   Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
484:   PetscScalar    *xx,*yy;
485:   PetscInt       *aj,i,*ii,jrow,m = A->rmap->n/A->rmap->bs,bs = A->rmap->bs,n,j;
486:   Mat            *aa;

489:   CHKMEMQ;
490:   /*
491:      Standard CSR multiply except each entry is a Mat
492:   */
493:   VecGetArray(x,&xx);

495:   VecSet(y,0.0);
496:   VecGetArray(y,&yy);
497:   aj  = bmat->j;
498:   aa  = bmat->a;
499:   ii  = bmat->i;
500:   for (i=0; i<m; i++) {
501:     jrow = ii[i];
502:     VecPlaceArray(bmat->left,yy + bs*i);
503:     n    = ii[i+1] - jrow;
504:     for (j=0; j<n; j++) {
505:       VecPlaceArray(bmat->right,xx + bs*aj[jrow]);
506:       MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left);
507:       VecResetArray(bmat->right);
508:       jrow++;
509:     }
510:     VecResetArray(bmat->left);
511:   }
512:   VecRestoreArray(x,&xx);
513:   VecRestoreArray(y,&yy);
514:   CHKMEMQ;
515:   return(0);
516: }

520: PetscErrorCode MatMult_BlockMat_Symmetric(Mat A,Vec x,Vec y)
521: {
522:   Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
524:   PetscScalar    *xx,*yy;
525:   PetscInt       *aj,i,*ii,jrow,m = A->rmap->n/A->rmap->bs,bs = A->rmap->bs,n,j;
526:   Mat            *aa;

529:   CHKMEMQ;
530:   /*
531:      Standard CSR multiply except each entry is a Mat
532:   */
533:   VecGetArray(x,&xx);

535:   VecSet(y,0.0);
536:   VecGetArray(y,&yy);
537:   aj  = bmat->j;
538:   aa  = bmat->a;
539:   ii  = bmat->i;
540:   for (i=0; i<m; i++) {
541:     jrow = ii[i];
542:     n    = ii[i+1] - jrow;
543:     VecPlaceArray(bmat->left,yy + bs*i);
544:     VecPlaceArray(bmat->middle,xx + bs*i);
545:     /* if we ALWAYS required a diagonal entry then could remove this if test */
546:     if (aj[jrow] == i) {
547:       VecPlaceArray(bmat->right,xx + bs*aj[jrow]);
548:       MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left);
549:       VecResetArray(bmat->right);
550:       jrow++;
551:       n--;
552:     }
553:     for (j=0; j<n; j++) {
554:       VecPlaceArray(bmat->right,xx + bs*aj[jrow]);            /* upper triangular part */
555:       MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left);
556:       VecResetArray(bmat->right);

558:       VecPlaceArray(bmat->right,yy + bs*aj[jrow]);            /* lower triangular part */
559:       MatMultTransposeAdd(aa[jrow],bmat->middle,bmat->right,bmat->right);
560:       VecResetArray(bmat->right);
561:       jrow++;
562:     }
563:     VecResetArray(bmat->left);
564:     VecResetArray(bmat->middle);
565:   }
566:   VecRestoreArray(x,&xx);
567:   VecRestoreArray(y,&yy);
568:   CHKMEMQ;
569:   return(0);
570: }

574: PetscErrorCode MatMultAdd_BlockMat(Mat A,Vec x,Vec y,Vec z)
575: {
577:   return(0);
578: }

582: PetscErrorCode MatMultTranspose_BlockMat(Mat A,Vec x,Vec y)
583: {
585:   return(0);
586: }

590: PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A,Vec x,Vec y,Vec z)
591: {
593:   return(0);
594: }

596: /*
597:      Adds diagonal pointers to sparse matrix structure.
598: */
601: PetscErrorCode MatMarkDiagonal_BlockMat(Mat A)
602: {
603:   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
605:   PetscInt       i,j,mbs = A->rmap->n/A->rmap->bs;

608:   if (!a->diag) {
609:     PetscMalloc(mbs*sizeof(PetscInt),&a->diag);
610:   }
611:   for (i=0; i<mbs; i++) {
612:     a->diag[i] = a->i[i+1];
613:     for (j=a->i[i]; j<a->i[i+1]; j++) {
614:       if (a->j[j] == i) {
615:         a->diag[i] = j;
616:         break;
617:       }
618:     }
619:   }
620:   return(0);
621: }

625: PetscErrorCode MatGetSubMatrix_BlockMat(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
626: {
627:   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
628:   Mat_SeqAIJ     *c;
630:   PetscInt       i,k,first,step,lensi,nrows,ncols;
631:   PetscInt       *j_new,*i_new,*aj = a->j,*ailen = a->ilen;
632:   PetscScalar    *a_new;
633:   Mat            C,*aa = a->a;
634:   PetscBool      stride,equal;

637:   ISEqual(isrow,iscol,&equal);
638:   if (!equal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only for idential column and row indices");
639:   PetscTypeCompare((PetscObject)iscol,ISSTRIDE,&stride);
640:   if (!stride) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only for stride indices");
641:   ISStrideGetInfo(iscol,&first,&step);
642:   if (step != A->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only select one entry from each block");

644:   ISGetLocalSize(isrow,&nrows);
645:   ncols = nrows;

647:   /* create submatrix */
648:   if (scall == MAT_REUSE_MATRIX) {
649:     PetscInt n_cols,n_rows;
650:     C = *B;
651:     MatGetSize(C,&n_rows,&n_cols);
652:     if (n_rows != nrows || n_cols != ncols) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size");
653:     MatZeroEntries(C);
654:   } else {
655:     MatCreate(((PetscObject)A)->comm,&C);
656:     MatSetSizes(C,nrows,ncols,PETSC_DETERMINE,PETSC_DETERMINE);
657:     if (A->symmetric) {
658:       MatSetType(C,MATSEQSBAIJ);
659:     } else {
660:       MatSetType(C,MATSEQAIJ);
661:     }
662:     MatSeqAIJSetPreallocation(C,0,ailen);
663:     MatSeqSBAIJSetPreallocation(C,1,0,ailen);
664:   }
665:   c = (Mat_SeqAIJ*)C->data;
666: 
667:   /* loop over rows inserting into submatrix */
668:   a_new    = c->a;
669:   j_new    = c->j;
670:   i_new    = c->i;
671: 
672:   for (i=0; i<nrows; i++) {
673:     lensi = ailen[i];
674:     for (k=0; k<lensi; k++) {
675:       *j_new++ = *aj++;
676:       MatGetValue(*aa++,first,first,a_new++);
677:     }
678:     i_new[i+1]  = i_new[i] + lensi;
679:     c->ilen[i]  = lensi;
680:   }

682:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
683:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
684:   *B = C;
685:   return(0);
686: }

690: PetscErrorCode MatAssemblyEnd_BlockMat(Mat A,MatAssemblyType mode)
691: {
692:   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
694:   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
695:   PetscInt       m = a->mbs,*ip,N,*ailen = a->ilen,rmax = 0;
696:   Mat            *aa = a->a,*ap;

699:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

701:   if (m) rmax = ailen[0]; /* determine row with most nonzeros */
702:   for (i=1; i<m; i++) {
703:     /* move each row back by the amount of empty slots (fshift) before it*/
704:     fshift += imax[i-1] - ailen[i-1];
705:     rmax   = PetscMax(rmax,ailen[i]);
706:     if (fshift) {
707:       ip = aj + ai[i] ;
708:       ap = aa + ai[i] ;
709:       N  = ailen[i];
710:       for (j=0; j<N; j++) {
711:         ip[j-fshift] = ip[j];
712:         ap[j-fshift] = ap[j];
713:       }
714:     }
715:     ai[i] = ai[i-1] + ailen[i-1];
716:   }
717:   if (m) {
718:     fshift += imax[m-1] - ailen[m-1];
719:     ai[m]  = ai[m-1] + ailen[m-1];
720:   }
721:   /* reset ilen and imax for each row */
722:   for (i=0; i<m; i++) {
723:     ailen[i] = imax[i] = ai[i+1] - ai[i];
724:   }
725:   a->nz = ai[m];
726:   for (i=0; i<a->nz; i++) {
727: #if defined(PETSC_USE_DEBUG)
728:     if (!aa[i]) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Null matrix at location %D column %D nz %D",i,aj[i],a->nz);
729: #endif
730:     MatAssemblyBegin(aa[i],MAT_FINAL_ASSEMBLY);
731:     MatAssemblyEnd(aa[i],MAT_FINAL_ASSEMBLY);
732:   }
733:   CHKMEMQ;
734:   PetscInfo4(A,"Matrix size: %D X %D; storage space: %D unneeded,%D used\n",m,A->cmap->n/A->cmap->bs,fshift,a->nz);
735:   PetscInfo1(A,"Number of mallocs during MatSetValues() is %D\n",a->reallocs);
736:   PetscInfo1(A,"Maximum nonzeros in any row is %D\n",rmax);
737:   A->info.mallocs     += a->reallocs;
738:   a->reallocs          = 0;
739:   A->info.nz_unneeded  = (double)fshift;
740:   a->rmax              = rmax;

742:   A->same_nonzero = PETSC_TRUE;
743:   MatMarkDiagonal_BlockMat(A);
744:   return(0);
745: }

749: PetscErrorCode MatSetOption_BlockMat(Mat A,MatOption opt,PetscBool  flg)
750: {
752:   if (opt == MAT_SYMMETRIC && flg) {
753:     A->ops->sor = MatSOR_BlockMat_Symmetric;
754:     A->ops->mult  = MatMult_BlockMat_Symmetric;
755:   } else {
756:     PetscInfo1(A,"Unused matrix option %s\n",MatOptions[opt]);
757:   }
758:   return(0);
759: }


762: static struct _MatOps MatOps_Values = {MatSetValues_BlockMat,
763:        0,
764:        0,
765:        MatMult_BlockMat,
766: /* 4*/ MatMultAdd_BlockMat,
767:        MatMultTranspose_BlockMat,
768:        MatMultTransposeAdd_BlockMat,
769:        0,
770:        0,
771:        0,
772: /*10*/ 0,
773:        0,
774:        0,
775:        MatSOR_BlockMat,
776:        0,
777: /*15*/ 0,
778:        0,
779:        0,
780:        0,
781:        0,
782: /*20*/ 0,
783:        MatAssemblyEnd_BlockMat,
784:        MatSetOption_BlockMat,
785:        0,
786: /*24*/ 0,
787:        0,
788:        0,
789:        0,
790:        0,
791: /*29*/ 0,
792:        0,
793:        0,
794:        0,
795:        0,
796: /*34*/ 0,
797:        0,
798:        0,
799:        0,
800:        0,
801: /*39*/ 0,
802:        0,
803:        0,
804:        0,
805:        0,
806: /*44*/ 0,
807:        0,
808:        0,
809:        0,
810:        0,
811: /*49*/ 0,
812:        0,
813:        0,
814:        0,
815:        0,
816: /*54*/ 0,
817:        0,
818:        0,
819:        0,
820:        0,
821: /*59*/ MatGetSubMatrix_BlockMat,
822:        MatDestroy_BlockMat,
823:        MatView_BlockMat,
824:        0,
825:        0,
826: /*64*/ 0,
827:        0,
828:        0,
829:        0,
830:        0,
831: /*69*/ 0,
832:        0,
833:        0,
834:        0,
835:        0,
836: /*74*/ 0,
837:        0,
838:        0,
839:        0,
840:        0,
841: /*79*/ 0,
842:        0,
843:        0,
844:        0,
845:        MatLoad_BlockMat,
846: /*84*/ 0,
847:        0,
848:        0,
849:        0,
850:        0,
851: /*89*/ 0,
852:        0,
853:        0,
854:        0,
855:        0,
856: /*94*/ 0,
857:        0,
858:        0,
859:        0,
860:        0,
861: /*99*/ 0,
862:        0,
863:        0,
864:        0,
865:        0,
866: /*104*/0,
867:        0,
868:        0,
869:        0,
870:        0,
871: /*109*/0,
872:        0,
873:        0,
874:        0,
875:        0,
876: /*114*/0,
877:        0,
878:        0,
879:        0,
880:        0,
881: /*119*/0,
882:        0,
883:        0,
884:        0
885: };

889: /*@C
890:    MatBlockMatSetPreallocation - For good matrix assembly performance
891:    the user should preallocate the matrix storage by setting the parameter nz
892:    (or the array nnz).  By setting these parameters accurately, performance
893:    during matrix assembly can be increased by more than a factor of 50.

895:    Collective on MPI_Comm

897:    Input Parameters:
898: +  B - The matrix
899: .  bs - size of each block in matrix
900: .  nz - number of nonzeros per block row (same for all rows)
901: -  nnz - array containing the number of nonzeros in the various block rows 
902:          (possibly different for each row) or PETSC_NULL

904:    Notes:
905:      If nnz is given then nz is ignored

907:    Specify the preallocated storage with either nz or nnz (not both).
908:    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
909:    allocation.  For large problems you MUST preallocate memory or you 
910:    will get TERRIBLE performance, see the users' manual chapter on matrices.

912:    Level: intermediate

914: .seealso: MatCreate(), MatCreateBlockMat(), MatSetValues()

916: @*/
917: PetscErrorCode  MatBlockMatSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
918: {

922:   PetscTryMethod(B,"MatBlockMatSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));
923:   return(0);
924: }

929: PetscErrorCode  MatBlockMatSetPreallocation_BlockMat(Mat A,PetscInt bs,PetscInt nz,PetscInt *nnz)
930: {
931:   Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
933:   PetscInt       i;

936:   PetscLayoutSetBlockSize(A->rmap,1);
937:   PetscLayoutSetBlockSize(A->cmap,1);
938:   PetscLayoutSetUp(A->rmap);
939:   PetscLayoutSetUp(A->cmap);

941:   if (bs < 1) SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Block size given %D must be great than zero",bs);
942:   if (A->rmap->n % bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Blocksize %D does not divide number of rows %D",bs,A->rmap->n);
943:   if (A->cmap->n % bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Blocksize %D does not divide number of columns %D",bs,A->cmap->n);
944:   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
945:   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
946:   if (nnz) {
947:     for (i=0; i<A->rmap->n/bs; i++) {
948:       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
949:       if (nnz[i] > A->cmap->n/bs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than row length: local row %d value %d rowlength %d",i,nnz[i],A->cmap->n/bs);
950:     }
951:   }
952:   A->rmap->bs = A->cmap->bs = bs;
953:   bmat->mbs  = A->rmap->n/bs;

955:   VecCreateSeqWithArray(PETSC_COMM_SELF,bs,PETSC_NULL,&bmat->right);
956:   VecCreateSeqWithArray(PETSC_COMM_SELF,bs,PETSC_NULL,&bmat->middle);
957:   VecCreateSeq(PETSC_COMM_SELF,bs,&bmat->left);

959:   if (!bmat->imax) {
960:     PetscMalloc2(A->rmap->n,PetscInt,&bmat->imax,A->rmap->n,PetscInt,&bmat->ilen);
961:     PetscLogObjectMemory(A,2*A->rmap->n*sizeof(PetscInt));
962:   }
963:   if (nnz) {
964:     nz = 0;
965:     for (i=0; i<A->rmap->n/A->rmap->bs; i++) {
966:       bmat->imax[i] = nnz[i];
967:       nz           += nnz[i];
968:     }
969:   } else {
970:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Currently requires block row by row preallocation");
971:   }

973:   /* bmat->ilen will count nonzeros in each row so far. */
974:   for (i=0; i<bmat->mbs; i++) { bmat->ilen[i] = 0;}

976:   /* allocate the matrix space */
977:   MatSeqXAIJFreeAIJ(A,(PetscScalar**)&bmat->a,&bmat->j,&bmat->i);
978:   PetscMalloc3(nz,Mat,&bmat->a,nz,PetscInt,&bmat->j,A->rmap->n+1,PetscInt,&bmat->i);
979:   PetscLogObjectMemory(A,(A->rmap->n+1)*sizeof(PetscInt)+nz*(sizeof(PetscScalar)+sizeof(PetscInt)));
980:   bmat->i[0] = 0;
981:   for (i=1; i<bmat->mbs+1; i++) {
982:     bmat->i[i] = bmat->i[i-1] + bmat->imax[i-1];
983:   }
984:   bmat->singlemalloc = PETSC_TRUE;
985:   bmat->free_a       = PETSC_TRUE;
986:   bmat->free_ij      = PETSC_TRUE;

988:   bmat->nz                = 0;
989:   bmat->maxnz             = nz;
990:   A->info.nz_unneeded  = (double)bmat->maxnz;

992:   return(0);
993: }

996: /*MC
997:    MATBLOCKMAT - A matrix that is defined by a set of Mat's that represents a sparse block matrix
998:                  consisting of (usually) sparse blocks.

1000:   Level: advanced

1002: .seealso: MatCreateBlockMat()

1004: M*/

1009: PetscErrorCode  MatCreate_BlockMat(Mat A)
1010: {
1011:   Mat_BlockMat   *b;

1015:   PetscNewLog(A,Mat_BlockMat,&b);
1016:   A->data = (void*)b;
1017:   PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));

1019:   A->assembled     = PETSC_TRUE;
1020:   A->preallocated  = PETSC_FALSE;
1021:   PetscObjectChangeTypeName((PetscObject)A,MATBLOCKMAT);

1023:   PetscObjectComposeFunctionDynamic((PetscObject)A,"MatBlockMatSetPreallocation_C",
1024:                                      "MatBlockMatSetPreallocation_BlockMat",
1025:                                       MatBlockMatSetPreallocation_BlockMat);

1027:   return(0);
1028: }

1033: /*@C
1034:    MatCreateBlockMat - Creates a new matrix based sparse Mat storage

1036:   Collective on MPI_Comm

1038:    Input Parameters:
1039: +  comm - MPI communicator
1040: .  m - number of rows
1041: .  n  - number of columns
1042: .  bs - size of each submatrix
1043: .  nz  - expected maximum number of nonzero blocks in row (use PETSC_DEFAULT if not known)
1044: -  nnz - expected number of nonzers per block row if known (use PETSC_NULL otherwise)


1047:    Output Parameter:
1048: .  A - the matrix

1050:    Level: intermediate

1052:    PETSc requires that matrices and vectors being used for certain
1053:    operations are partitioned accordingly.  For example, when
1054:    creating a bmat matrix, A, that supports parallel matrix-vector
1055:    products using MatMult(A,x,y) the user should set the number
1056:    of local matrix rows to be the number of local elements of the
1057:    corresponding result vector, y. Note that this is information is
1058:    required for use of the matrix interface routines, even though
1059:    the bmat matrix may not actually be physically partitioned.
1060:    For example,

1062: .keywords: matrix, bmat, create

1064: .seealso: MATBLOCKMAT
1065: @*/
1066: PetscErrorCode  MatCreateBlockMat(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt bs,PetscInt nz,PetscInt *nnz, Mat *A)
1067: {

1071:   MatCreate(comm,A);
1072:   MatSetSizes(*A,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
1073:   MatSetType(*A,MATBLOCKMAT);
1074:   MatBlockMatSetPreallocation(*A,bs,nz,nnz);
1075:   return(0);
1076: }