Actual source code: mpibaij.c

  2: #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
  3: #include <petscblaslapack.h>


 16: PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
 17: {
 18:   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
 20:   PetscInt       i,*idxb = 0;
 21:   PetscScalar    *va,*vb;
 22:   Vec            vtmp;

 25:   MatGetRowMaxAbs(a->A,v,idx);
 26:   VecGetArray(v,&va);
 27:   if (idx) {
 28:     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
 29:   }

 31:   VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);
 32:   if (idx) {PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);}
 33:   MatGetRowMaxAbs(a->B,vtmp,idxb);
 34:   VecGetArray(vtmp,&vb);

 36:   for (i=0; i<A->rmap->n; i++){
 37:     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
 38:   }

 40:   VecRestoreArray(v,&va);
 41:   VecRestoreArray(vtmp,&vb);
 42:   PetscFree(idxb);
 43:   VecDestroy(&vtmp);
 44:   return(0);
 45: }

 50: PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
 51: {
 52:   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;

 56:   MatStoreValues(aij->A);
 57:   MatStoreValues(aij->B);
 58:   return(0);
 59: }

 65: PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
 66: {
 67:   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;

 71:   MatRetrieveValues(aij->A);
 72:   MatRetrieveValues(aij->B);
 73:   return(0);
 74: }

 77: /* 
 78:      Local utility routine that creates a mapping from the global column 
 79:    number to the local number in the off-diagonal part of the local 
 80:    storage of the matrix.  This is done in a non scalable way since the
 81:    length of colmap equals the global matrix length. 
 82: */
 85: PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
 86: {
 87:   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
 88:   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
 90:   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;

 93: #if defined (PETSC_USE_CTABLE)
 94:   PetscTableCreate(baij->nbs,&baij->colmap);
 95:   for (i=0; i<nbs; i++){
 96:     PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);
 97:   }
 98: #else
 99:   PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);
100:   PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));
101:   PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));
102:   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
103: #endif
104:   return(0);
105: }

107: #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
108: { \
109:  \
110:     brow = row/bs;  \
111:     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
112:     rmax = aimax[brow]; nrow = ailen[brow]; \
113:       bcol = col/bs; \
114:       ridx = row % bs; cidx = col % bs; \
115:       low = 0; high = nrow; \
116:       while (high-low > 3) { \
117:         t = (low+high)/2; \
118:         if (rp[t] > bcol) high = t; \
119:         else              low  = t; \
120:       } \
121:       for (_i=low; _i<high; _i++) { \
122:         if (rp[_i] > bcol) break; \
123:         if (rp[_i] == bcol) { \
124:           bap  = ap +  bs2*_i + bs*cidx + ridx; \
125:           if (addv == ADD_VALUES) *bap += value;  \
126:           else                    *bap  = value;  \
127:           goto a_noinsert; \
128:         } \
129:       } \
130:       if (a->nonew == 1) goto a_noinsert; \
131:       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
132:       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
133:       N = nrow++ - 1;  \
134:       /* shift up all the later entries in this row */ \
135:       for (ii=N; ii>=_i; ii--) { \
136:         rp[ii+1] = rp[ii]; \
137:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar)); \
138:       } \
139:       if (N>=_i) { PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar)); }  \
140:       rp[_i]                      = bcol;  \
141:       ap[bs2*_i + bs*cidx + ridx] = value;  \
142:       a_noinsert:; \
143:     ailen[brow] = nrow; \
144: } 

146: #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
147: { \
148:     brow = row/bs;  \
149:     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
150:     rmax = bimax[brow]; nrow = bilen[brow]; \
151:       bcol = col/bs; \
152:       ridx = row % bs; cidx = col % bs; \
153:       low = 0; high = nrow; \
154:       while (high-low > 3) { \
155:         t = (low+high)/2; \
156:         if (rp[t] > bcol) high = t; \
157:         else              low  = t; \
158:       } \
159:       for (_i=low; _i<high; _i++) { \
160:         if (rp[_i] > bcol) break; \
161:         if (rp[_i] == bcol) { \
162:           bap  = ap +  bs2*_i + bs*cidx + ridx; \
163:           if (addv == ADD_VALUES) *bap += value;  \
164:           else                    *bap  = value;  \
165:           goto b_noinsert; \
166:         } \
167:       } \
168:       if (b->nonew == 1) goto b_noinsert; \
169:       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
170:       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
171:       CHKMEMQ;\
172:       N = nrow++ - 1;  \
173:       /* shift up all the later entries in this row */ \
174:       for (ii=N; ii>=_i; ii--) { \
175:         rp[ii+1] = rp[ii]; \
176:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar)); \
177:       } \
178:       if (N>=_i) { PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));}  \
179:       rp[_i]                      = bcol;  \
180:       ap[bs2*_i + bs*cidx + ridx] = value;  \
181:       b_noinsert:; \
182:     bilen[brow] = nrow; \
183: } 

187: PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
188: {
189:   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
190:   MatScalar      value;
191:   PetscBool      roworiented = baij->roworiented;
193:   PetscInt       i,j,row,col;
194:   PetscInt       rstart_orig=mat->rmap->rstart;
195:   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
196:   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;

198:   /* Some Variables required in the macro */
199:   Mat            A = baij->A;
200:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
201:   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
202:   MatScalar      *aa=a->a;

204:   Mat            B = baij->B;
205:   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
206:   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
207:   MatScalar      *ba=b->a;

209:   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
210:   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
211:   MatScalar      *ap,*bap;

215:   for (i=0; i<m; i++) {
216:     if (im[i] < 0) continue;
217: #if defined(PETSC_USE_DEBUG)
218:     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
219: #endif
220:     if (im[i] >= rstart_orig && im[i] < rend_orig) {
221:       row = im[i] - rstart_orig;
222:       for (j=0; j<n; j++) {
223:         if (in[j] >= cstart_orig && in[j] < cend_orig){
224:           col = in[j] - cstart_orig;
225:           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
226:           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
227:           /* MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv); */
228:         } else if (in[j] < 0) continue;
229: #if defined(PETSC_USE_DEBUG)
230:         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
231: #endif
232:         else {
233:           if (mat->was_assembled) {
234:             if (!baij->colmap) {
235:               CreateColmap_MPIBAIJ_Private(mat);
236:             }
237: #if defined (PETSC_USE_CTABLE)
238:             PetscTableFind(baij->colmap,in[j]/bs + 1,&col);
239:             col  = col - 1;
240: #else
241:             col = baij->colmap[in[j]/bs] - 1;
242: #endif
243:             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
244:               DisAssemble_MPIBAIJ(mat);
245:               col =  in[j];
246:               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
247:               B = baij->B;
248:               b = (Mat_SeqBAIJ*)(B)->data;
249:               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
250:               ba=b->a;
251:             } else col += in[j]%bs;
252:           } else col = in[j];
253:           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
254:           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
255:           /* MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv); */
256:         }
257:       }
258:     } else {
259:       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
260:       if (!baij->donotstash) {
261:         if (roworiented) {
262:           MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);
263:         } else {
264:           MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);
265:         }
266:       }
267:     }
268:   }
269:   return(0);
270: }

274: PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
275: {
276:   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
277:   const PetscScalar *value;
278:   MatScalar         *barray=baij->barray;
279:   PetscBool         roworiented = baij->roworiented;
280:   PetscErrorCode    ierr;
281:   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
282:   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
283:   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
284: 
286:   if(!barray) {
287:     PetscMalloc(bs2*sizeof(MatScalar),&barray);
288:     baij->barray = barray;
289:   }

291:   if (roworiented) {
292:     stepval = (n-1)*bs;
293:   } else {
294:     stepval = (m-1)*bs;
295:   }
296:   for (i=0; i<m; i++) {
297:     if (im[i] < 0) continue;
298: #if defined(PETSC_USE_DEBUG)
299:     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
300: #endif
301:     if (im[i] >= rstart && im[i] < rend) {
302:       row = im[i] - rstart;
303:       for (j=0; j<n; j++) {
304:         /* If NumCol = 1 then a copy is not required */
305:         if ((roworiented) && (n == 1)) {
306:           barray = (MatScalar*)v + i*bs2;
307:         } else if((!roworiented) && (m == 1)) {
308:           barray = (MatScalar*)v + j*bs2;
309:         } else { /* Here a copy is required */
310:           if (roworiented) {
311:             value = v + (i*(stepval+bs) + j)*bs;
312:           } else {
313:             value = v + (j*(stepval+bs) + i)*bs;
314:           }
315:           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
316:             for (jj=0; jj<bs; jj++) {
317:               barray[jj]  = value[jj];
318:             }
319:             barray += bs;
320:           }
321:           barray -= bs2;
322:         }
323: 
324:         if (in[j] >= cstart && in[j] < cend){
325:           col  = in[j] - cstart;
326:           MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);
327:         }
328:         else if (in[j] < 0) continue;
329: #if defined(PETSC_USE_DEBUG)
330:         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
331: #endif
332:         else {
333:           if (mat->was_assembled) {
334:             if (!baij->colmap) {
335:               CreateColmap_MPIBAIJ_Private(mat);
336:             }

338: #if defined(PETSC_USE_DEBUG)
339: #if defined (PETSC_USE_CTABLE)
340:             { PetscInt data;
341:               PetscTableFind(baij->colmap,in[j]+1,&data);
342:               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
343:             }
344: #else
345:             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
346: #endif
347: #endif
348: #if defined (PETSC_USE_CTABLE)
349:             PetscTableFind(baij->colmap,in[j]+1,&col);
350:             col  = (col - 1)/bs;
351: #else
352:             col = (baij->colmap[in[j]] - 1)/bs;
353: #endif
354:             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
355:               DisAssemble_MPIBAIJ(mat);
356:               col =  in[j];
357:             }
358:           }
359:           else col = in[j];
360:           MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);
361:         }
362:       }
363:     } else {
364:       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
365:       if (!baij->donotstash) {
366:         if (roworiented) {
367:           MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
368:         } else {
369:           MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
370:         }
371:       }
372:     }
373:   }
374:   return(0);
375: }

377: #define HASH_KEY 0.6180339887
378: #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
379: /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
380: /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
383: PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
384: {
385:   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
386:   PetscBool      roworiented = baij->roworiented;
388:   PetscInt       i,j,row,col;
389:   PetscInt       rstart_orig=mat->rmap->rstart;
390:   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
391:   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
392:   PetscReal      tmp;
393:   MatScalar      **HD = baij->hd,value;
394: #if defined(PETSC_USE_DEBUG)
395:   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
396: #endif

400:   for (i=0; i<m; i++) {
401: #if defined(PETSC_USE_DEBUG)
402:     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
403:     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
404: #endif
405:       row = im[i];
406:     if (row >= rstart_orig && row < rend_orig) {
407:       for (j=0; j<n; j++) {
408:         col = in[j];
409:         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
410:         /* Look up PetscInto the Hash Table */
411:         key = (row/bs)*Nbs+(col/bs)+1;
412:         h1  = HASH(size,key,tmp);

414: 
415:         idx = h1;
416: #if defined(PETSC_USE_DEBUG)
417:         insert_ct++;
418:         total_ct++;
419:         if (HT[idx] != key) {
420:           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
421:           if (idx == size) {
422:             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
423:             if (idx == h1) {
424:               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
425:             }
426:           }
427:         }
428: #else
429:         if (HT[idx] != key) {
430:           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
431:           if (idx == size) {
432:             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
433:             if (idx == h1) {
434:               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
435:             }
436:           }
437:         }
438: #endif
439:         /* A HASH table entry is found, so insert the values at the correct address */
440:         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
441:         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
442:       }
443:     } else {
444:       if (!baij->donotstash) {
445:         if (roworiented) {
446:           MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);
447:         } else {
448:           MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);
449:         }
450:       }
451:     }
452:   }
453: #if defined(PETSC_USE_DEBUG)
454:   baij->ht_total_ct = total_ct;
455:   baij->ht_insert_ct = insert_ct;
456: #endif
457:   return(0);
458: }

462: PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
463: {
464:   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
465:   PetscBool         roworiented = baij->roworiented;
466:   PetscErrorCode    ierr;
467:   PetscInt          i,j,ii,jj,row,col;
468:   PetscInt          rstart=baij->rstartbs;
469:   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
470:   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
471:   PetscReal         tmp;
472:   MatScalar         **HD = baij->hd,*baij_a;
473:   const PetscScalar *v_t,*value;
474: #if defined(PETSC_USE_DEBUG)
475:   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
476: #endif
477: 

480:   if (roworiented) {
481:     stepval = (n-1)*bs;
482:   } else {
483:     stepval = (m-1)*bs;
484:   }
485:   for (i=0; i<m; i++) {
486: #if defined(PETSC_USE_DEBUG)
487:     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
488:     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
489: #endif
490:     row   = im[i];
491:     v_t   = v + i*nbs2;
492:     if (row >= rstart && row < rend) {
493:       for (j=0; j<n; j++) {
494:         col = in[j];

496:         /* Look up into the Hash Table */
497:         key = row*Nbs+col+1;
498:         h1  = HASH(size,key,tmp);
499: 
500:         idx = h1;
501: #if defined(PETSC_USE_DEBUG)
502:         total_ct++;
503:         insert_ct++;
504:        if (HT[idx] != key) {
505:           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
506:           if (idx == size) {
507:             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
508:             if (idx == h1) {
509:               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
510:             }
511:           }
512:         }
513: #else  
514:         if (HT[idx] != key) {
515:           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
516:           if (idx == size) {
517:             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
518:             if (idx == h1) {
519:               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
520:             }
521:           }
522:         }
523: #endif
524:         baij_a = HD[idx];
525:         if (roworiented) {
526:           /*value = v + i*(stepval+bs)*bs + j*bs;*/
527:           /* value = v + (i*(stepval+bs)+j)*bs; */
528:           value = v_t;
529:           v_t  += bs;
530:           if (addv == ADD_VALUES) {
531:             for (ii=0; ii<bs; ii++,value+=stepval) {
532:               for (jj=ii; jj<bs2; jj+=bs) {
533:                 baij_a[jj]  += *value++;
534:               }
535:             }
536:           } else {
537:             for (ii=0; ii<bs; ii++,value+=stepval) {
538:               for (jj=ii; jj<bs2; jj+=bs) {
539:                 baij_a[jj]  = *value++;
540:               }
541:             }
542:           }
543:         } else {
544:           value = v + j*(stepval+bs)*bs + i*bs;
545:           if (addv == ADD_VALUES) {
546:             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
547:               for (jj=0; jj<bs; jj++) {
548:                 baij_a[jj]  += *value++;
549:               }
550:             }
551:           } else {
552:             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
553:               for (jj=0; jj<bs; jj++) {
554:                 baij_a[jj]  = *value++;
555:               }
556:             }
557:           }
558:         }
559:       }
560:     } else {
561:       if (!baij->donotstash) {
562:         if (roworiented) {
563:           MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
564:         } else {
565:           MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
566:         }
567:       }
568:     }
569:   }
570: #if defined(PETSC_USE_DEBUG)
571:   baij->ht_total_ct = total_ct;
572:   baij->ht_insert_ct = insert_ct;
573: #endif
574:   return(0);
575: }

579: PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
580: {
581:   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
583:   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
584:   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;

587:   for (i=0; i<m; i++) {
588:     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
589:     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
590:     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
591:       row = idxm[i] - bsrstart;
592:       for (j=0; j<n; j++) {
593:         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
594:         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
595:         if (idxn[j] >= bscstart && idxn[j] < bscend){
596:           col = idxn[j] - bscstart;
597:           MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);
598:         } else {
599:           if (!baij->colmap) {
600:             CreateColmap_MPIBAIJ_Private(mat);
601:           }
602: #if defined (PETSC_USE_CTABLE)
603:           PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);
604:           data --;
605: #else
606:           data = baij->colmap[idxn[j]/bs]-1;
607: #endif
608:           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
609:           else {
610:             col  = data + idxn[j]%bs;
611:             MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);
612:           }
613:         }
614:       }
615:     } else {
616:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
617:     }
618:   }
619:  return(0);
620: }

624: PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
625: {
626:   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
627:   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
629:   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
630:   PetscReal      sum = 0.0;
631:   MatScalar      *v;

634:   if (baij->size == 1) {
635:      MatNorm(baij->A,type,nrm);
636:   } else {
637:     if (type == NORM_FROBENIUS) {
638:       v = amat->a;
639:       nz = amat->nz*bs2;
640:       for (i=0; i<nz; i++) {
641: #if defined(PETSC_USE_COMPLEX)
642:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
643: #else
644:         sum += (*v)*(*v); v++;
645: #endif
646:       }
647:       v = bmat->a;
648:       nz = bmat->nz*bs2;
649:       for (i=0; i<nz; i++) {
650: #if defined(PETSC_USE_COMPLEX)
651:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
652: #else
653:         sum += (*v)*(*v); v++;
654: #endif
655:       }
656:       MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);
657:       *nrm = PetscSqrtReal(*nrm);
658:     } else if (type == NORM_1) { /* max column sum */
659:       PetscReal *tmp,*tmp2;
660:       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
661:       PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);
662:       PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));
663:       v = amat->a; jj = amat->j;
664:       for (i=0; i<amat->nz; i++) {
665:         for (j=0; j<bs; j++){
666:           col = bs*(cstart + *jj) + j; /* column index */
667:           for (row=0; row<bs; row++){
668:             tmp[col] += PetscAbsScalar(*v);  v++;
669:           }
670:         }
671:         jj++;
672:       }
673:       v = bmat->a; jj = bmat->j;
674:       for (i=0; i<bmat->nz; i++) {
675:         for (j=0; j<bs; j++){
676:           col = bs*garray[*jj] + j;
677:           for (row=0; row<bs; row++){
678:             tmp[col] += PetscAbsScalar(*v); v++;
679:           }
680:         }
681:         jj++;
682:       }
683:       MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);
684:       *nrm = 0.0;
685:       for (j=0; j<mat->cmap->N; j++) {
686:         if (tmp2[j] > *nrm) *nrm = tmp2[j];
687:       }
688:       PetscFree2(tmp,tmp2);
689:     } else if (type == NORM_INFINITY) { /* max row sum */
690:       PetscReal *sums;
691:       PetscMalloc(bs*sizeof(PetscReal),&sums);
692:       sum = 0.0;
693:       for (j=0; j<amat->mbs; j++) {
694:         for (row=0; row<bs; row++) sums[row] = 0.0;
695:         v = amat->a + bs2*amat->i[j];
696:         nz = amat->i[j+1]-amat->i[j];
697:         for (i=0; i<nz; i++) {
698:           for (col=0; col<bs; col++){
699:             for (row=0; row<bs; row++){
700:               sums[row] += PetscAbsScalar(*v); v++;
701:             }
702:           }
703:         }
704:         v = bmat->a + bs2*bmat->i[j];
705:         nz = bmat->i[j+1]-bmat->i[j];
706:         for (i=0; i<nz; i++) {
707:           for (col=0; col<bs; col++){
708:             for (row=0; row<bs; row++){
709:               sums[row] += PetscAbsScalar(*v); v++;
710:             }
711:           }
712:         }
713:         for (row=0; row<bs; row++){
714:           if (sums[row] > sum) sum = sums[row];
715:         }
716:       }
717:       MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);
718:       PetscFree(sums);
719:     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
720:   }
721:   return(0);
722: }

724: /*
725:   Creates the hash table, and sets the table 
726:   This table is created only once. 
727:   If new entried need to be added to the matrix
728:   then the hash table has to be destroyed and
729:   recreated.
730: */
733: PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
734: {
735:   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
736:   Mat            A = baij->A,B=baij->B;
737:   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
738:   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
740:   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
741:   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
742:   PetscInt       *HT,key;
743:   MatScalar      **HD;
744:   PetscReal      tmp;
745: #if defined(PETSC_USE_INFO)
746:   PetscInt       ct=0,max=0;
747: #endif

750:   if (baij->ht) return(0);

752:   baij->ht_size = (PetscInt)(factor*nz);
753:   ht_size       = baij->ht_size;
754: 
755:   /* Allocate Memory for Hash Table */
756:   PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);
757:   PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));
758:   PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));
759:   HD   = baij->hd;
760:   HT   = baij->ht;

762:   /* Loop Over A */
763:   for (i=0; i<a->mbs; i++) {
764:     for (j=ai[i]; j<ai[i+1]; j++) {
765:       row = i+rstart;
766:       col = aj[j]+cstart;
767: 
768:       key = row*Nbs + col + 1;
769:       h1  = HASH(ht_size,key,tmp);
770:       for (k=0; k<ht_size; k++){
771:         if (!HT[(h1+k)%ht_size]) {
772:           HT[(h1+k)%ht_size] = key;
773:           HD[(h1+k)%ht_size] = a->a + j*bs2;
774:           break;
775: #if defined(PETSC_USE_INFO)
776:         } else {
777:           ct++;
778: #endif
779:         }
780:       }
781: #if defined(PETSC_USE_INFO)
782:       if (k> max) max = k;
783: #endif
784:     }
785:   }
786:   /* Loop Over B */
787:   for (i=0; i<b->mbs; i++) {
788:     for (j=bi[i]; j<bi[i+1]; j++) {
789:       row = i+rstart;
790:       col = garray[bj[j]];
791:       key = row*Nbs + col + 1;
792:       h1  = HASH(ht_size,key,tmp);
793:       for (k=0; k<ht_size; k++){
794:         if (!HT[(h1+k)%ht_size]) {
795:           HT[(h1+k)%ht_size] = key;
796:           HD[(h1+k)%ht_size] = b->a + j*bs2;
797:           break;
798: #if defined(PETSC_USE_INFO)
799:         } else {
800:           ct++;
801: #endif
802:         }
803:       }
804: #if defined(PETSC_USE_INFO)
805:       if (k> max) max = k;
806: #endif
807:     }
808:   }
809: 
810:   /* Print Summary */
811: #if defined(PETSC_USE_INFO)
812:   for (i=0,j=0; i<ht_size; i++) {
813:     if (HT[i]) {j++;}
814:   }
815:   PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);
816: #endif
817:   return(0);
818: }

822: PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
823: {
824:   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
826:   PetscInt       nstash,reallocs;
827:   InsertMode     addv;

830:   if (baij->donotstash || mat->nooffprocentries) {
831:     return(0);
832:   }

834:   /* make sure all processors are either in INSERTMODE or ADDMODE */
835:   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);
836:   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
837:   mat->insertmode = addv; /* in case this processor had no cache */

839:   MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
840:   MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);
841:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
842:   PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);
843:   MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);
844:   PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
845:   return(0);
846: }

850: PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
851: {
852:   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
853:   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
855:   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
856:   PetscInt       *row,*col;
857:   PetscBool      r1,r2,r3,other_disassembled;
858:   MatScalar      *val;
859:   InsertMode     addv = mat->insertmode;
860:   PetscMPIInt    n;

862:   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
864:   if (!baij->donotstash && !mat->nooffprocentries) {
865:     while (1) {
866:       MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
867:       if (!flg) break;

869:       for (i=0; i<n;) {
870:         /* Now identify the consecutive vals belonging to the same row */
871:         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
872:         if (j < n) ncols = j-i;
873:         else       ncols = n-i;
874:         /* Now assemble all these values with a single function call */
875:         MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);
876:         i = j;
877:       }
878:     }
879:     MatStashScatterEnd_Private(&mat->stash);
880:     /* Now process the block-stash. Since the values are stashed column-oriented,
881:        set the roworiented flag to column oriented, and after MatSetValues() 
882:        restore the original flags */
883:     r1 = baij->roworiented;
884:     r2 = a->roworiented;
885:     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
886:     baij->roworiented = PETSC_FALSE;
887:     a->roworiented    = PETSC_FALSE;
888:     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
889:     while (1) {
890:       MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);
891:       if (!flg) break;
892: 
893:       for (i=0; i<n;) {
894:         /* Now identify the consecutive vals belonging to the same row */
895:         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
896:         if (j < n) ncols = j-i;
897:         else       ncols = n-i;
898:         MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);
899:         i = j;
900:       }
901:     }
902:     MatStashScatterEnd_Private(&mat->bstash);
903:     baij->roworiented = r1;
904:     a->roworiented    = r2;
905:     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
906:   }
907: 
908:   MatAssemblyBegin(baij->A,mode);
909:   MatAssemblyEnd(baij->A,mode);

911:   /* determine if any processor has disassembled, if so we must 
912:      also disassemble ourselfs, in order that we may reassemble. */
913:   /*
914:      if nonzero structure of submatrix B cannot change then we know that
915:      no processor disassembled thus we can skip this stuff
916:   */
917:   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
918:     MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);
919:     if (mat->was_assembled && !other_disassembled) {
920:       DisAssemble_MPIBAIJ(mat);
921:     }
922:   }

924:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
925:     MatSetUpMultiply_MPIBAIJ(mat);
926:   }
927:   MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);
928:   MatAssemblyBegin(baij->B,mode);
929:   MatAssemblyEnd(baij->B,mode);
930: 
931: #if defined(PETSC_USE_INFO)
932:   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
933:     PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
934:     baij->ht_total_ct  = 0;
935:     baij->ht_insert_ct = 0;
936:   }
937: #endif
938:   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
939:     MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);
940:     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
941:     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
942:   }

944:   PetscFree2(baij->rowvalues,baij->rowindices);
945:   baij->rowvalues = 0;
946:   return(0);
947: }

951: static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
952: {
953:   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
954:   PetscErrorCode    ierr;
955:   PetscMPIInt       size = baij->size,rank = baij->rank;
956:   PetscInt          bs = mat->rmap->bs;
957:   PetscBool         iascii,isdraw;
958:   PetscViewer       sviewer;
959:   PetscViewerFormat format;

962:   PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
963:   PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
964:   if (iascii) {
965:     PetscViewerGetFormat(viewer,&format);
966:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
967:       MatInfo info;
968:       MPI_Comm_rank(((PetscObject)mat)->comm,&rank);
969:       MatGetInfo(mat,MAT_LOCAL,&info);
970:       PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
971:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
972:              rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);
973:       MatGetInfo(baij->A,MAT_LOCAL,&info);
974:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
975:       MatGetInfo(baij->B,MAT_LOCAL,&info);
976:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
977:       PetscViewerFlush(viewer);
978:       PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
979:       PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");
980:       VecScatterView(baij->Mvctx,viewer);
981:       return(0);
982:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
983:       PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);
984:       return(0);
985:     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
986:       return(0);
987:     }
988:   }

990:   if (isdraw) {
991:     PetscDraw       draw;
992:     PetscBool  isnull;
993:     PetscViewerDrawGetDraw(viewer,0,&draw);
994:     PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
995:   }

997:   if (size == 1) {
998:     PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);
999:     MatView(baij->A,viewer);
1000:   } else {
1001:     /* assemble the entire matrix onto first processor. */
1002:     Mat         A;
1003:     Mat_SeqBAIJ *Aloc;
1004:     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1005:     MatScalar   *a;

1007:     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1008:     /* Perhaps this should be the type of mat? */
1009:     MatCreate(((PetscObject)mat)->comm,&A);
1010:     if (!rank) {
1011:       MatSetSizes(A,M,N,M,N);
1012:     } else {
1013:       MatSetSizes(A,0,0,M,N);
1014:     }
1015:     MatSetType(A,MATMPIBAIJ);
1016:     MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);
1017:     PetscLogObjectParent(mat,A);

1019:     /* copy over the A part */
1020:     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1021:     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1022:     PetscMalloc(bs*sizeof(PetscInt),&rvals);

1024:     for (i=0; i<mbs; i++) {
1025:       rvals[0] = bs*(baij->rstartbs + i);
1026:       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1027:       for (j=ai[i]; j<ai[i+1]; j++) {
1028:         col = (baij->cstartbs+aj[j])*bs;
1029:         for (k=0; k<bs; k++) {
1030:           MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);
1031:           col++; a += bs;
1032:         }
1033:       }
1034:     }
1035:     /* copy over the B part */
1036:     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1037:     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1038:     for (i=0; i<mbs; i++) {
1039:       rvals[0] = bs*(baij->rstartbs + i);
1040:       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1041:       for (j=ai[i]; j<ai[i+1]; j++) {
1042:         col = baij->garray[aj[j]]*bs;
1043:         for (k=0; k<bs; k++) {
1044:           MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);
1045:           col++; a += bs;
1046:         }
1047:       }
1048:     }
1049:     PetscFree(rvals);
1050:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1051:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1052:     /* 
1053:        Everyone has to call to draw the matrix since the graphics waits are
1054:        synchronized across all processors that share the PetscDraw object
1055:     */
1056:     PetscViewerGetSingleton(viewer,&sviewer);
1057:     if (!rank) {
1058:       PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);
1059:     /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1060:       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1061:       MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);
1062:     }
1063:     PetscViewerRestoreSingleton(viewer,&sviewer);
1064:     MatDestroy(&A);
1065:   }
1066:   return(0);
1067: }

1071: static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1072: {
1073:   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1074:   Mat_SeqBAIJ*   A = (Mat_SeqBAIJ*)a->A->data;
1075:   Mat_SeqBAIJ*   B = (Mat_SeqBAIJ*)a->B->data;
1077:   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1078:   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1079:   int            fd;
1080:   PetscScalar    *column_values;
1081:   FILE           *file;
1082:   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1083:   PetscInt       message_count,flowcontrolcount;

1086:   MPI_Comm_rank(((PetscObject)mat)->comm,&rank);
1087:   MPI_Comm_size(((PetscObject)mat)->comm,&size);
1088:   nz   = bs2*(A->nz + B->nz);
1089:   rlen = mat->rmap->n;
1090:   if (!rank) {
1091:     header[0] = MAT_FILE_CLASSID;
1092:     header[1] = mat->rmap->N;
1093:     header[2] = mat->cmap->N;
1094:     MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);
1095:     PetscViewerBinaryGetDescriptor(viewer,&fd);
1096:     PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);
1097:     /* get largest number of rows any processor has */
1098:     range = mat->rmap->range;
1099:     for (i=1; i<size; i++) {
1100:       rlen = PetscMax(rlen,range[i+1] - range[i]);
1101:     }
1102:   } else {
1103:     MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);
1104:   }

1106:   PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);
1107:   /* compute lengths of each row  */
1108:   for (i=0; i<a->mbs; i++) {
1109:     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1110:   }
1111:   /* store the row lengths to the file */
1112:   PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1113:   if (!rank) {
1114:     MPI_Status status;
1115:     PetscMalloc(rlen*sizeof(PetscInt),&row_lens);
1116:     rlen  = (range[1] - range[0])/bs;
1117:     for (i=0; i<rlen; i++) {
1118:       for (j=0; j<bs; j++) {
1119:         row_lens[i*bs+j] = bs*crow_lens[i];
1120:       }
1121:     }
1122:     PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);
1123:     for (i=1; i<size; i++) {
1124:       rlen = (range[i+1] - range[i])/bs;
1125:       PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);
1126:       MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);
1127:       for (k=0; k<rlen; k++) {
1128:         for (j=0; j<bs; j++) {
1129:           row_lens[k*bs+j] = bs*crow_lens[k];
1130:         }
1131:       }
1132:       PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);
1133:     }
1134:     PetscViewerFlowControlEndMaster(viewer,message_count);
1135:     PetscFree(row_lens);
1136:   } else {
1137:     PetscViewerFlowControlStepWorker(viewer,rank,message_count);
1138:     MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);
1139:     PetscViewerFlowControlEndWorker(viewer,message_count);
1140:   }
1141:   PetscFree(crow_lens);

1143:   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1144:      information needed to make it for each row from a block row. This does require more communication but still not more than
1145:      the communication needed for the nonzero values  */
1146:   nzmax = nz; /*  space a largest processor needs */
1147:   MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);
1148:   PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);
1149:   cnt  = 0;
1150:   for (i=0; i<a->mbs; i++) {
1151:     pcnt = cnt;
1152:     for (j=B->i[i]; j<B->i[i+1]; j++) {
1153:       if ( (col = garray[B->j[j]]) > cstart) break;
1154:       for (l=0; l<bs; l++) {
1155:         column_indices[cnt++] = bs*col+l;
1156:       }
1157:     }
1158:     for (k=A->i[i]; k<A->i[i+1]; k++) {
1159:       for (l=0; l<bs; l++) {
1160:         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1161:       }
1162:     }
1163:     for (; j<B->i[i+1]; j++) {
1164:       for (l=0; l<bs; l++) {
1165:         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1166:       }
1167:     }
1168:     len = cnt - pcnt;
1169:     for (k=1; k<bs; k++) {
1170:       PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));
1171:       cnt += len;
1172:     }
1173:   }
1174:   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);

1176:   /* store the columns to the file */
1177:   PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1178:   if (!rank) {
1179:     MPI_Status status;
1180:     PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);
1181:     for (i=1; i<size; i++) {
1182:       PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);
1183:       MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);
1184:       MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);
1185:       PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);
1186:     }
1187:     PetscViewerFlowControlEndMaster(viewer,message_count);
1188:   } else {
1189:     PetscViewerFlowControlStepWorker(viewer,rank,message_count);
1190:     MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);
1191:     MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);
1192:     PetscViewerFlowControlEndWorker(viewer,message_count);
1193:   }
1194:   PetscFree(column_indices);

1196:   /* load up the numerical values */
1197:   PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);
1198:   cnt = 0;
1199:   for (i=0; i<a->mbs; i++) {
1200:     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1201:     for (j=B->i[i]; j<B->i[i+1]; j++) {
1202:       if ( garray[B->j[j]] > cstart) break;
1203:       for (l=0; l<bs; l++) {
1204:         for (ll=0; ll<bs; ll++) {
1205:           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1206:         }
1207:       }
1208:       cnt += bs;
1209:     }
1210:     for (k=A->i[i]; k<A->i[i+1]; k++) {
1211:       for (l=0; l<bs; l++) {
1212:         for (ll=0; ll<bs; ll++) {
1213:           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1214:         }
1215:       }
1216:       cnt += bs;
1217:     }
1218:     for (; j<B->i[i+1]; j++) {
1219:       for (l=0; l<bs; l++) {
1220:         for (ll=0; ll<bs; ll++) {
1221:           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1222:         }
1223:       }
1224:       cnt += bs;
1225:     }
1226:     cnt += (bs-1)*rlen;
1227:   }
1228:   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);

1230:   /* store the column values to the file */
1231:   PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1232:   if (!rank) {
1233:     MPI_Status status;
1234:     PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);
1235:     for (i=1; i<size; i++) {
1236:       PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);
1237:       MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);
1238:       MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);
1239:       PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);
1240:     }
1241:     PetscViewerFlowControlEndMaster(viewer,message_count);
1242:   } else {
1243:     PetscViewerFlowControlStepWorker(viewer,rank,message_count);
1244:     MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);
1245:     MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);
1246:     PetscViewerFlowControlEndWorker(viewer,message_count);
1247:   }
1248:   PetscFree(column_values);

1250:   PetscViewerBinaryGetInfoPointer(viewer,&file);
1251:   if (file) {
1252:     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1253:   }
1254:   return(0);
1255: }

1259: PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1260: {
1262:   PetscBool      iascii,isdraw,issocket,isbinary;

1265:   PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1266:   PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1267:   PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
1268:   PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1269:   if (iascii || isdraw || issocket) {
1270:     MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);
1271:   } else if (isbinary) {
1272:     MatView_MPIBAIJ_Binary(mat,viewer);
1273:   } else {
1274:     SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1275:   }
1276:   return(0);
1277: }

1281: PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1282: {
1283:   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;

1287: #if defined(PETSC_USE_LOG)
1288:   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1289: #endif
1290:   MatStashDestroy_Private(&mat->stash);
1291:   MatStashDestroy_Private(&mat->bstash);
1292:   MatDestroy(&baij->A);
1293:   MatDestroy(&baij->B);
1294: #if defined (PETSC_USE_CTABLE)
1295:   PetscTableDestroy(&baij->colmap);
1296: #else
1297:   PetscFree(baij->colmap);
1298: #endif
1299:   PetscFree(baij->garray);
1300:   VecDestroy(&baij->lvec);
1301:   VecScatterDestroy(&baij->Mvctx);
1302:   PetscFree2(baij->rowvalues,baij->rowindices);
1303:   PetscFree(baij->barray);
1304:   PetscFree2(baij->hd,baij->ht);
1305:   PetscFree(baij->rangebs);
1306:   PetscFree(mat->data);

1308:   PetscObjectChangeTypeName((PetscObject)mat,0);
1309:   PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);
1310:   PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);
1311:   PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);
1312:   PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);
1313:   PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);
1314:   PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);
1315:   PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);
1316:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);
1317:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",PETSC_NULL);
1318:   return(0);
1319: }

1323: PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1324: {
1325:   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1327:   PetscInt       nt;

1330:   VecGetLocalSize(xx,&nt);
1331:   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1332:   VecGetLocalSize(yy,&nt);
1333:   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1334:   VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1335:   (*a->A->ops->mult)(a->A,xx,yy);
1336:   VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1337:   (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
1338:   return(0);
1339: }

1343: PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1344: {
1345:   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;

1349:   VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1350:   (*a->A->ops->multadd)(a->A,xx,yy,zz);
1351:   VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1352:   (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
1353:   return(0);
1354: }

1358: PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1359: {
1360:   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1362:   PetscBool      merged;

1365:   VecScatterGetMerged(a->Mvctx,&merged);
1366:   /* do nondiagonal part */
1367:   (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1368:   if (!merged) {
1369:     /* send it on its way */
1370:     VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1371:     /* do local part */
1372:     (*a->A->ops->multtranspose)(a->A,xx,yy);
1373:     /* receive remote parts: note this assumes the values are not actually */
1374:     /* inserted in yy until the next line */
1375:     VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1376:   } else {
1377:     /* do local part */
1378:     (*a->A->ops->multtranspose)(a->A,xx,yy);
1379:     /* send it on its way */
1380:     VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1381:     /* values actually were received in the Begin() but we need to call this nop */
1382:     VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1383:   }
1384:   return(0);
1385: }

1389: PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1390: {
1391:   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;

1395:   /* do nondiagonal part */
1396:   (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1397:   /* send it on its way */
1398:   VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1399:   /* do local part */
1400:   (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
1401:   /* receive remote parts: note this assumes the values are not actually */
1402:   /* inserted in yy until the next line, which is true for my implementation*/
1403:   /* but is not perhaps always true. */
1404:   VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1405:   return(0);
1406: }

1408: /*
1409:   This only works correctly for square matrices where the subblock A->A is the 
1410:    diagonal block
1411: */
1414: PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1415: {
1416:   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;

1420:   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1421:   MatGetDiagonal(a->A,v);
1422:   return(0);
1423: }

1427: PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1428: {
1429:   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;

1433:   MatScale(a->A,aa);
1434:   MatScale(a->B,aa);
1435:   return(0);
1436: }

1440: PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1441: {
1442:   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1443:   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1445:   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1446:   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1447:   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;

1450:   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1451:   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1452:   mat->getrowactive = PETSC_TRUE;

1454:   if (!mat->rowvalues && (idx || v)) {
1455:     /*
1456:         allocate enough space to hold information from the longest row.
1457:     */
1458:     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1459:     PetscInt     max = 1,mbs = mat->mbs,tmp;
1460:     for (i=0; i<mbs; i++) {
1461:       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1462:       if (max < tmp) { max = tmp; }
1463:     }
1464:     PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);
1465:   }
1466:   lrow = row - brstart;

1468:   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1469:   if (!v)   {pvA = 0; pvB = 0;}
1470:   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1471:   (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1472:   (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1473:   nztot = nzA + nzB;

1475:   cmap  = mat->garray;
1476:   if (v  || idx) {
1477:     if (nztot) {
1478:       /* Sort by increasing column numbers, assuming A and B already sorted */
1479:       PetscInt imark = -1;
1480:       if (v) {
1481:         *v = v_p = mat->rowvalues;
1482:         for (i=0; i<nzB; i++) {
1483:           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1484:           else break;
1485:         }
1486:         imark = i;
1487:         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1488:         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1489:       }
1490:       if (idx) {
1491:         *idx = idx_p = mat->rowindices;
1492:         if (imark > -1) {
1493:           for (i=0; i<imark; i++) {
1494:             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1495:           }
1496:         } else {
1497:           for (i=0; i<nzB; i++) {
1498:             if (cmap[cworkB[i]/bs] < cstart)
1499:               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1500:             else break;
1501:           }
1502:           imark = i;
1503:         }
1504:         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1505:         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1506:       }
1507:     } else {
1508:       if (idx) *idx = 0;
1509:       if (v)   *v   = 0;
1510:     }
1511:   }
1512:   *nz = nztot;
1513:   (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1514:   (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1515:   return(0);
1516: }

1520: PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1521: {
1522:   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;

1525:   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1526:   baij->getrowactive = PETSC_FALSE;
1527:   return(0);
1528: }

1532: PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1533: {
1534:   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;

1538:   MatZeroEntries(l->A);
1539:   MatZeroEntries(l->B);
1540:   return(0);
1541: }

1545: PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1546: {
1547:   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1548:   Mat            A = a->A,B = a->B;
1550:   PetscReal      isend[5],irecv[5];

1553:   info->block_size     = (PetscReal)matin->rmap->bs;
1554:   MatGetInfo(A,MAT_LOCAL,info);
1555:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1556:   isend[3] = info->memory;  isend[4] = info->mallocs;
1557:   MatGetInfo(B,MAT_LOCAL,info);
1558:   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1559:   isend[3] += info->memory;  isend[4] += info->mallocs;
1560:   if (flag == MAT_LOCAL) {
1561:     info->nz_used      = isend[0];
1562:     info->nz_allocated = isend[1];
1563:     info->nz_unneeded  = isend[2];
1564:     info->memory       = isend[3];
1565:     info->mallocs      = isend[4];
1566:   } else if (flag == MAT_GLOBAL_MAX) {
1567:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);
1568:     info->nz_used      = irecv[0];
1569:     info->nz_allocated = irecv[1];
1570:     info->nz_unneeded  = irecv[2];
1571:     info->memory       = irecv[3];
1572:     info->mallocs      = irecv[4];
1573:   } else if (flag == MAT_GLOBAL_SUM) {
1574:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);
1575:     info->nz_used      = irecv[0];
1576:     info->nz_allocated = irecv[1];
1577:     info->nz_unneeded  = irecv[2];
1578:     info->memory       = irecv[3];
1579:     info->mallocs      = irecv[4];
1580:   } else {
1581:     SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1582:   }
1583:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1584:   info->fill_ratio_needed = 0;
1585:   info->factor_mallocs    = 0;
1586:   return(0);
1587: }

1591: PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool  flg)
1592: {
1593:   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;

1597:   switch (op) {
1598:   case MAT_NEW_NONZERO_LOCATIONS:
1599:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1600:   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1601:   case MAT_KEEP_NONZERO_PATTERN:
1602:   case MAT_NEW_NONZERO_LOCATION_ERR:
1603:     MatSetOption(a->A,op,flg);
1604:     MatSetOption(a->B,op,flg);
1605:     break;
1606:   case MAT_ROW_ORIENTED:
1607:     a->roworiented = flg;
1608:     MatSetOption(a->A,op,flg);
1609:     MatSetOption(a->B,op,flg);
1610:     break;
1611:   case MAT_NEW_DIAGONALS:
1612:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1613:     break;
1614:   case MAT_IGNORE_OFF_PROC_ENTRIES:
1615:     a->donotstash = flg;
1616:     break;
1617:   case MAT_USE_HASH_TABLE:
1618:     a->ht_flag = flg;
1619:     break;
1620:   case MAT_SYMMETRIC:
1621:   case MAT_STRUCTURALLY_SYMMETRIC:
1622:   case MAT_HERMITIAN:
1623:   case MAT_SYMMETRY_ETERNAL:
1624:     MatSetOption(a->A,op,flg);
1625:     break;
1626:   default:
1627:     SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op);
1628:   }
1629:   return(0);
1630: }

1634: PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1635: {
1636:   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1637:   Mat_SeqBAIJ    *Aloc;
1638:   Mat            B;
1640:   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1641:   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1642:   MatScalar      *a;
1643: 
1645:   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1646:   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1647:     MatCreate(((PetscObject)A)->comm,&B);
1648:     MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
1649:     MatSetType(B,((PetscObject)A)->type_name);
1650:     MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);
1651:   } else {
1652:     B = *matout;
1653:   }

1655:   /* copy over the A part */
1656:   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1657:   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1658:   PetscMalloc(bs*sizeof(PetscInt),&rvals);
1659: 
1660:   for (i=0; i<mbs; i++) {
1661:     rvals[0] = bs*(baij->rstartbs + i);
1662:     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1663:     for (j=ai[i]; j<ai[i+1]; j++) {
1664:       col = (baij->cstartbs+aj[j])*bs;
1665:       for (k=0; k<bs; k++) {
1666:         MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);
1667:         col++; a += bs;
1668:       }
1669:     }
1670:   }
1671:   /* copy over the B part */
1672:   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1673:   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1674:   for (i=0; i<mbs; i++) {
1675:     rvals[0] = bs*(baij->rstartbs + i);
1676:     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1677:     for (j=ai[i]; j<ai[i+1]; j++) {
1678:       col = baij->garray[aj[j]]*bs;
1679:       for (k=0; k<bs; k++) {
1680:         MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);
1681:         col++; a += bs;
1682:       }
1683:     }
1684:   }
1685:   PetscFree(rvals);
1686:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1687:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1688: 
1689:   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1690:     *matout = B;
1691:   } else {
1692:     MatHeaderMerge(A,B);
1693:   }
1694:   return(0);
1695: }

1699: PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1700: {
1701:   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1702:   Mat            a = baij->A,b = baij->B;
1704:   PetscInt       s1,s2,s3;

1707:   MatGetLocalSize(mat,&s2,&s3);
1708:   if (rr) {
1709:     VecGetLocalSize(rr,&s1);
1710:     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1711:     /* Overlap communication with computation. */
1712:     VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);
1713:   }
1714:   if (ll) {
1715:     VecGetLocalSize(ll,&s1);
1716:     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1717:     (*b->ops->diagonalscale)(b,ll,PETSC_NULL);
1718:   }
1719:   /* scale  the diagonal block */
1720:   (*a->ops->diagonalscale)(a,ll,rr);

1722:   if (rr) {
1723:     /* Do a scatter end and then right scale the off-diagonal block */
1724:     VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);
1725:     (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);
1726:   }
1727: 
1728:   return(0);
1729: }

1733: PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1734: {
1735:   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1736:   PetscErrorCode    ierr;
1737:   PetscMPIInt       imdex,size = l->size,n,rank = l->rank;
1738:   PetscInt          i,*owners = A->rmap->range;
1739:   PetscInt          *nprocs,j,idx,nsends,row;
1740:   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
1741:   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1742:   PetscInt          *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1743:   MPI_Comm          comm = ((PetscObject)A)->comm;
1744:   MPI_Request       *send_waits,*recv_waits;
1745:   MPI_Status        recv_status,*send_status;
1746:   const PetscScalar *xx;
1747:   PetscScalar       *bb;
1748: #if defined(PETSC_DEBUG)
1749:   PetscBool         found = PETSC_FALSE;
1750: #endif
1751: 
1753:   /*  first count number of contributors to each processor */
1754:   PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
1755:   PetscMemzero(nprocs,2*size*sizeof(PetscInt));
1756:   PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
1757:   j = 0;
1758:   for (i=0; i<N; i++) {
1759:     if (lastidx > (idx = rows[i])) j = 0;
1760:     lastidx = idx;
1761:     for (; j<size; j++) {
1762:       if (idx >= owners[j] && idx < owners[j+1]) {
1763:         nprocs[2*j]++;
1764:         nprocs[2*j+1] = 1;
1765:         owner[i] = j;
1766: #if defined(PETSC_DEBUG)
1767:         found = PETSC_TRUE;
1768: #endif
1769:         break;
1770:       }
1771:     }
1772: #if defined(PETSC_DEBUG)
1773:     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1774:     found = PETSC_FALSE;
1775: #endif
1776:   }
1777:   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1778: 
1779:   if (A->nooffproczerorows) {
1780:     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1781:     nrecvs = nsends;
1782:     nmax   = N;
1783:   } else {
1784:     /* inform other processors of number of messages and max length*/
1785:     PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
1786:   }
1787: 
1788:   /* post receives:   */
1789:   PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
1790:   PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
1791:   for (i=0; i<nrecvs; i++) {
1792:     MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
1793:   }
1794: 
1795:   /* do sends:
1796:      1) starts[i] gives the starting index in svalues for stuff going to 
1797:      the ith processor
1798:   */
1799:   PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
1800:   PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
1801:   PetscMalloc((size+1)*sizeof(PetscInt),&starts);
1802:   starts[0]  = 0;
1803:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1804:   for (i=0; i<N; i++) {
1805:     svalues[starts[owner[i]]++] = rows[i];
1806:   }
1807: 
1808:   starts[0] = 0;
1809:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1810:   count = 0;
1811:   for (i=0; i<size; i++) {
1812:     if (nprocs[2*i+1]) {
1813:       MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
1814:     }
1815:   }
1816:   PetscFree(starts);

1818:   base = owners[rank];
1819: 
1820:   /*  wait on receives */
1821:   PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);
1822:   count  = nrecvs;
1823:   slen = 0;
1824:   while (count) {
1825:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
1826:     /* unpack receives into our local space */
1827:     MPI_Get_count(&recv_status,MPIU_INT,&n);
1828:     source[imdex]  = recv_status.MPI_SOURCE;
1829:     lens[imdex]    = n;
1830:     slen          += n;
1831:     count--;
1832:   }
1833:   PetscFree(recv_waits);
1834: 
1835:   /* move the data into the send scatter */
1836:   PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
1837:   count = 0;
1838:   for (i=0; i<nrecvs; i++) {
1839:     values = rvalues + i*nmax;
1840:     for (j=0; j<lens[i]; j++) {
1841:       lrows[count++] = values[j] - base;
1842:     }
1843:   }
1844:   PetscFree(rvalues);
1845:   PetscFree2(lens,source);
1846:   PetscFree(owner);
1847:   PetscFree(nprocs);
1848: 
1849:   /* fix right hand side if needed */
1850:   if (x && b) {
1851:     VecGetArrayRead(x,&xx);
1852:     VecGetArray(b,&bb);
1853:     for (i=0; i<slen; i++) {
1854:       bb[lrows[i]] = diag*xx[lrows[i]];
1855:     }
1856:     VecRestoreArrayRead(x,&xx);
1857:     VecRestoreArray(b,&bb);
1858:   }

1860:   /* actually zap the local rows */
1861:   /*
1862:         Zero the required rows. If the "diagonal block" of the matrix
1863:      is square and the user wishes to set the diagonal we use separate
1864:      code so that MatSetValues() is not called for each diagonal allocating
1865:      new memory, thus calling lots of mallocs and slowing things down.

1867:   */
1868:   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1869:   MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);
1870:   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1871:     MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);
1872:   } else if (diag != 0.0) {
1873:     MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);
1874:     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1875:        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1876:     for (i=0; i<slen; i++) {
1877:       row  = lrows[i] + rstart_bs;
1878:       MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);
1879:     }
1880:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1881:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1882:   } else {
1883:     MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);
1884:   }

1886:   PetscFree(lrows);

1888:   /* wait on sends */
1889:   if (nsends) {
1890:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1891:     MPI_Waitall(nsends,send_waits,send_status);
1892:     PetscFree(send_status);
1893:   }
1894:   PetscFree(send_waits);
1895:   PetscFree(svalues);

1897:   return(0);
1898: }

1902: PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1903: {
1904:   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;

1908:   MatSetUnfactored(a->A);
1909:   return(0);
1910: }

1912: static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);

1916: PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1917: {
1918:   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1919:   Mat            a,b,c,d;
1920:   PetscBool      flg;

1924:   a = matA->A; b = matA->B;
1925:   c = matB->A; d = matB->B;

1927:   MatEqual(a,c,&flg);
1928:   if (flg) {
1929:     MatEqual(b,d,&flg);
1930:   }
1931:   MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);
1932:   return(0);
1933: }

1937: PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1938: {
1940:   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1941:   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;

1944:   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1945:   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1946:     MatCopy_Basic(A,B,str);
1947:   } else {
1948:     MatCopy(a->A,b->A,str);
1949:     MatCopy(a->B,b->B,str);
1950:   }
1951:   return(0);
1952: }

1956: PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1957: {

1961:    MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);
1962:   return(0);
1963: }

1967: PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1968: {
1970:   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1971:   PetscBLASInt   bnz,one=1;
1972:   Mat_SeqBAIJ    *x,*y;

1975:   if (str == SAME_NONZERO_PATTERN) {
1976:     PetscScalar alpha = a;
1977:     x = (Mat_SeqBAIJ *)xx->A->data;
1978:     y = (Mat_SeqBAIJ *)yy->A->data;
1979:     bnz = PetscBLASIntCast(x->nz);
1980:     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1981:     x = (Mat_SeqBAIJ *)xx->B->data;
1982:     y = (Mat_SeqBAIJ *)yy->B->data;
1983:     bnz = PetscBLASIntCast(x->nz);
1984:     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1985:   } else {
1986:     MatAXPY_Basic(Y,a,X,str);
1987:   }
1988:   return(0);
1989: }

1993: PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs)
1994: {
1995:   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1996:   PetscInt rbs,cbs;

2000:   MatSetBlockSize(a->A,bs);
2001:   MatSetBlockSize(a->B,bs);
2002:   PetscLayoutGetBlockSize(A->rmap,&rbs);
2003:   PetscLayoutGetBlockSize(A->cmap,&cbs);
2004:   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2005:   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2006:   return(0);
2007: }

2011: PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2012: {
2013:   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;

2017:   MatRealPart(a->A);
2018:   MatRealPart(a->B);
2019:   return(0);
2020: }

2024: PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2025: {
2026:   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;

2030:   MatImaginaryPart(a->A);
2031:   MatImaginaryPart(a->B);
2032:   return(0);
2033: }

2037: PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2038: {
2040:   IS             iscol_local;
2041:   PetscInt       csize;

2044:   ISGetLocalSize(iscol,&csize);
2045:   if (call == MAT_REUSE_MATRIX) {
2046:     PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);
2047:     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2048:   } else {
2049:     ISAllGather(iscol,&iscol_local);
2050:   }
2051:   MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);
2052:   if (call == MAT_INITIAL_MATRIX) {
2053:     PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);
2054:     ISDestroy(&iscol_local);
2055:   }
2056:   return(0);
2057: }

2061: /*
2062:     Not great since it makes two copies of the submatrix, first an SeqBAIJ 
2063:   in local and then by concatenating the local matrices the end result.
2064:   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2065: */
2066: PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2067: {
2069:   PetscMPIInt    rank,size;
2070:   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2071:   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2072:   Mat            *local,M,Mreuse;
2073:   MatScalar      *vwork,*aa;
2074:   MPI_Comm       comm = ((PetscObject)mat)->comm;
2075:   Mat_SeqBAIJ    *aij;


2079:   MPI_Comm_rank(comm,&rank);
2080:   MPI_Comm_size(comm,&size);

2082:   if (call ==  MAT_REUSE_MATRIX) {
2083:     PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);
2084:     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2085:     local = &Mreuse;
2086:     MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);
2087:   } else {
2088:     MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);
2089:     Mreuse = *local;
2090:     PetscFree(local);
2091:   }

2093:   /* 
2094:       m - number of local rows
2095:       n - number of columns (same on all processors)
2096:       rstart - first row in new global matrix generated
2097:   */
2098:   MatGetBlockSize(mat,&bs);
2099:   MatGetSize(Mreuse,&m,&n);
2100:   m    = m/bs;
2101:   n    = n/bs;
2102: 
2103:   if (call == MAT_INITIAL_MATRIX) {
2104:     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2105:     ii  = aij->i;
2106:     jj  = aij->j;

2108:     /*
2109:         Determine the number of non-zeros in the diagonal and off-diagonal 
2110:         portions of the matrix in order to do correct preallocation
2111:     */

2113:     /* first get start and end of "diagonal" columns */
2114:     if (csize == PETSC_DECIDE) {
2115:       ISGetSize(isrow,&mglobal);
2116:       if (mglobal == n*bs) { /* square matrix */
2117:         nlocal = m;
2118:       } else {
2119:         nlocal = n/size + ((n % size) > rank);
2120:       }
2121:     } else {
2122:       nlocal = csize/bs;
2123:     }
2124:     MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);
2125:     rstart = rend - nlocal;
2126:     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);

2128:     /* next, compute all the lengths */
2129:     PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);
2130:     olens = dlens + m;
2131:     for (i=0; i<m; i++) {
2132:       jend = ii[i+1] - ii[i];
2133:       olen = 0;
2134:       dlen = 0;
2135:       for (j=0; j<jend; j++) {
2136:         if (*jj < rstart || *jj >= rend) olen++;
2137:         else dlen++;
2138:         jj++;
2139:       }
2140:       olens[i] = olen;
2141:       dlens[i] = dlen;
2142:     }
2143:     MatCreate(comm,&M);
2144:     MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);
2145:     MatSetType(M,((PetscObject)mat)->type_name);
2146:     MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);
2147:     PetscFree(dlens);
2148:   } else {
2149:     PetscInt ml,nl;

2151:     M = *newmat;
2152:     MatGetLocalSize(M,&ml,&nl);
2153:     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2154:     MatZeroEntries(M);
2155:     /*
2156:          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2157:        rather than the slower MatSetValues().
2158:     */
2159:     M->was_assembled = PETSC_TRUE;
2160:     M->assembled     = PETSC_FALSE;
2161:   }
2162:   MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);
2163:   MatGetOwnershipRange(M,&rstart,&rend);
2164:   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2165:   ii  = aij->i;
2166:   jj  = aij->j;
2167:   aa  = aij->a;
2168:   for (i=0; i<m; i++) {
2169:     row   = rstart/bs + i;
2170:     nz    = ii[i+1] - ii[i];
2171:     cwork = jj;     jj += nz;
2172:     vwork = aa;     aa += nz;
2173:     MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);
2174:   }

2176:   MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
2177:   MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
2178:   *newmat = M;

2180:   /* save submatrix used in processor for next request */
2181:   if (call ==  MAT_INITIAL_MATRIX) {
2182:     PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);
2183:     PetscObjectDereference((PetscObject)Mreuse);
2184:   }

2186:   return(0);
2187: }

2191: PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2192: {
2193:   MPI_Comm       comm,pcomm;
2194:   PetscInt       first,local_size,nrows;
2195:   const PetscInt *rows;
2196:   PetscMPIInt    size;
2197:   IS             crowp,growp,irowp,lrowp,lcolp,icolp;

2201:   PetscObjectGetComm((PetscObject)A,&comm);
2202:   /* make a collective version of 'rowp' */
2203:   PetscObjectGetComm((PetscObject)rowp,&pcomm);
2204:   if (pcomm==comm) {
2205:     crowp = rowp;
2206:   } else {
2207:     ISGetSize(rowp,&nrows);
2208:     ISGetIndices(rowp,&rows);
2209:     ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);
2210:     ISRestoreIndices(rowp,&rows);
2211:   }
2212:   /* collect the global row permutation and invert it */
2213:   ISAllGather(crowp,&growp);
2214:   ISSetPermutation(growp);
2215:   if (pcomm!=comm) {
2216:     ISDestroy(&crowp);
2217:   }
2218:   ISInvertPermutation(growp,PETSC_DECIDE,&irowp);
2219:   /* get the local target indices */
2220:   MatGetOwnershipRange(A,&first,PETSC_NULL);
2221:   MatGetLocalSize(A,&local_size,PETSC_NULL);
2222:   ISGetIndices(irowp,&rows);
2223:   ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);
2224:   ISRestoreIndices(irowp,&rows);
2225:   ISDestroy(&irowp);
2226:   /* the column permutation is so much easier;
2227:      make a local version of 'colp' and invert it */
2228:   PetscObjectGetComm((PetscObject)colp,&pcomm);
2229:   MPI_Comm_size(pcomm,&size);
2230:   if (size==1) {
2231:     lcolp = colp;
2232:   } else {
2233:     ISGetSize(colp,&nrows);
2234:     ISGetIndices(colp,&rows);
2235:     ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);
2236:   }
2237:   ISSetPermutation(lcolp);
2238:   ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);
2239:   ISSetPermutation(icolp);
2240:   if (size>1) {
2241:     ISRestoreIndices(colp,&rows);
2242:     ISDestroy(&lcolp);
2243:   }
2244:   /* now we just get the submatrix */
2245:   MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);
2246:   /* clean up */
2247:   ISDestroy(&lrowp);
2248:   ISDestroy(&icolp);
2249:   return(0);
2250: }

2254: PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2255: {
2256:   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2257:   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;

2260:   if (nghosts) { *nghosts = B->nbs;}
2261:   if (ghosts) {*ghosts = baij->garray;}
2262:   return(0);
2263: }


2269: /*
2270:     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2271: */
2272: PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2273: {
2274:   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2275:   PetscErrorCode        ierr;
2276:   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2277:   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2278:   const PetscInt        *is;
2279:   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2280:   PetscInt              *rowhit,M,cstart,cend,colb;
2281:   PetscInt              *columnsforrow,l;
2282:   IS                    *isa;
2283:   PetscBool              done,flg;
2284:   ISLocalToGlobalMapping map = mat->cmap->bmapping;
2285:   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;

2288:   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2289:   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");

2291:   ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);
2292:   MatGetBlockSize(mat,&bs);
2293:   M                = mat->rmap->n/bs;
2294:   cstart           = mat->cmap->rstart/bs;
2295:   cend             = mat->cmap->rend/bs;
2296:   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2297:   c->N             = mat->cmap->N/bs;
2298:   c->m             = mat->rmap->n/bs;
2299:   c->rstart        = mat->rmap->rstart/bs;

2301:   c->ncolors       = nis;
2302:   PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);
2303:   PetscMalloc(nis*sizeof(PetscInt*),&c->columns);
2304:   PetscMalloc(nis*sizeof(PetscInt),&c->nrows);
2305:   PetscMalloc(nis*sizeof(PetscInt*),&c->rows);
2306:   PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);
2307:   PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));

2309:   /* Allow access to data structures of local part of matrix */
2310:   if (!baij->colmap) {
2311:     CreateColmap_MPIBAIJ_Private(mat);
2312:   }
2313:   MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);
2314:   MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);
2315: 
2316:   PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);
2317:   PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);

2319:   for (i=0; i<nis; i++) {
2320:     ISGetLocalSize(isa[i],&n);
2321:     ISGetIndices(isa[i],&is);
2322:     c->ncolumns[i] = n;
2323:     if (n) {
2324:       PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);
2325:       PetscLogObjectMemory(c,n*sizeof(PetscInt));
2326:       PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));
2327:     } else {
2328:       c->columns[i]  = 0;
2329:     }

2331:     if (ctype == IS_COLORING_GLOBAL){
2332:       /* Determine the total (parallel) number of columns of this color */
2333:       MPI_Comm_size(((PetscObject)mat)->comm,&size);
2334:       PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);

2336:       nn   = PetscMPIIntCast(n);
2337:       MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);
2338:       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2339:       if (!nctot) {
2340:         PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");
2341:       }

2343:       disp[0] = 0;
2344:       for (j=1; j<size; j++) {
2345:         disp[j] = disp[j-1] + ncolsonproc[j-1];
2346:       }

2348:       /* Get complete list of columns for color on each processor */
2349:       PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);
2350:       MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);
2351:       PetscFree2(ncolsonproc,disp);
2352:     } else if (ctype == IS_COLORING_GHOSTED){
2353:       /* Determine local number of columns of this color on this process, including ghost points */
2354:       nctot = n;
2355:       PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);
2356:       PetscMemcpy(cols,is,n*sizeof(PetscInt));
2357:     } else {
2358:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2359:     }

2361:     /*
2362:        Mark all rows affect by these columns
2363:     */
2364:     /* Temporary option to allow for debugging/testing */
2365:     flg  = PETSC_FALSE;
2366:     PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);
2367:     if (!flg) {/*-----------------------------------------------------------------------------*/
2368:       /* crude, fast version */
2369:       PetscMemzero(rowhit,M*sizeof(PetscInt));
2370:       /* loop over columns*/
2371:       for (j=0; j<nctot; j++) {
2372:         if (ctype == IS_COLORING_GHOSTED) {
2373:           col = ltog[cols[j]];
2374:         } else {
2375:           col  = cols[j];
2376:         }
2377:         if (col >= cstart && col < cend) {
2378:           /* column is in diagonal block of matrix */
2379:           rows = A_cj + A_ci[col-cstart];
2380:           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2381:         } else {
2382: #if defined (PETSC_USE_CTABLE)
2383:           PetscTableFind(baij->colmap,col+1,&colb);
2384:           colb --;
2385: #else
2386:           colb = baij->colmap[col] - 1;
2387: #endif
2388:           if (colb == -1) {
2389:             m = 0;
2390:           } else {
2391:             colb = colb/bs;
2392:             rows = B_cj + B_ci[colb];
2393:             m    = B_ci[colb+1] - B_ci[colb];
2394:           }
2395:         }
2396:         /* loop over columns marking them in rowhit */
2397:         for (k=0; k<m; k++) {
2398:           rowhit[*rows++] = col + 1;
2399:         }
2400:       }

2402:       /* count the number of hits */
2403:       nrows = 0;
2404:       for (j=0; j<M; j++) {
2405:         if (rowhit[j]) nrows++;
2406:       }
2407:       c->nrows[i]         = nrows;
2408:       PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);
2409:       PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);
2410:       PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));
2411:       nrows = 0;
2412:       for (j=0; j<M; j++) {
2413:         if (rowhit[j]) {
2414:           c->rows[i][nrows]           = j;
2415:           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2416:           nrows++;
2417:         }
2418:       }
2419:     } else {/*-------------------------------------------------------------------------------*/
2420:       /* slow version, using rowhit as a linked list */
2421:       PetscInt currentcol,fm,mfm;
2422:       rowhit[M] = M;
2423:       nrows     = 0;
2424:       /* loop over columns*/
2425:       for (j=0; j<nctot; j++) {
2426:         if (ctype == IS_COLORING_GHOSTED) {
2427:           col = ltog[cols[j]];
2428:         } else {
2429:           col  = cols[j];
2430:         }
2431:         if (col >= cstart && col < cend) {
2432:           /* column is in diagonal block of matrix */
2433:           rows = A_cj + A_ci[col-cstart];
2434:           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2435:         } else {
2436: #if defined (PETSC_USE_CTABLE)
2437:           PetscTableFind(baij->colmap,col+1,&colb);
2438:           colb --;
2439: #else
2440:           colb = baij->colmap[col] - 1;
2441: #endif
2442:           if (colb == -1) {
2443:             m = 0;
2444:           } else {
2445:             colb = colb/bs;
2446:             rows = B_cj + B_ci[colb];
2447:             m    = B_ci[colb+1] - B_ci[colb];
2448:           }
2449:         }

2451:         /* loop over columns marking them in rowhit */
2452:         fm    = M; /* fm points to first entry in linked list */
2453:         for (k=0; k<m; k++) {
2454:           currentcol = *rows++;
2455:           /* is it already in the list? */
2456:           do {
2457:             mfm  = fm;
2458:             fm   = rowhit[fm];
2459:           } while (fm < currentcol);
2460:           /* not in list so add it */
2461:           if (fm != currentcol) {
2462:             nrows++;
2463:             columnsforrow[currentcol] = col;
2464:             /* next three lines insert new entry into linked list */
2465:             rowhit[mfm]               = currentcol;
2466:             rowhit[currentcol]        = fm;
2467:             fm                        = currentcol;
2468:             /* fm points to present position in list since we know the columns are sorted */
2469:           } else {
2470:             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2471:           }
2472:         }
2473:       }
2474:       c->nrows[i]         = nrows;
2475:       PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);
2476:       PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);
2477:       PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));
2478:       /* now store the linked list of rows into c->rows[i] */
2479:       nrows = 0;
2480:       fm    = rowhit[M];
2481:       do {
2482:         c->rows[i][nrows]            = fm;
2483:         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2484:         fm                           = rowhit[fm];
2485:       } while (fm < M);
2486:     } /* ---------------------------------------------------------------------------------------*/
2487:     PetscFree(cols);
2488:   }

2490:   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2491:   /*
2492:        vscale will contain the "diagonal" on processor scalings followed by the off processor
2493:   */
2494:   if (ctype == IS_COLORING_GLOBAL) {
2495:     PetscInt *garray;
2496:     PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);
2497:     for (i=0; i<baij->B->cmap->n/bs; i++) {
2498:       for (j=0; j<bs; j++) {
2499:         garray[i*bs+j] = bs*baij->garray[i]+j;
2500:       }
2501:     }
2502:     VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);
2503:     PetscFree(garray);
2504:     CHKMEMQ;
2505:     PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);
2506:     for (k=0; k<c->ncolors; k++) {
2507:       PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);
2508:       for (l=0; l<c->nrows[k]; l++) {
2509:         col = c->columnsforrow[k][l];
2510:         if (col >= cstart && col < cend) {
2511:           /* column is in diagonal block of matrix */
2512:           colb = col - cstart;
2513:         } else {
2514:           /* column  is in "off-processor" part */
2515: #if defined (PETSC_USE_CTABLE)
2516:           PetscTableFind(baij->colmap,col+1,&colb);
2517:           colb --;
2518: #else
2519:           colb = baij->colmap[col] - 1;
2520: #endif
2521:           colb = colb/bs;
2522:           colb += cend - cstart;
2523:         }
2524:         c->vscaleforrow[k][l] = colb;
2525:       }
2526:     }
2527:   } else if (ctype == IS_COLORING_GHOSTED) {
2528:     /* Get gtol mapping */
2529:     PetscInt N = mat->cmap->N, *gtol;
2530:     PetscMalloc((N+1)*sizeof(PetscInt),&gtol);
2531:     for (i=0; i<N; i++) gtol[i] = -1;
2532:     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2533: 
2534:     c->vscale = 0; /* will be created in MatFDColoringApply() */
2535:     PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);
2536:     for (k=0; k<c->ncolors; k++) {
2537:       PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);
2538:       for (l=0; l<c->nrows[k]; l++) {
2539:         col = c->columnsforrow[k][l];      /* global column index */
2540:         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2541:       }
2542:     }
2543:     PetscFree(gtol);
2544:   }
2545:   ISColoringRestoreIS(iscoloring,&isa);

2547:   PetscFree(rowhit);
2548:   PetscFree(columnsforrow);
2549:   MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);
2550:   MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);
2551:     CHKMEMQ;
2552:   return(0);
2553: }

2557: PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2558: {
2559:   Mat            B;
2560:   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2561:   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2562:   Mat_SeqAIJ     *b;
2564:   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2565:   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2566:   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;

2569:   MPI_Comm_size(((PetscObject)A)->comm,&size);
2570:   MPI_Comm_rank(((PetscObject)A)->comm,&rank);

2572:   /* ----------------------------------------------------------------
2573:      Tell every processor the number of nonzeros per row
2574:   */
2575:   PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);
2576:   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2577:     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2578:   }
2579:   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2580:   PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);
2581:   displs     = recvcounts + size;
2582:   for (i=0; i<size; i++) {
2583:     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2584:     displs[i]     = A->rmap->range[i]/bs;
2585:   }
2586: #if defined(PETSC_HAVE_MPI_IN_PLACE)
2587:   MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);
2588: #else
2589:   MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);
2590: #endif
2591:   /* ---------------------------------------------------------------
2592:      Create the sequential matrix of the same type as the local block diagonal
2593:   */
2594:   MatCreate(PETSC_COMM_SELF,&B);
2595:   MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);
2596:   MatSetType(B,MATSEQAIJ);
2597:   MatSeqAIJSetPreallocation(B,0,lens);
2598:   b = (Mat_SeqAIJ *)B->data;

2600:   /*--------------------------------------------------------------------
2601:     Copy my part of matrix column indices over
2602:   */
2603:   sendcount  = ad->nz + bd->nz;
2604:   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2605:   a_jsendbuf = ad->j;
2606:   b_jsendbuf = bd->j;
2607:   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2608:   cnt        = 0;
2609:   for (i=0; i<n; i++) {

2611:     /* put in lower diagonal portion */
2612:     m = bd->i[i+1] - bd->i[i];
2613:     while (m > 0) {
2614:       /* is it above diagonal (in bd (compressed) numbering) */
2615:       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2616:       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2617:       m--;
2618:     }

2620:     /* put in diagonal portion */
2621:     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2622:       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2623:     }

2625:     /* put in upper diagonal portion */
2626:     while (m-- > 0) {
2627:       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2628:     }
2629:   }
2630:   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);

2632:   /*--------------------------------------------------------------------
2633:     Gather all column indices to all processors
2634:   */
2635:   for (i=0; i<size; i++) {
2636:     recvcounts[i] = 0;
2637:     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2638:       recvcounts[i] += lens[j];
2639:     }
2640:   }
2641:   displs[0]  = 0;
2642:   for (i=1; i<size; i++) {
2643:     displs[i] = displs[i-1] + recvcounts[i-1];
2644:   }
2645: #if defined(PETSC_HAVE_MPI_IN_PLACE)
2646:   MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);
2647: #else
2648:   MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);
2649: #endif
2650:   /*--------------------------------------------------------------------
2651:     Assemble the matrix into useable form (note numerical values not yet set)
2652:   */
2653:   /* set the b->ilen (length of each row) values */
2654:   PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));
2655:   /* set the b->i indices */
2656:   b->i[0] = 0;
2657:   for (i=1; i<=A->rmap->N/bs; i++) {
2658:     b->i[i] = b->i[i-1] + lens[i-1];
2659:   }
2660:   PetscFree(lens);
2661:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2662:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2663:   PetscFree(recvcounts);

2665:   if (A->symmetric){
2666:     MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);
2667:   } else if (A->hermitian) {
2668:     MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);
2669:   } else if (A->structurally_symmetric) {
2670:     MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);
2671:   }
2672:   *newmat = B;
2673:   return(0);
2674: }

2678: PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2679: {
2680:   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2682:   Vec            bb1 = 0;

2685:   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2686:     VecDuplicate(bb,&bb1);
2687:   }

2689:   if (flag == SOR_APPLY_UPPER) {
2690:     (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
2691:     return(0);
2692:   }

2694:   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2695:     if (flag & SOR_ZERO_INITIAL_GUESS) {
2696:       (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
2697:       its--;
2698:     }
2699: 
2700:     while (its--) {
2701:       VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
2702:       VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);

2704:       /* update rhs: bb1 = bb - B*x */
2705:       VecScale(mat->lvec,-1.0);
2706:       (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);

2708:       /* local sweep */
2709:       (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);
2710:     }
2711:   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2712:     if (flag & SOR_ZERO_INITIAL_GUESS) {
2713:       (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
2714:       its--;
2715:     }
2716:     while (its--) {
2717:       VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
2718:       VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);

2720:       /* update rhs: bb1 = bb - B*x */
2721:       VecScale(mat->lvec,-1.0);
2722:       (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);

2724:       /* local sweep */
2725:       (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);
2726:     }
2727:   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2728:     if (flag & SOR_ZERO_INITIAL_GUESS) {
2729:       (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
2730:       its--;
2731:     }
2732:     while (its--) {
2733:       VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
2734:       VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);

2736:       /* update rhs: bb1 = bb - B*x */
2737:       VecScale(mat->lvec,-1.0);
2738:       (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);

2740:       /* local sweep */
2741:       (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);
2742:     }
2743:   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");

2745:   VecDestroy(&bb1);
2746:   return(0);
2747: }


2753: PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,PetscScalar **values)
2754: {
2755:   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;

2759:   MatInvertBlockDiagonal(a->A,values);
2760:   return(0);
2761: }


2764: /* -------------------------------------------------------------------*/
2765: static struct _MatOps MatOps_Values = {
2766:        MatSetValues_MPIBAIJ,
2767:        MatGetRow_MPIBAIJ,
2768:        MatRestoreRow_MPIBAIJ,
2769:        MatMult_MPIBAIJ,
2770: /* 4*/ MatMultAdd_MPIBAIJ,
2771:        MatMultTranspose_MPIBAIJ,
2772:        MatMultTransposeAdd_MPIBAIJ,
2773:        0,
2774:        0,
2775:        0,
2776: /*10*/ 0,
2777:        0,
2778:        0,
2779:        MatSOR_MPIBAIJ,
2780:        MatTranspose_MPIBAIJ,
2781: /*15*/ MatGetInfo_MPIBAIJ,
2782:        MatEqual_MPIBAIJ,
2783:        MatGetDiagonal_MPIBAIJ,
2784:        MatDiagonalScale_MPIBAIJ,
2785:        MatNorm_MPIBAIJ,
2786: /*20*/ MatAssemblyBegin_MPIBAIJ,
2787:        MatAssemblyEnd_MPIBAIJ,
2788:        MatSetOption_MPIBAIJ,
2789:        MatZeroEntries_MPIBAIJ,
2790: /*24*/ MatZeroRows_MPIBAIJ,
2791:        0,
2792:        0,
2793:        0,
2794:        0,
2795: /*29*/ MatSetUpPreallocation_MPIBAIJ,
2796:        0,
2797:        0,
2798:        0,
2799:        0,
2800: /*34*/ MatDuplicate_MPIBAIJ,
2801:        0,
2802:        0,
2803:        0,
2804:        0,
2805: /*39*/ MatAXPY_MPIBAIJ,
2806:        MatGetSubMatrices_MPIBAIJ,
2807:        MatIncreaseOverlap_MPIBAIJ,
2808:        MatGetValues_MPIBAIJ,
2809:        MatCopy_MPIBAIJ,
2810: /*44*/ 0,
2811:        MatScale_MPIBAIJ,
2812:        0,
2813:        0,
2814:        0,
2815: /*49*/ MatSetBlockSize_MPIBAIJ,
2816:        0,
2817:        0,
2818:        0,
2819:        0,
2820: /*54*/ MatFDColoringCreate_MPIBAIJ,
2821:        0,
2822:        MatSetUnfactored_MPIBAIJ,
2823:        MatPermute_MPIBAIJ,
2824:        MatSetValuesBlocked_MPIBAIJ,
2825: /*59*/ MatGetSubMatrix_MPIBAIJ,
2826:        MatDestroy_MPIBAIJ,
2827:        MatView_MPIBAIJ,
2828:        0,
2829:        0,
2830: /*64*/ 0,
2831:        0,
2832:        0,
2833:        0,
2834:        0,
2835: /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2836:        0,
2837:        0,
2838:        0,
2839:        0,
2840: /*74*/ 0,
2841:        MatFDColoringApply_BAIJ,
2842:        0,
2843:        0,
2844:        0,
2845: /*79*/ 0,
2846:        0,
2847:        0,
2848:        0,
2849:        MatLoad_MPIBAIJ,
2850: /*84*/ 0,
2851:        0,
2852:        0,
2853:        0,
2854:        0,
2855: /*89*/ 0,
2856:        0,
2857:        0,
2858:        0,
2859:        0,
2860: /*94*/ 0,
2861:        0,
2862:        0,
2863:        0,
2864:        0,
2865: /*99*/ 0,
2866:        0,
2867:        0,
2868:        0,
2869:        0,
2870: /*104*/0,
2871:        MatRealPart_MPIBAIJ,
2872:        MatImaginaryPart_MPIBAIJ,
2873:        0,
2874:        0,
2875: /*109*/0,
2876:        0,
2877:        0,
2878:        0,
2879:        0,
2880: /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2881:        0,
2882:        MatGetGhosts_MPIBAIJ,
2883:        0,
2884:        0,
2885: /*119*/0,
2886:        0,
2887:        0,
2888:        0,
2889:        0,
2890: /*124*/0,
2891:        0,
2892:        MatInvertBlockDiagonal_MPIBAIJ
2893: };

2898: PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2899: {
2901:   *a = ((Mat_MPIBAIJ *)A->data)->A;
2902:   return(0);
2903: }


2913: PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2914: {
2915:   PetscInt       m,rstart,cstart,cend;
2916:   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2917:   const PetscInt *JJ=0;
2918:   PetscScalar    *values=0;


2923:   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2924:   PetscLayoutSetBlockSize(B->rmap,bs);
2925:   PetscLayoutSetBlockSize(B->cmap,bs);
2926:   PetscLayoutSetUp(B->rmap);
2927:   PetscLayoutSetUp(B->cmap);
2928:   m      = B->rmap->n/bs;
2929:   rstart = B->rmap->rstart/bs;
2930:   cstart = B->cmap->rstart/bs;
2931:   cend   = B->cmap->rend/bs;

2933:   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2934:   PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);
2935:   for (i=0; i<m; i++) {
2936:     nz = ii[i+1] - ii[i];
2937:     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2938:     nz_max = PetscMax(nz_max,nz);
2939:     JJ  = jj + ii[i];
2940:     for (j=0; j<nz; j++) {
2941:       if (*JJ >= cstart) break;
2942:       JJ++;
2943:     }
2944:     d = 0;
2945:     for (; j<nz; j++) {
2946:       if (*JJ++ >= cend) break;
2947:       d++;
2948:     }
2949:     d_nnz[i] = d;
2950:     o_nnz[i] = nz - d;
2951:   }
2952:   MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);
2953:   PetscFree2(d_nnz,o_nnz);

2955:   values = (PetscScalar*)V;
2956:   if (!values) {
2957:     PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);
2958:     PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));
2959:   }
2960:   for (i=0; i<m; i++) {
2961:     PetscInt          row    = i + rstart;
2962:     PetscInt          ncols  = ii[i+1] - ii[i];
2963:     const PetscInt    *icols = jj + ii[i];
2964:     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2965:     MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);
2966:   }

2968:   if (!V) { PetscFree(values); }
2969:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2970:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

2972:   return(0);
2973: }

2978: /*@C
2979:    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2980:    (the default parallel PETSc format).  

2982:    Collective on MPI_Comm

2984:    Input Parameters:
2985: +  A - the matrix 
2986: .  bs - the block size
2987: .  i - the indices into j for the start of each local row (starts with zero)
2988: .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2989: -  v - optional values in the matrix

2991:    Level: developer

2993: .keywords: matrix, aij, compressed row, sparse, parallel

2995: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2996: @*/
2997: PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2998: {

3002:   PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
3003:   return(0);
3004: }

3009: PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
3010: {
3011:   Mat_MPIBAIJ    *b;
3013:   PetscInt       i, newbs = PetscAbs(bs);

3016:   if (bs < 0) {
3017:     PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");
3018:       PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);
3019:     PetscOptionsEnd();
3020:     bs   = PetscAbs(bs);
3021:   }
3022:   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
3023:   bs = newbs;


3026:   if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
3027:   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3028:   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3029:   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3030:   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3031: 
3032:   PetscLayoutSetBlockSize(B->rmap,bs);
3033:   PetscLayoutSetBlockSize(B->cmap,bs);
3034:   PetscLayoutSetUp(B->rmap);
3035:   PetscLayoutSetUp(B->cmap);

3037:   if (d_nnz) {
3038:     for (i=0; i<B->rmap->n/bs; i++) {
3039:       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
3040:     }
3041:   }
3042:   if (o_nnz) {
3043:     for (i=0; i<B->rmap->n/bs; i++) {
3044:       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3045:     }
3046:   }

3048:   b = (Mat_MPIBAIJ*)B->data;
3049:   b->bs2 = bs*bs;
3050:   b->mbs = B->rmap->n/bs;
3051:   b->nbs = B->cmap->n/bs;
3052:   b->Mbs = B->rmap->N/bs;
3053:   b->Nbs = B->cmap->N/bs;

3055:   for (i=0; i<=b->size; i++) {
3056:     b->rangebs[i] = B->rmap->range[i]/bs;
3057:   }
3058:   b->rstartbs = B->rmap->rstart/bs;
3059:   b->rendbs   = B->rmap->rend/bs;
3060:   b->cstartbs = B->cmap->rstart/bs;
3061:   b->cendbs   = B->cmap->rend/bs;

3063:   if (!B->preallocated) {
3064:     MatCreate(PETSC_COMM_SELF,&b->A);
3065:     MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);
3066:     MatSetType(b->A,MATSEQBAIJ);
3067:     PetscLogObjectParent(B,b->A);
3068:     MatCreate(PETSC_COMM_SELF,&b->B);
3069:     MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);
3070:     MatSetType(b->B,MATSEQBAIJ);
3071:     PetscLogObjectParent(B,b->B);
3072:     MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);
3073:   }

3075:   MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);
3076:   MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);
3077:   B->preallocated = PETSC_TRUE;
3078:   return(0);
3079: }



3091: PetscErrorCode  MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
3092: {
3093:   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3095:   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3096:   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3097:   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;

3100:   PetscMalloc((M+1)*sizeof(PetscInt),&ii);
3101:   ii[0] = 0;
3102:   CHKMEMQ;
3103:   for (i=0; i<M; i++) {
3104:     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3105:     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3106:     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3107:     /* remove one from count of matrix has diagonal */
3108:     for (j=id[i]; j<id[i+1]; j++) {
3109:       if (jd[j] == i) {ii[i+1]--;break;}
3110:     }
3111:   CHKMEMQ;
3112:   }
3113:   PetscMalloc(ii[M]*sizeof(PetscInt),&jj);
3114:   cnt = 0;
3115:   for (i=0; i<M; i++) {
3116:     for (j=io[i]; j<io[i+1]; j++) {
3117:       if (garray[jo[j]] > rstart) break;
3118:       jj[cnt++] = garray[jo[j]];
3119:   CHKMEMQ;
3120:     }
3121:     for (k=id[i]; k<id[i+1]; k++) {
3122:       if (jd[k] != i) {
3123:         jj[cnt++] = rstart + jd[k];
3124:   CHKMEMQ;
3125:       }
3126:     }
3127:     for (;j<io[i+1]; j++) {
3128:       jj[cnt++] = garray[jo[j]];
3129:   CHKMEMQ;
3130:     }
3131:   }
3132:   MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);
3133:   return(0);
3134: }

3137: #include <../src/mat/impls/aij/mpi/mpiaij.h>
3139: PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*);

3145: PetscErrorCode  MatConvert_MPIBAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat)
3146: {
3148:   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3149:   Mat            B;
3150:   Mat_MPIAIJ     *b;

3153:   if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled");

3155:   MatCreate(((PetscObject)A)->comm,&B);
3156:   MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
3157:   MatSetType(B,MATMPIAIJ);
3158:   MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
3159:   MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);
3160:   b = (Mat_MPIAIJ*) B->data;

3162:   MatDestroy(&b->A);
3163:   MatDestroy(&b->B);
3164:   DisAssemble_MPIBAIJ(A);
3165:   MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);
3166:   MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);
3167:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3168:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3169:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
3170:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
3171:   if (reuse == MAT_REUSE_MATRIX) {
3172:     MatHeaderReplace(A,B);
3173:   } else {
3174:    *newmat = B;
3175:   }
3176:   return(0);
3177: }
3179: 
3181: #if defined(PETSC_HAVE_MUMPS)
3183: #endif

3186: /*MC
3187:    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.

3189:    Options Database Keys:
3190: + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3191: . -mat_block_size <bs> - set the blocksize used to store the matrix
3192: - -mat_use_hash_table <fact>

3194:   Level: beginner

3196: .seealso: MatCreateMPIBAIJ
3197: M*/


3206: PetscErrorCode  MatCreate_MPIBAIJ(Mat B)
3207: {
3208:   Mat_MPIBAIJ    *b;
3210:   PetscBool      flg;

3213:   PetscNewLog(B,Mat_MPIBAIJ,&b);
3214:   B->data = (void*)b;

3216:   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
3217:   B->assembled  = PETSC_FALSE;

3219:   B->insertmode = NOT_SET_VALUES;
3220:   MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);
3221:   MPI_Comm_size(((PetscObject)B)->comm,&b->size);

3223:   /* build local table of row and column ownerships */
3224:   PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);

3226:   /* build cache for off array entries formed */
3227:   MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);
3228:   b->donotstash  = PETSC_FALSE;
3229:   b->colmap      = PETSC_NULL;
3230:   b->garray      = PETSC_NULL;
3231:   b->roworiented = PETSC_TRUE;

3233:   /* stuff used in block assembly */
3234:   b->barray       = 0;

3236:   /* stuff used for matrix vector multiply */
3237:   b->lvec         = 0;
3238:   b->Mvctx        = 0;

3240:   /* stuff for MatGetRow() */
3241:   b->rowindices   = 0;
3242:   b->rowvalues    = 0;
3243:   b->getrowactive = PETSC_FALSE;

3245:   /* hash table stuff */
3246:   b->ht           = 0;
3247:   b->hd           = 0;
3248:   b->ht_size      = 0;
3249:   b->ht_flag      = PETSC_FALSE;
3250:   b->ht_fact      = 0;
3251:   b->ht_total_ct  = 0;
3252:   b->ht_insert_ct = 0;

3254:   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3255:   b->ijonly       = PETSC_FALSE;

3257:   PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");
3258:     PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);
3259:     if (flg) {
3260:       PetscReal fact = 1.39;
3261:       MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);
3262:       PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);
3263:       if (fact <= 1.0) fact = 1.39;
3264:       MatMPIBAIJSetHashTableFactor(B,fact);
3265:       PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);
3266:     }
3267:   PetscOptionsEnd();

3269: #if defined(PETSC_HAVE_MUMPS)
3270:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);
3271: #endif
3272:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3273:                                      "MatConvert_MPIBAIJ_MPIAdj",
3274:                                       MatConvert_MPIBAIJ_MPIAdj);
3275:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",
3276:                                      "MatConvert_MPIBAIJ_MPIAIJ",
3277:                                       MatConvert_MPIBAIJ_MPIAIJ);
3278:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",
3279:                                      "MatConvert_MPIBAIJ_MPISBAIJ",
3280:                                       MatConvert_MPIBAIJ_MPISBAIJ);
3281:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3282:                                      "MatStoreValues_MPIBAIJ",
3283:                                      MatStoreValues_MPIBAIJ);
3284:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3285:                                      "MatRetrieveValues_MPIBAIJ",
3286:                                      MatRetrieveValues_MPIBAIJ);
3287:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3288:                                      "MatGetDiagonalBlock_MPIBAIJ",
3289:                                      MatGetDiagonalBlock_MPIBAIJ);
3290:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3291:                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3292:                                      MatMPIBAIJSetPreallocation_MPIBAIJ);
3293:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3294:                                      "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3295:                                      MatMPIBAIJSetPreallocationCSR_MPIBAIJ);
3296:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3297:                                      "MatDiagonalScaleLocal_MPIBAIJ",
3298:                                      MatDiagonalScaleLocal_MPIBAIJ);
3299:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3300:                                      "MatSetHashTableFactor_MPIBAIJ",
3301:                                      MatSetHashTableFactor_MPIBAIJ);
3302:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",
3303:                                      "MatConvert_MPIBAIJ_MPIBSTRM",
3304:                                       MatConvert_MPIBAIJ_MPIBSTRM);
3305:   PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);
3306:   return(0);
3307: }

3310: /*MC
3311:    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.

3313:    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3314:    and MATMPIBAIJ otherwise.

3316:    Options Database Keys:
3317: . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()

3319:   Level: beginner

3321: .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3322: M*/

3326: /*@C
3327:    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3328:    (block compressed row).  For good matrix assembly performance
3329:    the user should preallocate the matrix storage by setting the parameters 
3330:    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3331:    performance can be increased by more than a factor of 50.

3333:    Collective on Mat

3335:    Input Parameters:
3336: +  A - the matrix 
3337: .  bs   - size of blockk
3338: .  d_nz  - number of block nonzeros per block row in diagonal portion of local 
3339:            submatrix  (same for all local rows)
3340: .  d_nnz - array containing the number of block nonzeros in the various block rows 
3341:            of the in diagonal portion of the local (possibly different for each block
3342:            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3343: .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3344:            submatrix (same for all local rows).
3345: -  o_nnz - array containing the number of nonzeros in the various block rows of the
3346:            off-diagonal portion of the local submatrix (possibly different for
3347:            each block row) or PETSC_NULL.

3349:    If the *_nnz parameter is given then the *_nz parameter is ignored

3351:    Options Database Keys:
3352: +   -mat_block_size - size of the blocks to use
3353: -   -mat_use_hash_table <fact>

3355:    Notes:
3356:    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3357:    than it must be used on all processors that share the object for that argument.

3359:    Storage Information:
3360:    For a square global matrix we define each processor's diagonal portion 
3361:    to be its local rows and the corresponding columns (a square submatrix);  
3362:    each processor's off-diagonal portion encompasses the remainder of the
3363:    local matrix (a rectangular submatrix). 

3365:    The user can specify preallocated storage for the diagonal part of
3366:    the local submatrix with either d_nz or d_nnz (not both).  Set 
3367:    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3368:    memory allocation.  Likewise, specify preallocated storage for the
3369:    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).

3371:    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3372:    the figure below we depict these three local rows and all columns (0-11).

3374: .vb
3375:            0 1 2 3 4 5 6 7 8 9 10 11
3376:           -------------------
3377:    row 3  |  o o o d d d o o o o o o
3378:    row 4  |  o o o d d d o o o o o o
3379:    row 5  |  o o o d d d o o o o o o
3380:           -------------------
3381: .ve
3382:   
3383:    Thus, any entries in the d locations are stored in the d (diagonal) 
3384:    submatrix, and any entries in the o locations are stored in the
3385:    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3386:    stored simply in the MATSEQBAIJ format for compressed row storage.

3388:    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3389:    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3390:    In general, for PDE problems in which most nonzeros are near the diagonal,
3391:    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3392:    or you will get TERRIBLE performance; see the users' manual chapter on
3393:    matrices.

3395:    You can call MatGetInfo() to get information on how effective the preallocation was;
3396:    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3397:    You can also run with the option -info and look for messages with the string 
3398:    malloc in them to see if additional memory allocation was needed.

3400:    Level: intermediate

3402: .keywords: matrix, block, aij, compressed row, sparse, parallel

3404: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
3405: @*/
3406: PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3407: {

3411:   PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));
3412:   return(0);
3413: }

3417: /*@C
3418:    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
3419:    (block compressed row).  For good matrix assembly performance
3420:    the user should preallocate the matrix storage by setting the parameters 
3421:    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3422:    performance can be increased by more than a factor of 50.

3424:    Collective on MPI_Comm

3426:    Input Parameters:
3427: +  comm - MPI communicator
3428: .  bs   - size of blockk
3429: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3430:            This value should be the same as the local size used in creating the 
3431:            y vector for the matrix-vector product y = Ax.
3432: .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3433:            This value should be the same as the local size used in creating the 
3434:            x vector for the matrix-vector product y = Ax.
3435: .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3436: .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3437: .  d_nz  - number of nonzero blocks per block row in diagonal portion of local 
3438:            submatrix  (same for all local rows)
3439: .  d_nnz - array containing the number of nonzero blocks in the various block rows 
3440:            of the in diagonal portion of the local (possibly different for each block
3441:            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3442: .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3443:            submatrix (same for all local rows).
3444: -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3445:            off-diagonal portion of the local submatrix (possibly different for
3446:            each block row) or PETSC_NULL.

3448:    Output Parameter:
3449: .  A - the matrix 

3451:    Options Database Keys:
3452: +   -mat_block_size - size of the blocks to use
3453: -   -mat_use_hash_table <fact>

3455:    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3456:    MatXXXXSetPreallocation() paradgm instead of this routine directly. 
3457:    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]

3459:    Notes:
3460:    If the *_nnz parameter is given then the *_nz parameter is ignored

3462:    A nonzero block is any block that as 1 or more nonzeros in it

3464:    The user MUST specify either the local or global matrix dimensions
3465:    (possibly both).

3467:    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3468:    than it must be used on all processors that share the object for that argument.

3470:    Storage Information:
3471:    For a square global matrix we define each processor's diagonal portion 
3472:    to be its local rows and the corresponding columns (a square submatrix);  
3473:    each processor's off-diagonal portion encompasses the remainder of the
3474:    local matrix (a rectangular submatrix). 

3476:    The user can specify preallocated storage for the diagonal part of
3477:    the local submatrix with either d_nz or d_nnz (not both).  Set 
3478:    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3479:    memory allocation.  Likewise, specify preallocated storage for the
3480:    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).

3482:    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3483:    the figure below we depict these three local rows and all columns (0-11).

3485: .vb
3486:            0 1 2 3 4 5 6 7 8 9 10 11
3487:           -------------------
3488:    row 3  |  o o o d d d o o o o o o
3489:    row 4  |  o o o d d d o o o o o o
3490:    row 5  |  o o o d d d o o o o o o
3491:           -------------------
3492: .ve
3493:   
3494:    Thus, any entries in the d locations are stored in the d (diagonal) 
3495:    submatrix, and any entries in the o locations are stored in the
3496:    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3497:    stored simply in the MATSEQBAIJ format for compressed row storage.

3499:    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3500:    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3501:    In general, for PDE problems in which most nonzeros are near the diagonal,
3502:    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3503:    or you will get TERRIBLE performance; see the users' manual chapter on
3504:    matrices.

3506:    Level: intermediate

3508: .keywords: matrix, block, aij, compressed row, sparse, parallel

3510: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3511: @*/
3512: PetscErrorCode  MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3513: {
3515:   PetscMPIInt    size;

3518:   MatCreate(comm,A);
3519:   MatSetSizes(*A,m,n,M,N);
3520:   MPI_Comm_size(comm,&size);
3521:   if (size > 1) {
3522:     MatSetType(*A,MATMPIBAIJ);
3523:     MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);
3524:   } else {
3525:     MatSetType(*A,MATSEQBAIJ);
3526:     MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);
3527:   }
3528:   return(0);
3529: }

3533: static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3534: {
3535:   Mat            mat;
3536:   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3538:   PetscInt       len=0;

3541:   *newmat       = 0;
3542:   MatCreate(((PetscObject)matin)->comm,&mat);
3543:   MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);
3544:   MatSetType(mat,((PetscObject)matin)->type_name);
3545:   PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));

3547:   mat->factortype   = matin->factortype;
3548:   mat->preallocated = PETSC_TRUE;
3549:   mat->assembled    = PETSC_TRUE;
3550:   mat->insertmode   = NOT_SET_VALUES;

3552:   a      = (Mat_MPIBAIJ*)mat->data;
3553:   mat->rmap->bs  = matin->rmap->bs;
3554:   a->bs2   = oldmat->bs2;
3555:   a->mbs   = oldmat->mbs;
3556:   a->nbs   = oldmat->nbs;
3557:   a->Mbs   = oldmat->Mbs;
3558:   a->Nbs   = oldmat->Nbs;
3559: 
3560:   PetscLayoutReference(matin->rmap,&mat->rmap);
3561:   PetscLayoutReference(matin->cmap,&mat->cmap);

3563:   a->size         = oldmat->size;
3564:   a->rank         = oldmat->rank;
3565:   a->donotstash   = oldmat->donotstash;
3566:   a->roworiented  = oldmat->roworiented;
3567:   a->rowindices   = 0;
3568:   a->rowvalues    = 0;
3569:   a->getrowactive = PETSC_FALSE;
3570:   a->barray       = 0;
3571:   a->rstartbs     = oldmat->rstartbs;
3572:   a->rendbs       = oldmat->rendbs;
3573:   a->cstartbs     = oldmat->cstartbs;
3574:   a->cendbs       = oldmat->cendbs;

3576:   /* hash table stuff */
3577:   a->ht           = 0;
3578:   a->hd           = 0;
3579:   a->ht_size      = 0;
3580:   a->ht_flag      = oldmat->ht_flag;
3581:   a->ht_fact      = oldmat->ht_fact;
3582:   a->ht_total_ct  = 0;
3583:   a->ht_insert_ct = 0;

3585:   PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));
3586:   if (oldmat->colmap) {
3587: #if defined (PETSC_USE_CTABLE)
3588:   PetscTableCreateCopy(oldmat->colmap,&a->colmap);
3589: #else
3590:   PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);
3591:   PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));
3592:   PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));
3593: #endif
3594:   } else a->colmap = 0;

3596:   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3597:     PetscMalloc(len*sizeof(PetscInt),&a->garray);
3598:     PetscLogObjectMemory(mat,len*sizeof(PetscInt));
3599:     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));
3600:   } else a->garray = 0;
3601: 
3602:   MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);
3603:   VecDuplicate(oldmat->lvec,&a->lvec);
3604:   PetscLogObjectParent(mat,a->lvec);
3605:   VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
3606:   PetscLogObjectParent(mat,a->Mvctx);

3608:   MatDuplicate(oldmat->A,cpvalues,&a->A);
3609:   PetscLogObjectParent(mat,a->A);
3610:   MatDuplicate(oldmat->B,cpvalues,&a->B);
3611:   PetscLogObjectParent(mat,a->B);
3612:   PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);
3613:   *newmat = mat;

3615:   return(0);
3616: }

3620: PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3621: {
3623:   int            fd;
3624:   PetscInt       i,nz,j,rstart,rend;
3625:   PetscScalar    *vals,*buf;
3626:   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3627:   MPI_Status     status;
3628:   PetscMPIInt    rank,size,maxnz;
3629:   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3630:   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3631:   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3632:   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3633:   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3634:   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;

3637:   PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");
3638:     PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);
3639:   PetscOptionsEnd();

3641:   MPI_Comm_size(comm,&size);
3642:   MPI_Comm_rank(comm,&rank);
3643:   if (!rank) {
3644:     PetscViewerBinaryGetDescriptor(viewer,&fd);
3645:     PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
3646:     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3647:   }

3649:   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;

3651:   MPI_Bcast(header+1,3,MPIU_INT,0,comm);
3652:   M = header[1]; N = header[2];

3654:   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3655:   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3656:   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3657: 
3658:   /* If global sizes are set, check if they are consistent with that given in the file */
3659:   if (sizesset) {
3660:     MatGetSize(newmat,&grows,&gcols);
3661:   }
3662:   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3663:   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);

3665:   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");

3667:   /* 
3668:      This code adds extra rows to make sure the number of rows is 
3669:      divisible by the blocksize
3670:   */
3671:   Mbs        = M/bs;
3672:   extra_rows = bs - M + bs*Mbs;
3673:   if (extra_rows == bs) extra_rows = 0;
3674:   else                  Mbs++;
3675:   if (extra_rows && !rank) {
3676:     PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");
3677:   }

3679:   /* determine ownership of all rows */
3680:   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3681:     mbs        = Mbs/size + ((Mbs % size) > rank);
3682:     m          = mbs*bs;
3683:   } else { /* User set */
3684:     m          = newmat->rmap->n;
3685:     mbs        = m/bs;
3686:   }
3687:   PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);
3688:   MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);

3690:   /* process 0 needs enough room for process with most rows */
3691:   if (!rank) {
3692:     mmax = rowners[1];
3693:     for (i=2; i<size; i++) {
3694:       mmax = PetscMax(mmax,rowners[i]);
3695:     }
3696:     mmax*=bs;
3697:   } else mmax = m;

3699:   rowners[0] = 0;
3700:   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3701:   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3702:   rstart = rowners[rank];
3703:   rend   = rowners[rank+1];

3705:   /* distribute row lengths to all processors */
3706:   PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);
3707:   if (!rank) {
3708:     mend = m;
3709:     if (size == 1) mend = mend - extra_rows;
3710:     PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);
3711:     for (j=mend; j<m; j++) locrowlens[j] = 1;
3712:     PetscMalloc(m*sizeof(PetscInt),&rowlengths);
3713:     PetscMalloc(size*sizeof(PetscInt),&procsnz);
3714:     PetscMemzero(procsnz,size*sizeof(PetscInt));
3715:     for (j=0; j<m; j++) {
3716:       procsnz[0] += locrowlens[j];
3717:     }
3718:     for (i=1; i<size; i++) {
3719:       mend = browners[i+1] - browners[i];
3720:       if (i == size-1) mend = mend - extra_rows;
3721:       PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);
3722:       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3723:       /* calculate the number of nonzeros on each processor */
3724:       for (j=0; j<browners[i+1]-browners[i]; j++) {
3725:         procsnz[i] += rowlengths[j];
3726:       }
3727:       MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);
3728:     }
3729:     PetscFree(rowlengths);
3730:   } else {
3731:     MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);
3732:   }

3734:   if (!rank) {
3735:     /* determine max buffer needed and allocate it */
3736:     maxnz = procsnz[0];
3737:     for (i=1; i<size; i++) {
3738:       maxnz = PetscMax(maxnz,procsnz[i]);
3739:     }
3740:     PetscMalloc(maxnz*sizeof(PetscInt),&cols);

3742:     /* read in my part of the matrix column indices  */
3743:     nz     = procsnz[0];
3744:     PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);
3745:     mycols = ibuf;
3746:     if (size == 1)  nz -= extra_rows;
3747:     PetscBinaryRead(fd,mycols,nz,PETSC_INT);
3748:     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }

3750:     /* read in every ones (except the last) and ship off */
3751:     for (i=1; i<size-1; i++) {
3752:       nz   = procsnz[i];
3753:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
3754:       MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
3755:     }
3756:     /* read in the stuff for the last proc */
3757:     if (size != 1) {
3758:       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3759:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
3760:       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3761:       MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);
3762:     }
3763:     PetscFree(cols);
3764:   } else {
3765:     /* determine buffer space needed for message */
3766:     nz = 0;
3767:     for (i=0; i<m; i++) {
3768:       nz += locrowlens[i];
3769:     }
3770:     PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);
3771:     mycols = ibuf;
3772:     /* receive message of column indices*/
3773:     MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
3774:     MPI_Get_count(&status,MPIU_INT,&maxnz);
3775:     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3776:   }
3777: 
3778:   /* loop over local rows, determining number of off diagonal entries */
3779:   PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);
3780:   PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);
3781:   PetscMemzero(mask,Mbs*sizeof(PetscInt));
3782:   PetscMemzero(masked1,Mbs*sizeof(PetscInt));
3783:   PetscMemzero(masked2,Mbs*sizeof(PetscInt));
3784:   rowcount = 0; nzcount = 0;
3785:   for (i=0; i<mbs; i++) {
3786:     dcount  = 0;
3787:     odcount = 0;
3788:     for (j=0; j<bs; j++) {
3789:       kmax = locrowlens[rowcount];
3790:       for (k=0; k<kmax; k++) {
3791:         tmp = mycols[nzcount++]/bs;
3792:         if (!mask[tmp]) {
3793:           mask[tmp] = 1;
3794:           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3795:           else masked1[dcount++] = tmp;
3796:         }
3797:       }
3798:       rowcount++;
3799:     }
3800: 
3801:     dlens[i]  = dcount;
3802:     odlens[i] = odcount;

3804:     /* zero out the mask elements we set */
3805:     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3806:     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3807:   }

3809: 
3810:   if (!sizesset) {
3811:     MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);
3812:   }
3813:   MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);

3815:   if (!rank) {
3816:     PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);
3817:     /* read in my part of the matrix numerical values  */
3818:     nz = procsnz[0];
3819:     vals = buf;
3820:     mycols = ibuf;
3821:     if (size == 1)  nz -= extra_rows;
3822:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
3823:     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }

3825:     /* insert into matrix */
3826:     jj      = rstart*bs;
3827:     for (i=0; i<m; i++) {
3828:       MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);
3829:       mycols += locrowlens[i];
3830:       vals   += locrowlens[i];
3831:       jj++;
3832:     }
3833:     /* read in other processors (except the last one) and ship out */
3834:     for (i=1; i<size-1; i++) {
3835:       nz   = procsnz[i];
3836:       vals = buf;
3837:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
3838:       MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);
3839:     }
3840:     /* the last proc */
3841:     if (size != 1){
3842:       nz   = procsnz[i] - extra_rows;
3843:       vals = buf;
3844:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
3845:       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3846:       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);
3847:     }
3848:     PetscFree(procsnz);
3849:   } else {
3850:     /* receive numeric values */
3851:     PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);

3853:     /* receive message of values*/
3854:     vals   = buf;
3855:     mycols = ibuf;
3856:     MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);
3857:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
3858:     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");

3860:     /* insert into matrix */
3861:     jj      = rstart*bs;
3862:     for (i=0; i<m; i++) {
3863:       MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);
3864:       mycols += locrowlens[i];
3865:       vals   += locrowlens[i];
3866:       jj++;
3867:     }
3868:   }
3869:   PetscFree(locrowlens);
3870:   PetscFree(buf);
3871:   PetscFree(ibuf);
3872:   PetscFree2(rowners,browners);
3873:   PetscFree2(dlens,odlens);
3874:   PetscFree3(mask,masked1,masked2);
3875:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
3876:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);

3878:   return(0);
3879: }

3883: /*@
3884:    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.

3886:    Input Parameters:
3887: .  mat  - the matrix
3888: .  fact - factor

3890:    Not Collective, each process can use a different factor

3892:    Level: advanced

3894:   Notes:
3895:    This can also be set by the command line option: -mat_use_hash_table <fact>

3897: .keywords: matrix, hashtable, factor, HT

3899: .seealso: MatSetOption()
3900: @*/
3901: PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3902: {

3906:   PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));
3907:   return(0);
3908: }

3913: PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3914: {
3915:   Mat_MPIBAIJ *baij;

3918:   baij = (Mat_MPIBAIJ*)mat->data;
3919:   baij->ht_fact = fact;
3920:   return(0);
3921: }

3926: PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3927: {
3928:   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3930:   *Ad     = a->A;
3931:   *Ao     = a->B;
3932:   *colmap = a->garray;
3933:   return(0);
3934: }

3936: /*
3937:     Special version for direct calls from Fortran (to eliminate two function call overheads 
3938: */
3939: #if defined(PETSC_HAVE_FORTRAN_CAPS)
3940: #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3941: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3942: #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3943: #endif

3947: /*@C
3948:   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()

3950:   Collective on Mat

3952:   Input Parameters:
3953: + mat - the matrix
3954: . min - number of input rows
3955: . im - input rows
3956: . nin - number of input columns
3957: . in - input columns
3958: . v - numerical values input
3959: - addvin - INSERT_VALUES or ADD_VALUES

3961:   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.

3963:   Level: advanced

3965: .seealso:   MatSetValuesBlocked()
3966: @*/
3967: PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3968: {
3969:   /* convert input arguments to C version */
3970:   Mat             mat = *matin;
3971:   PetscInt        m = *min, n = *nin;
3972:   InsertMode      addv = *addvin;

3974:   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3975:   const MatScalar *value;
3976:   MatScalar       *barray=baij->barray;
3977:   PetscBool       roworiented = baij->roworiented;
3978:   PetscErrorCode  ierr;
3979:   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3980:   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3981:   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3982: 
3984:   /* tasks normally handled by MatSetValuesBlocked() */
3985:   if (mat->insertmode == NOT_SET_VALUES) {
3986:     mat->insertmode = addv;
3987:   }
3988: #if defined(PETSC_USE_DEBUG) 
3989:   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3990:   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3991: #endif
3992:   if (mat->assembled) {
3993:     mat->was_assembled = PETSC_TRUE;
3994:     mat->assembled     = PETSC_FALSE;
3995:   }
3996:   PetscLogEventBegin(MAT_SetValues,mat,0,0,0);


3999:   if(!barray) {
4000:     PetscMalloc(bs2*sizeof(MatScalar),&barray);
4001:     baij->barray = barray;
4002:   }

4004:   if (roworiented) {
4005:     stepval = (n-1)*bs;
4006:   } else {
4007:     stepval = (m-1)*bs;
4008:   }
4009:   for (i=0; i<m; i++) {
4010:     if (im[i] < 0) continue;
4011: #if defined(PETSC_USE_DEBUG)
4012:     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
4013: #endif
4014:     if (im[i] >= rstart && im[i] < rend) {
4015:       row = im[i] - rstart;
4016:       for (j=0; j<n; j++) {
4017:         /* If NumCol = 1 then a copy is not required */
4018:         if ((roworiented) && (n == 1)) {
4019:           barray = (MatScalar*)v + i*bs2;
4020:         } else if((!roworiented) && (m == 1)) {
4021:           barray = (MatScalar*)v + j*bs2;
4022:         } else { /* Here a copy is required */
4023:           if (roworiented) {
4024:             value = v + i*(stepval+bs)*bs + j*bs;
4025:           } else {
4026:             value = v + j*(stepval+bs)*bs + i*bs;
4027:           }
4028:           for (ii=0; ii<bs; ii++,value+=stepval) {
4029:             for (jj=0; jj<bs; jj++) {
4030:               *barray++  = *value++;
4031:             }
4032:           }
4033:           barray -=bs2;
4034:         }
4035: 
4036:         if (in[j] >= cstart && in[j] < cend){
4037:           col  = in[j] - cstart;
4038:           MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);
4039:         }
4040:         else if (in[j] < 0) continue;
4041: #if defined(PETSC_USE_DEBUG)
4042:         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
4043: #endif
4044:         else {
4045:           if (mat->was_assembled) {
4046:             if (!baij->colmap) {
4047:               CreateColmap_MPIBAIJ_Private(mat);
4048:             }

4050: #if defined(PETSC_USE_DEBUG)
4051: #if defined (PETSC_USE_CTABLE)
4052:             { PetscInt data;
4053:               PetscTableFind(baij->colmap,in[j]+1,&data);
4054:               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4055:             }
4056: #else
4057:             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4058: #endif
4059: #endif
4060: #if defined (PETSC_USE_CTABLE)
4061:             PetscTableFind(baij->colmap,in[j]+1,&col);
4062:             col  = (col - 1)/bs;
4063: #else
4064:             col = (baij->colmap[in[j]] - 1)/bs;
4065: #endif
4066:             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
4067:               DisAssemble_MPIBAIJ(mat);
4068:               col =  in[j];
4069:             }
4070:           }
4071:           else col = in[j];
4072:           MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);
4073:         }
4074:       }
4075:     } else {
4076:       if (!baij->donotstash) {
4077:         if (roworiented) {
4078:           MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
4079:         } else {
4080:           MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
4081:         }
4082:       }
4083:     }
4084:   }
4085: 
4086:   /* task normally handled by MatSetValuesBlocked() */
4087:   PetscLogEventEnd(MAT_SetValues,mat,0,0,0);
4088:   return(0);
4089: }

4093: /*@
4094:      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
4095:          CSR format the local rows. 

4097:    Collective on MPI_Comm

4099:    Input Parameters:
4100: +  comm - MPI communicator
4101: .  bs - the block size, only a block size of 1 is supported
4102: .  m - number of local rows (Cannot be PETSC_DECIDE)
4103: .  n - This value should be the same as the local size used in creating the 
4104:        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4105:        calculated if N is given) For square matrices n is almost always m.
4106: .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4107: .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4108: .   i - row indices
4109: .   j - column indices
4110: -   a - matrix values

4112:    Output Parameter:
4113: .   mat - the matrix

4115:    Level: intermediate

4117:    Notes:
4118:        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4119:      thus you CANNOT change the matrix entries by changing the values of a[] after you have 
4120:      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.

4122:        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.

4124: .keywords: matrix, aij, compressed row, sparse, parallel

4126: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4127:           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
4128: @*/
4129: PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4130: {


4135:   if (i[0]) {
4136:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4137:   }
4138:   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4139:   MatCreate(comm,mat);
4140:   MatSetSizes(*mat,m,n,M,N);
4141:   MatSetType(*mat,MATMPISBAIJ);
4142:   MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);
4143:   return(0);
4144: }