Actual source code: baij.c

  2: /*
  3:     Defines the basic matrix operations for the BAIJ (compressed row)
  4:   matrix storage format.
  5: */
  6: #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
  7: #include <petscblaslapack.h>
  8: #include <../src/mat/blockinvert.h>


 13: PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,PetscScalar **values)
 14: {
 15:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
 17:   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
 18:   MatScalar      *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
 19:   PetscReal      shift = 0.0;

 22:   if (a->idiagvalid) {
 23:     if (values)*values = a->idiag;
 24:     return(0);
 25:   }
 26:   MatMarkDiagonal_SeqBAIJ(A);
 27:   diag_offset = a->diag;
 28:   if (!a->idiag) {
 29:     PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);
 30:     PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));
 31:   }
 32:   diag    = a->idiag;
 33:   mdiag   = a->idiag+bs2*mbs;
 34:   if (values) *values = a->idiag;
 35:   /* factor and invert each block */
 36:   switch (bs){
 37:     case 1:
 38:       for (i=0; i<mbs; i++) {
 39:         odiag = v + 1*diag_offset[i];
 40:         diag[0]  = odiag[0];
 41:         mdiag[0] = odiag[0];
 42:         diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
 43:         diag    += 1;
 44:         mdiag   += 1;
 45:       }
 46:       break;
 47:     case 2:
 48:       for (i=0; i<mbs; i++) {
 49:         odiag   = v + 4*diag_offset[i];
 50:         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 51:         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
 52:         Kernel_A_gets_inverse_A_2(diag,shift);
 53:         diag    += 4;
 54:         mdiag   += 4;
 55:       }
 56:       break;
 57:     case 3:
 58:       for (i=0; i<mbs; i++) {
 59:         odiag    = v + 9*diag_offset[i];
 60:         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 61:         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
 62:         diag[8]  = odiag[8];
 63:         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
 64:         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
 65:         mdiag[8] = odiag[8];
 66:         Kernel_A_gets_inverse_A_3(diag,shift);
 67:         diag    += 9;
 68:         mdiag   += 9;
 69:       }
 70:       break;
 71:     case 4:
 72:       for (i=0; i<mbs; i++) {
 73:         odiag  = v + 16*diag_offset[i];
 74:         PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));
 75:         PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));
 76:         Kernel_A_gets_inverse_A_4(diag,shift);
 77:         diag  += 16;
 78:         mdiag += 16;
 79:       }
 80:       break;
 81:     case 5:
 82:       for (i=0; i<mbs; i++) {
 83:         odiag = v + 25*diag_offset[i];
 84:         PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));
 85:         PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));
 86:         Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);
 87:         diag  += 25;
 88:         mdiag += 25;
 89:       }
 90:       break;
 91:     case 6:
 92:       for (i=0; i<mbs; i++) {
 93:         odiag = v + 36*diag_offset[i];
 94:         PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));
 95:         PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));
 96:         Kernel_A_gets_inverse_A_6(diag,shift);
 97:         diag  += 36;
 98:         mdiag += 36;
 99:       }
100:       break;
101:     case 7:
102:       for (i=0; i<mbs; i++) {
103:         odiag = v + 49*diag_offset[i];
104:         PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));
105:         PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));
106:         Kernel_A_gets_inverse_A_7(diag,shift);
107:         diag  += 49;
108:         mdiag += 49;
109:       }
110:       break;
111:     default:
112:       PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);
113:       for (i=0; i<mbs; i++) {
114:         odiag = v + bs2*diag_offset[i];
115:         PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));
116:         PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));
117:         Kernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);
118:         diag  += bs2;
119:         mdiag += bs2;
120:       }
121:       PetscFree2(v_work,v_pivots);
122:   }
123:   a->idiagvalid = PETSC_TRUE;
124:   return(0);
125: }

129: PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130: {
131:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
132:   PetscScalar        *x,x1,s1;
133:   const PetscScalar  *b;
134:   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
135:   PetscErrorCode     ierr;
136:   PetscInt           m = a->mbs,i,i2,nz,j;
137:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

140:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
141:   its = its*lits;
142:   if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
143:   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
144:   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
145:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
146:   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

148:   if (!a->idiagvalid){MatInvertBlockDiagonal(A,PETSC_NULL);}

150:   diag  = a->diag;
151:   idiag = a->idiag;
152:   VecGetArray(xx,&x);
153:   VecGetArrayRead(bb,&b);

155:   if (flag & SOR_ZERO_INITIAL_GUESS) {
156:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
157:       x[0] = b[0]*idiag[0];
158:       i2     = 1;
159:       idiag += 1;
160:       for (i=1; i<m; i++) {
161:         v     = aa + ai[i];
162:         vi    = aj + ai[i];
163:         nz    = diag[i] - ai[i];
164:         s1    = b[i2];
165:         for (j=0; j<nz; j++) {
166:           s1 -= v[j]*x[vi[j]];
167:         }
168:         x[i2]   = idiag[0]*s1;
169:         idiag   += 1;
170:         i2      += 1;
171:       }
172:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
173:       PetscLogFlops(a->nz);
174:     }
175:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
176:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
177:       i2    = 0;
178:       mdiag = a->idiag+a->mbs;
179:       for (i=0; i<m; i++) {
180:         x1      = x[i2];
181:         x[i2]   = mdiag[0]*x1;
182:         mdiag  += 1;
183:         i2     += 1;
184:       }
185:       PetscLogFlops(m);
186:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
187:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
188:     }
189:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
190:       idiag   = a->idiag+a->mbs - 1;
191:       i2      = m - 1;
192:       x1      = x[i2];
193:       x[i2]   = idiag[0]*x1;
194:       idiag -= 1;
195:       i2    -= 1;
196:       for (i=m-2; i>=0; i--) {
197:         v     = aa + (diag[i]+1);
198:         vi    = aj + diag[i] + 1;
199:         nz    = ai[i+1] - diag[i] - 1;
200:         s1    = x[i2];
201:         for (j=0; j<nz; j++) {
202:           s1 -= v[j]*x[vi[j]];
203:         }
204:         x[i2]   = idiag[0]*s1;
205:         idiag   -= 1;
206:         i2      -= 1;
207:       }
208:       PetscLogFlops(a->nz);
209:     }
210:   } else {
211:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
212:   }
213:   VecRestoreArray(xx,&x);
214:   VecRestoreArrayRead(bb,&b);
215:   return(0);
216: }

220: PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
221: {
222:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
223:   PetscScalar        *x,x1,x2,s1,s2;
224:   const PetscScalar  *b;
225:   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
226:   PetscErrorCode     ierr;
227:   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
228:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

231:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
232:   its = its*lits;
233:   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
234:   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
235:   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
236:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
237:   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

239:   if (!a->idiagvalid){MatInvertBlockDiagonal(A,PETSC_NULL);}

241:   diag  = a->diag;
242:   idiag = a->idiag;
243:   VecGetArray(xx,&x);
244:   VecGetArrayRead(bb,&b);

246:   if (flag & SOR_ZERO_INITIAL_GUESS) {
247:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
248:       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
249:       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
250:       i2     = 2;
251:       idiag += 4;
252:       for (i=1; i<m; i++) {
253:         v     = aa + 4*ai[i];
254:         vi    = aj + ai[i];
255:         nz    = diag[i] - ai[i];
256:         s1    = b[i2]; s2 = b[i2+1];
257:         for (j=0; j<nz; j++) {
258:           idx  = 2*vi[j];
259:           it   = 4*j;
260:           x1   = x[idx]; x2 = x[1+idx];
261:           s1  -= v[it]*x1 + v[it+2]*x2;
262:           s2  -= v[it+1]*x1 + v[it+3]*x2;
263:         }
264:         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
265:         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
266:         idiag   += 4;
267:         i2      += 2;
268:       }
269:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
270:       PetscLogFlops(4.0*(a->nz));
271:     }
272:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
273:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
274:       i2    = 0;
275:       mdiag = a->idiag+4*a->mbs;
276:       for (i=0; i<m; i++) {
277:         x1      = x[i2]; x2 = x[i2+1];
278:         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
279:         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
280:         mdiag  += 4;
281:         i2     += 2;
282:       }
283:       PetscLogFlops(6.0*m);
284:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
285:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
286:     }
287:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
288:       idiag   = a->idiag+4*a->mbs - 4;
289:       i2      = 2*m - 2;
290:       x1      = x[i2]; x2 = x[i2+1];
291:       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
292:       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
293:       idiag -= 4;
294:       i2    -= 2;
295:       for (i=m-2; i>=0; i--) {
296:         v     = aa + 4*(diag[i]+1);
297:         vi    = aj + diag[i] + 1;
298:         nz    = ai[i+1] - diag[i] - 1;
299:         s1    = x[i2]; s2 = x[i2+1];
300:         for (j=0; j<nz; j++) {
301:            idx  = 2*vi[j];
302:           it   = 4*j;
303:           x1   = x[idx]; x2 = x[1+idx];
304:           s1  -= v[it]*x1 + v[it+2]*x2;
305:           s2  -= v[it+1]*x1 + v[it+3]*x2;
306:         }
307:         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
308:         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
309:         idiag   -= 4;
310:         i2      -= 2;
311:       }
312:       PetscLogFlops(4.0*(a->nz));
313:     }
314:   } else {
315:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
316:   }
317:   VecRestoreArray(xx,&x);
318:   VecRestoreArrayRead(bb,&b);
319:   return(0);
320: }

324: PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
325: {
326:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
327:   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
328:   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
329:   const PetscScalar  *b;
330:   PetscErrorCode     ierr;
331:   PetscInt           m = a->mbs,i,i2,nz,idx;
332:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

335:   its = its*lits;
336:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
337:   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
338:   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
339:   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
340:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
341:   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

343:   if (!a->idiagvalid){MatInvertBlockDiagonal(A,PETSC_NULL);}

345:   diag  = a->diag;
346:   idiag = a->idiag;
347:   VecGetArray(xx,&x);
348:   VecGetArrayRead(bb,&b);

350:   if (flag & SOR_ZERO_INITIAL_GUESS) {
351:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
352:       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
353:       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
354:       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
355:       i2     = 3;
356:       idiag += 9;
357:       for (i=1; i<m; i++) {
358:         v     = aa + 9*ai[i];
359:         vi    = aj + ai[i];
360:         nz    = diag[i] - ai[i];
361:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
362:         while (nz--) {
363:           idx  = 3*(*vi++);
364:           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
365:           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
366:           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
367:           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
368:           v   += 9;
369:         }
370:         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
371:         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
372:         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
373:         idiag   += 9;
374:         i2      += 3;
375:       }
376:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
377:       PetscLogFlops(9.0*(a->nz));
378:     }
379:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
380:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
381:       i2    = 0;
382:       mdiag = a->idiag+9*a->mbs;
383:       for (i=0; i<m; i++) {
384:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
385:         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
386:         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
387:         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
388:         mdiag  += 9;
389:         i2     += 3;
390:       }
391:       PetscLogFlops(15.0*m);
392:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
393:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
394:     }
395:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
396:       idiag   = a->idiag+9*a->mbs - 9;
397:       i2      = 3*m - 3;
398:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
399:       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
400:       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
401:       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
402:       idiag -= 9;
403:       i2    -= 3;
404:       for (i=m-2; i>=0; i--) {
405:         v     = aa + 9*(diag[i]+1);
406:         vi    = aj + diag[i] + 1;
407:         nz    = ai[i+1] - diag[i] - 1;
408:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
409:         while (nz--) {
410:           idx  = 3*(*vi++);
411:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
412:           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
413:           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
414:           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
415:           v   += 9;
416:         }
417:         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
418:         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
419:         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
420:         idiag   -= 9;
421:         i2      -= 3;
422:       }
423:       PetscLogFlops(9.0*(a->nz));
424:     }
425:   } else {
426:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
427:   }
428:   VecRestoreArray(xx,&x);
429:   VecRestoreArrayRead(bb,&b);
430:   return(0);
431: }

435: PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
436: {
437:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
438:   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
439:   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
440:   const PetscScalar  *b;
441:   PetscErrorCode     ierr;
442:   PetscInt           m = a->mbs,i,i2,nz,idx;
443:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

446:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
447:   its = its*lits;
448:   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
449:   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
450:   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
451:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
452:   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

454:   if (!a->idiagvalid){MatInvertBlockDiagonal(A,PETSC_NULL);}

456:   diag  = a->diag;
457:   idiag = a->idiag;
458:   VecGetArray(xx,&x);
459:   VecGetArrayRead(bb,&b);

461:   if (flag & SOR_ZERO_INITIAL_GUESS) {
462:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
463:       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
464:       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
465:       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
466:       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
467:       i2     = 4;
468:       idiag += 16;
469:       for (i=1; i<m; i++) {
470:         v     = aa + 16*ai[i];
471:         vi    = aj + ai[i];
472:         nz    = diag[i] - ai[i];
473:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
474:         while (nz--) {
475:           idx  = 4*(*vi++);
476:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
477:           s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
478:           s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
479:           s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
480:           s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
481:           v   += 16;
482:         }
483:         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
484:         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
485:         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
486:         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
487:         idiag   += 16;
488:         i2      += 4;
489:       }
490:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
491:       PetscLogFlops(16.0*(a->nz));
492:     }
493:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
494:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
495:       i2    = 0;
496:       mdiag = a->idiag+16*a->mbs;
497:       for (i=0; i<m; i++) {
498:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
499:         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
500:         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
501:         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
502:         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
503:         mdiag  += 16;
504:         i2     += 4;
505:       }
506:       PetscLogFlops(28.0*m);
507:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
508:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
509:     }
510:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
511:       idiag   = a->idiag+16*a->mbs - 16;
512:       i2      = 4*m - 4;
513:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
514:       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
515:       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
516:       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
517:       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
518:       idiag -= 16;
519:       i2    -= 4;
520:       for (i=m-2; i>=0; i--) {
521:         v     = aa + 16*(diag[i]+1);
522:         vi    = aj + diag[i] + 1;
523:         nz    = ai[i+1] - diag[i] - 1;
524:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
525:         while (nz--) {
526:           idx  = 4*(*vi++);
527:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
528:           s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
529:           s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
530:           s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
531:           s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
532:           v   += 16;
533:         }
534:         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
535:         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
536:         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
537:         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
538:         idiag   -= 16;
539:         i2      -= 4;
540:       }
541:       PetscLogFlops(16.0*(a->nz));
542:     }
543:   } else {
544:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
545:   }
546:   VecRestoreArray(xx,&x);
547:   VecRestoreArrayRead(bb,&b);
548:   return(0);
549: }

553: PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
554: {
555:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
556:   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
557:   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
558:   const PetscScalar  *b;
559:   PetscErrorCode     ierr;
560:   PetscInt           m = a->mbs,i,i2,nz,idx;
561:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

564:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
565:   its = its*lits;
566:   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
567:   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
568:   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
569:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
570:   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

572:   if (!a->idiagvalid){MatInvertBlockDiagonal(A,PETSC_NULL);}

574:   diag  = a->diag;
575:   idiag = a->idiag;
576:   VecGetArray(xx,&x);
577:   VecGetArrayRead(bb,&b);

579:   if (flag & SOR_ZERO_INITIAL_GUESS) {
580:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
581:       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
582:       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
583:       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
584:       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
585:       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
586:       i2     = 5;
587:       idiag += 25;
588:       for (i=1; i<m; i++) {
589:         v     = aa + 25*ai[i];
590:         vi    = aj + ai[i];
591:         nz    = diag[i] - ai[i];
592:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
593:         while (nz--) {
594:           idx  = 5*(*vi++);
595:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
596:           s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
597:           s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
598:           s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
599:           s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
600:           s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
601:           v   += 25;
602:         }
603:         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
604:         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
605:         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
606:         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
607:         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
608:         idiag   += 25;
609:         i2      += 5;
610:       }
611:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
612:       PetscLogFlops(25.0*(a->nz));
613:     }
614:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
615:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
616:       i2    = 0;
617:       mdiag = a->idiag+25*a->mbs;
618:       for (i=0; i<m; i++) {
619:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
620:         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
621:         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
622:         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
623:         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
624:         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
625:         mdiag  += 25;
626:         i2     += 5;
627:       }
628:       PetscLogFlops(45.0*m);
629:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
630:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
631:     }
632:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
633:       idiag   = a->idiag+25*a->mbs - 25;
634:       i2      = 5*m - 5;
635:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
636:       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
637:       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
638:       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
639:       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
640:       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
641:       idiag -= 25;
642:       i2    -= 5;
643:       for (i=m-2; i>=0; i--) {
644:         v     = aa + 25*(diag[i]+1);
645:         vi    = aj + diag[i] + 1;
646:         nz    = ai[i+1] - diag[i] - 1;
647:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
648:         while (nz--) {
649:           idx  = 5*(*vi++);
650:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
651:           s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
652:           s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
653:           s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
654:           s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
655:           s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
656:           v   += 25;
657:         }
658:         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
659:         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
660:         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
661:         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
662:         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
663:         idiag   -= 25;
664:         i2      -= 5;
665:       }
666:       PetscLogFlops(25.0*(a->nz));
667:     }
668:   } else {
669:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
670:   }
671:   VecRestoreArray(xx,&x);
672:   VecRestoreArrayRead(bb,&b);
673:   return(0);
674: }

678: PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
679: {
680:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
681:   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
682:   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
683:   const PetscScalar  *b;
684:   PetscErrorCode     ierr;
685:   PetscInt           m = a->mbs,i,i2,nz,idx;
686:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

689:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
690:   its = its*lits;
691:   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
692:   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
693:   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
694:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
695:   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

697:   if (!a->idiagvalid){MatInvertBlockDiagonal(A,PETSC_NULL);}

699:   diag  = a->diag;
700:   idiag = a->idiag;
701:   VecGetArray(xx,&x);
702:   VecGetArrayRead(bb,&b);

704:   if (flag & SOR_ZERO_INITIAL_GUESS) {
705:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
706:       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
707:       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
708:       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
709:       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
710:       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
711:       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
712:       i2     = 6;
713:       idiag += 36;
714:       for (i=1; i<m; i++) {
715:         v     = aa + 36*ai[i];
716:         vi    = aj + ai[i];
717:         nz    = diag[i] - ai[i];
718:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
719:         while (nz--) {
720:           idx  = 6*(*vi++);
721:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
722:           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
723:           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
724:           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
725:           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
726:           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
727:           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
728:           v   += 36;
729:         }
730:         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
731:         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
732:         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
733:         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
734:         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
735:         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
736:         idiag   += 36;
737:         i2      += 6;
738:       }
739:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
740:       PetscLogFlops(36.0*(a->nz));
741:     }
742:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
743:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
744:       i2    = 0;
745:       mdiag = a->idiag+36*a->mbs;
746:       for (i=0; i<m; i++) {
747:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
748:         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
749:         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
750:         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
751:         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
752:         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
753:         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
754:         mdiag  += 36;
755:         i2     += 6;
756:       }
757:       PetscLogFlops(60.0*m);
758:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
759:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
760:     }
761:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
762:       idiag   = a->idiag+36*a->mbs - 36;
763:       i2      = 6*m - 6;
764:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
765:       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
766:       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
767:       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
768:       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
769:       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
770:       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
771:       idiag -= 36;
772:       i2    -= 6;
773:       for (i=m-2; i>=0; i--) {
774:         v     = aa + 36*(diag[i]+1);
775:         vi    = aj + diag[i] + 1;
776:         nz    = ai[i+1] - diag[i] - 1;
777:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
778:         while (nz--) {
779:           idx  = 6*(*vi++);
780:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
781:           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
782:           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
783:           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
784:           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
785:           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
786:           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
787:           v   += 36;
788:         }
789:         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
790:         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
791:         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
792:         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
793:         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
794:         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
795:         idiag   -= 36;
796:         i2      -= 6;
797:       }
798:       PetscLogFlops(36.0*(a->nz));
799:     }
800:   } else {
801:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
802:   }
803:   VecRestoreArray(xx,&x);
804:   VecRestoreArrayRead(bb,&b);
805:   return(0);
806: }

810: PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
811: {
812:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
813:   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
814:   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
815:   const PetscScalar  *b;
816:   PetscErrorCode     ierr;
817:   PetscInt           m = a->mbs,i,i2,nz,idx;
818:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

821:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
822:   its = its*lits;
823:   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
824:   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
825:   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
826:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
827:   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

829:   if (!a->idiagvalid){MatInvertBlockDiagonal(A,PETSC_NULL);}

831:   diag  = a->diag;
832:   idiag = a->idiag;
833:   VecGetArray(xx,&x);
834:   VecGetArrayRead(bb,&b);

836:   if (flag & SOR_ZERO_INITIAL_GUESS) {
837:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
838:       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
839:       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
840:       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
841:       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
842:       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
843:       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
844:       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
845:       i2     = 7;
846:       idiag += 49;
847:       for (i=1; i<m; i++) {
848:         v     = aa + 49*ai[i];
849:         vi    = aj + ai[i];
850:         nz    = diag[i] - ai[i];
851:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
852:         while (nz--) {
853:           idx  = 7*(*vi++);
854:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
855:           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
856:           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
857:           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
858:           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
859:           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
860:           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
861:           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
862:           v   += 49;
863:         }
864:         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
865:         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
866:         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
867:         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
868:         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
869:         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
870:         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
871:         idiag   += 49;
872:         i2      += 7;
873:       }
874:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
875:       PetscLogFlops(49.0*(a->nz));
876:     }
877:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
878:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
879:       i2    = 0;
880:       mdiag = a->idiag+49*a->mbs;
881:       for (i=0; i<m; i++) {
882:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
883:         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7;
884:         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7;
885:         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7;
886:         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7;
887:         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7;
888:         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7;
889:         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7;
890:         mdiag  += 49;
891:         i2     += 7;
892:       }
893:       PetscLogFlops(93.0*m);
894:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
895:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
896:     }
897:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
898:       idiag   = a->idiag+49*a->mbs - 49;
899:       i2      = 7*m - 7;
900:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
901:       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
902:       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
903:       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
904:       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
905:       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
906:       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
907:       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
908:       idiag -= 49;
909:       i2    -= 7;
910:       for (i=m-2; i>=0; i--) {
911:         v     = aa + 49*(diag[i]+1);
912:         vi    = aj + diag[i] + 1;
913:         nz    = ai[i+1] - diag[i] - 1;
914:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
915:         while (nz--) {
916:           idx  = 7*(*vi++);
917:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
918:           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
919:           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
920:           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
921:           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
922:           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
923:           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
924:           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
925:           v   += 49;
926:         }
927:         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
928:         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
929:         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
930:         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
931:         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
932:         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
933:         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
934:         idiag   -= 49;
935:         i2      -= 7;
936:       }
937:       PetscLogFlops(49.0*(a->nz));
938:     }
939:   } else {
940:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
941:   }
942:   VecRestoreArray(xx,&x);
943:   VecRestoreArrayRead(bb,&b);
944:   return(0);
945: }

949: PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
950: {
951:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
952:   PetscScalar        *x,*work,*w,*workt;
953:   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
954:   const PetscScalar  *b;
955:   PetscErrorCode     ierr;
956:   PetscInt           m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j;
957:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

960:   its = its*lits;
961:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
962:   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
963:   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
964:   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
965:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
966:   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

968:   if (!a->idiagvalid){MatInvertBlockDiagonal(A,PETSC_NULL);}

970:   diag  = a->diag;
971:   idiag = a->idiag;
972:   if (!a->mult_work) {
973:     k    = PetscMax(A->rmap->n,A->cmap->n);
974:     PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);
975:   }
976:   work = a->mult_work;
977:   if (!a->sor_work) {
978:     PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);
979:   }
980:   w = a->sor_work;

982:   VecGetArray(xx,&x);
983:   VecGetArrayRead(bb,&b);

985:   if (flag & SOR_ZERO_INITIAL_GUESS) {
986:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
987:       Kernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
988:       /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
989:       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
990:       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/
991:       i2     = bs;
992:       idiag += bs2;
993:       for (i=1; i<m; i++) {
994:         v     = aa + bs2*ai[i];
995:         vi    = aj + ai[i];
996:         nz    = diag[i] - ai[i];

998:         PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
999:         /* copy all rows of x that are needed into contiguous space */
1000:         workt = work;
1001:         for (j=0; j<nz; j++) {
1002:           PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
1003:           workt += bs;
1004:         }
1005:         Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1006:        /*s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
1007:         while (nz--) {
1008:           idx  = N*(*vi++);
1009:           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
1010:           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1011:           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1012:           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1013:           v   += N2;
1014:           } */

1016:         Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1017:         /*  x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1018:         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1019:         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/

1021:         idiag   += bs2;
1022:         i2      += bs;
1023:       }
1024:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
1025:       PetscLogFlops(1.0*bs2*(a->nz));
1026:     }
1027:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1028:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1029:       i2    = 0;
1030:       mdiag = a->idiag+bs2*a->mbs;
1031:       PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));
1032:       for (i=0; i<m; i++) {
1033:         Kernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2);
1034:         /* x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1035:         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
1036:         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
1037:         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */

1039:         mdiag  += bs2;
1040:         i2     += bs;
1041:       }
1042:       PetscLogFlops(2.0*bs*(bs-1)*m);
1043:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1044:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
1045:     }
1046:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1047:       idiag   = a->idiag+bs2*a->mbs - bs2;
1048:       i2      = bs*m - bs;
1049:       PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));
1050:       Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1051:       /*x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1052:       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
1053:       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
1054:       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/
1055:       idiag -= bs2;
1056:       i2    -= bs;
1057:       for (i=m-2; i>=0; i--) {
1058:         v     = aa + bs2*(diag[i]+1);
1059:         vi    = aj + diag[i] + 1;
1060:         nz    = ai[i+1] - diag[i] - 1;

1062:         PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));
1063:         /* copy all rows of x that are needed into contiguous space */
1064:         workt = work;
1065:         for (j=0; j<nz; j++) {
1066:           PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
1067:           workt += bs;
1068:         }
1069:         Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1070:         /* s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 
1071:         while (nz--) {
1072:           idx  = N*(*vi++);
1073:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1074:           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1075:           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1076:           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1077:           v   += N2;
1078:           } */

1080:         Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1081:         /*x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1082:         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1083:         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */
1084:         idiag   -= bs2;
1085:         i2      -= bs;
1086:       }
1087:       PetscLogFlops(1.0*bs2*(a->nz));
1088:     }
1089:   } else {
1090:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
1091:   }
1092:   VecRestoreArray(xx,&x);
1093:   VecRestoreArrayRead(bb,&b);
1094:   return(0);
1095: }

1097: /*
1098:     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1099: */
1100: #if defined(PETSC_HAVE_FORTRAN_CAPS)
1101: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1102: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1103: #define matsetvaluesblocked4_ matsetvaluesblocked4
1104: #endif

1109: void  matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
1110: {
1111:   Mat               A = *AA;
1112:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1113:   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
1114:   PetscInt          *ai=a->i,*ailen=a->ilen;
1115:   PetscInt          *aj=a->j,stepval,lastcol = -1;
1116:   const PetscScalar *value = v;
1117:   MatScalar         *ap,*aa = a->a,*bap;

1120:   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
1121:   stepval = (n-1)*4;
1122:   for (k=0; k<m; k++) { /* loop over added rows */
1123:     row  = im[k];
1124:     rp   = aj + ai[row];
1125:     ap   = aa + 16*ai[row];
1126:     nrow = ailen[row];
1127:     low  = 0;
1128:     high = nrow;
1129:     for (l=0; l<n; l++) { /* loop over added columns */
1130:       col = in[l];
1131:       if (col <= lastcol) low = 0; else high = nrow;
1132:       lastcol = col;
1133:       value = v + k*(stepval+4 + l)*4;
1134:       while (high-low > 7) {
1135:         t = (low+high)/2;
1136:         if (rp[t] > col) high = t;
1137:         else             low  = t;
1138:       }
1139:       for (i=low; i<high; i++) {
1140:         if (rp[i] > col) break;
1141:         if (rp[i] == col) {
1142:           bap  = ap +  16*i;
1143:           for (ii=0; ii<4; ii++,value+=stepval) {
1144:             for (jj=ii; jj<16; jj+=4) {
1145:               bap[jj] += *value++;
1146:             }
1147:           }
1148:           goto noinsert2;
1149:         }
1150:       }
1151:       N = nrow++ - 1;
1152:       high++; /* added new column index thus must search to one higher than before */
1153:       /* shift up all the later entries in this row */
1154:       for (ii=N; ii>=i; ii--) {
1155:         rp[ii+1] = rp[ii];
1156:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1157:       }
1158:       if (N >= i) {
1159:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1160:       }
1161:       rp[i] = col;
1162:       bap   = ap +  16*i;
1163:       for (ii=0; ii<4; ii++,value+=stepval) {
1164:         for (jj=ii; jj<16; jj+=4) {
1165:           bap[jj] = *value++;
1166:         }
1167:       }
1168:       noinsert2:;
1169:       low = i;
1170:     }
1171:     ailen[row] = nrow;
1172:   }
1173:   PetscFunctionReturnVoid();
1174: }

1177: #if defined(PETSC_HAVE_FORTRAN_CAPS)
1178: #define matsetvalues4_ MATSETVALUES4
1179: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1180: #define matsetvalues4_ matsetvalues4
1181: #endif

1186: void  matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1187: {
1188:   Mat         A = *AA;
1189:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1190:   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1191:   PetscInt    *ai=a->i,*ailen=a->ilen;
1192:   PetscInt    *aj=a->j,brow,bcol;
1193:   PetscInt    ridx,cidx,lastcol = -1;
1194:   MatScalar   *ap,value,*aa=a->a,*bap;
1195: 
1197:   for (k=0; k<m; k++) { /* loop over added rows */
1198:     row  = im[k]; brow = row/4;
1199:     rp   = aj + ai[brow];
1200:     ap   = aa + 16*ai[brow];
1201:     nrow = ailen[brow];
1202:     low  = 0;
1203:     high = nrow;
1204:     for (l=0; l<n; l++) { /* loop over added columns */
1205:       col = in[l]; bcol = col/4;
1206:       ridx = row % 4; cidx = col % 4;
1207:       value = v[l + k*n];
1208:       if (col <= lastcol) low = 0; else high = nrow;
1209:       lastcol = col;
1210:       while (high-low > 7) {
1211:         t = (low+high)/2;
1212:         if (rp[t] > bcol) high = t;
1213:         else              low  = t;
1214:       }
1215:       for (i=low; i<high; i++) {
1216:         if (rp[i] > bcol) break;
1217:         if (rp[i] == bcol) {
1218:           bap  = ap +  16*i + 4*cidx + ridx;
1219:           *bap += value;
1220:           goto noinsert1;
1221:         }
1222:       }
1223:       N = nrow++ - 1;
1224:       high++; /* added new column thus must search to one higher than before */
1225:       /* shift up all the later entries in this row */
1226:       for (ii=N; ii>=i; ii--) {
1227:         rp[ii+1] = rp[ii];
1228:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1229:       }
1230:       if (N>=i) {
1231:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1232:       }
1233:       rp[i]                    = bcol;
1234:       ap[16*i + 4*cidx + ridx] = value;
1235:       noinsert1:;
1236:       low = i;
1237:     }
1238:     ailen[brow] = nrow;
1239:   }
1240:   PetscFunctionReturnVoid();
1241: }

1244: /*
1245:      Checks for missing diagonals
1246: */
1249: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1250: {
1251:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1253:   PetscInt       *diag,*jj = a->j,i;

1256:   MatMarkDiagonal_SeqBAIJ(A);
1257:   *missing = PETSC_FALSE;
1258:   if (A->rmap->n > 0 && !jj) {
1259:     *missing  = PETSC_TRUE;
1260:     if (d) *d = 0;
1261:     PetscInfo(A,"Matrix has no entries therefor is missing diagonal");
1262:   } else {
1263:     diag     = a->diag;
1264:     for (i=0; i<a->mbs; i++) {
1265:       if (jj[diag[i]] != i) {
1266:         *missing  = PETSC_TRUE;
1267:         if (d) *d = i;
1268:         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1269:       }
1270:     }
1271:   }
1272:   return(0);
1273: }

1277: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1278: {
1279:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1281:   PetscInt       i,j,m = a->mbs;

1284:   if (!a->diag) {
1285:     PetscMalloc(m*sizeof(PetscInt),&a->diag);
1286:     PetscLogObjectMemory(A,m*sizeof(PetscInt));
1287:     a->free_diag = PETSC_TRUE;
1288:   }
1289:   for (i=0; i<m; i++) {
1290:     a->diag[i] = a->i[i+1];
1291:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1292:       if (a->j[j] == i) {
1293:         a->diag[i] = j;
1294:         break;
1295:       }
1296:     }
1297:   }
1298:   return(0);
1299: }



1306: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
1307: {
1308:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1310:   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt;
1311:   PetscInt       *tia, *tja;

1314:   *nn = n;
1315:   if (!ia) return(0);
1316:   if (symmetric) {
1317:     MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);
1318:     nz   = tia[n];
1319:   } else {
1320:     tia = a->i; tja = a->j;
1321:   }
1322: 
1323:   if (!blockcompressed && bs > 1) {
1324:     (*nn) *= bs;
1325:     /* malloc & create the natural set of indices */
1326:     PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);
1327:     if (n) {
1328:       (*ia)[0] = 0;
1329:       for (j=1; j<bs; j++) {
1330:         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1331:       }
1332:     }

1334:     for (i=1; i<n; i++) {
1335:       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1336:       for (j=1; j<bs; j++) {
1337:         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1338:       }
1339:     }
1340:     if (n) {
1341:       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1342:     }

1344:     if (ja) {
1345:       PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);
1346:       cnt = 0;
1347:       for (i=0; i<n; i++) {
1348:         for (j=0; j<bs; j++) {
1349:           for (k=tia[i]; k<tia[i+1]; k++) {
1350:             for (l=0; l<bs; l++) {
1351:               (*ja)[cnt++] = bs*tja[k] + l;
1352:             }
1353:           }
1354:         }
1355:       }
1356:     }

1358:     n     *= bs;
1359:     nz *= bs*bs;
1360:     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1361:       PetscFree(tia);
1362:       PetscFree(tja);
1363:     }
1364:   } else if (oshift == 1) {
1365:     if (symmetric) {
1366:       PetscInt nz = tia[A->rmap->n/bs];
1367:       /*  add 1 to i and j indices */
1368:       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1369:       *ia = tia;
1370:       if (ja) {
1371:         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1372:         *ja = tja;
1373:       }
1374:     } else {
1375:       PetscInt nz = a->i[A->rmap->n/bs];
1376:       /* malloc space and  add 1 to i and j indices */
1377:       PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);
1378:       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1379:       if (ja) {
1380:         PetscMalloc(nz*sizeof(PetscInt),ja);
1381:         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1382:       }
1383:     }
1384:   } else {
1385:     *ia = tia;
1386:     if (ja) *ja = tja;
1387:   }
1388: 
1389:   return(0);
1390: }

1394: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
1395: {

1399:   if (!ia) return(0);
1400:   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1401:     PetscFree(*ia);
1402:     if (ja) {PetscFree(*ja);}
1403:   }
1404:   return(0);
1405: }

1409: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1410: {
1411:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

1415: #if defined(PETSC_USE_LOG)
1416:   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1417: #endif
1418:   MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);
1419:   ISDestroy(&a->row);
1420:   ISDestroy(&a->col);
1421:   if (a->free_diag) {PetscFree(a->diag);}
1422:   PetscFree(a->idiag);
1423:   if (a->free_imax_ilen) {PetscFree2(a->imax,a->ilen);}
1424:   PetscFree(a->solve_work);
1425:   PetscFree(a->mult_work);
1426:   PetscFree(a->sor_work);
1427:   ISDestroy(&a->icol);
1428:   PetscFree(a->saved_values);
1429:   PetscFree(a->xtoy);
1430:   PetscFree2(a->compressedrow.i,a->compressedrow.rindex);

1432:   MatDestroy(&a->sbaijMat);
1433:   MatDestroy(&a->parent);
1434:   PetscFree(A->data);

1436:   PetscObjectChangeTypeName((PetscObject)A,0);
1437:   PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",PETSC_NULL);
1438:   PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);
1439:   PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);
1440:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);
1441:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);
1442:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);
1443:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);
1444:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);
1445:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);
1446:   PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",PETSC_NULL);
1447:   return(0);
1448: }

1452: PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool  flg)
1453: {
1454:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

1458:   switch (op) {
1459:   case MAT_ROW_ORIENTED:
1460:     a->roworiented    = flg;
1461:     break;
1462:   case MAT_KEEP_NONZERO_PATTERN:
1463:     a->keepnonzeropattern = flg;
1464:     break;
1465:   case MAT_NEW_NONZERO_LOCATIONS:
1466:     a->nonew          = (flg ? 0 : 1);
1467:     break;
1468:   case MAT_NEW_NONZERO_LOCATION_ERR:
1469:     a->nonew          = (flg ? -1 : 0);
1470:     break;
1471:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1472:     a->nonew          = (flg ? -2 : 0);
1473:     break;
1474:   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1475:     a->nounused       = (flg ? -1 : 0);
1476:     break;
1477:   case MAT_CHECK_COMPRESSED_ROW:
1478:     a->compressedrow.check = flg;
1479:     break;
1480:   case MAT_NEW_DIAGONALS:
1481:   case MAT_IGNORE_OFF_PROC_ENTRIES:
1482:   case MAT_USE_HASH_TABLE:
1483:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1484:     break;
1485:   case MAT_SYMMETRIC:
1486:   case MAT_STRUCTURALLY_SYMMETRIC:
1487:   case MAT_HERMITIAN:
1488:   case MAT_SYMMETRY_ETERNAL:
1489:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1490:     break;
1491:   default:
1492:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1493:   }
1494:   return(0);
1495: }

1499: PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1500: {
1501:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1503:   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1504:   MatScalar      *aa,*aa_i;
1505:   PetscScalar    *v_i;

1508:   bs  = A->rmap->bs;
1509:   ai  = a->i;
1510:   aj  = a->j;
1511:   aa  = a->a;
1512:   bs2 = a->bs2;
1513: 
1514:   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1515: 
1516:   bn  = row/bs;   /* Block number */
1517:   bp  = row % bs; /* Block Position */
1518:   M   = ai[bn+1] - ai[bn];
1519:   *nz = bs*M;
1520: 
1521:   if (v) {
1522:     *v = 0;
1523:     if (*nz) {
1524:       PetscMalloc((*nz)*sizeof(PetscScalar),v);
1525:       for (i=0; i<M; i++) { /* for each block in the block row */
1526:         v_i  = *v + i*bs;
1527:         aa_i = aa + bs2*(ai[bn] + i);
1528:         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1529:       }
1530:     }
1531:   }

1533:   if (idx) {
1534:     *idx = 0;
1535:     if (*nz) {
1536:       PetscMalloc((*nz)*sizeof(PetscInt),idx);
1537:       for (i=0; i<M; i++) { /* for each block in the block row */
1538:         idx_i = *idx + i*bs;
1539:         itmp  = bs*aj[ai[bn] + i];
1540:         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1541:       }
1542:     }
1543:   }
1544:   return(0);
1545: }

1549: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1550: {

1554:   if (idx) {PetscFree(*idx);}
1555:   if (v)   {PetscFree(*v);}
1556:   return(0);
1557: }


1563: PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1564: {
1565:   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1566:   Mat            C;
1568:   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1569:   PetscInt       *rows,*cols,bs2=a->bs2;
1570:   MatScalar      *array;

1573:   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1574:   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1575:     PetscMalloc((1+nbs)*sizeof(PetscInt),&col);
1576:     PetscMemzero(col,(1+nbs)*sizeof(PetscInt));

1578:     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1579:     MatCreate(((PetscObject)A)->comm,&C);
1580:     MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);
1581:     MatSetType(C,((PetscObject)A)->type_name);
1582:     MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);
1583:     PetscFree(col);
1584:   } else {
1585:     C = *B;
1586:   }

1588:   array = a->a;
1589:   PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);
1590:   for (i=0; i<mbs; i++) {
1591:     cols[0] = i*bs;
1592:     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1593:     len = ai[i+1] - ai[i];
1594:     for (j=0; j<len; j++) {
1595:       rows[0] = (*aj++)*bs;
1596:       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1597:       MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);
1598:       array += bs2;
1599:     }
1600:   }
1601:   PetscFree2(rows,cols);
1602: 
1603:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1604:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1605: 
1606:   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1607:     *B = C;
1608:   } else {
1609:     MatHeaderMerge(A,C);
1610:   }
1611:   return(0);
1612: }

1617: PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1618: {
1620:   Mat            Btrans;

1623:   *f = PETSC_FALSE;
1624:   MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);
1625:   MatEqual_SeqBAIJ(B,Btrans,f);
1626:   MatDestroy(&Btrans);
1627:   return(0);
1628: }

1633: static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1634: {
1635:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1637:   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1638:   int            fd;
1639:   PetscScalar    *aa;
1640:   FILE           *file;

1643:   PetscViewerBinaryGetDescriptor(viewer,&fd);
1644:   PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);
1645:   col_lens[0] = MAT_FILE_CLASSID;

1647:   col_lens[1] = A->rmap->N;
1648:   col_lens[2] = A->cmap->n;
1649:   col_lens[3] = a->nz*bs2;

1651:   /* store lengths of each row and write (including header) to file */
1652:   count = 0;
1653:   for (i=0; i<a->mbs; i++) {
1654:     for (j=0; j<bs; j++) {
1655:       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1656:     }
1657:   }
1658:   PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);
1659:   PetscFree(col_lens);

1661:   /* store column indices (zero start index) */
1662:   PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);
1663:   count = 0;
1664:   for (i=0; i<a->mbs; i++) {
1665:     for (j=0; j<bs; j++) {
1666:       for (k=a->i[i]; k<a->i[i+1]; k++) {
1667:         for (l=0; l<bs; l++) {
1668:           jj[count++] = bs*a->j[k] + l;
1669:         }
1670:       }
1671:     }
1672:   }
1673:   PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);
1674:   PetscFree(jj);

1676:   /* store nonzero values */
1677:   PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);
1678:   count = 0;
1679:   for (i=0; i<a->mbs; i++) {
1680:     for (j=0; j<bs; j++) {
1681:       for (k=a->i[i]; k<a->i[i+1]; k++) {
1682:         for (l=0; l<bs; l++) {
1683:           aa[count++] = a->a[bs2*k + l*bs + j];
1684:         }
1685:       }
1686:     }
1687:   }
1688:   PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);
1689:   PetscFree(aa);

1691:   PetscViewerBinaryGetInfoPointer(viewer,&file);
1692:   if (file) {
1693:     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1694:   }
1695:   return(0);
1696: }

1700: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1701: {
1702:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1703:   PetscErrorCode    ierr;
1704:   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1705:   PetscViewerFormat format;

1708:   PetscViewerGetFormat(viewer,&format);
1709:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1710:     PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);
1711:   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1712:     Mat aij;
1713:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);
1714:     MatView(aij,viewer);
1715:     MatDestroy(&aij);
1716:   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1717:      return(0);
1718:   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1719:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1720:     PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");
1721:     for (i=0; i<a->mbs; i++) {
1722:       for (j=0; j<bs; j++) {
1723:         PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1724:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1725:           for (l=0; l<bs; l++) {
1726: #if defined(PETSC_USE_COMPLEX)
1727:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1728:               PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1729:                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1730:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1731:               PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1732:                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1733:             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1734:               PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1735:             }
1736: #else
1737:             if (a->a[bs2*k + l*bs + j] != 0.0) {
1738:               PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1739:             }
1740: #endif
1741:           }
1742:         }
1743:         PetscViewerASCIIPrintf(viewer,"\n");
1744:       }
1745:     }
1746:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1747:   } else {
1748:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1749:     PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");
1750:     for (i=0; i<a->mbs; i++) {
1751:       for (j=0; j<bs; j++) {
1752:         PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1753:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1754:           for (l=0; l<bs; l++) {
1755: #if defined(PETSC_USE_COMPLEX)
1756:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1757:               PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1758:                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1759:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1760:               PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1761:                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1762:             } else {
1763:               PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1764:             }
1765: #else
1766:             PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1767: #endif
1768:           }
1769:         }
1770:         PetscViewerASCIIPrintf(viewer,"\n");
1771:       }
1772:     }
1773:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1774:   }
1775:   PetscViewerFlush(viewer);
1776:   return(0);
1777: }

1781: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1782: {
1783:   Mat            A = (Mat) Aa;
1784:   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1786:   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1787:   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1788:   MatScalar      *aa;
1789:   PetscViewer    viewer;
1790:   PetscViewerFormat format;

1793:   PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
1794:   PetscViewerGetFormat(viewer,&format);

1796:   PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);

1798:   /* loop over matrix elements drawing boxes */

1800:   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1801:     color = PETSC_DRAW_BLUE;
1802:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1803:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1804:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1805:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1806:         aa = a->a + j*bs2;
1807:         for (k=0; k<bs; k++) {
1808:           for (l=0; l<bs; l++) {
1809:             if (PetscRealPart(*aa++) >=  0.) continue;
1810:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1811:           }
1812:         }
1813:       }
1814:     }
1815:     color = PETSC_DRAW_CYAN;
1816:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1817:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1818:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1819:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1820:         aa = a->a + j*bs2;
1821:         for (k=0; k<bs; k++) {
1822:           for (l=0; l<bs; l++) {
1823:             if (PetscRealPart(*aa++) != 0.) continue;
1824:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1825:           }
1826:         }
1827:       }
1828:     }
1829:     color = PETSC_DRAW_RED;
1830:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1831:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1832:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1833:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1834:         aa = a->a + j*bs2;
1835:         for (k=0; k<bs; k++) {
1836:           for (l=0; l<bs; l++) {
1837:             if (PetscRealPart(*aa++) <= 0.) continue;
1838:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1839:           }
1840:         }
1841:       }
1842:     }
1843:   } else {
1844:     /* use contour shading to indicate magnitude of values */
1845:     /* first determine max of all nonzero values */
1846:     PetscDraw   popup;
1847:     PetscReal scale,maxv = 0.0;

1849:     for (i=0; i<a->nz*a->bs2; i++) {
1850:       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1851:     }
1852:     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1853:     PetscDrawGetPopup(draw,&popup);
1854:     if (popup) {PetscDrawScalePopup(popup,0.0,maxv);}
1855:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1856:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1857:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1858:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1859:         aa = a->a + j*bs2;
1860:         for (k=0; k<bs; k++) {
1861:           for (l=0; l<bs; l++) {
1862:             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1863:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1864:           }
1865:         }
1866:       }
1867:     }
1868:   }
1869:   return(0);
1870: }

1874: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1875: {
1877:   PetscReal      xl,yl,xr,yr,w,h;
1878:   PetscDraw      draw;
1879:   PetscBool      isnull;


1883:   PetscViewerDrawGetDraw(viewer,0,&draw);
1884:   PetscDrawIsNull(draw,&isnull); if (isnull) return(0);

1886:   PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1887:   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1888:   xr += w;    yr += h;  xl = -w;     yl = -h;
1889:   PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1890:   PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1891:   PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
1892:   return(0);
1893: }

1897: PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1898: {
1900:   PetscBool      iascii,isbinary,isdraw;

1903:   PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1904:   PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1905:   PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1906:   if (iascii){
1907:     MatView_SeqBAIJ_ASCII(A,viewer);
1908:   } else if (isbinary) {
1909:     MatView_SeqBAIJ_Binary(A,viewer);
1910:   } else if (isdraw) {
1911:     MatView_SeqBAIJ_Draw(A,viewer);
1912:   } else {
1913:     Mat B;
1914:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1915:     MatView(B,viewer);
1916:     MatDestroy(&B);
1917:   }
1918:   return(0);
1919: }


1924: PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1925: {
1926:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1927:   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1928:   PetscInt    *ai = a->i,*ailen = a->ilen;
1929:   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1930:   MatScalar   *ap,*aa = a->a;

1933:   for (k=0; k<m; k++) { /* loop over rows */
1934:     row  = im[k]; brow = row/bs;
1935:     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1936:     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1937:     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1938:     nrow = ailen[brow];
1939:     for (l=0; l<n; l++) { /* loop over columns */
1940:       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1941:       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1942:       col  = in[l] ;
1943:       bcol = col/bs;
1944:       cidx = col%bs;
1945:       ridx = row%bs;
1946:       high = nrow;
1947:       low  = 0; /* assume unsorted */
1948:       while (high-low > 5) {
1949:         t = (low+high)/2;
1950:         if (rp[t] > bcol) high = t;
1951:         else             low  = t;
1952:       }
1953:       for (i=low; i<high; i++) {
1954:         if (rp[i] > bcol) break;
1955:         if (rp[i] == bcol) {
1956:           *v++ = ap[bs2*i+bs*cidx+ridx];
1957:           goto finished;
1958:         }
1959:       }
1960:       *v++ = 0.0;
1961:       finished:;
1962:     }
1963:   }
1964:   return(0);
1965: }

1969: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1970: {
1971:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1972:   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1973:   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1974:   PetscErrorCode    ierr;
1975:   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1976:   PetscBool         roworiented=a->roworiented;
1977:   const PetscScalar *value = v;
1978:   MatScalar         *ap,*aa = a->a,*bap;

1981:   if (roworiented) {
1982:     stepval = (n-1)*bs;
1983:   } else {
1984:     stepval = (m-1)*bs;
1985:   }
1986:   for (k=0; k<m; k++) { /* loop over added rows */
1987:     row  = im[k];
1988:     if (row < 0) continue;
1989: #if defined(PETSC_USE_DEBUG)  
1990:     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1991: #endif
1992:     rp   = aj + ai[row];
1993:     ap   = aa + bs2*ai[row];
1994:     rmax = imax[row];
1995:     nrow = ailen[row];
1996:     low  = 0;
1997:     high = nrow;
1998:     for (l=0; l<n; l++) { /* loop over added columns */
1999:       if (in[l] < 0) continue;
2000: #if defined(PETSC_USE_DEBUG)  
2001:       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
2002: #endif
2003:       col = in[l];
2004:       if (roworiented) {
2005:         value = v + (k*(stepval+bs) + l)*bs;
2006:       } else {
2007:         value = v + (l*(stepval+bs) + k)*bs;
2008:       }
2009:       if (col <= lastcol) low = 0; else high = nrow;
2010:       lastcol = col;
2011:       while (high-low > 7) {
2012:         t = (low+high)/2;
2013:         if (rp[t] > col) high = t;
2014:         else             low  = t;
2015:       }
2016:       for (i=low; i<high; i++) {
2017:         if (rp[i] > col) break;
2018:         if (rp[i] == col) {
2019:           bap  = ap +  bs2*i;
2020:           if (roworiented) {
2021:             if (is == ADD_VALUES) {
2022:               for (ii=0; ii<bs; ii++,value+=stepval) {
2023:                 for (jj=ii; jj<bs2; jj+=bs) {
2024:                   bap[jj] += *value++;
2025:                 }
2026:               }
2027:             } else {
2028:               for (ii=0; ii<bs; ii++,value+=stepval) {
2029:                 for (jj=ii; jj<bs2; jj+=bs) {
2030:                   bap[jj] = *value++;
2031:                 }
2032:               }
2033:             }
2034:           } else {
2035:             if (is == ADD_VALUES) {
2036:               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2037:                 for (jj=0; jj<bs; jj++) {
2038:                   bap[jj] += value[jj];
2039:                 }
2040:                 bap += bs;
2041:               }
2042:             } else {
2043:               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2044:                 for (jj=0; jj<bs; jj++) {
2045:                   bap[jj]  = value[jj];
2046:                 }
2047:                 bap += bs;
2048:               }
2049:             }
2050:           }
2051:           goto noinsert2;
2052:         }
2053:       }
2054:       if (nonew == 1) goto noinsert2;
2055:       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2056:       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2057:       N = nrow++ - 1; high++;
2058:       /* shift up all the later entries in this row */
2059:       for (ii=N; ii>=i; ii--) {
2060:         rp[ii+1] = rp[ii];
2061:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
2062:       }
2063:       if (N >= i) {
2064:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
2065:       }
2066:       rp[i] = col;
2067:       bap   = ap +  bs2*i;
2068:       if (roworiented) {
2069:         for (ii=0; ii<bs; ii++,value+=stepval) {
2070:           for (jj=ii; jj<bs2; jj+=bs) {
2071:             bap[jj] = *value++;
2072:           }
2073:         }
2074:       } else {
2075:         for (ii=0; ii<bs; ii++,value+=stepval) {
2076:           for (jj=0; jj<bs; jj++) {
2077:             *bap++  = *value++;
2078:           }
2079:         }
2080:       }
2081:       noinsert2:;
2082:       low = i;
2083:     }
2084:     ailen[row] = nrow;
2085:   }
2086:   return(0);
2087: }

2091: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
2092: {
2093:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2094:   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
2095:   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
2097:   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
2098:   MatScalar      *aa = a->a,*ap;
2099:   PetscReal      ratio=0.6;

2102:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

2104:   if (m) rmax = ailen[0];
2105:   for (i=1; i<mbs; i++) {
2106:     /* move each row back by the amount of empty slots (fshift) before it*/
2107:     fshift += imax[i-1] - ailen[i-1];
2108:     rmax   = PetscMax(rmax,ailen[i]);
2109:     if (fshift) {
2110:       ip = aj + ai[i]; ap = aa + bs2*ai[i];
2111:       N = ailen[i];
2112:       for (j=0; j<N; j++) {
2113:         ip[j-fshift] = ip[j];
2114:         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
2115:       }
2116:     }
2117:     ai[i] = ai[i-1] + ailen[i-1];
2118:   }
2119:   if (mbs) {
2120:     fshift += imax[mbs-1] - ailen[mbs-1];
2121:     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
2122:   }
2123:   /* reset ilen and imax for each row */
2124:   for (i=0; i<mbs; i++) {
2125:     ailen[i] = imax[i] = ai[i+1] - ai[i];
2126:   }
2127:   a->nz = ai[mbs];

2129:   /* diagonals may have moved, so kill the diagonal pointers */
2130:   a->idiagvalid = PETSC_FALSE;
2131:   if (fshift && a->diag) {
2132:     PetscFree(a->diag);
2133:     PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));
2134:     a->diag = 0;
2135:   }
2136:   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2137:   PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);
2138:   PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);
2139:   PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);
2140:   A->info.mallocs     += a->reallocs;
2141:   a->reallocs          = 0;
2142:   A->info.nz_unneeded  = (PetscReal)fshift*bs2;

2144:   MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);
2145:   A->same_nonzero = PETSC_TRUE;
2146:   return(0);
2147: }

2149: /* 
2150:    This function returns an array of flags which indicate the locations of contiguous
2151:    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2152:    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2153:    Assume: sizes should be long enough to hold all the values.
2154: */
2157: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2158: {
2159:   PetscInt   i,j,k,row;
2160:   PetscBool  flg;

2163:   for (i=0,j=0; i<n; j++) {
2164:     row = idx[i];
2165:     if (row%bs!=0) { /* Not the begining of a block */
2166:       sizes[j] = 1;
2167:       i++;
2168:     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2169:       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2170:       i++;
2171:     } else { /* Begining of the block, so check if the complete block exists */
2172:       flg = PETSC_TRUE;
2173:       for (k=1; k<bs; k++) {
2174:         if (row+k != idx[i+k]) { /* break in the block */
2175:           flg = PETSC_FALSE;
2176:           break;
2177:         }
2178:       }
2179:       if (flg) { /* No break in the bs */
2180:         sizes[j] = bs;
2181:         i+= bs;
2182:       } else {
2183:         sizes[j] = 1;
2184:         i++;
2185:       }
2186:     }
2187:   }
2188:   *bs_max = j;
2189:   return(0);
2190: }
2191: 
2194: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2195: {
2196:   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2197:   PetscErrorCode    ierr;
2198:   PetscInt          i,j,k,count,*rows;
2199:   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2200:   PetscScalar       zero = 0.0;
2201:   MatScalar         *aa;
2202:   const PetscScalar *xx;
2203:   PetscScalar       *bb;

2206:   /* fix right hand side if needed */
2207:   if (x && b) {
2208:     VecGetArrayRead(x,&xx);
2209:     VecGetArray(b,&bb);
2210:     for (i=0; i<is_n; i++) {
2211:       bb[is_idx[i]] = diag*xx[is_idx[i]];
2212:     }
2213:     VecRestoreArrayRead(x,&xx);
2214:     VecRestoreArray(b,&bb);
2215:   }

2217:   /* Make a copy of the IS and  sort it */
2218:   /* allocate memory for rows,sizes */
2219:   PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);

2221:   /* copy IS values to rows, and sort them */
2222:   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2223:   PetscSortInt(is_n,rows);

2225:   if (baij->keepnonzeropattern) {
2226:     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2227:     bs_max = is_n;
2228:     A->same_nonzero = PETSC_TRUE;
2229:   } else {
2230:     MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
2231:     A->same_nonzero = PETSC_FALSE;
2232:   }

2234:   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2235:     row   = rows[j];
2236:     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2237:     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2238:     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2239:     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2240:       if (diag != (PetscScalar)0.0) {
2241:         if (baij->ilen[row/bs] > 0) {
2242:           baij->ilen[row/bs]       = 1;
2243:           baij->j[baij->i[row/bs]] = row/bs;
2244:           PetscMemzero(aa,count*bs*sizeof(MatScalar));
2245:         }
2246:         /* Now insert all the diagonal values for this bs */
2247:         for (k=0; k<bs; k++) {
2248:           (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);
2249:         }
2250:       } else { /* (diag == 0.0) */
2251:         baij->ilen[row/bs] = 0;
2252:       } /* end (diag == 0.0) */
2253:     } else { /* (sizes[i] != bs) */
2254: #if defined (PETSC_USE_DEBUG)
2255:       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2256: #endif
2257:       for (k=0; k<count; k++) {
2258:         aa[0] =  zero;
2259:         aa    += bs;
2260:       }
2261:       if (diag != (PetscScalar)0.0) {
2262:         (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);
2263:       }
2264:     }
2265:   }

2267:   PetscFree2(rows,sizes);
2268:   MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2269:   return(0);
2270: }

2274: PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2275: {
2276:   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2277:   PetscErrorCode    ierr;
2278:   PetscInt          i,j,k,count;
2279:   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,row,col;
2280:   PetscScalar       zero = 0.0;
2281:   MatScalar         *aa;
2282:   const PetscScalar *xx;
2283:   PetscScalar       *bb;
2284:   PetscBool         *zeroed,vecs = PETSC_FALSE;

2287:   /* fix right hand side if needed */
2288:   if (x && b) {
2289:     VecGetArrayRead(x,&xx);
2290:     VecGetArray(b,&bb);
2291:     vecs = PETSC_TRUE;
2292:   }
2293:   A->same_nonzero = PETSC_TRUE;

2295:   /* zero the columns */
2296:   PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);
2297:   PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));
2298:   for (i=0; i<is_n; i++) {
2299:     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2300:     zeroed[is_idx[i]] = PETSC_TRUE;
2301:   }
2302:   for (i=0; i<A->rmap->N; i++) {
2303:     if (!zeroed[i]) {
2304:       row = i/bs;
2305:       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2306:         for (k=0; k<bs; k++) {
2307:           col = bs*baij->j[j] + k;
2308:           if (zeroed[col]) {
2309:             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2310:             if (vecs) bb[i] -= aa[0]*xx[col];
2311:             aa[0] = 0.0;
2312:           }
2313:         }
2314:       }
2315:     } else if (vecs) bb[i] = diag*xx[i];
2316:   }
2317:   PetscFree(zeroed);
2318:   if (vecs) {
2319:     VecRestoreArrayRead(x,&xx);
2320:     VecRestoreArray(b,&bb);
2321:   }

2323:   /* zero the rows */
2324:   for (i=0; i<is_n; i++) {
2325:     row   = is_idx[i];
2326:     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2327:     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2328:     for (k=0; k<count; k++) {
2329:       aa[0] =  zero;
2330:       aa    += bs;
2331:     }
2332:     if (diag != (PetscScalar)0.0) {
2333:       (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);
2334:     }
2335:   }
2336:   MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2337:   return(0);
2338: }

2342: PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2343: {
2344:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2345:   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2346:   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2347:   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2349:   PetscInt       ridx,cidx,bs2=a->bs2;
2350:   PetscBool      roworiented=a->roworiented;
2351:   MatScalar      *ap,value,*aa=a->a,*bap;

2355:   for (k=0; k<m; k++) { /* loop over added rows */
2356:     row  = im[k];
2357:     brow = row/bs;
2358:     if (row < 0) continue;
2359: #if defined(PETSC_USE_DEBUG)  
2360:     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2361: #endif
2362:     rp   = aj + ai[brow];
2363:     ap   = aa + bs2*ai[brow];
2364:     rmax = imax[brow];
2365:     nrow = ailen[brow];
2366:     low  = 0;
2367:     high = nrow;
2368:     for (l=0; l<n; l++) { /* loop over added columns */
2369:       if (in[l] < 0) continue;
2370: #if defined(PETSC_USE_DEBUG)  
2371:       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2372: #endif
2373:       col = in[l]; bcol = col/bs;
2374:       ridx = row % bs; cidx = col % bs;
2375:       if (roworiented) {
2376:         value = v[l + k*n];
2377:       } else {
2378:         value = v[k + l*m];
2379:       }
2380:       if (col <= lastcol) low = 0; else high = nrow;
2381:       lastcol = col;
2382:       while (high-low > 7) {
2383:         t = (low+high)/2;
2384:         if (rp[t] > bcol) high = t;
2385:         else              low  = t;
2386:       }
2387:       for (i=low; i<high; i++) {
2388:         if (rp[i] > bcol) break;
2389:         if (rp[i] == bcol) {
2390:           bap  = ap +  bs2*i + bs*cidx + ridx;
2391:           if (is == ADD_VALUES) *bap += value;
2392:           else                  *bap  = value;
2393:           goto noinsert1;
2394:         }
2395:       }
2396:       if (nonew == 1) goto noinsert1;
2397:       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2398:       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2399:       N = nrow++ - 1; high++;
2400:       /* shift up all the later entries in this row */
2401:       for (ii=N; ii>=i; ii--) {
2402:         rp[ii+1] = rp[ii];
2403:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
2404:       }
2405:       if (N>=i) {
2406:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
2407:       }
2408:       rp[i]                      = bcol;
2409:       ap[bs2*i + bs*cidx + ridx] = value;
2410:       a->nz++;
2411:       noinsert1:;
2412:       low = i;
2413:     }
2414:     ailen[brow] = nrow;
2415:   }
2416:   A->same_nonzero = PETSC_FALSE;
2417:   return(0);
2418: }

2422: PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2423: {
2424:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2425:   Mat            outA;
2427:   PetscBool      row_identity,col_identity;

2430:   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2431:   ISIdentity(row,&row_identity);
2432:   ISIdentity(col,&col_identity);
2433:   if (!row_identity || !col_identity) {
2434:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2435:   }

2437:   outA            = inA;
2438:   inA->factortype = MAT_FACTOR_LU;

2440:   MatMarkDiagonal_SeqBAIJ(inA);

2442:   PetscObjectReference((PetscObject)row);
2443:   ISDestroy(&a->row);
2444:   a->row = row;
2445:   PetscObjectReference((PetscObject)col);
2446:   ISDestroy(&a->col);
2447:   a->col = col;
2448: 
2449:   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2450:   ISDestroy(&a->icol);
2451:    ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
2452:   PetscLogObjectParent(inA,a->icol);
2453: 
2454:   MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));
2455:   if (!a->solve_work) {
2456:     PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);
2457:     PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));
2458:   }
2459:   MatLUFactorNumeric(outA,inA,info);

2461:   return(0);
2462: }

2467: PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2468: {
2469:   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2470:   PetscInt    i,nz,mbs;

2473:   nz  = baij->maxnz;
2474:   mbs = baij->mbs;
2475:   for (i=0; i<nz; i++) {
2476:     baij->j[i] = indices[i];
2477:   }
2478:   baij->nz = nz;
2479:   for (i=0; i<mbs; i++) {
2480:     baij->ilen[i] = baij->imax[i];
2481:   }
2482:   return(0);
2483: }

2488: /*@
2489:     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2490:        in the matrix.

2492:   Input Parameters:
2493: +  mat - the SeqBAIJ matrix
2494: -  indices - the column indices

2496:   Level: advanced

2498:   Notes:
2499:     This can be called if you have precomputed the nonzero structure of the 
2500:   matrix and want to provide it to the matrix object to improve the performance
2501:   of the MatSetValues() operation.

2503:     You MUST have set the correct numbers of nonzeros per row in the call to 
2504:   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.

2506:     MUST be called before any calls to MatSetValues();

2508: @*/
2509: PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2510: {

2516:   PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));
2517:   return(0);
2518: }

2522: PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2523: {
2524:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2526:   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2527:   PetscReal      atmp;
2528:   PetscScalar    *x,zero = 0.0;
2529:   MatScalar      *aa;
2530:   PetscInt       ncols,brow,krow,kcol;

2533:   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2534:   bs   = A->rmap->bs;
2535:   aa   = a->a;
2536:   ai   = a->i;
2537:   aj   = a->j;
2538:   mbs  = a->mbs;

2540:   VecSet(v,zero);
2541:   VecGetArray(v,&x);
2542:   VecGetLocalSize(v,&n);
2543:   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2544:   for (i=0; i<mbs; i++) {
2545:     ncols = ai[1] - ai[0]; ai++;
2546:     brow  = bs*i;
2547:     for (j=0; j<ncols; j++){
2548:       for (kcol=0; kcol<bs; kcol++){
2549:         for (krow=0; krow<bs; krow++){
2550:           atmp = PetscAbsScalar(*aa);aa++;
2551:           row = brow + krow;    /* row index */
2552:           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2553:           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2554:         }
2555:       }
2556:       aj++;
2557:     }
2558:   }
2559:   VecRestoreArray(v,&x);
2560:   return(0);
2561: }

2565: PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2566: {

2570:   /* If the two matrices have the same copy implementation, use fast copy. */
2571:   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2572:     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2573:     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2574:     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;

2576:     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2577:     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2578:     PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));
2579:   } else {
2580:     MatCopy_Basic(A,B,str);
2581:   }
2582:   return(0);
2583: }

2587: PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2588: {

2592:    MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);
2593:   return(0);
2594: }

2598: PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2599: {
2600:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2602:   *array = a->a;
2603:   return(0);
2604: }

2608: PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2609: {
2611:   return(0);
2612: }

2616: PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2617: {
2618:   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2620:   PetscInt       i,bs=Y->rmap->bs,j,bs2=bs*bs;
2621:   PetscBLASInt   one=1;

2624:   if (str == SAME_NONZERO_PATTERN) {
2625:     PetscScalar alpha = a;
2626:     PetscBLASInt bnz = PetscBLASIntCast(x->nz*bs2);
2627:     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2628:   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2629:     if (y->xtoy && y->XtoY != X) {
2630:       PetscFree(y->xtoy);
2631:       MatDestroy(&y->XtoY);
2632:     }
2633:     if (!y->xtoy) { /* get xtoy */
2634:       MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);
2635:       y->XtoY = X;
2636:       PetscObjectReference((PetscObject)X);
2637:     }
2638:     for (i=0; i<x->nz; i++) {
2639:       j = 0;
2640:       while (j < bs2){
2641:         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2642:         j++;
2643:       }
2644:     }
2645:     PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
2646:   } else {
2647:     MatAXPY_Basic(Y,a,X,str);
2648:   }
2649:   return(0);
2650: }

2654: PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs)
2655: {
2656:   PetscInt rbs,cbs;

2660:   PetscLayoutGetBlockSize(A->rmap,&rbs);
2661:   PetscLayoutGetBlockSize(A->cmap,&cbs);
2662:   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2663:   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2664:   return(0);
2665: }

2669: PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2670: {
2671:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2672:   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2673:   MatScalar      *aa = a->a;

2676:   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2677:   return(0);
2678: }

2682: PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2683: {
2684:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2685:   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2686:   MatScalar      *aa = a->a;

2689:   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2690:   return(0);
2691: }


2697: /*
2698:     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2699: */
2700: PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
2701: {
2702:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2704:   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2705:   PetscInt       nz = a->i[m],row,*jj,mr,col;

2708:   *nn = n;
2709:   if (!ia) return(0);
2710:   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2711:   else {
2712:     PetscMalloc((n+1)*sizeof(PetscInt),&collengths);
2713:     PetscMemzero(collengths,n*sizeof(PetscInt));
2714:     PetscMalloc((n+1)*sizeof(PetscInt),&cia);
2715:     PetscMalloc((nz+1)*sizeof(PetscInt),&cja);
2716:     jj = a->j;
2717:     for (i=0; i<nz; i++) {
2718:       collengths[jj[i]]++;
2719:     }
2720:     cia[0] = oshift;
2721:     for (i=0; i<n; i++) {
2722:       cia[i+1] = cia[i] + collengths[i];
2723:     }
2724:     PetscMemzero(collengths,n*sizeof(PetscInt));
2725:     jj   = a->j;
2726:     for (row=0; row<m; row++) {
2727:       mr = a->i[row+1] - a->i[row];
2728:       for (i=0; i<mr; i++) {
2729:         col = *jj++;
2730:         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2731:       }
2732:     }
2733:     PetscFree(collengths);
2734:     *ia = cia; *ja = cja;
2735:   }
2736:   return(0);
2737: }

2741: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
2742: {

2746:   if (!ia) return(0);
2747:   PetscFree(*ia);
2748:   PetscFree(*ja);
2749:   return(0);
2750: }

2754: PetscErrorCode  MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2755: {
2756:   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2758:   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2;
2759:   PetscScalar    dx,*y,*xx,*w3_array;
2760:   PetscScalar    *vscale_array;
2761:   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2762:   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2763:   void           *fctx = coloring->fctx;
2764:   PetscBool      flg = PETSC_FALSE;
2765:   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2766:   Vec            x1_tmp;

2772:   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");

2774:   PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);
2775:   MatSetUnfactored(J);
2776:   PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);
2777:   if (flg) {
2778:     PetscInfo(coloring,"Not calling MatZeroEntries()\n");
2779:   } else {
2780:     PetscBool  assembled;
2781:     MatAssembled(J,&assembled);
2782:     if (assembled) {
2783:       MatZeroEntries(J);
2784:     }
2785:   }

2787:   x1_tmp = x1;
2788:   if (!coloring->vscale){
2789:     VecDuplicate(x1_tmp,&coloring->vscale);
2790:   }
2791: 
2792:   /*
2793:     This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets
2794:     coloring->F for the coarser grids from the finest
2795:   */
2796:   if (coloring->F) {
2797:     VecGetLocalSize(coloring->F,&m1);
2798:     VecGetLocalSize(w1,&m2);
2799:     if (m1 != m2) {
2800:       coloring->F = 0;
2801:       }
2802:     }

2804:   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2805:     VecNorm(x1_tmp,NORM_2,&unorm);
2806:   }
2807:   VecGetOwnershipRange(w1,&start,&end); /* OwnershipRange is used by ghosted x! */
2808: 
2809:   /* Set w1 = F(x1) */
2810:   if (coloring->F) {
2811:     w1          = coloring->F; /* use already computed value of function */
2812:     coloring->F = 0;
2813:   } else {
2814:     PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);
2815:     (*f)(sctx,x1_tmp,w1,fctx);
2816:     PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);
2817:   }
2818: 
2819:   if (!coloring->w3) {
2820:     VecDuplicate(x1_tmp,&coloring->w3);
2821:     PetscLogObjectParent(coloring,coloring->w3);
2822:   }
2823:   w3 = coloring->w3;

2825:     CHKMEMQ;
2826:     /* Compute all the local scale factors, including ghost points */
2827:   VecGetLocalSize(x1_tmp,&N);
2828:   VecGetArray(x1_tmp,&xx);
2829:   VecGetArray(coloring->vscale,&vscale_array);
2830:   if (ctype == IS_COLORING_GHOSTED){
2831:     col_start = 0; col_end = N;
2832:   } else if (ctype == IS_COLORING_GLOBAL){
2833:     xx = xx - start;
2834:     vscale_array = vscale_array - start;
2835:     col_start = start; col_end = N + start;
2836:   }    CHKMEMQ;
2837:   for (col=col_start; col<col_end; col++){
2838:     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2839:     if (coloring->htype[0] == 'w') {
2840:       dx = 1.0 + unorm;
2841:     } else {
2842:       dx  = xx[col];
2843:     }
2844:     if (dx == (PetscScalar)0.0) dx = 1.0;
2845: #if !defined(PETSC_USE_COMPLEX)
2846:     if (dx < umin && dx >= 0.0)      dx = umin;
2847:     else if (dx < 0.0 && dx > -umin) dx = -umin;
2848: #else
2849:     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2850:     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2851: #endif
2852:     dx               *= epsilon;
2853:     vscale_array[col] = (PetscScalar)1.0/dx;
2854:   }     CHKMEMQ;
2855:   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2856:   VecRestoreArray(coloring->vscale,&vscale_array);
2857:   if (ctype == IS_COLORING_GLOBAL){
2858:     VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);
2859:     VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);
2860:   }
2861:   CHKMEMQ;
2862:   if (coloring->vscaleforrow) {
2863:     vscaleforrow = coloring->vscaleforrow;
2864:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");

2866:   PetscMalloc(bs*sizeof(PetscInt),&srows);
2867:   /*
2868:     Loop over each color
2869:   */
2870:   VecGetArray(coloring->vscale,&vscale_array);
2871:   for (k=0; k<coloring->ncolors; k++) {
2872:     coloring->currentcolor = k;
2873:     for (i=0; i<bs; i++) {
2874:       VecCopy(x1_tmp,w3);
2875:       VecGetArray(w3,&w3_array);
2876:       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2877:       /*
2878:         Loop over each column associated with color 
2879:         adding the perturbation to the vector w3.
2880:       */
2881:       for (l=0; l<coloring->ncolumns[k]; l++) {
2882:         col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2883:         if (coloring->htype[0] == 'w') {
2884:           dx = 1.0 + unorm;
2885:         } else {
2886:           dx  = xx[col];
2887:         }
2888:         if (dx == (PetscScalar)0.0) dx = 1.0;
2889: #if !defined(PETSC_USE_COMPLEX)
2890:         if (dx < umin && dx >= 0.0)      dx = umin;
2891:         else if (dx < 0.0 && dx > -umin) dx = -umin;
2892: #else
2893:         if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2894:         else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2895: #endif
2896:         dx            *= epsilon;
2897:         if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2898:         w3_array[col] += dx;
2899:       }
2900:       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2901:       VecRestoreArray(w3,&w3_array);

2903:       /*
2904:         Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2905:         w2 = F(x1 + dx) - F(x1)
2906:       */
2907:       PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);
2908:       (*f)(sctx,w3,w2,fctx);
2909:       PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);
2910:       VecAXPY(w2,-1.0,w1);
2911: 
2912:       /*
2913:         Loop over rows of vector, putting results into Jacobian matrix
2914:       */
2915:       VecGetArray(w2,&y);
2916:       for (l=0; l<coloring->nrows[k]; l++) {
2917:         row    = bs*coloring->rows[k][l];             /* local row index */
2918:         col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2919:         for (j=0; j<bs; j++) {
2920:             y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2921:           srows[j]  = row + start + j;
2922:         }
2923:         MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);
2924:       }
2925:       VecRestoreArray(w2,&y);
2926:     }
2927:   } /* endof for each color */
2928:   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2929:   VecRestoreArray(coloring->vscale,&vscale_array);
2930:   VecRestoreArray(x1_tmp,&xx);
2931:   PetscFree(srows);
2932: 
2933:   coloring->currentcolor = -1;
2934:   MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
2935:   MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
2936:   PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);
2937:   return(0);
2938: }

2940: /* -------------------------------------------------------------------*/
2941: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2942:        MatGetRow_SeqBAIJ,
2943:        MatRestoreRow_SeqBAIJ,
2944:        MatMult_SeqBAIJ_N,
2945: /* 4*/ MatMultAdd_SeqBAIJ_N,
2946:        MatMultTranspose_SeqBAIJ,
2947:        MatMultTransposeAdd_SeqBAIJ,
2948:        0,
2949:        0,
2950:        0,
2951: /*10*/ 0,
2952:        MatLUFactor_SeqBAIJ,
2953:        0,
2954:        0,
2955:        MatTranspose_SeqBAIJ,
2956: /*15*/ MatGetInfo_SeqBAIJ,
2957:        MatEqual_SeqBAIJ,
2958:        MatGetDiagonal_SeqBAIJ,
2959:        MatDiagonalScale_SeqBAIJ,
2960:        MatNorm_SeqBAIJ,
2961: /*20*/ 0,
2962:        MatAssemblyEnd_SeqBAIJ,
2963:        MatSetOption_SeqBAIJ,
2964:        MatZeroEntries_SeqBAIJ,
2965: /*24*/ MatZeroRows_SeqBAIJ,
2966:        0,
2967:        0,
2968:        0,
2969:        0,
2970: /*29*/ MatSetUpPreallocation_SeqBAIJ,
2971:        0,
2972:        0,
2973:        MatGetArray_SeqBAIJ,
2974:        MatRestoreArray_SeqBAIJ,
2975: /*34*/ MatDuplicate_SeqBAIJ,
2976:        0,
2977:        0,
2978:        MatILUFactor_SeqBAIJ,
2979:        0,
2980: /*39*/ MatAXPY_SeqBAIJ,
2981:        MatGetSubMatrices_SeqBAIJ,
2982:        MatIncreaseOverlap_SeqBAIJ,
2983:        MatGetValues_SeqBAIJ,
2984:        MatCopy_SeqBAIJ,
2985: /*44*/ 0,
2986:        MatScale_SeqBAIJ,
2987:        0,
2988:        0,
2989:        MatZeroRowsColumns_SeqBAIJ,
2990: /*49*/ MatSetBlockSize_SeqBAIJ,
2991:        MatGetRowIJ_SeqBAIJ,
2992:        MatRestoreRowIJ_SeqBAIJ,
2993:        MatGetColumnIJ_SeqBAIJ,
2994:        MatRestoreColumnIJ_SeqBAIJ,
2995: /*54*/ MatFDColoringCreate_SeqAIJ,
2996:        0,
2997:        0,
2998:        0,
2999:        MatSetValuesBlocked_SeqBAIJ,
3000: /*59*/ MatGetSubMatrix_SeqBAIJ,
3001:        MatDestroy_SeqBAIJ,
3002:        MatView_SeqBAIJ,
3003:        0,
3004:        0,
3005: /*64*/ 0,
3006:        0,
3007:        0,
3008:        0,
3009:        0,
3010: /*69*/ MatGetRowMaxAbs_SeqBAIJ,
3011:        0,
3012:        MatConvert_Basic,
3013:        0,
3014:        0,
3015: /*74*/ 0,
3016:        MatFDColoringApply_BAIJ,
3017:        0,
3018:        0,
3019:        0,
3020: /*79*/ 0,
3021:        0,
3022:        0,
3023:        0,
3024:        MatLoad_SeqBAIJ,
3025: /*84*/ 0,
3026:        0,
3027:        0,
3028:        0,
3029:        0,
3030: /*89*/ 0,
3031:        0,
3032:        0,
3033:        0,
3034:        0,
3035: /*94*/ 0,
3036:        0,
3037:        0,
3038:        0,
3039:        0,
3040: /*99*/0,
3041:        0,
3042:        0,
3043:        0,
3044:        0,
3045: /*104*/0,
3046:        MatRealPart_SeqBAIJ,
3047:        MatImaginaryPart_SeqBAIJ,
3048:        0,
3049:        0,
3050: /*109*/0,
3051:        0,
3052:        0,
3053:        0,
3054:        MatMissingDiagonal_SeqBAIJ,
3055: /*114*/0,
3056:        0,
3057:        0,
3058:        0,
3059:        0,
3060: /*119*/0,
3061:        0,
3062:        MatMultHermitianTranspose_SeqBAIJ,
3063:        MatMultHermitianTransposeAdd_SeqBAIJ,
3064:        0,
3065: /*124*/0,
3066:        0,
3067:        MatInvertBlockDiagonal_SeqBAIJ
3068: };

3073: PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
3074: {
3075:   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3076:   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;

3080:   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");

3082:   /* allocate space for values if not already there */
3083:   if (!aij->saved_values) {
3084:     PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);
3085:     PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));
3086:   }

3088:   /* copy values over */
3089:   PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
3090:   return(0);
3091: }

3097: PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
3098: {
3099:   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3101:   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;

3104:   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3105:   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");

3107:   /* copy values over */
3108:   PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
3109:   return(0);
3110: }


3121: PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
3122: {
3123:   Mat_SeqBAIJ    *b;
3125:   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
3126:   PetscBool      flg,skipallocation = PETSC_FALSE;


3130:   if (nz == MAT_SKIP_ALLOCATION) {
3131:     skipallocation = PETSC_TRUE;
3132:     nz             = 0;
3133:   }

3135:   if (bs < 0) {
3136:     PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");
3137:       PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);
3138:     PetscOptionsEnd();
3139:     bs   = PetscAbs(bs);
3140:   }
3141:   if (nnz && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
3142:   bs   = newbs;

3144:   PetscLayoutSetBlockSize(B->rmap,bs);
3145:   PetscLayoutSetBlockSize(B->cmap,bs);
3146:   PetscLayoutSetUp(B->rmap);
3147:   PetscLayoutSetUp(B->cmap);

3149:   B->preallocated = PETSC_TRUE;

3151:   mbs  = B->rmap->n/bs;
3152:   nbs  = B->cmap->n/bs;
3153:   bs2  = bs*bs;

3155:   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);

3157:   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3158:   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
3159:   if (nnz) {
3160:     for (i=0; i<mbs; i++) {
3161:       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
3162:       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
3163:     }
3164:   }

3166:   b       = (Mat_SeqBAIJ*)B->data;
3167:   PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");
3168:     PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);
3169:   PetscOptionsEnd();

3171:   if (!flg) {
3172:     switch (bs) {
3173:     case 1:
3174:       B->ops->mult            = MatMult_SeqBAIJ_1;
3175:       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
3176:       B->ops->sor             = MatSOR_SeqBAIJ_1;
3177:       break;
3178:     case 2:
3179:       B->ops->mult            = MatMult_SeqBAIJ_2;
3180:       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
3181:       B->ops->sor             = MatSOR_SeqBAIJ_2;
3182:       break;
3183:     case 3:
3184:       B->ops->mult            = MatMult_SeqBAIJ_3;
3185:       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
3186:       B->ops->sor             = MatSOR_SeqBAIJ_3;
3187:       break;
3188:     case 4:
3189:       B->ops->mult            = MatMult_SeqBAIJ_4;
3190:       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
3191:       B->ops->sor             = MatSOR_SeqBAIJ_4;
3192:       break;
3193:     case 5:
3194:       B->ops->mult            = MatMult_SeqBAIJ_5;
3195:       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
3196:       B->ops->sor             = MatSOR_SeqBAIJ_5;
3197:       break;
3198:     case 6:
3199:       B->ops->mult            = MatMult_SeqBAIJ_6;
3200:       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
3201:       B->ops->sor             = MatSOR_SeqBAIJ_6;
3202:       break;
3203:     case 7:
3204:       B->ops->mult            = MatMult_SeqBAIJ_7;
3205:       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
3206:       B->ops->sor             = MatSOR_SeqBAIJ_7;
3207:       break;
3208:     case 15:
3209:       B->ops->mult            = MatMult_SeqBAIJ_15_ver1;
3210:       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3211:       B->ops->sor             = MatSOR_SeqBAIJ_N;
3212:       break;
3213:     default:
3214:       B->ops->mult            = MatMult_SeqBAIJ_N;
3215:       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3216:       B->ops->sor             = MatSOR_SeqBAIJ_N;
3217:       break;
3218:     }
3219:   }
3220:   B->rmap->bs  = bs;
3221:   b->mbs       = mbs;
3222:   b->nbs       = nbs;
3223:   if (!skipallocation) {
3224:     if (!b->imax) {
3225:       PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);
3226:       PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
3227:       b->free_imax_ilen = PETSC_TRUE;
3228:     }
3229:     /* b->ilen will count nonzeros in each block row so far. */
3230:     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
3231:     if (!nnz) {
3232:       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3233:       else if (nz < 0) nz = 1;
3234:       for (i=0; i<mbs; i++) b->imax[i] = nz;
3235:       nz = nz*mbs;
3236:     } else {
3237:       nz = 0;
3238:       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3239:     }

3241:     /* allocate the matrix space */
3242:     MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);
3243:     PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);
3244:     PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));
3245:     PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
3246:     PetscMemzero(b->j,nz*sizeof(PetscInt));
3247:     b->singlemalloc = PETSC_TRUE;
3248:     b->i[0] = 0;
3249:     for (i=1; i<mbs+1; i++) {
3250:       b->i[i] = b->i[i-1] + b->imax[i-1];
3251:     }
3252:     b->free_a     = PETSC_TRUE;
3253:     b->free_ij    = PETSC_TRUE;
3254:   } else {
3255:     b->free_a     = PETSC_FALSE;
3256:     b->free_ij    = PETSC_FALSE;
3257:   }

3259:   B->rmap->bs          = bs;
3260:   b->bs2              = bs2;
3261:   b->mbs              = mbs;
3262:   b->nz               = 0;
3263:   b->maxnz            = nz;
3264:   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3265:   return(0);
3266: }

3272: PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3273: {
3274:   PetscInt       i,m,nz,nz_max=0,*nnz;
3275:   PetscScalar    *values=0;

3279:   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3280:   PetscLayoutSetBlockSize(B->rmap,bs);
3281:   PetscLayoutSetBlockSize(B->cmap,bs);
3282:   PetscLayoutSetUp(B->rmap);
3283:   PetscLayoutSetUp(B->cmap);
3284:   m = B->rmap->n/bs;

3286:   if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3287:   PetscMalloc((m+1) * sizeof(PetscInt), &nnz);
3288:   for(i=0; i<m; i++) {
3289:     nz = ii[i+1]- ii[i];
3290:     if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3291:     nz_max = PetscMax(nz_max, nz);
3292:     nnz[i] = nz;
3293:   }
3294:   MatSeqBAIJSetPreallocation(B,bs,0,nnz);
3295:   PetscFree(nnz);

3297:   values = (PetscScalar*)V;
3298:   if (!values) {
3299:     PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);
3300:     PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));
3301:   }
3302:   for (i=0; i<m; i++) {
3303:     PetscInt          ncols  = ii[i+1] - ii[i];
3304:     const PetscInt    *icols = jj + ii[i];
3305:     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3306:     MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);
3307:   }
3308:   if (!V) { PetscFree(values); }
3309:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3310:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

3312:   return(0);
3313: }


3320: #if defined(PETSC_HAVE_MUMPS)
3322: #endif

3326: /*MC
3327:    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 
3328:    block sparse compressed row format.

3330:    Options Database Keys:
3331: . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()

3333:   Level: beginner

3335: .seealso: MatCreateSeqBAIJ()
3336: M*/


3345: PetscErrorCode  MatCreate_SeqBAIJ(Mat B)
3346: {
3348:   PetscMPIInt    size;
3349:   Mat_SeqBAIJ    *b;

3352:   MPI_Comm_size(((PetscObject)B)->comm,&size);
3353:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");

3355:   PetscNewLog(B,Mat_SeqBAIJ,&b);
3356:   B->data = (void*)b;
3357:   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
3358:   b->row                   = 0;
3359:   b->col                   = 0;
3360:   b->icol                  = 0;
3361:   b->reallocs              = 0;
3362:   b->saved_values          = 0;

3364:   b->roworiented           = PETSC_TRUE;
3365:   b->nonew                 = 0;
3366:   b->diag                  = 0;
3367:   b->solve_work            = 0;
3368:   b->mult_work             = 0;
3369:   B->spptr                 = 0;
3370:   B->info.nz_unneeded      = (PetscReal)b->maxnz*b->bs2;
3371:   b->keepnonzeropattern    = PETSC_FALSE;
3372:   b->xtoy                  = 0;
3373:   b->XtoY                  = 0;
3374:   B->same_nonzero          = PETSC_FALSE;

3376:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3377:                                      "MatGetFactorAvailable_seqbaij_petsc",
3378:                                      MatGetFactorAvailable_seqbaij_petsc);
3379:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3380:                                      "MatGetFactor_seqbaij_petsc",
3381:                                      MatGetFactor_seqbaij_petsc);
3382:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_bstrm_C",
3383:                                      "MatGetFactor_seqbaij_bstrm",
3384:                                      MatGetFactor_seqbaij_bstrm);
3385: #if defined(PETSC_HAVE_MUMPS)
3386:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);
3387: #endif
3388:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatInvertBlockDiagonal_C",
3389:                                      "MatInvertBlockDiagonal_SeqBAIJ",
3390:                                       MatInvertBlockDiagonal_SeqBAIJ);
3391:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3392:                                      "MatStoreValues_SeqBAIJ",
3393:                                       MatStoreValues_SeqBAIJ);
3394:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3395:                                      "MatRetrieveValues_SeqBAIJ",
3396:                                       MatRetrieveValues_SeqBAIJ);
3397:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3398:                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3399:                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);
3400:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3401:                                      "MatConvert_SeqBAIJ_SeqAIJ",
3402:                                       MatConvert_SeqBAIJ_SeqAIJ);
3403:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3404:                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3405:                                       MatConvert_SeqBAIJ_SeqSBAIJ);
3406:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3407:                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3408:                                       MatSeqBAIJSetPreallocation_SeqBAIJ);
3409:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3410:                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3411:                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);
3412:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",
3413:                                      "MatConvert_SeqBAIJ_SeqBSTRM",
3414:                                       MatConvert_SeqBAIJ_SeqBSTRM);
3415:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3416:                                      "MatIsTranspose_SeqBAIJ",
3417:                                       MatIsTranspose_SeqBAIJ);
3418:   PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);
3419:   return(0);
3420: }

3425: PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool  mallocmatspace)
3426: {
3427:   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3429:   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;

3432:   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");

3434:   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3435:     c->imax = a->imax;
3436:     c->ilen = a->ilen;
3437:     c->free_imax_ilen = PETSC_FALSE;
3438:   } else {
3439:     PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);
3440:     PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));
3441:     for (i=0; i<mbs; i++) {
3442:       c->imax[i] = a->imax[i];
3443:       c->ilen[i] = a->ilen[i];
3444:     }
3445:     c->free_imax_ilen = PETSC_TRUE;
3446:   }

3448:   /* allocate the matrix space */
3449:   if (mallocmatspace){
3450:     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3451:       PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);
3452:       PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));
3453:       c->singlemalloc = PETSC_FALSE;
3454:       c->free_ij      = PETSC_FALSE;
3455:       c->i            = a->i;
3456:       c->j            = a->j;
3457:       c->parent       = A;
3458:       PetscObjectReference((PetscObject)A);
3459:       MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3460:       MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3461:     } else {
3462:       PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);
3463:       PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));
3464:       c->singlemalloc = PETSC_TRUE;
3465:       c->free_ij      = PETSC_TRUE;
3466:       PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));
3467:       if (mbs > 0) {
3468:         PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));
3469:         if (cpvalues == MAT_COPY_VALUES) {
3470:           PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
3471:         } else {
3472:           PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
3473:         }
3474:       }
3475:     }
3476:   }

3478:   c->roworiented = a->roworiented;
3479:   c->nonew       = a->nonew;
3480:   PetscLayoutReference(A->rmap,&C->rmap);
3481:   PetscLayoutReference(A->cmap,&C->cmap);
3482:   c->bs2         = a->bs2;
3483:   c->mbs         = a->mbs;
3484:   c->nbs         = a->nbs;

3486:   if (a->diag) {
3487:     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3488:       c->diag      = a->diag;
3489:       c->free_diag = PETSC_FALSE;
3490:     } else {
3491:       PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);
3492:       PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));
3493:       for (i=0; i<mbs; i++) {
3494:         c->diag[i] = a->diag[i];
3495:       }
3496:       c->free_diag = PETSC_TRUE;
3497:     }
3498:   } else c->diag        = 0;
3499:   c->nz                 = a->nz;
3500:   c->maxnz              = a->nz; /* Since we allocate exactly the right amount */
3501:   c->solve_work         = 0;
3502:   c->mult_work          = 0;
3503:   c->free_a             = PETSC_TRUE;
3504:   c->free_ij            = PETSC_TRUE;
3505:   C->preallocated       = PETSC_TRUE;
3506:   C->assembled          = PETSC_TRUE;

3508:   c->compressedrow.use     = a->compressedrow.use;
3509:   c->compressedrow.nrows   = a->compressedrow.nrows;
3510:   c->compressedrow.check   = a->compressedrow.check;
3511:   if (a->compressedrow.use){
3512:     i = a->compressedrow.nrows;
3513:     PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);
3514:     PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));
3515:     PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));
3516:     PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));
3517:   } else {
3518:     c->compressedrow.use    = PETSC_FALSE;
3519:     c->compressedrow.i      = PETSC_NULL;
3520:     c->compressedrow.rindex = PETSC_NULL;
3521:   }
3522:   C->same_nonzero = A->same_nonzero;
3523:   PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);
3524:   PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
3525:   return(0);
3526: }

3530: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3531: {

3535:   MatCreate(((PetscObject)A)->comm,B);
3536:   MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);
3537:   MatSetType(*B,MATSEQBAIJ);
3538:   MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3539:   return(0);
3540: }

3544: PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3545: {
3546:   Mat_SeqBAIJ    *a;
3548:   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3549:   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3550:   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3551:   PetscInt       *masked,nmask,tmp,bs2,ishift;
3552:   PetscMPIInt    size;
3553:   int            fd;
3554:   PetscScalar    *aa;
3555:   MPI_Comm       comm = ((PetscObject)viewer)->comm;

3558:   PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");
3559:   PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);
3560:   PetscOptionsEnd();
3561:   bs2  = bs*bs;

3563:   MPI_Comm_size(comm,&size);
3564:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3565:   PetscViewerBinaryGetDescriptor(viewer,&fd);
3566:   PetscBinaryRead(fd,header,4,PETSC_INT);
3567:   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3568:   M = header[1]; N = header[2]; nz = header[3];

3570:   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3571:   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");

3573:   /* 
3574:      This code adds extra rows to make sure the number of rows is 
3575:     divisible by the blocksize
3576:   */
3577:   mbs        = M/bs;
3578:   extra_rows = bs - M + bs*(mbs);
3579:   if (extra_rows == bs) extra_rows = 0;
3580:   else                  mbs++;
3581:   if (extra_rows) {
3582:     PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");
3583:   }

3585:   /* Set global sizes if not already set */
3586:   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3587:     MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3588:   } else { /* Check if the matrix global sizes are correct */
3589:     MatGetSize(newmat,&rows,&cols);
3590:     if (rows < 0 && cols < 0){ /* user might provide local size instead of global size */
3591:       MatGetLocalSize(newmat,&rows,&cols);
3592:     }
3593:     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3594:   }
3595: 
3596:   /* read in row lengths */
3597:   PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);
3598:   PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
3599:   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;

3601:   /* read in column indices */
3602:   PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);
3603:   PetscBinaryRead(fd,jj,nz,PETSC_INT);
3604:   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;

3606:   /* loop over row lengths determining block row lengths */
3607:   PetscMalloc(mbs*sizeof(PetscInt),&browlengths);
3608:   PetscMemzero(browlengths,mbs*sizeof(PetscInt));
3609:   PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);
3610:   PetscMemzero(mask,mbs*sizeof(PetscInt));
3611:   rowcount = 0;
3612:   nzcount = 0;
3613:   for (i=0; i<mbs; i++) {
3614:     nmask = 0;
3615:     for (j=0; j<bs; j++) {
3616:       kmax = rowlengths[rowcount];
3617:       for (k=0; k<kmax; k++) {
3618:         tmp = jj[nzcount++]/bs;
3619:         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3620:       }
3621:       rowcount++;
3622:     }
3623:     browlengths[i] += nmask;
3624:     /* zero out the mask elements we set */
3625:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3626:   }

3628:   /* Do preallocation  */
3629:   MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);
3630:   a = (Mat_SeqBAIJ*)newmat->data;

3632:   /* set matrix "i" values */
3633:   a->i[0] = 0;
3634:   for (i=1; i<= mbs; i++) {
3635:     a->i[i]      = a->i[i-1] + browlengths[i-1];
3636:     a->ilen[i-1] = browlengths[i-1];
3637:   }
3638:   a->nz         = 0;
3639:   for (i=0; i<mbs; i++) a->nz += browlengths[i];

3641:   /* read in nonzero values */
3642:   PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);
3643:   PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
3644:   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;

3646:   /* set "a" and "j" values into matrix */
3647:   nzcount = 0; jcount = 0;
3648:   for (i=0; i<mbs; i++) {
3649:     nzcountb = nzcount;
3650:     nmask    = 0;
3651:     for (j=0; j<bs; j++) {
3652:       kmax = rowlengths[i*bs+j];
3653:       for (k=0; k<kmax; k++) {
3654:         tmp = jj[nzcount++]/bs;
3655:         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3656:       }
3657:     }
3658:     /* sort the masked values */
3659:     PetscSortInt(nmask,masked);

3661:     /* set "j" values into matrix */
3662:     maskcount = 1;
3663:     for (j=0; j<nmask; j++) {
3664:       a->j[jcount++]  = masked[j];
3665:       mask[masked[j]] = maskcount++;
3666:     }
3667:     /* set "a" values into matrix */
3668:     ishift = bs2*a->i[i];
3669:     for (j=0; j<bs; j++) {
3670:       kmax = rowlengths[i*bs+j];
3671:       for (k=0; k<kmax; k++) {
3672:         tmp       = jj[nzcountb]/bs ;
3673:         block     = mask[tmp] - 1;
3674:         point     = jj[nzcountb] - bs*tmp;
3675:         idx       = ishift + bs2*block + j + bs*point;
3676:         a->a[idx] = (MatScalar)aa[nzcountb++];
3677:       }
3678:     }
3679:     /* zero out the mask elements we set */
3680:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3681:   }
3682:   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");

3684:   PetscFree(rowlengths);
3685:   PetscFree(browlengths);
3686:   PetscFree(aa);
3687:   PetscFree(jj);
3688:   PetscFree2(mask,masked);

3690:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
3691:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
3692:   MatView_Private(newmat);
3693:   return(0);
3694: }

3698: /*@C
3699:    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3700:    compressed row) format.  For good matrix assembly performance the
3701:    user should preallocate the matrix storage by setting the parameter nz
3702:    (or the array nnz).  By setting these parameters accurately, performance
3703:    during matrix assembly can be increased by more than a factor of 50.

3705:    Collective on MPI_Comm

3707:    Input Parameters:
3708: +  comm - MPI communicator, set to PETSC_COMM_SELF
3709: .  bs - size of block
3710: .  m - number of rows
3711: .  n - number of columns
3712: .  nz - number of nonzero blocks  per block row (same for all rows)
3713: -  nnz - array containing the number of nonzero blocks in the various block rows 
3714:          (possibly different for each block row) or PETSC_NULL

3716:    Output Parameter:
3717: .  A - the matrix 

3719:    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3720:    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3721:    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]

3723:    Options Database Keys:
3724: .   -mat_no_unroll - uses code that does not unroll the loops in the 
3725:                      block calculations (much slower)
3726: .    -mat_block_size - size of the blocks to use

3728:    Level: intermediate

3730:    Notes:
3731:    The number of rows and columns must be divisible by blocksize.

3733:    If the nnz parameter is given then the nz parameter is ignored

3735:    A nonzero block is any block that as 1 or more nonzeros in it

3737:    The block AIJ format is fully compatible with standard Fortran 77
3738:    storage.  That is, the stored row and column indices can begin at
3739:    either one (as in Fortran) or zero.  See the users' manual for details.

3741:    Specify the preallocated storage with either nz or nnz (not both).
3742:    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
3743:    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3744:    matrices.

3746: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3747: @*/
3748: PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3749: {
3751: 
3753:   MatCreate(comm,A);
3754:   MatSetSizes(*A,m,n,m,n);
3755:   MatSetType(*A,MATSEQBAIJ);
3756:   MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);
3757:   return(0);
3758: }

3762: /*@C
3763:    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3764:    per row in the matrix. For good matrix assembly performance the
3765:    user should preallocate the matrix storage by setting the parameter nz
3766:    (or the array nnz).  By setting these parameters accurately, performance
3767:    during matrix assembly can be increased by more than a factor of 50.

3769:    Collective on MPI_Comm

3771:    Input Parameters:
3772: +  A - the matrix
3773: .  bs - size of block
3774: .  nz - number of block nonzeros per block row (same for all rows)
3775: -  nnz - array containing the number of block nonzeros in the various block rows 
3776:          (possibly different for each block row) or PETSC_NULL

3778:    Options Database Keys:
3779: .   -mat_no_unroll - uses code that does not unroll the loops in the 
3780:                      block calculations (much slower)
3781: .    -mat_block_size - size of the blocks to use

3783:    Level: intermediate

3785:    Notes:
3786:    If the nnz parameter is given then the nz parameter is ignored

3788:    You can call MatGetInfo() to get information on how effective the preallocation was;
3789:    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3790:    You can also run with the option -info and look for messages with the string 
3791:    malloc in them to see if additional memory allocation was needed.

3793:    The block AIJ format is fully compatible with standard Fortran 77
3794:    storage.  That is, the stored row and column indices can begin at
3795:    either one (as in Fortran) or zero.  See the users' manual for details.

3797:    Specify the preallocated storage with either nz or nnz (not both).
3798:    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
3799:    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.

3801: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3802: @*/
3803: PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3804: {

3808:   PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));
3809:   return(0);
3810: }

3814: /*@C
3815:    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3816:    (the default sequential PETSc format).  

3818:    Collective on MPI_Comm

3820:    Input Parameters:
3821: +  A - the matrix 
3822: .  i - the indices into j for the start of each local row (starts with zero)
3823: .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3824: -  v - optional values in the matrix

3826:    Level: developer

3828: .keywords: matrix, aij, compressed row, sparse

3830: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3831: @*/
3832: PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3833: {

3837:   PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
3838:   return(0);
3839: }


3844: /*@
3845:      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.

3847:      Collective on MPI_Comm

3849:    Input Parameters:
3850: +  comm - must be an MPI communicator of size 1
3851: .  bs - size of block
3852: .  m - number of rows
3853: .  n - number of columns
3854: .  i - row indices
3855: .  j - column indices
3856: -  a - matrix values

3858:    Output Parameter:
3859: .  mat - the matrix

3861:    Level: advanced

3863:    Notes:
3864:        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3865:     once the matrix is destroyed

3867:        You cannot set new nonzero locations into this matrix, that will generate an error.

3869:        The i and j indices are 0 based

3871:        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).


3874: .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()

3876: @*/
3877: PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3878: {
3880:   PetscInt       ii;
3881:   Mat_SeqBAIJ    *baij;

3884:   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3885:   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3886: 
3887:   MatCreate(comm,mat);
3888:   MatSetSizes(*mat,m,n,m,n);
3889:   MatSetType(*mat,MATSEQBAIJ);
3890:   MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);
3891:   baij = (Mat_SeqBAIJ*)(*mat)->data;
3892:   PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);
3893:   PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));

3895:   baij->i = i;
3896:   baij->j = j;
3897:   baij->a = a;
3898:   baij->singlemalloc = PETSC_FALSE;
3899:   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3900:   baij->free_a       = PETSC_FALSE;
3901:   baij->free_ij      = PETSC_FALSE;

3903:   for (ii=0; ii<m; ii++) {
3904:     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3905: #if defined(PETSC_USE_DEBUG)
3906:     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3907: #endif    
3908:   }
3909: #if defined(PETSC_USE_DEBUG)
3910:   for (ii=0; ii<baij->i[m]; ii++) {
3911:     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3912:     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3913:   }
3914: #endif    

3916:   MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
3917:   MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
3918:   return(0);
3919: }