Actual source code: maij.c

  2: /*
  3:     Defines the basic matrix operations for the MAIJ  matrix storage format.
  4:   This format is used for restriction and interpolation operations for 
  5:   multicomponent problems. It interpolates each component the same way
  6:   independently.

  8:      We provide:
  9:          MatMult()
 10:          MatMultTranspose()
 11:          MatMultTransposeAdd()
 12:          MatMultAdd()
 13:           and
 14:          MatCreateMAIJ(Mat,dof,Mat*)

 16:      This single directory handles both the sequential and parallel codes
 17: */

 19: #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
 20: #include <../src/mat/utils/freespace.h>
 21: #include <private/vecimpl.h>

 25: /*@C
 26:    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix

 28:    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel

 30:    Input Parameter:
 31: .  A - the MAIJ matrix

 33:    Output Parameter:
 34: .  B - the AIJ matrix

 36:    Level: advanced

 38:    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.

 40: .seealso: MatCreateMAIJ()
 41: @*/
 42: PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
 43: {
 45:   PetscBool      ismpimaij,isseqmaij;

 48:   PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
 49:   PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
 50:   if (ismpimaij) {
 51:     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;

 53:     *B = b->A;
 54:   } else if (isseqmaij) {
 55:     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;

 57:     *B = b->AIJ;
 58:   } else {
 59:     *B = A;
 60:   }
 61:   return(0);
 62: }

 66: /*@C
 67:    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size

 69:    Logically Collective

 71:    Input Parameter:
 72: +  A - the MAIJ matrix
 73: -  dof - the block size for the new matrix

 75:    Output Parameter:
 76: .  B - the new MAIJ matrix

 78:    Level: advanced

 80: .seealso: MatCreateMAIJ()
 81: @*/
 82: PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
 83: {
 85:   Mat            Aij = PETSC_NULL;

 89:   MatMAIJGetAIJ(A,&Aij);
 90:   MatCreateMAIJ(Aij,dof,B);
 91:   return(0);
 92: }

 96: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
 97: {
 99:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;

102:   MatDestroy(&b->AIJ);
103:   PetscFree(A->data);
104:   return(0);
105: }

109: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
110: {
112:   Mat            B;

115:   MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
116:   MatView(B,viewer);
117:   MatDestroy(&B);
118:   return(0);
119: }

123: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
124: {
126:   Mat            B;

129:   MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
130:   MatView(B,viewer);
131:   MatDestroy(&B);
132:   return(0);
133: }

137: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
138: {
140:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

143:   MatDestroy(&b->AIJ);
144:   MatDestroy(&b->OAIJ);
145:   MatDestroy(&b->A);
146:   VecScatterDestroy(&b->ctx);
147:   VecDestroy(&b->w);
148:   PetscFree(A->data);
149:   PetscObjectChangeTypeName((PetscObject)A,0);
150:   return(0);
151: }

153: /*MC
154:   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 
155:   multicomponent problems, interpolating or restricting each component the same way independently.
156:   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.

158:   Operations provided:
159: . MatMult
160: . MatMultTranspose
161: . MatMultAdd
162: . MatMultTransposeAdd

164:   Level: advanced

166: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
167: M*/

172: PetscErrorCode  MatCreate_MAIJ(Mat A)
173: {
175:   Mat_MPIMAIJ    *b;
176:   PetscMPIInt    size;

179:   PetscNewLog(A,Mat_MPIMAIJ,&b);
180:   A->data  = (void*)b;
181:   PetscMemzero(A->ops,sizeof(struct _MatOps));

183:   b->AIJ  = 0;
184:   b->dof  = 0;
185:   b->OAIJ = 0;
186:   b->ctx  = 0;
187:   b->w    = 0;
188:   MPI_Comm_size(((PetscObject)A)->comm,&size);
189:   if (size == 1){
190:     PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);
191:   } else {
192:     PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);
193:   }
194:   return(0);
195: }

198: /* --------------------------------------------------------------------------------------*/
201: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
202: {
203:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
204:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
205:   const PetscScalar *x,*v;
206:   PetscScalar       *y, sum1, sum2;
207:   PetscErrorCode    ierr;
208:   PetscInt          nonzerorow=0,n,i,jrow,j;
209:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

212:   VecGetArrayRead(xx,&x);
213:   VecGetArray(yy,&y);
214:   idx  = a->j;
215:   v    = a->a;
216:   ii   = a->i;

218:   for (i=0; i<m; i++) {
219:     jrow = ii[i];
220:     n    = ii[i+1] - jrow;
221:     sum1  = 0.0;
222:     sum2  = 0.0;
223:     nonzerorow += (n>0);
224:     for (j=0; j<n; j++) {
225:       sum1 += v[jrow]*x[2*idx[jrow]];
226:       sum2 += v[jrow]*x[2*idx[jrow]+1];
227:       jrow++;
228:      }
229:     y[2*i]   = sum1;
230:     y[2*i+1] = sum2;
231:   }

233:   PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);
234:   VecRestoreArrayRead(xx,&x);
235:   VecRestoreArray(yy,&y);
236:   return(0);
237: }

241: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
242: {
243:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
244:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
245:   const PetscScalar *x,*v;
246:   PetscScalar       *y,alpha1,alpha2;
247:   PetscErrorCode    ierr;
248:   PetscInt          n,i;
249:   const PetscInt    m = b->AIJ->rmap->n,*idx;

252:   VecSet(yy,0.0);
253:   VecGetArrayRead(xx,&x);
254:   VecGetArray(yy,&y);
255: 
256:   for (i=0; i<m; i++) {
257:     idx    = a->j + a->i[i] ;
258:     v      = a->a + a->i[i] ;
259:     n      = a->i[i+1] - a->i[i];
260:     alpha1 = x[2*i];
261:     alpha2 = x[2*i+1];
262:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
263:   }
264:   PetscLogFlops(4.0*a->nz);
265:   VecRestoreArrayRead(xx,&x);
266:   VecRestoreArray(yy,&y);
267:   return(0);
268: }

272: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
273: {
274:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
275:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
276:   const PetscScalar *x,*v;
277:   PetscScalar       *y,sum1, sum2;
278:   PetscErrorCode    ierr;
279:   PetscInt          n,i,jrow,j;
280:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

283:   if (yy != zz) {VecCopy(yy,zz);}
284:   VecGetArrayRead(xx,&x);
285:   VecGetArray(zz,&y);
286:   idx  = a->j;
287:   v    = a->a;
288:   ii   = a->i;

290:   for (i=0; i<m; i++) {
291:     jrow = ii[i];
292:     n    = ii[i+1] - jrow;
293:     sum1  = 0.0;
294:     sum2  = 0.0;
295:     for (j=0; j<n; j++) {
296:       sum1 += v[jrow]*x[2*idx[jrow]];
297:       sum2 += v[jrow]*x[2*idx[jrow]+1];
298:       jrow++;
299:      }
300:     y[2*i]   += sum1;
301:     y[2*i+1] += sum2;
302:   }

304:   PetscLogFlops(4.0*a->nz);
305:   VecRestoreArrayRead(xx,&x);
306:   VecRestoreArray(zz,&y);
307:   return(0);
308: }
311: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
312: {
313:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
314:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
315:   const PetscScalar *x,*v;
316:   PetscScalar       *y,alpha1,alpha2;
317:   PetscErrorCode    ierr;
318:   PetscInt          n,i;
319:   const PetscInt    m = b->AIJ->rmap->n,*idx;

322:   if (yy != zz) {VecCopy(yy,zz);}
323:   VecGetArrayRead(xx,&x);
324:   VecGetArray(zz,&y);
325: 
326:   for (i=0; i<m; i++) {
327:     idx   = a->j + a->i[i] ;
328:     v     = a->a + a->i[i] ;
329:     n     = a->i[i+1] - a->i[i];
330:     alpha1 = x[2*i];
331:     alpha2 = x[2*i+1];
332:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
333:   }
334:   PetscLogFlops(4.0*a->nz);
335:   VecRestoreArrayRead(xx,&x);
336:   VecRestoreArray(zz,&y);
337:   return(0);
338: }
339: /* --------------------------------------------------------------------------------------*/
342: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
343: {
344:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
345:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
346:   const PetscScalar *x,*v;
347:   PetscScalar       *y,sum1, sum2, sum3;
348:   PetscErrorCode    ierr;
349:   PetscInt          nonzerorow=0,n,i,jrow,j;
350:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

353:   VecGetArrayRead(xx,&x);
354:   VecGetArray(yy,&y);
355:   idx  = a->j;
356:   v    = a->a;
357:   ii   = a->i;

359:   for (i=0; i<m; i++) {
360:     jrow = ii[i];
361:     n    = ii[i+1] - jrow;
362:     sum1  = 0.0;
363:     sum2  = 0.0;
364:     sum3  = 0.0;
365:     nonzerorow += (n>0);
366:     for (j=0; j<n; j++) {
367:       sum1 += v[jrow]*x[3*idx[jrow]];
368:       sum2 += v[jrow]*x[3*idx[jrow]+1];
369:       sum3 += v[jrow]*x[3*idx[jrow]+2];
370:       jrow++;
371:      }
372:     y[3*i]   = sum1;
373:     y[3*i+1] = sum2;
374:     y[3*i+2] = sum3;
375:   }

377:   PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);
378:   VecRestoreArrayRead(xx,&x);
379:   VecRestoreArray(yy,&y);
380:   return(0);
381: }

385: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
386: {
387:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
388:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
389:   const PetscScalar *x,*v;
390:   PetscScalar       *y,alpha1,alpha2,alpha3;
391:   PetscErrorCode    ierr;
392:   PetscInt          n,i;
393:   const PetscInt    m = b->AIJ->rmap->n,*idx;

396:   VecSet(yy,0.0);
397:   VecGetArrayRead(xx,&x);
398:   VecGetArray(yy,&y);
399: 
400:   for (i=0; i<m; i++) {
401:     idx    = a->j + a->i[i];
402:     v      = a->a + a->i[i];
403:     n      = a->i[i+1] - a->i[i];
404:     alpha1 = x[3*i];
405:     alpha2 = x[3*i+1];
406:     alpha3 = x[3*i+2];
407:     while (n-->0) {
408:       y[3*(*idx)]   += alpha1*(*v);
409:       y[3*(*idx)+1] += alpha2*(*v);
410:       y[3*(*idx)+2] += alpha3*(*v);
411:       idx++; v++;
412:     }
413:   }
414:   PetscLogFlops(6.0*a->nz);
415:   VecRestoreArrayRead(xx,&x);
416:   VecRestoreArray(yy,&y);
417:   return(0);
418: }

422: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
423: {
424:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
425:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
426:   const PetscScalar *x,*v;
427:   PetscScalar       *y,sum1, sum2, sum3;
428:   PetscErrorCode    ierr;
429:   PetscInt          n,i,jrow,j;
430:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

433:   if (yy != zz) {VecCopy(yy,zz);}
434:   VecGetArrayRead(xx,&x);
435:   VecGetArray(zz,&y);
436:   idx  = a->j;
437:   v    = a->a;
438:   ii   = a->i;

440:   for (i=0; i<m; i++) {
441:     jrow = ii[i];
442:     n    = ii[i+1] - jrow;
443:     sum1  = 0.0;
444:     sum2  = 0.0;
445:     sum3  = 0.0;
446:     for (j=0; j<n; j++) {
447:       sum1 += v[jrow]*x[3*idx[jrow]];
448:       sum2 += v[jrow]*x[3*idx[jrow]+1];
449:       sum3 += v[jrow]*x[3*idx[jrow]+2];
450:       jrow++;
451:      }
452:     y[3*i]   += sum1;
453:     y[3*i+1] += sum2;
454:     y[3*i+2] += sum3;
455:   }

457:   PetscLogFlops(6.0*a->nz);
458:   VecRestoreArrayRead(xx,&x);
459:   VecRestoreArray(zz,&y);
460:   return(0);
461: }
464: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
465: {
466:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
467:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
468:   const PetscScalar *x,*v;
469:   PetscScalar       *y,alpha1,alpha2,alpha3;
470:   PetscErrorCode    ierr;
471:   PetscInt          n,i;
472:   const PetscInt    m = b->AIJ->rmap->n,*idx;

475:   if (yy != zz) {VecCopy(yy,zz);}
476:   VecGetArrayRead(xx,&x);
477:   VecGetArray(zz,&y);
478:   for (i=0; i<m; i++) {
479:     idx    = a->j + a->i[i] ;
480:     v      = a->a + a->i[i] ;
481:     n      = a->i[i+1] - a->i[i];
482:     alpha1 = x[3*i];
483:     alpha2 = x[3*i+1];
484:     alpha3 = x[3*i+2];
485:     while (n-->0) {
486:       y[3*(*idx)]   += alpha1*(*v);
487:       y[3*(*idx)+1] += alpha2*(*v);
488:       y[3*(*idx)+2] += alpha3*(*v);
489:       idx++; v++;
490:     }
491:   }
492:   PetscLogFlops(6.0*a->nz);
493:   VecRestoreArrayRead(xx,&x);
494:   VecRestoreArray(zz,&y);
495:   return(0);
496: }

498: /* ------------------------------------------------------------------------------*/
501: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
502: {
503:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
504:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
505:   const PetscScalar *x,*v;
506:   PetscScalar       *y,sum1, sum2, sum3, sum4;
507:   PetscErrorCode    ierr;
508:   PetscInt          nonzerorow=0,n,i,jrow,j;
509:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

512:   VecGetArrayRead(xx,&x);
513:   VecGetArray(yy,&y);
514:   idx  = a->j;
515:   v    = a->a;
516:   ii   = a->i;

518:   for (i=0; i<m; i++) {
519:     jrow = ii[i];
520:     n    = ii[i+1] - jrow;
521:     sum1  = 0.0;
522:     sum2  = 0.0;
523:     sum3  = 0.0;
524:     sum4  = 0.0;
525:     nonzerorow += (n>0);
526:     for (j=0; j<n; j++) {
527:       sum1 += v[jrow]*x[4*idx[jrow]];
528:       sum2 += v[jrow]*x[4*idx[jrow]+1];
529:       sum3 += v[jrow]*x[4*idx[jrow]+2];
530:       sum4 += v[jrow]*x[4*idx[jrow]+3];
531:       jrow++;
532:      }
533:     y[4*i]   = sum1;
534:     y[4*i+1] = sum2;
535:     y[4*i+2] = sum3;
536:     y[4*i+3] = sum4;
537:   }

539:   PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);
540:   VecRestoreArrayRead(xx,&x);
541:   VecRestoreArray(yy,&y);
542:   return(0);
543: }

547: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
548: {
549:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
550:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
551:   const PetscScalar *x,*v;
552:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
553:   PetscErrorCode    ierr;
554:   PetscInt          n,i;
555:   const PetscInt    m = b->AIJ->rmap->n,*idx;

558:   VecSet(yy,0.0);
559:   VecGetArrayRead(xx,&x);
560:   VecGetArray(yy,&y);
561:   for (i=0; i<m; i++) {
562:     idx    = a->j + a->i[i] ;
563:     v      = a->a + a->i[i] ;
564:     n      = a->i[i+1] - a->i[i];
565:     alpha1 = x[4*i];
566:     alpha2 = x[4*i+1];
567:     alpha3 = x[4*i+2];
568:     alpha4 = x[4*i+3];
569:     while (n-->0) {
570:       y[4*(*idx)]   += alpha1*(*v);
571:       y[4*(*idx)+1] += alpha2*(*v);
572:       y[4*(*idx)+2] += alpha3*(*v);
573:       y[4*(*idx)+3] += alpha4*(*v);
574:       idx++; v++;
575:     }
576:   }
577:   PetscLogFlops(8.0*a->nz);
578:   VecRestoreArrayRead(xx,&x);
579:   VecRestoreArray(yy,&y);
580:   return(0);
581: }

585: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
586: {
587:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
588:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
589:   const PetscScalar *x,*v;
590:   PetscScalar       *y,sum1, sum2, sum3, sum4;
591:   PetscErrorCode    ierr;
592:   PetscInt          n,i,jrow,j;
593:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

596:   if (yy != zz) {VecCopy(yy,zz);}
597:   VecGetArrayRead(xx,&x);
598:   VecGetArray(zz,&y);
599:   idx  = a->j;
600:   v    = a->a;
601:   ii   = a->i;

603:   for (i=0; i<m; i++) {
604:     jrow = ii[i];
605:     n    = ii[i+1] - jrow;
606:     sum1  = 0.0;
607:     sum2  = 0.0;
608:     sum3  = 0.0;
609:     sum4  = 0.0;
610:     for (j=0; j<n; j++) {
611:       sum1 += v[jrow]*x[4*idx[jrow]];
612:       sum2 += v[jrow]*x[4*idx[jrow]+1];
613:       sum3 += v[jrow]*x[4*idx[jrow]+2];
614:       sum4 += v[jrow]*x[4*idx[jrow]+3];
615:       jrow++;
616:      }
617:     y[4*i]   += sum1;
618:     y[4*i+1] += sum2;
619:     y[4*i+2] += sum3;
620:     y[4*i+3] += sum4;
621:   }

623:   PetscLogFlops(8.0*a->nz);
624:   VecRestoreArrayRead(xx,&x);
625:   VecRestoreArray(zz,&y);
626:   return(0);
627: }
630: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
631: {
632:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
633:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
634:   const PetscScalar *x,*v;
635:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
636:   PetscErrorCode    ierr;
637:   PetscInt          n,i;
638:   const PetscInt    m = b->AIJ->rmap->n,*idx;

641:   if (yy != zz) {VecCopy(yy,zz);}
642:   VecGetArrayRead(xx,&x);
643:   VecGetArray(zz,&y);
644: 
645:   for (i=0; i<m; i++) {
646:     idx    = a->j + a->i[i] ;
647:     v      = a->a + a->i[i] ;
648:     n      = a->i[i+1] - a->i[i];
649:     alpha1 = x[4*i];
650:     alpha2 = x[4*i+1];
651:     alpha3 = x[4*i+2];
652:     alpha4 = x[4*i+3];
653:     while (n-->0) {
654:       y[4*(*idx)]   += alpha1*(*v);
655:       y[4*(*idx)+1] += alpha2*(*v);
656:       y[4*(*idx)+2] += alpha3*(*v);
657:       y[4*(*idx)+3] += alpha4*(*v);
658:       idx++; v++;
659:     }
660:   }
661:   PetscLogFlops(8.0*a->nz);
662:   VecRestoreArrayRead(xx,&x);
663:   VecRestoreArray(zz,&y);
664:   return(0);
665: }
666: /* ------------------------------------------------------------------------------*/

670: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
671: {
672:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
673:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
674:   const PetscScalar *x,*v;
675:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
676:   PetscErrorCode    ierr;
677:   PetscInt          nonzerorow=0,n,i,jrow,j;
678:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

681:   VecGetArrayRead(xx,&x);
682:   VecGetArray(yy,&y);
683:   idx  = a->j;
684:   v    = a->a;
685:   ii   = a->i;

687:   for (i=0; i<m; i++) {
688:     jrow = ii[i];
689:     n    = ii[i+1] - jrow;
690:     sum1  = 0.0;
691:     sum2  = 0.0;
692:     sum3  = 0.0;
693:     sum4  = 0.0;
694:     sum5  = 0.0;
695:     nonzerorow += (n>0);
696:     for (j=0; j<n; j++) {
697:       sum1 += v[jrow]*x[5*idx[jrow]];
698:       sum2 += v[jrow]*x[5*idx[jrow]+1];
699:       sum3 += v[jrow]*x[5*idx[jrow]+2];
700:       sum4 += v[jrow]*x[5*idx[jrow]+3];
701:       sum5 += v[jrow]*x[5*idx[jrow]+4];
702:       jrow++;
703:      }
704:     y[5*i]   = sum1;
705:     y[5*i+1] = sum2;
706:     y[5*i+2] = sum3;
707:     y[5*i+3] = sum4;
708:     y[5*i+4] = sum5;
709:   }

711:   PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);
712:   VecRestoreArrayRead(xx,&x);
713:   VecRestoreArray(yy,&y);
714:   return(0);
715: }

719: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
720: {
721:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
722:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
723:   const PetscScalar *x,*v;
724:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
725:   PetscErrorCode    ierr;
726:   PetscInt          n,i;
727:   const PetscInt    m = b->AIJ->rmap->n,*idx;

730:   VecSet(yy,0.0);
731:   VecGetArrayRead(xx,&x);
732:   VecGetArray(yy,&y);
733: 
734:   for (i=0; i<m; i++) {
735:     idx    = a->j + a->i[i] ;
736:     v      = a->a + a->i[i] ;
737:     n      = a->i[i+1] - a->i[i];
738:     alpha1 = x[5*i];
739:     alpha2 = x[5*i+1];
740:     alpha3 = x[5*i+2];
741:     alpha4 = x[5*i+3];
742:     alpha5 = x[5*i+4];
743:     while (n-->0) {
744:       y[5*(*idx)]   += alpha1*(*v);
745:       y[5*(*idx)+1] += alpha2*(*v);
746:       y[5*(*idx)+2] += alpha3*(*v);
747:       y[5*(*idx)+3] += alpha4*(*v);
748:       y[5*(*idx)+4] += alpha5*(*v);
749:       idx++; v++;
750:     }
751:   }
752:   PetscLogFlops(10.0*a->nz);
753:   VecRestoreArrayRead(xx,&x);
754:   VecRestoreArray(yy,&y);
755:   return(0);
756: }

760: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
761: {
762:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
763:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
764:   const PetscScalar *x,*v;
765:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
766:   PetscErrorCode    ierr;
767:   PetscInt          n,i,jrow,j;
768:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

771:   if (yy != zz) {VecCopy(yy,zz);}
772:   VecGetArrayRead(xx,&x);
773:   VecGetArray(zz,&y);
774:   idx  = a->j;
775:   v    = a->a;
776:   ii   = a->i;

778:   for (i=0; i<m; i++) {
779:     jrow = ii[i];
780:     n    = ii[i+1] - jrow;
781:     sum1  = 0.0;
782:     sum2  = 0.0;
783:     sum3  = 0.0;
784:     sum4  = 0.0;
785:     sum5  = 0.0;
786:     for (j=0; j<n; j++) {
787:       sum1 += v[jrow]*x[5*idx[jrow]];
788:       sum2 += v[jrow]*x[5*idx[jrow]+1];
789:       sum3 += v[jrow]*x[5*idx[jrow]+2];
790:       sum4 += v[jrow]*x[5*idx[jrow]+3];
791:       sum5 += v[jrow]*x[5*idx[jrow]+4];
792:       jrow++;
793:      }
794:     y[5*i]   += sum1;
795:     y[5*i+1] += sum2;
796:     y[5*i+2] += sum3;
797:     y[5*i+3] += sum4;
798:     y[5*i+4] += sum5;
799:   }

801:   PetscLogFlops(10.0*a->nz);
802:   VecRestoreArrayRead(xx,&x);
803:   VecRestoreArray(zz,&y);
804:   return(0);
805: }

809: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
810: {
811:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
812:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
813:   const PetscScalar *x,*v;
814:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
815:   PetscErrorCode    ierr;
816:   PetscInt          n,i;
817:   const PetscInt    m = b->AIJ->rmap->n,*idx;

820:   if (yy != zz) {VecCopy(yy,zz);}
821:   VecGetArrayRead(xx,&x);
822:   VecGetArray(zz,&y);
823: 
824:   for (i=0; i<m; i++) {
825:     idx    = a->j + a->i[i] ;
826:     v      = a->a + a->i[i] ;
827:     n      = a->i[i+1] - a->i[i];
828:     alpha1 = x[5*i];
829:     alpha2 = x[5*i+1];
830:     alpha3 = x[5*i+2];
831:     alpha4 = x[5*i+3];
832:     alpha5 = x[5*i+4];
833:     while (n-->0) {
834:       y[5*(*idx)]   += alpha1*(*v);
835:       y[5*(*idx)+1] += alpha2*(*v);
836:       y[5*(*idx)+2] += alpha3*(*v);
837:       y[5*(*idx)+3] += alpha4*(*v);
838:       y[5*(*idx)+4] += alpha5*(*v);
839:       idx++; v++;
840:     }
841:   }
842:   PetscLogFlops(10.0*a->nz);
843:   VecRestoreArrayRead(xx,&x);
844:   VecRestoreArray(zz,&y);
845:   return(0);
846: }

848: /* ------------------------------------------------------------------------------*/
851: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
852: {
853:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
854:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
855:   const PetscScalar *x,*v;
856:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
857:   PetscErrorCode    ierr;
858:   PetscInt          nonzerorow=0,n,i,jrow,j;
859:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

862:   VecGetArrayRead(xx,&x);
863:   VecGetArray(yy,&y);
864:   idx  = a->j;
865:   v    = a->a;
866:   ii   = a->i;

868:   for (i=0; i<m; i++) {
869:     jrow = ii[i];
870:     n    = ii[i+1] - jrow;
871:     sum1  = 0.0;
872:     sum2  = 0.0;
873:     sum3  = 0.0;
874:     sum4  = 0.0;
875:     sum5  = 0.0;
876:     sum6  = 0.0;
877:     nonzerorow += (n>0);
878:     for (j=0; j<n; j++) {
879:       sum1 += v[jrow]*x[6*idx[jrow]];
880:       sum2 += v[jrow]*x[6*idx[jrow]+1];
881:       sum3 += v[jrow]*x[6*idx[jrow]+2];
882:       sum4 += v[jrow]*x[6*idx[jrow]+3];
883:       sum5 += v[jrow]*x[6*idx[jrow]+4];
884:       sum6 += v[jrow]*x[6*idx[jrow]+5];
885:       jrow++;
886:      }
887:     y[6*i]   = sum1;
888:     y[6*i+1] = sum2;
889:     y[6*i+2] = sum3;
890:     y[6*i+3] = sum4;
891:     y[6*i+4] = sum5;
892:     y[6*i+5] = sum6;
893:   }

895:   PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);
896:   VecRestoreArrayRead(xx,&x);
897:   VecRestoreArray(yy,&y);
898:   return(0);
899: }

903: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
904: {
905:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
906:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
907:   const PetscScalar *x,*v;
908:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
909:   PetscErrorCode    ierr;
910:   PetscInt          n,i;
911:   const PetscInt    m = b->AIJ->rmap->n,*idx;

914:   VecSet(yy,0.0);
915:   VecGetArrayRead(xx,&x);
916:   VecGetArray(yy,&y);

918:   for (i=0; i<m; i++) {
919:     idx    = a->j + a->i[i] ;
920:     v      = a->a + a->i[i] ;
921:     n      = a->i[i+1] - a->i[i];
922:     alpha1 = x[6*i];
923:     alpha2 = x[6*i+1];
924:     alpha3 = x[6*i+2];
925:     alpha4 = x[6*i+3];
926:     alpha5 = x[6*i+4];
927:     alpha6 = x[6*i+5];
928:     while (n-->0) {
929:       y[6*(*idx)]   += alpha1*(*v);
930:       y[6*(*idx)+1] += alpha2*(*v);
931:       y[6*(*idx)+2] += alpha3*(*v);
932:       y[6*(*idx)+3] += alpha4*(*v);
933:       y[6*(*idx)+4] += alpha5*(*v);
934:       y[6*(*idx)+5] += alpha6*(*v);
935:       idx++; v++;
936:     }
937:   }
938:   PetscLogFlops(12.0*a->nz);
939:   VecRestoreArrayRead(xx,&x);
940:   VecRestoreArray(yy,&y);
941:   return(0);
942: }

946: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
947: {
948:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
949:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
950:   const PetscScalar *x,*v;
951:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
952:   PetscErrorCode    ierr;
953:   PetscInt          n,i,jrow,j;
954:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

957:   if (yy != zz) {VecCopy(yy,zz);}
958:   VecGetArrayRead(xx,&x);
959:   VecGetArray(zz,&y);
960:   idx  = a->j;
961:   v    = a->a;
962:   ii   = a->i;

964:   for (i=0; i<m; i++) {
965:     jrow = ii[i];
966:     n    = ii[i+1] - jrow;
967:     sum1  = 0.0;
968:     sum2  = 0.0;
969:     sum3  = 0.0;
970:     sum4  = 0.0;
971:     sum5  = 0.0;
972:     sum6  = 0.0;
973:     for (j=0; j<n; j++) {
974:       sum1 += v[jrow]*x[6*idx[jrow]];
975:       sum2 += v[jrow]*x[6*idx[jrow]+1];
976:       sum3 += v[jrow]*x[6*idx[jrow]+2];
977:       sum4 += v[jrow]*x[6*idx[jrow]+3];
978:       sum5 += v[jrow]*x[6*idx[jrow]+4];
979:       sum6 += v[jrow]*x[6*idx[jrow]+5];
980:       jrow++;
981:      }
982:     y[6*i]   += sum1;
983:     y[6*i+1] += sum2;
984:     y[6*i+2] += sum3;
985:     y[6*i+3] += sum4;
986:     y[6*i+4] += sum5;
987:     y[6*i+5] += sum6;
988:   }

990:   PetscLogFlops(12.0*a->nz);
991:   VecRestoreArrayRead(xx,&x);
992:   VecRestoreArray(zz,&y);
993:   return(0);
994: }

998: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
999: {
1000:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1001:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1002:   const PetscScalar *x,*v;
1003:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
1004:   PetscErrorCode    ierr;
1005:   PetscInt          n,i;
1006:   const PetscInt    m = b->AIJ->rmap->n,*idx;

1009:   if (yy != zz) {VecCopy(yy,zz);}
1010:   VecGetArrayRead(xx,&x);
1011:   VecGetArray(zz,&y);
1012: 
1013:   for (i=0; i<m; i++) {
1014:     idx    = a->j + a->i[i] ;
1015:     v      = a->a + a->i[i] ;
1016:     n      = a->i[i+1] - a->i[i];
1017:     alpha1 = x[6*i];
1018:     alpha2 = x[6*i+1];
1019:     alpha3 = x[6*i+2];
1020:     alpha4 = x[6*i+3];
1021:     alpha5 = x[6*i+4];
1022:     alpha6 = x[6*i+5];
1023:     while (n-->0) {
1024:       y[6*(*idx)]   += alpha1*(*v);
1025:       y[6*(*idx)+1] += alpha2*(*v);
1026:       y[6*(*idx)+2] += alpha3*(*v);
1027:       y[6*(*idx)+3] += alpha4*(*v);
1028:       y[6*(*idx)+4] += alpha5*(*v);
1029:       y[6*(*idx)+5] += alpha6*(*v);
1030:       idx++; v++;
1031:     }
1032:   }
1033:   PetscLogFlops(12.0*a->nz);
1034:   VecRestoreArrayRead(xx,&x);
1035:   VecRestoreArray(zz,&y);
1036:   return(0);
1037: }

1039: /* ------------------------------------------------------------------------------*/
1042: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1043: {
1044:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1045:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1046:   const PetscScalar *x,*v;
1047:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1048:   PetscErrorCode    ierr;
1049:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1050:   PetscInt          nonzerorow=0,n,i,jrow,j;

1053:   VecGetArrayRead(xx,&x);
1054:   VecGetArray(yy,&y);
1055:   idx  = a->j;
1056:   v    = a->a;
1057:   ii   = a->i;

1059:   for (i=0; i<m; i++) {
1060:     jrow = ii[i];
1061:     n    = ii[i+1] - jrow;
1062:     sum1  = 0.0;
1063:     sum2  = 0.0;
1064:     sum3  = 0.0;
1065:     sum4  = 0.0;
1066:     sum5  = 0.0;
1067:     sum6  = 0.0;
1068:     sum7  = 0.0;
1069:     nonzerorow += (n>0);
1070:     for (j=0; j<n; j++) {
1071:       sum1 += v[jrow]*x[7*idx[jrow]];
1072:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1073:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1074:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1075:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1076:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1077:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1078:       jrow++;
1079:      }
1080:     y[7*i]   = sum1;
1081:     y[7*i+1] = sum2;
1082:     y[7*i+2] = sum3;
1083:     y[7*i+3] = sum4;
1084:     y[7*i+4] = sum5;
1085:     y[7*i+5] = sum6;
1086:     y[7*i+6] = sum7;
1087:   }

1089:   PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);
1090:   VecRestoreArrayRead(xx,&x);
1091:   VecRestoreArray(yy,&y);
1092:   return(0);
1093: }

1097: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1098: {
1099:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1100:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1101:   const PetscScalar *x,*v;
1102:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1103:   PetscErrorCode    ierr;
1104:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1105:   PetscInt          n,i;

1108:   VecSet(yy,0.0);
1109:   VecGetArrayRead(xx,&x);
1110:   VecGetArray(yy,&y);

1112:   for (i=0; i<m; i++) {
1113:     idx    = a->j + a->i[i] ;
1114:     v      = a->a + a->i[i] ;
1115:     n      = a->i[i+1] - a->i[i];
1116:     alpha1 = x[7*i];
1117:     alpha2 = x[7*i+1];
1118:     alpha3 = x[7*i+2];
1119:     alpha4 = x[7*i+3];
1120:     alpha5 = x[7*i+4];
1121:     alpha6 = x[7*i+5];
1122:     alpha7 = x[7*i+6];
1123:     while (n-->0) {
1124:       y[7*(*idx)]   += alpha1*(*v);
1125:       y[7*(*idx)+1] += alpha2*(*v);
1126:       y[7*(*idx)+2] += alpha3*(*v);
1127:       y[7*(*idx)+3] += alpha4*(*v);
1128:       y[7*(*idx)+4] += alpha5*(*v);
1129:       y[7*(*idx)+5] += alpha6*(*v);
1130:       y[7*(*idx)+6] += alpha7*(*v);
1131:       idx++; v++;
1132:     }
1133:   }
1134:   PetscLogFlops(14.0*a->nz);
1135:   VecRestoreArrayRead(xx,&x);
1136:   VecRestoreArray(yy,&y);
1137:   return(0);
1138: }

1142: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1143: {
1144:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1145:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1146:   const PetscScalar *x,*v;
1147:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1148:   PetscErrorCode    ierr;
1149:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1150:   PetscInt          n,i,jrow,j;

1153:   if (yy != zz) {VecCopy(yy,zz);}
1154:   VecGetArrayRead(xx,&x);
1155:   VecGetArray(zz,&y);
1156:   idx  = a->j;
1157:   v    = a->a;
1158:   ii   = a->i;

1160:   for (i=0; i<m; i++) {
1161:     jrow = ii[i];
1162:     n    = ii[i+1] - jrow;
1163:     sum1  = 0.0;
1164:     sum2  = 0.0;
1165:     sum3  = 0.0;
1166:     sum4  = 0.0;
1167:     sum5  = 0.0;
1168:     sum6  = 0.0;
1169:     sum7  = 0.0;
1170:     for (j=0; j<n; j++) {
1171:       sum1 += v[jrow]*x[7*idx[jrow]];
1172:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1173:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1174:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1175:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1176:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1177:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1178:       jrow++;
1179:      }
1180:     y[7*i]   += sum1;
1181:     y[7*i+1] += sum2;
1182:     y[7*i+2] += sum3;
1183:     y[7*i+3] += sum4;
1184:     y[7*i+4] += sum5;
1185:     y[7*i+5] += sum6;
1186:     y[7*i+6] += sum7;
1187:   }

1189:   PetscLogFlops(14.0*a->nz);
1190:   VecRestoreArrayRead(xx,&x);
1191:   VecRestoreArray(zz,&y);
1192:   return(0);
1193: }

1197: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1198: {
1199:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1200:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1201:   const PetscScalar *x,*v;
1202:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1203:   PetscErrorCode    ierr;
1204:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1205:   PetscInt          n,i;

1208:   if (yy != zz) {VecCopy(yy,zz);}
1209:   VecGetArrayRead(xx,&x);
1210:   VecGetArray(zz,&y);
1211:   for (i=0; i<m; i++) {
1212:     idx    = a->j + a->i[i] ;
1213:     v      = a->a + a->i[i] ;
1214:     n      = a->i[i+1] - a->i[i];
1215:     alpha1 = x[7*i];
1216:     alpha2 = x[7*i+1];
1217:     alpha3 = x[7*i+2];
1218:     alpha4 = x[7*i+3];
1219:     alpha5 = x[7*i+4];
1220:     alpha6 = x[7*i+5];
1221:     alpha7 = x[7*i+6];
1222:     while (n-->0) {
1223:       y[7*(*idx)]   += alpha1*(*v);
1224:       y[7*(*idx)+1] += alpha2*(*v);
1225:       y[7*(*idx)+2] += alpha3*(*v);
1226:       y[7*(*idx)+3] += alpha4*(*v);
1227:       y[7*(*idx)+4] += alpha5*(*v);
1228:       y[7*(*idx)+5] += alpha6*(*v);
1229:       y[7*(*idx)+6] += alpha7*(*v);
1230:       idx++; v++;
1231:     }
1232:   }
1233:   PetscLogFlops(14.0*a->nz);
1234:   VecRestoreArrayRead(xx,&x);
1235:   VecRestoreArray(zz,&y);
1236:   return(0);
1237: }

1241: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1242: {
1243:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1244:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1245:   const PetscScalar *x,*v;
1246:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1247:   PetscErrorCode    ierr;
1248:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1249:   PetscInt          nonzerorow=0,n,i,jrow,j;

1252:   VecGetArrayRead(xx,&x);
1253:   VecGetArray(yy,&y);
1254:   idx  = a->j;
1255:   v    = a->a;
1256:   ii   = a->i;

1258:   for (i=0; i<m; i++) {
1259:     jrow = ii[i];
1260:     n    = ii[i+1] - jrow;
1261:     sum1  = 0.0;
1262:     sum2  = 0.0;
1263:     sum3  = 0.0;
1264:     sum4  = 0.0;
1265:     sum5  = 0.0;
1266:     sum6  = 0.0;
1267:     sum7  = 0.0;
1268:     sum8  = 0.0;
1269:     nonzerorow += (n>0);
1270:     for (j=0; j<n; j++) {
1271:       sum1 += v[jrow]*x[8*idx[jrow]];
1272:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1273:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1274:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1275:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1276:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1277:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1278:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1279:       jrow++;
1280:      }
1281:     y[8*i]   = sum1;
1282:     y[8*i+1] = sum2;
1283:     y[8*i+2] = sum3;
1284:     y[8*i+3] = sum4;
1285:     y[8*i+4] = sum5;
1286:     y[8*i+5] = sum6;
1287:     y[8*i+6] = sum7;
1288:     y[8*i+7] = sum8;
1289:   }

1291:   PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);
1292:   VecRestoreArrayRead(xx,&x);
1293:   VecRestoreArray(yy,&y);
1294:   return(0);
1295: }

1299: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1300: {
1301:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1302:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1303:   const PetscScalar *x,*v;
1304:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1305:   PetscErrorCode    ierr;
1306:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1307:   PetscInt          n,i;

1310:   VecSet(yy,0.0);
1311:   VecGetArrayRead(xx,&x);
1312:   VecGetArray(yy,&y);

1314:   for (i=0; i<m; i++) {
1315:     idx    = a->j + a->i[i] ;
1316:     v      = a->a + a->i[i] ;
1317:     n      = a->i[i+1] - a->i[i];
1318:     alpha1 = x[8*i];
1319:     alpha2 = x[8*i+1];
1320:     alpha3 = x[8*i+2];
1321:     alpha4 = x[8*i+3];
1322:     alpha5 = x[8*i+4];
1323:     alpha6 = x[8*i+5];
1324:     alpha7 = x[8*i+6];
1325:     alpha8 = x[8*i+7];
1326:     while (n-->0) {
1327:       y[8*(*idx)]   += alpha1*(*v);
1328:       y[8*(*idx)+1] += alpha2*(*v);
1329:       y[8*(*idx)+2] += alpha3*(*v);
1330:       y[8*(*idx)+3] += alpha4*(*v);
1331:       y[8*(*idx)+4] += alpha5*(*v);
1332:       y[8*(*idx)+5] += alpha6*(*v);
1333:       y[8*(*idx)+6] += alpha7*(*v);
1334:       y[8*(*idx)+7] += alpha8*(*v);
1335:       idx++; v++;
1336:     }
1337:   }
1338:   PetscLogFlops(16.0*a->nz);
1339:   VecRestoreArrayRead(xx,&x);
1340:   VecRestoreArray(yy,&y);
1341:   return(0);
1342: }

1346: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1347: {
1348:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1349:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1350:   const PetscScalar *x,*v;
1351:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1352:   PetscErrorCode    ierr;
1353:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1354:   PetscInt          n,i,jrow,j;

1357:   if (yy != zz) {VecCopy(yy,zz);}
1358:   VecGetArrayRead(xx,&x);
1359:   VecGetArray(zz,&y);
1360:   idx  = a->j;
1361:   v    = a->a;
1362:   ii   = a->i;

1364:   for (i=0; i<m; i++) {
1365:     jrow = ii[i];
1366:     n    = ii[i+1] - jrow;
1367:     sum1  = 0.0;
1368:     sum2  = 0.0;
1369:     sum3  = 0.0;
1370:     sum4  = 0.0;
1371:     sum5  = 0.0;
1372:     sum6  = 0.0;
1373:     sum7  = 0.0;
1374:     sum8  = 0.0;
1375:     for (j=0; j<n; j++) {
1376:       sum1 += v[jrow]*x[8*idx[jrow]];
1377:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1378:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1379:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1380:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1381:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1382:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1383:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1384:       jrow++;
1385:      }
1386:     y[8*i]   += sum1;
1387:     y[8*i+1] += sum2;
1388:     y[8*i+2] += sum3;
1389:     y[8*i+3] += sum4;
1390:     y[8*i+4] += sum5;
1391:     y[8*i+5] += sum6;
1392:     y[8*i+6] += sum7;
1393:     y[8*i+7] += sum8;
1394:   }

1396:   PetscLogFlops(16.0*a->nz);
1397:   VecRestoreArrayRead(xx,&x);
1398:   VecRestoreArray(zz,&y);
1399:   return(0);
1400: }

1404: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1405: {
1406:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1407:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1408:   const PetscScalar *x,*v;
1409:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1410:   PetscErrorCode    ierr;
1411:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1412:   PetscInt          n,i;

1415:   if (yy != zz) {VecCopy(yy,zz);}
1416:   VecGetArrayRead(xx,&x);
1417:   VecGetArray(zz,&y);
1418:   for (i=0; i<m; i++) {
1419:     idx    = a->j + a->i[i] ;
1420:     v      = a->a + a->i[i] ;
1421:     n      = a->i[i+1] - a->i[i];
1422:     alpha1 = x[8*i];
1423:     alpha2 = x[8*i+1];
1424:     alpha3 = x[8*i+2];
1425:     alpha4 = x[8*i+3];
1426:     alpha5 = x[8*i+4];
1427:     alpha6 = x[8*i+5];
1428:     alpha7 = x[8*i+6];
1429:     alpha8 = x[8*i+7];
1430:     while (n-->0) {
1431:       y[8*(*idx)]   += alpha1*(*v);
1432:       y[8*(*idx)+1] += alpha2*(*v);
1433:       y[8*(*idx)+2] += alpha3*(*v);
1434:       y[8*(*idx)+3] += alpha4*(*v);
1435:       y[8*(*idx)+4] += alpha5*(*v);
1436:       y[8*(*idx)+5] += alpha6*(*v);
1437:       y[8*(*idx)+6] += alpha7*(*v);
1438:       y[8*(*idx)+7] += alpha8*(*v);
1439:       idx++; v++;
1440:     }
1441:   }
1442:   PetscLogFlops(16.0*a->nz);
1443:   VecRestoreArrayRead(xx,&x);
1444:   VecRestoreArray(zz,&y);
1445:   return(0);
1446: }

1448: /* ------------------------------------------------------------------------------*/
1451: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1452: {
1453:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1454:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1455:   const PetscScalar *x,*v;
1456:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1457:   PetscErrorCode    ierr;
1458:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1459:   PetscInt          nonzerorow=0,n,i,jrow,j;

1462:   VecGetArrayRead(xx,&x);
1463:   VecGetArray(yy,&y);
1464:   idx  = a->j;
1465:   v    = a->a;
1466:   ii   = a->i;

1468:   for (i=0; i<m; i++) {
1469:     jrow = ii[i];
1470:     n    = ii[i+1] - jrow;
1471:     sum1  = 0.0;
1472:     sum2  = 0.0;
1473:     sum3  = 0.0;
1474:     sum4  = 0.0;
1475:     sum5  = 0.0;
1476:     sum6  = 0.0;
1477:     sum7  = 0.0;
1478:     sum8  = 0.0;
1479:     sum9  = 0.0;
1480:     nonzerorow += (n>0);
1481:     for (j=0; j<n; j++) {
1482:       sum1 += v[jrow]*x[9*idx[jrow]];
1483:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1484:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1485:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1486:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1487:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1488:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1489:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1490:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1491:       jrow++;
1492:      }
1493:     y[9*i]   = sum1;
1494:     y[9*i+1] = sum2;
1495:     y[9*i+2] = sum3;
1496:     y[9*i+3] = sum4;
1497:     y[9*i+4] = sum5;
1498:     y[9*i+5] = sum6;
1499:     y[9*i+6] = sum7;
1500:     y[9*i+7] = sum8;
1501:     y[9*i+8] = sum9;
1502:   }

1504:   PetscLogFlops(18.0*a->nz - 9*nonzerorow);
1505:   VecRestoreArrayRead(xx,&x);
1506:   VecRestoreArray(yy,&y);
1507:   return(0);
1508: }

1510: /* ------------------------------------------------------------------------------*/

1514: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1515: {
1516:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1517:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1518:   const PetscScalar *x,*v;
1519:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1520:   PetscErrorCode    ierr;
1521:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1522:   PetscInt          n,i;

1525:   VecSet(yy,0.0);
1526:   VecGetArrayRead(xx,&x);
1527:   VecGetArray(yy,&y);

1529:   for (i=0; i<m; i++) {
1530:     idx    = a->j + a->i[i] ;
1531:     v      = a->a + a->i[i] ;
1532:     n      = a->i[i+1] - a->i[i];
1533:     alpha1 = x[9*i];
1534:     alpha2 = x[9*i+1];
1535:     alpha3 = x[9*i+2];
1536:     alpha4 = x[9*i+3];
1537:     alpha5 = x[9*i+4];
1538:     alpha6 = x[9*i+5];
1539:     alpha7 = x[9*i+6];
1540:     alpha8 = x[9*i+7];
1541:     alpha9 = x[9*i+8];
1542:     while (n-->0) {
1543:       y[9*(*idx)]   += alpha1*(*v);
1544:       y[9*(*idx)+1] += alpha2*(*v);
1545:       y[9*(*idx)+2] += alpha3*(*v);
1546:       y[9*(*idx)+3] += alpha4*(*v);
1547:       y[9*(*idx)+4] += alpha5*(*v);
1548:       y[9*(*idx)+5] += alpha6*(*v);
1549:       y[9*(*idx)+6] += alpha7*(*v);
1550:       y[9*(*idx)+7] += alpha8*(*v);
1551:       y[9*(*idx)+8] += alpha9*(*v);
1552:       idx++; v++;
1553:     }
1554:   }
1555:   PetscLogFlops(18.0*a->nz);
1556:   VecRestoreArrayRead(xx,&x);
1557:   VecRestoreArray(yy,&y);
1558:   return(0);
1559: }

1563: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1564: {
1565:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1566:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1567:   const PetscScalar *x,*v;
1568:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1569:   PetscErrorCode    ierr;
1570:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1571:   PetscInt          n,i,jrow,j;

1574:   if (yy != zz) {VecCopy(yy,zz);}
1575:   VecGetArrayRead(xx,&x);
1576:   VecGetArray(zz,&y);
1577:   idx  = a->j;
1578:   v    = a->a;
1579:   ii   = a->i;

1581:   for (i=0; i<m; i++) {
1582:     jrow = ii[i];
1583:     n    = ii[i+1] - jrow;
1584:     sum1  = 0.0;
1585:     sum2  = 0.0;
1586:     sum3  = 0.0;
1587:     sum4  = 0.0;
1588:     sum5  = 0.0;
1589:     sum6  = 0.0;
1590:     sum7  = 0.0;
1591:     sum8  = 0.0;
1592:     sum9  = 0.0;
1593:     for (j=0; j<n; j++) {
1594:       sum1 += v[jrow]*x[9*idx[jrow]];
1595:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1596:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1597:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1598:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1599:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1600:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1601:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1602:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1603:       jrow++;
1604:      }
1605:     y[9*i]   += sum1;
1606:     y[9*i+1] += sum2;
1607:     y[9*i+2] += sum3;
1608:     y[9*i+3] += sum4;
1609:     y[9*i+4] += sum5;
1610:     y[9*i+5] += sum6;
1611:     y[9*i+6] += sum7;
1612:     y[9*i+7] += sum8;
1613:     y[9*i+8] += sum9;
1614:   }

1616:   PetscLogFlops(18*a->nz);
1617:   VecRestoreArrayRead(xx,&x);
1618:   VecRestoreArray(zz,&y);
1619:   return(0);
1620: }

1624: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1625: {
1626:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1627:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1628:   const PetscScalar *x,*v;
1629:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1630:   PetscErrorCode    ierr;
1631:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1632:   PetscInt          n,i;

1635:   if (yy != zz) {VecCopy(yy,zz);}
1636:   VecGetArrayRead(xx,&x);
1637:   VecGetArray(zz,&y);
1638:   for (i=0; i<m; i++) {
1639:     idx    = a->j + a->i[i] ;
1640:     v      = a->a + a->i[i] ;
1641:     n      = a->i[i+1] - a->i[i];
1642:     alpha1 = x[9*i];
1643:     alpha2 = x[9*i+1];
1644:     alpha3 = x[9*i+2];
1645:     alpha4 = x[9*i+3];
1646:     alpha5 = x[9*i+4];
1647:     alpha6 = x[9*i+5];
1648:     alpha7 = x[9*i+6];
1649:     alpha8 = x[9*i+7];
1650:     alpha9 = x[9*i+8];
1651:     while (n-->0) {
1652:       y[9*(*idx)]   += alpha1*(*v);
1653:       y[9*(*idx)+1] += alpha2*(*v);
1654:       y[9*(*idx)+2] += alpha3*(*v);
1655:       y[9*(*idx)+3] += alpha4*(*v);
1656:       y[9*(*idx)+4] += alpha5*(*v);
1657:       y[9*(*idx)+5] += alpha6*(*v);
1658:       y[9*(*idx)+6] += alpha7*(*v);
1659:       y[9*(*idx)+7] += alpha8*(*v);
1660:       y[9*(*idx)+8] += alpha9*(*v);
1661:       idx++; v++;
1662:     }
1663:   }
1664:   PetscLogFlops(18.0*a->nz);
1665:   VecRestoreArrayRead(xx,&x);
1666:   VecRestoreArray(zz,&y);
1667:   return(0);
1668: }
1671: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1672: {
1673:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1674:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1675:   const PetscScalar *x,*v;
1676:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1677:   PetscErrorCode    ierr;
1678:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1679:   PetscInt          nonzerorow=0,n,i,jrow,j;

1682:   VecGetArrayRead(xx,&x);
1683:   VecGetArray(yy,&y);
1684:   idx  = a->j;
1685:   v    = a->a;
1686:   ii   = a->i;

1688:   for (i=0; i<m; i++) {
1689:     jrow = ii[i];
1690:     n    = ii[i+1] - jrow;
1691:     sum1  = 0.0;
1692:     sum2  = 0.0;
1693:     sum3  = 0.0;
1694:     sum4  = 0.0;
1695:     sum5  = 0.0;
1696:     sum6  = 0.0;
1697:     sum7  = 0.0;
1698:     sum8  = 0.0;
1699:     sum9  = 0.0;
1700:     sum10 = 0.0;
1701:     nonzerorow += (n>0);
1702:     for (j=0; j<n; j++) {
1703:       sum1  += v[jrow]*x[10*idx[jrow]];
1704:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1705:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1706:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1707:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1708:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1709:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1710:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1711:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1712:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1713:       jrow++;
1714:      }
1715:     y[10*i]   = sum1;
1716:     y[10*i+1] = sum2;
1717:     y[10*i+2] = sum3;
1718:     y[10*i+3] = sum4;
1719:     y[10*i+4] = sum5;
1720:     y[10*i+5] = sum6;
1721:     y[10*i+6] = sum7;
1722:     y[10*i+7] = sum8;
1723:     y[10*i+8] = sum9;
1724:     y[10*i+9] = sum10;
1725:   }

1727:   PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);
1728:   VecRestoreArrayRead(xx,&x);
1729:   VecRestoreArray(yy,&y);
1730:   return(0);
1731: }

1735: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1736: {
1737:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1738:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1739:   const PetscScalar *x,*v;
1740:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1741:   PetscErrorCode    ierr;
1742:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1743:   PetscInt          n,i,jrow,j;

1746:   if (yy != zz) {VecCopy(yy,zz);}
1747:   VecGetArrayRead(xx,&x);
1748:   VecGetArray(zz,&y);
1749:   idx  = a->j;
1750:   v    = a->a;
1751:   ii   = a->i;

1753:   for (i=0; i<m; i++) {
1754:     jrow = ii[i];
1755:     n    = ii[i+1] - jrow;
1756:     sum1  = 0.0;
1757:     sum2  = 0.0;
1758:     sum3  = 0.0;
1759:     sum4  = 0.0;
1760:     sum5  = 0.0;
1761:     sum6  = 0.0;
1762:     sum7  = 0.0;
1763:     sum8  = 0.0;
1764:     sum9  = 0.0;
1765:     sum10 = 0.0;
1766:     for (j=0; j<n; j++) {
1767:       sum1  += v[jrow]*x[10*idx[jrow]];
1768:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1769:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1770:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1771:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1772:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1773:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1774:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1775:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1776:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1777:       jrow++;
1778:      }
1779:     y[10*i]   += sum1;
1780:     y[10*i+1] += sum2;
1781:     y[10*i+2] += sum3;
1782:     y[10*i+3] += sum4;
1783:     y[10*i+4] += sum5;
1784:     y[10*i+5] += sum6;
1785:     y[10*i+6] += sum7;
1786:     y[10*i+7] += sum8;
1787:     y[10*i+8] += sum9;
1788:     y[10*i+9] += sum10;
1789:   }

1791:   PetscLogFlops(20.0*a->nz);
1792:   VecRestoreArrayRead(xx,&x);
1793:   VecRestoreArray(yy,&y);
1794:   return(0);
1795: }

1799: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1800: {
1801:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1802:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1803:   const PetscScalar *x,*v;
1804:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1805:   PetscErrorCode    ierr;
1806:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1807:   PetscInt          n,i;

1810:   VecSet(yy,0.0);
1811:   VecGetArrayRead(xx,&x);
1812:   VecGetArray(yy,&y);

1814:   for (i=0; i<m; i++) {
1815:     idx    = a->j + a->i[i] ;
1816:     v      = a->a + a->i[i] ;
1817:     n      = a->i[i+1] - a->i[i];
1818:     alpha1 = x[10*i];
1819:     alpha2 = x[10*i+1];
1820:     alpha3 = x[10*i+2];
1821:     alpha4 = x[10*i+3];
1822:     alpha5 = x[10*i+4];
1823:     alpha6 = x[10*i+5];
1824:     alpha7 = x[10*i+6];
1825:     alpha8 = x[10*i+7];
1826:     alpha9 = x[10*i+8];
1827:     alpha10 = x[10*i+9];
1828:     while (n-->0) {
1829:       y[10*(*idx)]   += alpha1*(*v);
1830:       y[10*(*idx)+1] += alpha2*(*v);
1831:       y[10*(*idx)+2] += alpha3*(*v);
1832:       y[10*(*idx)+3] += alpha4*(*v);
1833:       y[10*(*idx)+4] += alpha5*(*v);
1834:       y[10*(*idx)+5] += alpha6*(*v);
1835:       y[10*(*idx)+6] += alpha7*(*v);
1836:       y[10*(*idx)+7] += alpha8*(*v);
1837:       y[10*(*idx)+8] += alpha9*(*v);
1838:       y[10*(*idx)+9] += alpha10*(*v);
1839:       idx++; v++;
1840:     }
1841:   }
1842:   PetscLogFlops(20.0*a->nz);
1843:   VecRestoreArrayRead(xx,&x);
1844:   VecRestoreArray(yy,&y);
1845:   return(0);
1846: }

1850: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1851: {
1852:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1853:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1854:   const PetscScalar *x,*v;
1855:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1856:   PetscErrorCode    ierr;
1857:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1858:   PetscInt          n,i;

1861:   if (yy != zz) {VecCopy(yy,zz);}
1862:   VecGetArrayRead(xx,&x);
1863:   VecGetArray(zz,&y);
1864:   for (i=0; i<m; i++) {
1865:     idx    = a->j + a->i[i] ;
1866:     v      = a->a + a->i[i] ;
1867:     n      = a->i[i+1] - a->i[i];
1868:     alpha1 = x[10*i];
1869:     alpha2 = x[10*i+1];
1870:     alpha3 = x[10*i+2];
1871:     alpha4 = x[10*i+3];
1872:     alpha5 = x[10*i+4];
1873:     alpha6 = x[10*i+5];
1874:     alpha7 = x[10*i+6];
1875:     alpha8 = x[10*i+7];
1876:     alpha9 = x[10*i+8];
1877:     alpha10 = x[10*i+9];
1878:     while (n-->0) {
1879:       y[10*(*idx)]   += alpha1*(*v);
1880:       y[10*(*idx)+1] += alpha2*(*v);
1881:       y[10*(*idx)+2] += alpha3*(*v);
1882:       y[10*(*idx)+3] += alpha4*(*v);
1883:       y[10*(*idx)+4] += alpha5*(*v);
1884:       y[10*(*idx)+5] += alpha6*(*v);
1885:       y[10*(*idx)+6] += alpha7*(*v);
1886:       y[10*(*idx)+7] += alpha8*(*v);
1887:       y[10*(*idx)+8] += alpha9*(*v);
1888:       y[10*(*idx)+9] += alpha10*(*v);
1889:       idx++; v++;
1890:     }
1891:   }
1892:   PetscLogFlops(20.0*a->nz);
1893:   VecRestoreArrayRead(xx,&x);
1894:   VecRestoreArray(zz,&y);
1895:   return(0);
1896: }


1899: /*--------------------------------------------------------------------------------------------*/
1902: PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1903: {
1904:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1905:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1906:   const PetscScalar *x,*v;
1907:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1908:   PetscErrorCode    ierr;
1909:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1910:   PetscInt          nonzerorow=0,n,i,jrow,j;

1913:   VecGetArrayRead(xx,&x);
1914:   VecGetArray(yy,&y);
1915:   idx  = a->j;
1916:   v    = a->a;
1917:   ii   = a->i;

1919:   for (i=0; i<m; i++) {
1920:     jrow = ii[i];
1921:     n    = ii[i+1] - jrow;
1922:     sum1  = 0.0;
1923:     sum2  = 0.0;
1924:     sum3  = 0.0;
1925:     sum4  = 0.0;
1926:     sum5  = 0.0;
1927:     sum6  = 0.0;
1928:     sum7  = 0.0;
1929:     sum8  = 0.0;
1930:     sum9  = 0.0;
1931:     sum10 = 0.0;
1932:     sum11 = 0.0;
1933:     nonzerorow += (n>0);
1934:     for (j=0; j<n; j++) {
1935:       sum1  += v[jrow]*x[11*idx[jrow]];
1936:       sum2  += v[jrow]*x[11*idx[jrow]+1];
1937:       sum3  += v[jrow]*x[11*idx[jrow]+2];
1938:       sum4  += v[jrow]*x[11*idx[jrow]+3];
1939:       sum5  += v[jrow]*x[11*idx[jrow]+4];
1940:       sum6  += v[jrow]*x[11*idx[jrow]+5];
1941:       sum7  += v[jrow]*x[11*idx[jrow]+6];
1942:       sum8  += v[jrow]*x[11*idx[jrow]+7];
1943:       sum9  += v[jrow]*x[11*idx[jrow]+8];
1944:       sum10 += v[jrow]*x[11*idx[jrow]+9];
1945:       sum11 += v[jrow]*x[11*idx[jrow]+10];
1946:       jrow++;
1947:      }
1948:     y[11*i]   = sum1;
1949:     y[11*i+1] = sum2;
1950:     y[11*i+2] = sum3;
1951:     y[11*i+3] = sum4;
1952:     y[11*i+4] = sum5;
1953:     y[11*i+5] = sum6;
1954:     y[11*i+6] = sum7;
1955:     y[11*i+7] = sum8;
1956:     y[11*i+8] = sum9;
1957:     y[11*i+9] = sum10;
1958:     y[11*i+10] = sum11;
1959:   }

1961:   PetscLogFlops(22*a->nz - 11*nonzerorow);
1962:   VecRestoreArrayRead(xx,&x);
1963:   VecRestoreArray(yy,&y);
1964:   return(0);
1965: }

1969: PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1970: {
1971:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1972:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1973:   const PetscScalar *x,*v;
1974:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1975:   PetscErrorCode    ierr;
1976:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1977:   PetscInt          n,i,jrow,j;

1980:   if (yy != zz) {VecCopy(yy,zz);}
1981:   VecGetArrayRead(xx,&x);
1982:   VecGetArray(zz,&y);
1983:   idx  = a->j;
1984:   v    = a->a;
1985:   ii   = a->i;

1987:   for (i=0; i<m; i++) {
1988:     jrow = ii[i];
1989:     n    = ii[i+1] - jrow;
1990:     sum1  = 0.0;
1991:     sum2  = 0.0;
1992:     sum3  = 0.0;
1993:     sum4  = 0.0;
1994:     sum5  = 0.0;
1995:     sum6  = 0.0;
1996:     sum7  = 0.0;
1997:     sum8  = 0.0;
1998:     sum9  = 0.0;
1999:     sum10 = 0.0;
2000:     sum11 = 0.0;
2001:     for (j=0; j<n; j++) {
2002:       sum1  += v[jrow]*x[11*idx[jrow]];
2003:       sum2  += v[jrow]*x[11*idx[jrow]+1];
2004:       sum3  += v[jrow]*x[11*idx[jrow]+2];
2005:       sum4  += v[jrow]*x[11*idx[jrow]+3];
2006:       sum5  += v[jrow]*x[11*idx[jrow]+4];
2007:       sum6  += v[jrow]*x[11*idx[jrow]+5];
2008:       sum7  += v[jrow]*x[11*idx[jrow]+6];
2009:       sum8  += v[jrow]*x[11*idx[jrow]+7];
2010:       sum9  += v[jrow]*x[11*idx[jrow]+8];
2011:       sum10 += v[jrow]*x[11*idx[jrow]+9];
2012:       sum11 += v[jrow]*x[11*idx[jrow]+10];
2013:       jrow++;
2014:      }
2015:     y[11*i]   += sum1;
2016:     y[11*i+1] += sum2;
2017:     y[11*i+2] += sum3;
2018:     y[11*i+3] += sum4;
2019:     y[11*i+4] += sum5;
2020:     y[11*i+5] += sum6;
2021:     y[11*i+6] += sum7;
2022:     y[11*i+7] += sum8;
2023:     y[11*i+8] += sum9;
2024:     y[11*i+9] += sum10;
2025:     y[11*i+10] += sum11;
2026:   }

2028:   PetscLogFlops(22*a->nz);
2029:   VecRestoreArrayRead(xx,&x);
2030:   VecRestoreArray(yy,&y);
2031:   return(0);
2032: }

2036: PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
2037: {
2038:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2039:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2040:   const PetscScalar *x,*v;
2041:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2042:   PetscErrorCode    ierr;
2043:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2044:   PetscInt          n,i;

2047:   VecSet(yy,0.0);
2048:   VecGetArrayRead(xx,&x);
2049:   VecGetArray(yy,&y);

2051:   for (i=0; i<m; i++) {
2052:     idx    = a->j + a->i[i] ;
2053:     v      = a->a + a->i[i] ;
2054:     n      = a->i[i+1] - a->i[i];
2055:     alpha1 = x[11*i];
2056:     alpha2 = x[11*i+1];
2057:     alpha3 = x[11*i+2];
2058:     alpha4 = x[11*i+3];
2059:     alpha5 = x[11*i+4];
2060:     alpha6 = x[11*i+5];
2061:     alpha7 = x[11*i+6];
2062:     alpha8 = x[11*i+7];
2063:     alpha9 = x[11*i+8];
2064:     alpha10 = x[11*i+9];
2065:     alpha11 = x[11*i+10];
2066:     while (n-->0) {
2067:       y[11*(*idx)]   += alpha1*(*v);
2068:       y[11*(*idx)+1] += alpha2*(*v);
2069:       y[11*(*idx)+2] += alpha3*(*v);
2070:       y[11*(*idx)+3] += alpha4*(*v);
2071:       y[11*(*idx)+4] += alpha5*(*v);
2072:       y[11*(*idx)+5] += alpha6*(*v);
2073:       y[11*(*idx)+6] += alpha7*(*v);
2074:       y[11*(*idx)+7] += alpha8*(*v);
2075:       y[11*(*idx)+8] += alpha9*(*v);
2076:       y[11*(*idx)+9] += alpha10*(*v);
2077:       y[11*(*idx)+10] += alpha11*(*v);
2078:       idx++; v++;
2079:     }
2080:   }
2081:   PetscLogFlops(22*a->nz);
2082:   VecRestoreArrayRead(xx,&x);
2083:   VecRestoreArray(yy,&y);
2084:   return(0);
2085: }

2089: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2090: {
2091:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2092:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2093:   const PetscScalar *x,*v;
2094:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2095:   PetscErrorCode    ierr;
2096:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2097:   PetscInt          n,i;

2100:   if (yy != zz) {VecCopy(yy,zz);}
2101:   VecGetArrayRead(xx,&x);
2102:   VecGetArray(zz,&y);
2103:   for (i=0; i<m; i++) {
2104:     idx    = a->j + a->i[i] ;
2105:     v      = a->a + a->i[i] ;
2106:     n      = a->i[i+1] - a->i[i];
2107:     alpha1 = x[11*i];
2108:     alpha2 = x[11*i+1];
2109:     alpha3 = x[11*i+2];
2110:     alpha4 = x[11*i+3];
2111:     alpha5 = x[11*i+4];
2112:     alpha6 = x[11*i+5];
2113:     alpha7 = x[11*i+6];
2114:     alpha8 = x[11*i+7];
2115:     alpha9 = x[11*i+8];
2116:     alpha10 = x[11*i+9];
2117:     alpha11 = x[11*i+10];
2118:     while (n-->0) {
2119:       y[11*(*idx)]   += alpha1*(*v);
2120:       y[11*(*idx)+1] += alpha2*(*v);
2121:       y[11*(*idx)+2] += alpha3*(*v);
2122:       y[11*(*idx)+3] += alpha4*(*v);
2123:       y[11*(*idx)+4] += alpha5*(*v);
2124:       y[11*(*idx)+5] += alpha6*(*v);
2125:       y[11*(*idx)+6] += alpha7*(*v);
2126:       y[11*(*idx)+7] += alpha8*(*v);
2127:       y[11*(*idx)+8] += alpha9*(*v);
2128:       y[11*(*idx)+9] += alpha10*(*v);
2129:       y[11*(*idx)+10] += alpha11*(*v);
2130:       idx++; v++;
2131:     }
2132:   }
2133:   PetscLogFlops(22*a->nz);
2134:   VecRestoreArrayRead(xx,&x);
2135:   VecRestoreArray(zz,&y);
2136:   return(0);
2137: }


2140: /*--------------------------------------------------------------------------------------------*/
2143: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2144: {
2145:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2146:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2147:   const PetscScalar *x,*v;
2148:   PetscScalar        *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2149:   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2150:   PetscErrorCode     ierr;
2151:   const PetscInt     m = b->AIJ->rmap->n,*idx,*ii;
2152:   PetscInt           nonzerorow=0,n,i,jrow,j;

2155:   VecGetArrayRead(xx,&x);
2156:   VecGetArray(yy,&y);
2157:   idx  = a->j;
2158:   v    = a->a;
2159:   ii   = a->i;

2161:   for (i=0; i<m; i++) {
2162:     jrow = ii[i];
2163:     n    = ii[i+1] - jrow;
2164:     sum1  = 0.0;
2165:     sum2  = 0.0;
2166:     sum3  = 0.0;
2167:     sum4  = 0.0;
2168:     sum5  = 0.0;
2169:     sum6  = 0.0;
2170:     sum7  = 0.0;
2171:     sum8  = 0.0;
2172:     sum9  = 0.0;
2173:     sum10 = 0.0;
2174:     sum11 = 0.0;
2175:     sum12 = 0.0;
2176:     sum13 = 0.0;
2177:     sum14 = 0.0;
2178:     sum15 = 0.0;
2179:     sum16 = 0.0;
2180:     nonzerorow += (n>0);
2181:     for (j=0; j<n; j++) {
2182:       sum1  += v[jrow]*x[16*idx[jrow]];
2183:       sum2  += v[jrow]*x[16*idx[jrow]+1];
2184:       sum3  += v[jrow]*x[16*idx[jrow]+2];
2185:       sum4  += v[jrow]*x[16*idx[jrow]+3];
2186:       sum5  += v[jrow]*x[16*idx[jrow]+4];
2187:       sum6  += v[jrow]*x[16*idx[jrow]+5];
2188:       sum7  += v[jrow]*x[16*idx[jrow]+6];
2189:       sum8  += v[jrow]*x[16*idx[jrow]+7];
2190:       sum9  += v[jrow]*x[16*idx[jrow]+8];
2191:       sum10 += v[jrow]*x[16*idx[jrow]+9];
2192:       sum11 += v[jrow]*x[16*idx[jrow]+10];
2193:       sum12 += v[jrow]*x[16*idx[jrow]+11];
2194:       sum13 += v[jrow]*x[16*idx[jrow]+12];
2195:       sum14 += v[jrow]*x[16*idx[jrow]+13];
2196:       sum15 += v[jrow]*x[16*idx[jrow]+14];
2197:       sum16 += v[jrow]*x[16*idx[jrow]+15];
2198:       jrow++;
2199:      }
2200:     y[16*i]    = sum1;
2201:     y[16*i+1]  = sum2;
2202:     y[16*i+2]  = sum3;
2203:     y[16*i+3]  = sum4;
2204:     y[16*i+4]  = sum5;
2205:     y[16*i+5]  = sum6;
2206:     y[16*i+6]  = sum7;
2207:     y[16*i+7]  = sum8;
2208:     y[16*i+8]  = sum9;
2209:     y[16*i+9]  = sum10;
2210:     y[16*i+10] = sum11;
2211:     y[16*i+11] = sum12;
2212:     y[16*i+12] = sum13;
2213:     y[16*i+13] = sum14;
2214:     y[16*i+14] = sum15;
2215:     y[16*i+15] = sum16;
2216:   }

2218:   PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);
2219:   VecRestoreArrayRead(xx,&x);
2220:   VecRestoreArray(yy,&y);
2221:   return(0);
2222: }

2226: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2227: {
2228:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2229:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2230:   const PetscScalar *x,*v;
2231:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2232:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2233:   PetscErrorCode    ierr;
2234:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2235:   PetscInt          n,i;

2238:   VecSet(yy,0.0);
2239:   VecGetArrayRead(xx,&x);
2240:   VecGetArray(yy,&y);

2242:   for (i=0; i<m; i++) {
2243:     idx    = a->j + a->i[i] ;
2244:     v      = a->a + a->i[i] ;
2245:     n      = a->i[i+1] - a->i[i];
2246:     alpha1  = x[16*i];
2247:     alpha2  = x[16*i+1];
2248:     alpha3  = x[16*i+2];
2249:     alpha4  = x[16*i+3];
2250:     alpha5  = x[16*i+4];
2251:     alpha6  = x[16*i+5];
2252:     alpha7  = x[16*i+6];
2253:     alpha8  = x[16*i+7];
2254:     alpha9  = x[16*i+8];
2255:     alpha10 = x[16*i+9];
2256:     alpha11 = x[16*i+10];
2257:     alpha12 = x[16*i+11];
2258:     alpha13 = x[16*i+12];
2259:     alpha14 = x[16*i+13];
2260:     alpha15 = x[16*i+14];
2261:     alpha16 = x[16*i+15];
2262:     while (n-->0) {
2263:       y[16*(*idx)]    += alpha1*(*v);
2264:       y[16*(*idx)+1]  += alpha2*(*v);
2265:       y[16*(*idx)+2]  += alpha3*(*v);
2266:       y[16*(*idx)+3]  += alpha4*(*v);
2267:       y[16*(*idx)+4]  += alpha5*(*v);
2268:       y[16*(*idx)+5]  += alpha6*(*v);
2269:       y[16*(*idx)+6]  += alpha7*(*v);
2270:       y[16*(*idx)+7]  += alpha8*(*v);
2271:       y[16*(*idx)+8]  += alpha9*(*v);
2272:       y[16*(*idx)+9]  += alpha10*(*v);
2273:       y[16*(*idx)+10] += alpha11*(*v);
2274:       y[16*(*idx)+11] += alpha12*(*v);
2275:       y[16*(*idx)+12] += alpha13*(*v);
2276:       y[16*(*idx)+13] += alpha14*(*v);
2277:       y[16*(*idx)+14] += alpha15*(*v);
2278:       y[16*(*idx)+15] += alpha16*(*v);
2279:       idx++; v++;
2280:     }
2281:   }
2282:   PetscLogFlops(32.0*a->nz);
2283:   VecRestoreArrayRead(xx,&x);
2284:   VecRestoreArray(yy,&y);
2285:   return(0);
2286: }

2290: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2291: {
2292:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2293:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2294:   const PetscScalar *x,*v;
2295:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2296:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2297:   PetscErrorCode    ierr;
2298:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2299:   PetscInt          n,i,jrow,j;

2302:   if (yy != zz) {VecCopy(yy,zz);}
2303:   VecGetArrayRead(xx,&x);
2304:   VecGetArray(zz,&y);
2305:   idx  = a->j;
2306:   v    = a->a;
2307:   ii   = a->i;

2309:   for (i=0; i<m; i++) {
2310:     jrow = ii[i];
2311:     n    = ii[i+1] - jrow;
2312:     sum1  = 0.0;
2313:     sum2  = 0.0;
2314:     sum3  = 0.0;
2315:     sum4  = 0.0;
2316:     sum5  = 0.0;
2317:     sum6  = 0.0;
2318:     sum7  = 0.0;
2319:     sum8  = 0.0;
2320:     sum9  = 0.0;
2321:     sum10 = 0.0;
2322:     sum11 = 0.0;
2323:     sum12 = 0.0;
2324:     sum13 = 0.0;
2325:     sum14 = 0.0;
2326:     sum15 = 0.0;
2327:     sum16 = 0.0;
2328:     for (j=0; j<n; j++) {
2329:       sum1  += v[jrow]*x[16*idx[jrow]];
2330:       sum2  += v[jrow]*x[16*idx[jrow]+1];
2331:       sum3  += v[jrow]*x[16*idx[jrow]+2];
2332:       sum4  += v[jrow]*x[16*idx[jrow]+3];
2333:       sum5  += v[jrow]*x[16*idx[jrow]+4];
2334:       sum6  += v[jrow]*x[16*idx[jrow]+5];
2335:       sum7  += v[jrow]*x[16*idx[jrow]+6];
2336:       sum8  += v[jrow]*x[16*idx[jrow]+7];
2337:       sum9  += v[jrow]*x[16*idx[jrow]+8];
2338:       sum10 += v[jrow]*x[16*idx[jrow]+9];
2339:       sum11 += v[jrow]*x[16*idx[jrow]+10];
2340:       sum12 += v[jrow]*x[16*idx[jrow]+11];
2341:       sum13 += v[jrow]*x[16*idx[jrow]+12];
2342:       sum14 += v[jrow]*x[16*idx[jrow]+13];
2343:       sum15 += v[jrow]*x[16*idx[jrow]+14];
2344:       sum16 += v[jrow]*x[16*idx[jrow]+15];
2345:       jrow++;
2346:      }
2347:     y[16*i]    += sum1;
2348:     y[16*i+1]  += sum2;
2349:     y[16*i+2]  += sum3;
2350:     y[16*i+3]  += sum4;
2351:     y[16*i+4]  += sum5;
2352:     y[16*i+5]  += sum6;
2353:     y[16*i+6]  += sum7;
2354:     y[16*i+7]  += sum8;
2355:     y[16*i+8]  += sum9;
2356:     y[16*i+9]  += sum10;
2357:     y[16*i+10] += sum11;
2358:     y[16*i+11] += sum12;
2359:     y[16*i+12] += sum13;
2360:     y[16*i+13] += sum14;
2361:     y[16*i+14] += sum15;
2362:     y[16*i+15] += sum16;
2363:   }

2365:   PetscLogFlops(32.0*a->nz);
2366:   VecRestoreArrayRead(xx,&x);
2367:   VecRestoreArray(zz,&y);
2368:   return(0);
2369: }

2373: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2374: {
2375:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2376:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2377:   const PetscScalar *x,*v;
2378:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2379:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2380:   PetscErrorCode    ierr;
2381:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2382:   PetscInt          n,i;

2385:   if (yy != zz) {VecCopy(yy,zz);}
2386:   VecGetArrayRead(xx,&x);
2387:   VecGetArray(zz,&y);
2388:   for (i=0; i<m; i++) {
2389:     idx    = a->j + a->i[i] ;
2390:     v      = a->a + a->i[i] ;
2391:     n      = a->i[i+1] - a->i[i];
2392:     alpha1 = x[16*i];
2393:     alpha2 = x[16*i+1];
2394:     alpha3 = x[16*i+2];
2395:     alpha4 = x[16*i+3];
2396:     alpha5 = x[16*i+4];
2397:     alpha6 = x[16*i+5];
2398:     alpha7 = x[16*i+6];
2399:     alpha8 = x[16*i+7];
2400:     alpha9  = x[16*i+8];
2401:     alpha10 = x[16*i+9];
2402:     alpha11 = x[16*i+10];
2403:     alpha12 = x[16*i+11];
2404:     alpha13 = x[16*i+12];
2405:     alpha14 = x[16*i+13];
2406:     alpha15 = x[16*i+14];
2407:     alpha16 = x[16*i+15];
2408:     while (n-->0) {
2409:       y[16*(*idx)]   += alpha1*(*v);
2410:       y[16*(*idx)+1] += alpha2*(*v);
2411:       y[16*(*idx)+2] += alpha3*(*v);
2412:       y[16*(*idx)+3] += alpha4*(*v);
2413:       y[16*(*idx)+4] += alpha5*(*v);
2414:       y[16*(*idx)+5] += alpha6*(*v);
2415:       y[16*(*idx)+6] += alpha7*(*v);
2416:       y[16*(*idx)+7] += alpha8*(*v);
2417:       y[16*(*idx)+8]  += alpha9*(*v);
2418:       y[16*(*idx)+9]  += alpha10*(*v);
2419:       y[16*(*idx)+10] += alpha11*(*v);
2420:       y[16*(*idx)+11] += alpha12*(*v);
2421:       y[16*(*idx)+12] += alpha13*(*v);
2422:       y[16*(*idx)+13] += alpha14*(*v);
2423:       y[16*(*idx)+14] += alpha15*(*v);
2424:       y[16*(*idx)+15] += alpha16*(*v);
2425:       idx++; v++;
2426:     }
2427:   }
2428:   PetscLogFlops(32.0*a->nz);
2429:   VecRestoreArrayRead(xx,&x);
2430:   VecRestoreArray(zz,&y);
2431:   return(0);
2432: }

2434: /*--------------------------------------------------------------------------------------------*/
2437: PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2438: {
2439:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2440:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2441:   const PetscScalar *x,*v;
2442:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2443:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2444:   PetscErrorCode    ierr;
2445:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2446:   PetscInt          nonzerorow=0,n,i,jrow,j;

2449:   VecGetArrayRead(xx,&x);
2450:   VecGetArray(yy,&y);
2451:   idx  = a->j;
2452:   v    = a->a;
2453:   ii   = a->i;

2455:   for (i=0; i<m; i++) {
2456:     jrow = ii[i];
2457:     n    = ii[i+1] - jrow;
2458:     sum1  = 0.0;
2459:     sum2  = 0.0;
2460:     sum3  = 0.0;
2461:     sum4  = 0.0;
2462:     sum5  = 0.0;
2463:     sum6  = 0.0;
2464:     sum7  = 0.0;
2465:     sum8  = 0.0;
2466:     sum9  = 0.0;
2467:     sum10 = 0.0;
2468:     sum11 = 0.0;
2469:     sum12 = 0.0;
2470:     sum13 = 0.0;
2471:     sum14 = 0.0;
2472:     sum15 = 0.0;
2473:     sum16 = 0.0;
2474:     sum17 = 0.0;
2475:     sum18 = 0.0;
2476:     nonzerorow += (n>0);
2477:     for (j=0; j<n; j++) {
2478:       sum1  += v[jrow]*x[18*idx[jrow]];
2479:       sum2  += v[jrow]*x[18*idx[jrow]+1];
2480:       sum3  += v[jrow]*x[18*idx[jrow]+2];
2481:       sum4  += v[jrow]*x[18*idx[jrow]+3];
2482:       sum5  += v[jrow]*x[18*idx[jrow]+4];
2483:       sum6  += v[jrow]*x[18*idx[jrow]+5];
2484:       sum7  += v[jrow]*x[18*idx[jrow]+6];
2485:       sum8  += v[jrow]*x[18*idx[jrow]+7];
2486:       sum9  += v[jrow]*x[18*idx[jrow]+8];
2487:       sum10 += v[jrow]*x[18*idx[jrow]+9];
2488:       sum11 += v[jrow]*x[18*idx[jrow]+10];
2489:       sum12 += v[jrow]*x[18*idx[jrow]+11];
2490:       sum13 += v[jrow]*x[18*idx[jrow]+12];
2491:       sum14 += v[jrow]*x[18*idx[jrow]+13];
2492:       sum15 += v[jrow]*x[18*idx[jrow]+14];
2493:       sum16 += v[jrow]*x[18*idx[jrow]+15];
2494:       sum17 += v[jrow]*x[18*idx[jrow]+16];
2495:       sum18 += v[jrow]*x[18*idx[jrow]+17];
2496:       jrow++;
2497:      }
2498:     y[18*i]    = sum1;
2499:     y[18*i+1]  = sum2;
2500:     y[18*i+2]  = sum3;
2501:     y[18*i+3]  = sum4;
2502:     y[18*i+4]  = sum5;
2503:     y[18*i+5]  = sum6;
2504:     y[18*i+6]  = sum7;
2505:     y[18*i+7]  = sum8;
2506:     y[18*i+8]  = sum9;
2507:     y[18*i+9]  = sum10;
2508:     y[18*i+10] = sum11;
2509:     y[18*i+11] = sum12;
2510:     y[18*i+12] = sum13;
2511:     y[18*i+13] = sum14;
2512:     y[18*i+14] = sum15;
2513:     y[18*i+15] = sum16;
2514:     y[18*i+16] = sum17;
2515:     y[18*i+17] = sum18;
2516:   }

2518:   PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);
2519:   VecRestoreArrayRead(xx,&x);
2520:   VecRestoreArray(yy,&y);
2521:   return(0);
2522: }

2526: PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2527: {
2528:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2529:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2530:   const PetscScalar *x,*v;
2531:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2532:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2533:   PetscErrorCode    ierr;
2534:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2535:   PetscInt          n,i;

2538:   VecSet(yy,0.0);
2539:   VecGetArrayRead(xx,&x);
2540:   VecGetArray(yy,&y);

2542:   for (i=0; i<m; i++) {
2543:     idx    = a->j + a->i[i] ;
2544:     v      = a->a + a->i[i] ;
2545:     n      = a->i[i+1] - a->i[i];
2546:     alpha1  = x[18*i];
2547:     alpha2  = x[18*i+1];
2548:     alpha3  = x[18*i+2];
2549:     alpha4  = x[18*i+3];
2550:     alpha5  = x[18*i+4];
2551:     alpha6  = x[18*i+5];
2552:     alpha7  = x[18*i+6];
2553:     alpha8  = x[18*i+7];
2554:     alpha9  = x[18*i+8];
2555:     alpha10 = x[18*i+9];
2556:     alpha11 = x[18*i+10];
2557:     alpha12 = x[18*i+11];
2558:     alpha13 = x[18*i+12];
2559:     alpha14 = x[18*i+13];
2560:     alpha15 = x[18*i+14];
2561:     alpha16 = x[18*i+15];
2562:     alpha17 = x[18*i+16];
2563:     alpha18 = x[18*i+17];
2564:     while (n-->0) {
2565:       y[18*(*idx)]    += alpha1*(*v);
2566:       y[18*(*idx)+1]  += alpha2*(*v);
2567:       y[18*(*idx)+2]  += alpha3*(*v);
2568:       y[18*(*idx)+3]  += alpha4*(*v);
2569:       y[18*(*idx)+4]  += alpha5*(*v);
2570:       y[18*(*idx)+5]  += alpha6*(*v);
2571:       y[18*(*idx)+6]  += alpha7*(*v);
2572:       y[18*(*idx)+7]  += alpha8*(*v);
2573:       y[18*(*idx)+8]  += alpha9*(*v);
2574:       y[18*(*idx)+9]  += alpha10*(*v);
2575:       y[18*(*idx)+10] += alpha11*(*v);
2576:       y[18*(*idx)+11] += alpha12*(*v);
2577:       y[18*(*idx)+12] += alpha13*(*v);
2578:       y[18*(*idx)+13] += alpha14*(*v);
2579:       y[18*(*idx)+14] += alpha15*(*v);
2580:       y[18*(*idx)+15] += alpha16*(*v);
2581:       y[18*(*idx)+16] += alpha17*(*v);
2582:       y[18*(*idx)+17] += alpha18*(*v);
2583:       idx++; v++;
2584:     }
2585:   }
2586:   PetscLogFlops(36.0*a->nz);
2587:   VecRestoreArrayRead(xx,&x);
2588:   VecRestoreArray(yy,&y);
2589:   return(0);
2590: }

2594: PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2595: {
2596:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2597:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2598:   const PetscScalar *x,*v;
2599:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2600:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2601:   PetscErrorCode    ierr;
2602:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2603:   PetscInt          n,i,jrow,j;

2606:   if (yy != zz) {VecCopy(yy,zz);}
2607:   VecGetArrayRead(xx,&x);
2608:   VecGetArray(zz,&y);
2609:   idx  = a->j;
2610:   v    = a->a;
2611:   ii   = a->i;

2613:   for (i=0; i<m; i++) {
2614:     jrow = ii[i];
2615:     n    = ii[i+1] - jrow;
2616:     sum1  = 0.0;
2617:     sum2  = 0.0;
2618:     sum3  = 0.0;
2619:     sum4  = 0.0;
2620:     sum5  = 0.0;
2621:     sum6  = 0.0;
2622:     sum7  = 0.0;
2623:     sum8  = 0.0;
2624:     sum9  = 0.0;
2625:     sum10 = 0.0;
2626:     sum11 = 0.0;
2627:     sum12 = 0.0;
2628:     sum13 = 0.0;
2629:     sum14 = 0.0;
2630:     sum15 = 0.0;
2631:     sum16 = 0.0;
2632:     sum17 = 0.0;
2633:     sum18 = 0.0;
2634:     for (j=0; j<n; j++) {
2635:       sum1  += v[jrow]*x[18*idx[jrow]];
2636:       sum2  += v[jrow]*x[18*idx[jrow]+1];
2637:       sum3  += v[jrow]*x[18*idx[jrow]+2];
2638:       sum4  += v[jrow]*x[18*idx[jrow]+3];
2639:       sum5  += v[jrow]*x[18*idx[jrow]+4];
2640:       sum6  += v[jrow]*x[18*idx[jrow]+5];
2641:       sum7  += v[jrow]*x[18*idx[jrow]+6];
2642:       sum8  += v[jrow]*x[18*idx[jrow]+7];
2643:       sum9  += v[jrow]*x[18*idx[jrow]+8];
2644:       sum10 += v[jrow]*x[18*idx[jrow]+9];
2645:       sum11 += v[jrow]*x[18*idx[jrow]+10];
2646:       sum12 += v[jrow]*x[18*idx[jrow]+11];
2647:       sum13 += v[jrow]*x[18*idx[jrow]+12];
2648:       sum14 += v[jrow]*x[18*idx[jrow]+13];
2649:       sum15 += v[jrow]*x[18*idx[jrow]+14];
2650:       sum16 += v[jrow]*x[18*idx[jrow]+15];
2651:       sum17 += v[jrow]*x[18*idx[jrow]+16];
2652:       sum18 += v[jrow]*x[18*idx[jrow]+17];
2653:       jrow++;
2654:      }
2655:     y[18*i]    += sum1;
2656:     y[18*i+1]  += sum2;
2657:     y[18*i+2]  += sum3;
2658:     y[18*i+3]  += sum4;
2659:     y[18*i+4]  += sum5;
2660:     y[18*i+5]  += sum6;
2661:     y[18*i+6]  += sum7;
2662:     y[18*i+7]  += sum8;
2663:     y[18*i+8]  += sum9;
2664:     y[18*i+9]  += sum10;
2665:     y[18*i+10] += sum11;
2666:     y[18*i+11] += sum12;
2667:     y[18*i+12] += sum13;
2668:     y[18*i+13] += sum14;
2669:     y[18*i+14] += sum15;
2670:     y[18*i+15] += sum16;
2671:     y[18*i+16] += sum17;
2672:     y[18*i+17] += sum18;
2673:   }

2675:   PetscLogFlops(36.0*a->nz);
2676:   VecRestoreArrayRead(xx,&x);
2677:   VecRestoreArray(zz,&y);
2678:   return(0);
2679: }

2683: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2684: {
2685:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2686:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2687:   const PetscScalar *x,*v;
2688:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2689:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2690:   PetscErrorCode    ierr;
2691:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2692:   PetscInt          n,i;

2695:   if (yy != zz) {VecCopy(yy,zz);}
2696:   VecGetArrayRead(xx,&x);
2697:   VecGetArray(zz,&y);
2698:   for (i=0; i<m; i++) {
2699:     idx    = a->j + a->i[i] ;
2700:     v      = a->a + a->i[i] ;
2701:     n      = a->i[i+1] - a->i[i];
2702:     alpha1 = x[18*i];
2703:     alpha2 = x[18*i+1];
2704:     alpha3 = x[18*i+2];
2705:     alpha4 = x[18*i+3];
2706:     alpha5 = x[18*i+4];
2707:     alpha6 = x[18*i+5];
2708:     alpha7 = x[18*i+6];
2709:     alpha8 = x[18*i+7];
2710:     alpha9  = x[18*i+8];
2711:     alpha10 = x[18*i+9];
2712:     alpha11 = x[18*i+10];
2713:     alpha12 = x[18*i+11];
2714:     alpha13 = x[18*i+12];
2715:     alpha14 = x[18*i+13];
2716:     alpha15 = x[18*i+14];
2717:     alpha16 = x[18*i+15];
2718:     alpha17 = x[18*i+16];
2719:     alpha18 = x[18*i+17];
2720:     while (n-->0) {
2721:       y[18*(*idx)]   += alpha1*(*v);
2722:       y[18*(*idx)+1] += alpha2*(*v);
2723:       y[18*(*idx)+2] += alpha3*(*v);
2724:       y[18*(*idx)+3] += alpha4*(*v);
2725:       y[18*(*idx)+4] += alpha5*(*v);
2726:       y[18*(*idx)+5] += alpha6*(*v);
2727:       y[18*(*idx)+6] += alpha7*(*v);
2728:       y[18*(*idx)+7] += alpha8*(*v);
2729:       y[18*(*idx)+8]  += alpha9*(*v);
2730:       y[18*(*idx)+9]  += alpha10*(*v);
2731:       y[18*(*idx)+10] += alpha11*(*v);
2732:       y[18*(*idx)+11] += alpha12*(*v);
2733:       y[18*(*idx)+12] += alpha13*(*v);
2734:       y[18*(*idx)+13] += alpha14*(*v);
2735:       y[18*(*idx)+14] += alpha15*(*v);
2736:       y[18*(*idx)+15] += alpha16*(*v);
2737:       y[18*(*idx)+16] += alpha17*(*v);
2738:       y[18*(*idx)+17] += alpha18*(*v);
2739:       idx++; v++;
2740:     }
2741:   }
2742:   PetscLogFlops(36.0*a->nz);
2743:   VecRestoreArrayRead(xx,&x);
2744:   VecRestoreArray(zz,&y);
2745:   return(0);
2746: }

2750: PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2751: {
2752:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2753:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2754:   const PetscScalar *x,*v;
2755:   PetscScalar       *y,*sums;
2756:   PetscErrorCode    ierr;
2757:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2758:   PetscInt          n,i,jrow,j,dof = b->dof,k;

2761:   VecGetArrayRead(xx,&x);
2762:   VecSet(yy,0.0);
2763:   VecGetArray(yy,&y);
2764:   idx  = a->j;
2765:   v    = a->a;
2766:   ii   = a->i;

2768:   for (i=0; i<m; i++) {
2769:     jrow = ii[i];
2770:     n    = ii[i+1] - jrow;
2771:     sums = y + dof*i;
2772:     for (j=0; j<n; j++) {
2773:       for (k=0; k<dof; k++) {
2774:         sums[k]  += v[jrow]*x[dof*idx[jrow]+k];
2775:       }
2776:       jrow++;
2777:     }
2778:   }

2780:   PetscLogFlops(2.0*dof*a->nz);
2781:   VecRestoreArrayRead(xx,&x);
2782:   VecRestoreArray(yy,&y);
2783:   return(0);
2784: }

2788: PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2789: {
2790:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2791:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2792:   const PetscScalar *x,*v;
2793:   PetscScalar       *y,*sums;
2794:   PetscErrorCode    ierr;
2795:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2796:   PetscInt          n,i,jrow,j,dof = b->dof,k;

2799:   if (yy != zz) {VecCopy(yy,zz);}
2800:   VecGetArrayRead(xx,&x);
2801:   VecGetArray(zz,&y);
2802:   idx  = a->j;
2803:   v    = a->a;
2804:   ii   = a->i;

2806:   for (i=0; i<m; i++) {
2807:     jrow = ii[i];
2808:     n    = ii[i+1] - jrow;
2809:     sums = y + dof*i;
2810:     for (j=0; j<n; j++) {
2811:       for (k=0; k<dof; k++) {
2812:         sums[k]  += v[jrow]*x[dof*idx[jrow]+k];
2813:       }
2814:       jrow++;
2815:     }
2816:   }

2818:   PetscLogFlops(2.0*dof*a->nz);
2819:   VecRestoreArrayRead(xx,&x);
2820:   VecRestoreArray(zz,&y);
2821:   return(0);
2822: }

2826: PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2827: {
2828:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2829:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2830:   const PetscScalar *x,*v,*alpha;
2831:   PetscScalar       *y;
2832:   PetscErrorCode    ierr;
2833:   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2834:   PetscInt          n,i,k;

2837:   VecGetArrayRead(xx,&x);
2838:   VecSet(yy,0.0);
2839:   VecGetArray(yy,&y);
2840:   for (i=0; i<m; i++) {
2841:     idx    = a->j + a->i[i] ;
2842:     v      = a->a + a->i[i] ;
2843:     n      = a->i[i+1] - a->i[i];
2844:     alpha  = x + dof*i;
2845:     while (n-->0) {
2846:       for (k=0; k<dof; k++) {
2847:         y[dof*(*idx)+k] += alpha[k]*(*v);
2848:       }
2849:       idx++; v++;
2850:     }
2851:   }
2852:   PetscLogFlops(2.0*dof*a->nz);
2853:   VecRestoreArrayRead(xx,&x);
2854:   VecRestoreArray(yy,&y);
2855:   return(0);
2856: }

2860: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2861: {
2862:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2863:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2864:   const PetscScalar *x,*v,*alpha;
2865:   PetscScalar       *y;
2866:   PetscErrorCode    ierr;
2867:   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2868:   PetscInt          n,i,k;

2871:   if (yy != zz) {VecCopy(yy,zz);}
2872:   VecGetArrayRead(xx,&x);
2873:   VecGetArray(zz,&y);
2874:   for (i=0; i<m; i++) {
2875:     idx    = a->j + a->i[i] ;
2876:     v      = a->a + a->i[i] ;
2877:     n      = a->i[i+1] - a->i[i];
2878:     alpha  = x + dof*i;
2879:     while (n-->0) {
2880:       for (k=0; k<dof; k++) {
2881:         y[dof*(*idx)+k] += alpha[k]*(*v);
2882:       }
2883:       idx++; v++;
2884:     }
2885:   }
2886:   PetscLogFlops(2.0*dof*a->nz);
2887:   VecRestoreArrayRead(xx,&x);
2888:   VecRestoreArray(zz,&y);
2889:   return(0);
2890: }

2892: /*===================================================================================*/
2895: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2896: {
2897:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2901:   /* start the scatter */
2902:   VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2903:   (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
2904:   VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2905:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
2906:   return(0);
2907: }

2911: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2912: {
2913:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2917:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2918:   (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
2919:   VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2920:   VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2921:   return(0);
2922: }

2926: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2927: {
2928:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2932:   /* start the scatter */
2933:   VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2934:   (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
2935:   VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2936:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
2937:   return(0);
2938: }

2942: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2943: {
2944:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2948:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2949:   VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2950:   (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
2951:   VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2952:   return(0);
2953: }

2955: /* ----------------------------------------------------------------*/
2958: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2959: {
2960:   /* This routine requires testing -- but it's getting better. */
2961:   PetscErrorCode     ierr;
2962:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2963:   Mat_SeqMAIJ        *pp=(Mat_SeqMAIJ*)PP->data;
2964:   Mat                P=pp->AIJ;
2965:   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2966:   PetscInt           *pti,*ptj,*ptJ;
2967:   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2968:   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2969:   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
2970:   MatScalar          *ca;
2971:   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;

2974:   /* Start timer */
2975:   PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);

2977:   /* Get ij structure of P^T */
2978:   MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

2980:   cn = pn*ppdof;
2981:   /* Allocate ci array, arrays for fill computation and */
2982:   /* free space for accumulating nonzero column info */
2983:   PetscMalloc((cn+1)*sizeof(PetscInt),&ci);
2984:   ci[0] = 0;

2986:   /* Work arrays for rows of P^T*A */
2987:   PetscMalloc4(an,PetscInt,&ptadenserow,an,PetscInt,&ptasparserow,cn,PetscInt,&denserow,cn,PetscInt,&sparserow);
2988:   PetscMemzero(ptadenserow,an*sizeof(PetscInt));
2989:   PetscMemzero(denserow,cn*sizeof(PetscInt));

2991:   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2992:   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2993:   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2994:   PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
2995:   current_space = free_space;

2997:   /* Determine symbolic info for each row of C: */
2998:   for (i=0;i<pn;i++) {
2999:     ptnzi  = pti[i+1] - pti[i];
3000:     ptJ    = ptj + pti[i];
3001:     for (dof=0;dof<ppdof;dof++) {
3002:       ptanzi = 0;
3003:       /* Determine symbolic row of PtA: */
3004:       for (j=0;j<ptnzi;j++) {
3005:         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
3006:         arow = ptJ[j]*ppdof + dof;
3007:         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
3008:         anzj = ai[arow+1] - ai[arow];
3009:         ajj  = aj + ai[arow];
3010:         for (k=0;k<anzj;k++) {
3011:           if (!ptadenserow[ajj[k]]) {
3012:             ptadenserow[ajj[k]]    = -1;
3013:             ptasparserow[ptanzi++] = ajj[k];
3014:           }
3015:         }
3016:       }
3017:       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
3018:       ptaj = ptasparserow;
3019:       cnzi   = 0;
3020:       for (j=0;j<ptanzi;j++) {
3021:         /* Get offset within block of P */
3022:         pshift = *ptaj%ppdof;
3023:         /* Get block row of P */
3024:         prow = (*ptaj++)/ppdof; /* integer division */
3025:         /* P has same number of nonzeros per row as the compressed form */
3026:         pnzj = pi[prow+1] - pi[prow];
3027:         pjj  = pj + pi[prow];
3028:         for (k=0;k<pnzj;k++) {
3029:           /* Locations in C are shifted by the offset within the block */
3030:           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
3031:           if (!denserow[pjj[k]*ppdof+pshift]) {
3032:             denserow[pjj[k]*ppdof+pshift] = -1;
3033:             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
3034:           }
3035:         }
3036:       }

3038:       /* sort sparserow */
3039:       PetscSortInt(cnzi,sparserow);
3040: 
3041:       /* If free space is not available, make more free space */
3042:       /* Double the amount of total space in the list */
3043:       if (current_space->local_remaining<cnzi) {
3044:         PetscFreeSpaceGet(cnzi+current_space->total_array_size,&current_space);
3045:       }

3047:       /* Copy data into free space, and zero out denserows */
3048:       PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));
3049:       current_space->array           += cnzi;
3050:       current_space->local_used      += cnzi;
3051:       current_space->local_remaining -= cnzi;

3053:       for (j=0;j<ptanzi;j++) {
3054:         ptadenserow[ptasparserow[j]] = 0;
3055:       }
3056:       for (j=0;j<cnzi;j++) {
3057:         denserow[sparserow[j]] = 0;
3058:       }
3059:       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
3060:       /*        For now, we will recompute what is needed. */
3061:       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
3062:     }
3063:   }
3064:   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
3065:   /* Allocate space for cj, initialize cj, and */
3066:   /* destroy list of free space and other temporary array(s) */
3067:   PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);
3068:   PetscFreeSpaceContiguous(&free_space,cj);
3069:   PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);
3070: 
3071:   /* Allocate space for ca */
3072:   PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);
3073:   PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));
3074: 
3075:   /* put together the new matrix */
3076:   MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);

3078:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3079:   /* Since these are PETSc arrays, change flags to free them as necessary. */
3080:   c          = (Mat_SeqAIJ *)((*C)->data);
3081:   c->free_a  = PETSC_TRUE;
3082:   c->free_ij = PETSC_TRUE;
3083:   c->nonew   = 0;

3085:   /* Clean up. */
3086:   MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

3088:   PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);
3089:   return(0);
3090: }

3094: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
3095: {
3096:   /* This routine requires testing -- first draft only */
3097:   PetscErrorCode  ierr;
3098:   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
3099:   Mat             P=pp->AIJ;
3100:   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *) A->data;
3101:   Mat_SeqAIJ      *p  = (Mat_SeqAIJ *) P->data;
3102:   Mat_SeqAIJ      *c  = (Mat_SeqAIJ *) C->data;
3103:   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
3104:   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3105:   const PetscInt  am=A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3106:   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3107:   const MatScalar *aa=a->a,*pa=p->a,*pA=p->a,*paj;
3108:   MatScalar       *ca=c->a,*caj,*apa;

3111:   /* Allocate temporary array for storage of one row of A*P */
3112:   PetscMalloc3(cn,MatScalar,&apa,cn,PetscInt,&apj,cn,PetscInt,&apjdense);
3113:   PetscMemzero(apa,cn*sizeof(MatScalar));
3114:   PetscMemzero(apj,cn*sizeof(PetscInt));
3115:   PetscMemzero(apjdense,cn*sizeof(PetscInt));

3117:   /* Clear old values in C */
3118:   PetscMemzero(ca,ci[cm]*sizeof(MatScalar));

3120:   for (i=0;i<am;i++) {
3121:     /* Form sparse row of A*P */
3122:     anzi  = ai[i+1] - ai[i];
3123:     apnzj = 0;
3124:     for (j=0;j<anzi;j++) {
3125:       /* Get offset within block of P */
3126:       pshift = *aj%ppdof;
3127:       /* Get block row of P */
3128:       prow   = *aj++/ppdof; /* integer division */
3129:       pnzj = pi[prow+1] - pi[prow];
3130:       pjj  = pj + pi[prow];
3131:       paj  = pa + pi[prow];
3132:       for (k=0;k<pnzj;k++) {
3133:         poffset = pjj[k]*ppdof+pshift;
3134:         if (!apjdense[poffset]) {
3135:           apjdense[poffset] = -1;
3136:           apj[apnzj++]      = poffset;
3137:         }
3138:         apa[poffset] += (*aa)*paj[k];
3139:       }
3140:       PetscLogFlops(2.0*pnzj);
3141:       aa++;
3142:     }

3144:     /* Sort the j index array for quick sparse axpy. */
3145:     /* Note: a array does not need sorting as it is in dense storage locations. */
3146:     PetscSortInt(apnzj,apj);

3148:     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3149:     prow    = i/ppdof; /* integer division */
3150:     pshift  = i%ppdof;
3151:     poffset = pi[prow];
3152:     pnzi = pi[prow+1] - poffset;
3153:     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3154:     pJ   = pj+poffset;
3155:     pA   = pa+poffset;
3156:     for (j=0;j<pnzi;j++) {
3157:       crow   = (*pJ)*ppdof+pshift;
3158:       cjj    = cj + ci[crow];
3159:       caj    = ca + ci[crow];
3160:       pJ++;
3161:       /* Perform sparse axpy operation.  Note cjj includes apj. */
3162:       for (k=0,nextap=0;nextap<apnzj;k++) {
3163:         if (cjj[k]==apj[nextap]) {
3164:           caj[k] += (*pA)*apa[apj[nextap++]];
3165:         }
3166:       }
3167:       PetscLogFlops(2.0*apnzj);
3168:       pA++;
3169:     }

3171:     /* Zero the current row info for A*P */
3172:     for (j=0;j<apnzj;j++) {
3173:       apa[apj[j]]      = 0.;
3174:       apjdense[apj[j]] = 0;
3175:     }
3176:   }

3178:   /* Assemble the final matrix and clean up */
3179:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
3180:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
3181:   PetscFree3(apa,apj,apjdense);
3182:   return(0);
3183: }

3187: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3188: {
3189:   PetscErrorCode    ierr;

3192:   /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
3193:   MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);
3194:   ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);
3195:   return(0);
3196: }

3200: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3201: {
3203:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3204:   return(0);
3205: }

3210: PetscErrorCode  MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3211: {
3212:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
3213:   Mat               a = b->AIJ,B;
3214:   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)a->data;
3215:   PetscErrorCode    ierr;
3216:   PetscInt          m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3217:   PetscInt          *cols;
3218:   PetscScalar       *vals;

3221:   MatGetSize(a,&m,&n);
3222:   PetscMalloc(dof*m*sizeof(PetscInt),&ilen);
3223:   for (i=0; i<m; i++) {
3224:     nmax = PetscMax(nmax,aij->ilen[i]);
3225:     for (j=0; j<dof; j++) {
3226:       ilen[dof*i+j] = aij->ilen[i];
3227:     }
3228:   }
3229:   MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);
3230:   PetscFree(ilen);
3231:   PetscMalloc(nmax*sizeof(PetscInt),&icols);
3232:   ii   = 0;
3233:   for (i=0; i<m; i++) {
3234:     MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3235:     for (j=0; j<dof; j++) {
3236:       for (k=0; k<ncols; k++) {
3237:         icols[k] = dof*cols[k]+j;
3238:       }
3239:       MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3240:       ii++;
3241:     }
3242:     MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3243:   }
3244:   PetscFree(icols);
3245:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3246:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

3248:   if (reuse == MAT_REUSE_MATRIX) {
3249:     MatHeaderReplace(A,B);
3250:   } else {
3251:     *newmat = B;
3252:   }
3253:   return(0);
3254: }

3257: #include <../src/mat/impls/aij/mpi/mpiaij.h>

3262: PetscErrorCode  MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3263: {
3264:   Mat_MPIMAIJ       *maij = (Mat_MPIMAIJ*)A->data;
3265:   Mat               MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3266:   Mat               MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3267:   Mat_SeqAIJ        *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
3268:   Mat_SeqAIJ        *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
3269:   Mat_MPIAIJ        *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3270:   PetscInt          dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0;
3271:   PetscInt          *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL;
3272:   PetscInt          rstart,cstart,*garray,ii,k;
3273:   PetscErrorCode    ierr;
3274:   PetscScalar       *vals,*ovals;

3277:   PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);
3278:   for (i=0; i<A->rmap->n/dof; i++) {
3279:     nmax  = PetscMax(nmax,AIJ->ilen[i]);
3280:     onmax = PetscMax(onmax,OAIJ->ilen[i]);
3281:     for (j=0; j<dof; j++) {
3282:       dnz[dof*i+j] = AIJ->ilen[i];
3283:       onz[dof*i+j] = OAIJ->ilen[i];
3284:     }
3285:   }
3286:   MatCreateMPIAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);
3287:   PetscFree2(dnz,onz);

3289:   PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);
3290:   rstart = dof*maij->A->rmap->rstart;
3291:   cstart = dof*maij->A->cmap->rstart;
3292:   garray = mpiaij->garray;

3294:   ii = rstart;
3295:   for (i=0; i<A->rmap->n/dof; i++) {
3296:     MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3297:     MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3298:     for (j=0; j<dof; j++) {
3299:       for (k=0; k<ncols; k++) {
3300:         icols[k] = cstart + dof*cols[k]+j;
3301:       }
3302:       for (k=0; k<oncols; k++) {
3303:         oicols[k] = dof*garray[ocols[k]]+j;
3304:       }
3305:       MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3306:       MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
3307:       ii++;
3308:     }
3309:     MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3310:     MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3311:   }
3312:   PetscFree2(icols,oicols);

3314:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3315:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

3317:   if (reuse == MAT_REUSE_MATRIX) {
3318:     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3319:     ((PetscObject)A)->refct = 1;
3320:     MatHeaderReplace(A,B);
3321:     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3322:   } else {
3323:     *newmat = B;
3324:   }
3325:   return(0);
3326: }

3331: PetscErrorCode  MatGetSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
3332: {
3334:   Mat            A;

3337:   MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
3338:   MatGetSubMatrix(A,isrow,iscol,cll,newmat);
3339:   MatDestroy(&A);
3340:   return(0);
3341: }

3343: /* ---------------------------------------------------------------------------------- */
3346: /*@C
3347:   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3348:   operations for multicomponent problems.  It interpolates each component the same
3349:   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
3350:   and MATMPIAIJ for distributed matrices.

3352:   Collective

3354:   Input Parameters:
3355: + A - the AIJ matrix describing the action on blocks
3356: - dof - the block size (number of components per node)

3358:   Output Parameter:
3359: . maij - the new MAIJ matrix

3361:   Operations provided:
3362: + MatMult
3363: . MatMultTranspose
3364: . MatMultAdd
3365: . MatMultTransposeAdd
3366: - MatView

3368:   Level: advanced

3370: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3371: @*/
3372: PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3373: {
3375:   PetscMPIInt    size;
3376:   PetscInt       n;
3377:   Mat            B;

3380:   PetscObjectReference((PetscObject)A);

3382:   if (dof == 1) {
3383:     *maij = A;
3384:   } else {
3385:     MatCreate(((PetscObject)A)->comm,&B);
3386:     MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);
3387:     PetscLayoutSetBlockSize(B->rmap,dof);
3388:     PetscLayoutSetBlockSize(B->cmap,dof);
3389:     PetscLayoutSetUp(B->rmap);
3390:     PetscLayoutSetUp(B->cmap);
3391:     B->assembled    = PETSC_TRUE;

3393:     MPI_Comm_size(((PetscObject)A)->comm,&size);
3394:     if (size == 1) {
3395:       Mat_SeqMAIJ    *b;

3397:       MatSetType(B,MATSEQMAIJ);
3398:       B->ops->destroy = MatDestroy_SeqMAIJ;
3399:       B->ops->view    = MatView_SeqMAIJ;
3400:       b      = (Mat_SeqMAIJ*)B->data;
3401:       b->dof = dof;
3402:       b->AIJ = A;
3403:       if (dof == 2) {
3404:         B->ops->mult             = MatMult_SeqMAIJ_2;
3405:         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3406:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3407:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3408:       } else if (dof == 3) {
3409:         B->ops->mult             = MatMult_SeqMAIJ_3;
3410:         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3411:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3412:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3413:       } else if (dof == 4) {
3414:         B->ops->mult             = MatMult_SeqMAIJ_4;
3415:         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3416:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3417:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3418:       } else if (dof == 5) {
3419:         B->ops->mult             = MatMult_SeqMAIJ_5;
3420:         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3421:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3422:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3423:       } else if (dof == 6) {
3424:         B->ops->mult             = MatMult_SeqMAIJ_6;
3425:         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
3426:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
3427:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3428:       } else if (dof == 7) {
3429:         B->ops->mult             = MatMult_SeqMAIJ_7;
3430:         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3431:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3432:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3433:       } else if (dof == 8) {
3434:         B->ops->mult             = MatMult_SeqMAIJ_8;
3435:         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
3436:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
3437:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3438:       } else if (dof == 9) {
3439:         B->ops->mult             = MatMult_SeqMAIJ_9;
3440:         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
3441:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
3442:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3443:       } else if (dof == 10) {
3444:         B->ops->mult             = MatMult_SeqMAIJ_10;
3445:         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3446:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3447:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3448:       } else if (dof == 11) {
3449:         B->ops->mult             = MatMult_SeqMAIJ_11;
3450:         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3451:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3452:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3453:       } else if (dof == 16) {
3454:         B->ops->mult             = MatMult_SeqMAIJ_16;
3455:         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
3456:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
3457:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3458:       } else if (dof == 18) {
3459:         B->ops->mult             = MatMult_SeqMAIJ_18;
3460:         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3461:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3462:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3463:       } else {
3464:         B->ops->mult             = MatMult_SeqMAIJ_N;
3465:         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
3466:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
3467:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3468:       }
3469:       B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
3470:       B->ops->ptapnumeric_seqaij  = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
3471:       PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);
3472:     } else {
3473:       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ *)A->data;
3474:       Mat_MPIMAIJ *b;
3475:       IS          from,to;
3476:       Vec         gvec;

3478:       MatSetType(B,MATMPIMAIJ);
3479:       B->ops->destroy = MatDestroy_MPIMAIJ;
3480:       B->ops->view    = MatView_MPIMAIJ;
3481:       b      = (Mat_MPIMAIJ*)B->data;
3482:       b->dof = dof;
3483:       b->A   = A;
3484:       MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
3485:       MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);

3487:       VecGetSize(mpiaij->lvec,&n);
3488:       VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);
3489:       VecSetBlockSize(b->w,dof);

3491:       /* create two temporary Index sets for build scatter gather */
3492:       ISCreateBlock(((PetscObject)A)->comm,dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);
3493:       ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);

3495:       /* create temporary global vector to generate scatter context */
3496:       VecCreateMPIWithArray(((PetscObject)A)->comm,dof*A->cmap->n,dof*A->cmap->N,PETSC_NULL,&gvec);
3497:       VecSetBlockSize(gvec,dof);

3499:       /* generate the scatter context */
3500:       VecScatterCreate(gvec,from,b->w,to,&b->ctx);

3502:       ISDestroy(&from);
3503:       ISDestroy(&to);
3504:       VecDestroy(&gvec);

3506:       B->ops->mult                = MatMult_MPIMAIJ_dof;
3507:       B->ops->multtranspose       = MatMultTranspose_MPIMAIJ_dof;
3508:       B->ops->multadd             = MatMultAdd_MPIMAIJ_dof;
3509:       B->ops->multtransposeadd    = MatMultTransposeAdd_MPIMAIJ_dof;
3510:       B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ;
3511:       B->ops->ptapnumeric_mpiaij  = MatPtAPNumeric_MPIAIJ_MPIMAIJ;
3512:       PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);
3513:     }
3514:     B->ops->getsubmatrix        = MatGetSubMatrix_MAIJ;
3515:     *maij = B;
3516:     MatView_Private(B);
3517:   }
3518:   return(0);
3519: }