Actual source code: maij.c
2: /*
3: Defines the basic matrix operations for the MAIJ matrix storage format.
4: This format is used for restriction and interpolation operations for
5: multicomponent problems. It interpolates each component the same way
6: independently.
8: We provide:
9: MatMult()
10: MatMultTranspose()
11: MatMultTransposeAdd()
12: MatMultAdd()
13: and
14: MatCreateMAIJ(Mat,dof,Mat*)
16: This single directory handles both the sequential and parallel codes
17: */
19: #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
20: #include <../src/mat/utils/freespace.h>
21: #include <private/vecimpl.h>
25: /*@C
26: MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
28: Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
30: Input Parameter:
31: . A - the MAIJ matrix
33: Output Parameter:
34: . B - the AIJ matrix
36: Level: advanced
38: Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
40: .seealso: MatCreateMAIJ()
41: @*/
42: PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B)
43: {
45: PetscBool ismpimaij,isseqmaij;
48: PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
49: PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
50: if (ismpimaij) {
51: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
53: *B = b->A;
54: } else if (isseqmaij) {
55: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
57: *B = b->AIJ;
58: } else {
59: *B = A;
60: }
61: return(0);
62: }
66: /*@C
67: MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
69: Logically Collective
71: Input Parameter:
72: + A - the MAIJ matrix
73: - dof - the block size for the new matrix
75: Output Parameter:
76: . B - the new MAIJ matrix
78: Level: advanced
80: .seealso: MatCreateMAIJ()
81: @*/
82: PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
83: {
85: Mat Aij = PETSC_NULL;
89: MatMAIJGetAIJ(A,&Aij);
90: MatCreateMAIJ(Aij,dof,B);
91: return(0);
92: }
96: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
97: {
99: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
102: MatDestroy(&b->AIJ);
103: PetscFree(A->data);
104: return(0);
105: }
109: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
110: {
112: Mat B;
115: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
116: MatView(B,viewer);
117: MatDestroy(&B);
118: return(0);
119: }
123: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
124: {
126: Mat B;
129: MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
130: MatView(B,viewer);
131: MatDestroy(&B);
132: return(0);
133: }
137: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
138: {
140: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
143: MatDestroy(&b->AIJ);
144: MatDestroy(&b->OAIJ);
145: MatDestroy(&b->A);
146: VecScatterDestroy(&b->ctx);
147: VecDestroy(&b->w);
148: PetscFree(A->data);
149: PetscObjectChangeTypeName((PetscObject)A,0);
150: return(0);
151: }
153: /*MC
154: MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
155: multicomponent problems, interpolating or restricting each component the same way independently.
156: The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
158: Operations provided:
159: . MatMult
160: . MatMultTranspose
161: . MatMultAdd
162: . MatMultTransposeAdd
164: Level: advanced
166: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
167: M*/
172: PetscErrorCode MatCreate_MAIJ(Mat A)
173: {
175: Mat_MPIMAIJ *b;
176: PetscMPIInt size;
179: PetscNewLog(A,Mat_MPIMAIJ,&b);
180: A->data = (void*)b;
181: PetscMemzero(A->ops,sizeof(struct _MatOps));
183: b->AIJ = 0;
184: b->dof = 0;
185: b->OAIJ = 0;
186: b->ctx = 0;
187: b->w = 0;
188: MPI_Comm_size(((PetscObject)A)->comm,&size);
189: if (size == 1){
190: PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);
191: } else {
192: PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);
193: }
194: return(0);
195: }
198: /* --------------------------------------------------------------------------------------*/
201: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
202: {
203: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
204: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
205: const PetscScalar *x,*v;
206: PetscScalar *y, sum1, sum2;
207: PetscErrorCode ierr;
208: PetscInt nonzerorow=0,n,i,jrow,j;
209: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
212: VecGetArrayRead(xx,&x);
213: VecGetArray(yy,&y);
214: idx = a->j;
215: v = a->a;
216: ii = a->i;
218: for (i=0; i<m; i++) {
219: jrow = ii[i];
220: n = ii[i+1] - jrow;
221: sum1 = 0.0;
222: sum2 = 0.0;
223: nonzerorow += (n>0);
224: for (j=0; j<n; j++) {
225: sum1 += v[jrow]*x[2*idx[jrow]];
226: sum2 += v[jrow]*x[2*idx[jrow]+1];
227: jrow++;
228: }
229: y[2*i] = sum1;
230: y[2*i+1] = sum2;
231: }
233: PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);
234: VecRestoreArrayRead(xx,&x);
235: VecRestoreArray(yy,&y);
236: return(0);
237: }
241: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
242: {
243: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
244: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
245: const PetscScalar *x,*v;
246: PetscScalar *y,alpha1,alpha2;
247: PetscErrorCode ierr;
248: PetscInt n,i;
249: const PetscInt m = b->AIJ->rmap->n,*idx;
252: VecSet(yy,0.0);
253: VecGetArrayRead(xx,&x);
254: VecGetArray(yy,&y);
255:
256: for (i=0; i<m; i++) {
257: idx = a->j + a->i[i] ;
258: v = a->a + a->i[i] ;
259: n = a->i[i+1] - a->i[i];
260: alpha1 = x[2*i];
261: alpha2 = x[2*i+1];
262: while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
263: }
264: PetscLogFlops(4.0*a->nz);
265: VecRestoreArrayRead(xx,&x);
266: VecRestoreArray(yy,&y);
267: return(0);
268: }
272: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
273: {
274: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
275: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
276: const PetscScalar *x,*v;
277: PetscScalar *y,sum1, sum2;
278: PetscErrorCode ierr;
279: PetscInt n,i,jrow,j;
280: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
283: if (yy != zz) {VecCopy(yy,zz);}
284: VecGetArrayRead(xx,&x);
285: VecGetArray(zz,&y);
286: idx = a->j;
287: v = a->a;
288: ii = a->i;
290: for (i=0; i<m; i++) {
291: jrow = ii[i];
292: n = ii[i+1] - jrow;
293: sum1 = 0.0;
294: sum2 = 0.0;
295: for (j=0; j<n; j++) {
296: sum1 += v[jrow]*x[2*idx[jrow]];
297: sum2 += v[jrow]*x[2*idx[jrow]+1];
298: jrow++;
299: }
300: y[2*i] += sum1;
301: y[2*i+1] += sum2;
302: }
304: PetscLogFlops(4.0*a->nz);
305: VecRestoreArrayRead(xx,&x);
306: VecRestoreArray(zz,&y);
307: return(0);
308: }
311: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
312: {
313: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
314: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
315: const PetscScalar *x,*v;
316: PetscScalar *y,alpha1,alpha2;
317: PetscErrorCode ierr;
318: PetscInt n,i;
319: const PetscInt m = b->AIJ->rmap->n,*idx;
322: if (yy != zz) {VecCopy(yy,zz);}
323: VecGetArrayRead(xx,&x);
324: VecGetArray(zz,&y);
325:
326: for (i=0; i<m; i++) {
327: idx = a->j + a->i[i] ;
328: v = a->a + a->i[i] ;
329: n = a->i[i+1] - a->i[i];
330: alpha1 = x[2*i];
331: alpha2 = x[2*i+1];
332: while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
333: }
334: PetscLogFlops(4.0*a->nz);
335: VecRestoreArrayRead(xx,&x);
336: VecRestoreArray(zz,&y);
337: return(0);
338: }
339: /* --------------------------------------------------------------------------------------*/
342: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
343: {
344: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
345: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
346: const PetscScalar *x,*v;
347: PetscScalar *y,sum1, sum2, sum3;
348: PetscErrorCode ierr;
349: PetscInt nonzerorow=0,n,i,jrow,j;
350: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
353: VecGetArrayRead(xx,&x);
354: VecGetArray(yy,&y);
355: idx = a->j;
356: v = a->a;
357: ii = a->i;
359: for (i=0; i<m; i++) {
360: jrow = ii[i];
361: n = ii[i+1] - jrow;
362: sum1 = 0.0;
363: sum2 = 0.0;
364: sum3 = 0.0;
365: nonzerorow += (n>0);
366: for (j=0; j<n; j++) {
367: sum1 += v[jrow]*x[3*idx[jrow]];
368: sum2 += v[jrow]*x[3*idx[jrow]+1];
369: sum3 += v[jrow]*x[3*idx[jrow]+2];
370: jrow++;
371: }
372: y[3*i] = sum1;
373: y[3*i+1] = sum2;
374: y[3*i+2] = sum3;
375: }
377: PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);
378: VecRestoreArrayRead(xx,&x);
379: VecRestoreArray(yy,&y);
380: return(0);
381: }
385: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
386: {
387: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
388: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
389: const PetscScalar *x,*v;
390: PetscScalar *y,alpha1,alpha2,alpha3;
391: PetscErrorCode ierr;
392: PetscInt n,i;
393: const PetscInt m = b->AIJ->rmap->n,*idx;
396: VecSet(yy,0.0);
397: VecGetArrayRead(xx,&x);
398: VecGetArray(yy,&y);
399:
400: for (i=0; i<m; i++) {
401: idx = a->j + a->i[i];
402: v = a->a + a->i[i];
403: n = a->i[i+1] - a->i[i];
404: alpha1 = x[3*i];
405: alpha2 = x[3*i+1];
406: alpha3 = x[3*i+2];
407: while (n-->0) {
408: y[3*(*idx)] += alpha1*(*v);
409: y[3*(*idx)+1] += alpha2*(*v);
410: y[3*(*idx)+2] += alpha3*(*v);
411: idx++; v++;
412: }
413: }
414: PetscLogFlops(6.0*a->nz);
415: VecRestoreArrayRead(xx,&x);
416: VecRestoreArray(yy,&y);
417: return(0);
418: }
422: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
423: {
424: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
425: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
426: const PetscScalar *x,*v;
427: PetscScalar *y,sum1, sum2, sum3;
428: PetscErrorCode ierr;
429: PetscInt n,i,jrow,j;
430: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
433: if (yy != zz) {VecCopy(yy,zz);}
434: VecGetArrayRead(xx,&x);
435: VecGetArray(zz,&y);
436: idx = a->j;
437: v = a->a;
438: ii = a->i;
440: for (i=0; i<m; i++) {
441: jrow = ii[i];
442: n = ii[i+1] - jrow;
443: sum1 = 0.0;
444: sum2 = 0.0;
445: sum3 = 0.0;
446: for (j=0; j<n; j++) {
447: sum1 += v[jrow]*x[3*idx[jrow]];
448: sum2 += v[jrow]*x[3*idx[jrow]+1];
449: sum3 += v[jrow]*x[3*idx[jrow]+2];
450: jrow++;
451: }
452: y[3*i] += sum1;
453: y[3*i+1] += sum2;
454: y[3*i+2] += sum3;
455: }
457: PetscLogFlops(6.0*a->nz);
458: VecRestoreArrayRead(xx,&x);
459: VecRestoreArray(zz,&y);
460: return(0);
461: }
464: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
465: {
466: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
467: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
468: const PetscScalar *x,*v;
469: PetscScalar *y,alpha1,alpha2,alpha3;
470: PetscErrorCode ierr;
471: PetscInt n,i;
472: const PetscInt m = b->AIJ->rmap->n,*idx;
475: if (yy != zz) {VecCopy(yy,zz);}
476: VecGetArrayRead(xx,&x);
477: VecGetArray(zz,&y);
478: for (i=0; i<m; i++) {
479: idx = a->j + a->i[i] ;
480: v = a->a + a->i[i] ;
481: n = a->i[i+1] - a->i[i];
482: alpha1 = x[3*i];
483: alpha2 = x[3*i+1];
484: alpha3 = x[3*i+2];
485: while (n-->0) {
486: y[3*(*idx)] += alpha1*(*v);
487: y[3*(*idx)+1] += alpha2*(*v);
488: y[3*(*idx)+2] += alpha3*(*v);
489: idx++; v++;
490: }
491: }
492: PetscLogFlops(6.0*a->nz);
493: VecRestoreArrayRead(xx,&x);
494: VecRestoreArray(zz,&y);
495: return(0);
496: }
498: /* ------------------------------------------------------------------------------*/
501: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
502: {
503: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
504: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
505: const PetscScalar *x,*v;
506: PetscScalar *y,sum1, sum2, sum3, sum4;
507: PetscErrorCode ierr;
508: PetscInt nonzerorow=0,n,i,jrow,j;
509: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
512: VecGetArrayRead(xx,&x);
513: VecGetArray(yy,&y);
514: idx = a->j;
515: v = a->a;
516: ii = a->i;
518: for (i=0; i<m; i++) {
519: jrow = ii[i];
520: n = ii[i+1] - jrow;
521: sum1 = 0.0;
522: sum2 = 0.0;
523: sum3 = 0.0;
524: sum4 = 0.0;
525: nonzerorow += (n>0);
526: for (j=0; j<n; j++) {
527: sum1 += v[jrow]*x[4*idx[jrow]];
528: sum2 += v[jrow]*x[4*idx[jrow]+1];
529: sum3 += v[jrow]*x[4*idx[jrow]+2];
530: sum4 += v[jrow]*x[4*idx[jrow]+3];
531: jrow++;
532: }
533: y[4*i] = sum1;
534: y[4*i+1] = sum2;
535: y[4*i+2] = sum3;
536: y[4*i+3] = sum4;
537: }
539: PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);
540: VecRestoreArrayRead(xx,&x);
541: VecRestoreArray(yy,&y);
542: return(0);
543: }
547: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
548: {
549: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
550: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
551: const PetscScalar *x,*v;
552: PetscScalar *y,alpha1,alpha2,alpha3,alpha4;
553: PetscErrorCode ierr;
554: PetscInt n,i;
555: const PetscInt m = b->AIJ->rmap->n,*idx;
558: VecSet(yy,0.0);
559: VecGetArrayRead(xx,&x);
560: VecGetArray(yy,&y);
561: for (i=0; i<m; i++) {
562: idx = a->j + a->i[i] ;
563: v = a->a + a->i[i] ;
564: n = a->i[i+1] - a->i[i];
565: alpha1 = x[4*i];
566: alpha2 = x[4*i+1];
567: alpha3 = x[4*i+2];
568: alpha4 = x[4*i+3];
569: while (n-->0) {
570: y[4*(*idx)] += alpha1*(*v);
571: y[4*(*idx)+1] += alpha2*(*v);
572: y[4*(*idx)+2] += alpha3*(*v);
573: y[4*(*idx)+3] += alpha4*(*v);
574: idx++; v++;
575: }
576: }
577: PetscLogFlops(8.0*a->nz);
578: VecRestoreArrayRead(xx,&x);
579: VecRestoreArray(yy,&y);
580: return(0);
581: }
585: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
586: {
587: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
588: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
589: const PetscScalar *x,*v;
590: PetscScalar *y,sum1, sum2, sum3, sum4;
591: PetscErrorCode ierr;
592: PetscInt n,i,jrow,j;
593: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
596: if (yy != zz) {VecCopy(yy,zz);}
597: VecGetArrayRead(xx,&x);
598: VecGetArray(zz,&y);
599: idx = a->j;
600: v = a->a;
601: ii = a->i;
603: for (i=0; i<m; i++) {
604: jrow = ii[i];
605: n = ii[i+1] - jrow;
606: sum1 = 0.0;
607: sum2 = 0.0;
608: sum3 = 0.0;
609: sum4 = 0.0;
610: for (j=0; j<n; j++) {
611: sum1 += v[jrow]*x[4*idx[jrow]];
612: sum2 += v[jrow]*x[4*idx[jrow]+1];
613: sum3 += v[jrow]*x[4*idx[jrow]+2];
614: sum4 += v[jrow]*x[4*idx[jrow]+3];
615: jrow++;
616: }
617: y[4*i] += sum1;
618: y[4*i+1] += sum2;
619: y[4*i+2] += sum3;
620: y[4*i+3] += sum4;
621: }
623: PetscLogFlops(8.0*a->nz);
624: VecRestoreArrayRead(xx,&x);
625: VecRestoreArray(zz,&y);
626: return(0);
627: }
630: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
631: {
632: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
633: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
634: const PetscScalar *x,*v;
635: PetscScalar *y,alpha1,alpha2,alpha3,alpha4;
636: PetscErrorCode ierr;
637: PetscInt n,i;
638: const PetscInt m = b->AIJ->rmap->n,*idx;
641: if (yy != zz) {VecCopy(yy,zz);}
642: VecGetArrayRead(xx,&x);
643: VecGetArray(zz,&y);
644:
645: for (i=0; i<m; i++) {
646: idx = a->j + a->i[i] ;
647: v = a->a + a->i[i] ;
648: n = a->i[i+1] - a->i[i];
649: alpha1 = x[4*i];
650: alpha2 = x[4*i+1];
651: alpha3 = x[4*i+2];
652: alpha4 = x[4*i+3];
653: while (n-->0) {
654: y[4*(*idx)] += alpha1*(*v);
655: y[4*(*idx)+1] += alpha2*(*v);
656: y[4*(*idx)+2] += alpha3*(*v);
657: y[4*(*idx)+3] += alpha4*(*v);
658: idx++; v++;
659: }
660: }
661: PetscLogFlops(8.0*a->nz);
662: VecRestoreArrayRead(xx,&x);
663: VecRestoreArray(zz,&y);
664: return(0);
665: }
666: /* ------------------------------------------------------------------------------*/
670: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
671: {
672: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
673: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
674: const PetscScalar *x,*v;
675: PetscScalar *y,sum1, sum2, sum3, sum4, sum5;
676: PetscErrorCode ierr;
677: PetscInt nonzerorow=0,n,i,jrow,j;
678: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
681: VecGetArrayRead(xx,&x);
682: VecGetArray(yy,&y);
683: idx = a->j;
684: v = a->a;
685: ii = a->i;
687: for (i=0; i<m; i++) {
688: jrow = ii[i];
689: n = ii[i+1] - jrow;
690: sum1 = 0.0;
691: sum2 = 0.0;
692: sum3 = 0.0;
693: sum4 = 0.0;
694: sum5 = 0.0;
695: nonzerorow += (n>0);
696: for (j=0; j<n; j++) {
697: sum1 += v[jrow]*x[5*idx[jrow]];
698: sum2 += v[jrow]*x[5*idx[jrow]+1];
699: sum3 += v[jrow]*x[5*idx[jrow]+2];
700: sum4 += v[jrow]*x[5*idx[jrow]+3];
701: sum5 += v[jrow]*x[5*idx[jrow]+4];
702: jrow++;
703: }
704: y[5*i] = sum1;
705: y[5*i+1] = sum2;
706: y[5*i+2] = sum3;
707: y[5*i+3] = sum4;
708: y[5*i+4] = sum5;
709: }
711: PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);
712: VecRestoreArrayRead(xx,&x);
713: VecRestoreArray(yy,&y);
714: return(0);
715: }
719: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
720: {
721: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
722: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
723: const PetscScalar *x,*v;
724: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5;
725: PetscErrorCode ierr;
726: PetscInt n,i;
727: const PetscInt m = b->AIJ->rmap->n,*idx;
730: VecSet(yy,0.0);
731: VecGetArrayRead(xx,&x);
732: VecGetArray(yy,&y);
733:
734: for (i=0; i<m; i++) {
735: idx = a->j + a->i[i] ;
736: v = a->a + a->i[i] ;
737: n = a->i[i+1] - a->i[i];
738: alpha1 = x[5*i];
739: alpha2 = x[5*i+1];
740: alpha3 = x[5*i+2];
741: alpha4 = x[5*i+3];
742: alpha5 = x[5*i+4];
743: while (n-->0) {
744: y[5*(*idx)] += alpha1*(*v);
745: y[5*(*idx)+1] += alpha2*(*v);
746: y[5*(*idx)+2] += alpha3*(*v);
747: y[5*(*idx)+3] += alpha4*(*v);
748: y[5*(*idx)+4] += alpha5*(*v);
749: idx++; v++;
750: }
751: }
752: PetscLogFlops(10.0*a->nz);
753: VecRestoreArrayRead(xx,&x);
754: VecRestoreArray(yy,&y);
755: return(0);
756: }
760: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
761: {
762: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
763: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
764: const PetscScalar *x,*v;
765: PetscScalar *y,sum1, sum2, sum3, sum4, sum5;
766: PetscErrorCode ierr;
767: PetscInt n,i,jrow,j;
768: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
771: if (yy != zz) {VecCopy(yy,zz);}
772: VecGetArrayRead(xx,&x);
773: VecGetArray(zz,&y);
774: idx = a->j;
775: v = a->a;
776: ii = a->i;
778: for (i=0; i<m; i++) {
779: jrow = ii[i];
780: n = ii[i+1] - jrow;
781: sum1 = 0.0;
782: sum2 = 0.0;
783: sum3 = 0.0;
784: sum4 = 0.0;
785: sum5 = 0.0;
786: for (j=0; j<n; j++) {
787: sum1 += v[jrow]*x[5*idx[jrow]];
788: sum2 += v[jrow]*x[5*idx[jrow]+1];
789: sum3 += v[jrow]*x[5*idx[jrow]+2];
790: sum4 += v[jrow]*x[5*idx[jrow]+3];
791: sum5 += v[jrow]*x[5*idx[jrow]+4];
792: jrow++;
793: }
794: y[5*i] += sum1;
795: y[5*i+1] += sum2;
796: y[5*i+2] += sum3;
797: y[5*i+3] += sum4;
798: y[5*i+4] += sum5;
799: }
801: PetscLogFlops(10.0*a->nz);
802: VecRestoreArrayRead(xx,&x);
803: VecRestoreArray(zz,&y);
804: return(0);
805: }
809: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
810: {
811: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
812: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
813: const PetscScalar *x,*v;
814: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5;
815: PetscErrorCode ierr;
816: PetscInt n,i;
817: const PetscInt m = b->AIJ->rmap->n,*idx;
820: if (yy != zz) {VecCopy(yy,zz);}
821: VecGetArrayRead(xx,&x);
822: VecGetArray(zz,&y);
823:
824: for (i=0; i<m; i++) {
825: idx = a->j + a->i[i] ;
826: v = a->a + a->i[i] ;
827: n = a->i[i+1] - a->i[i];
828: alpha1 = x[5*i];
829: alpha2 = x[5*i+1];
830: alpha3 = x[5*i+2];
831: alpha4 = x[5*i+3];
832: alpha5 = x[5*i+4];
833: while (n-->0) {
834: y[5*(*idx)] += alpha1*(*v);
835: y[5*(*idx)+1] += alpha2*(*v);
836: y[5*(*idx)+2] += alpha3*(*v);
837: y[5*(*idx)+3] += alpha4*(*v);
838: y[5*(*idx)+4] += alpha5*(*v);
839: idx++; v++;
840: }
841: }
842: PetscLogFlops(10.0*a->nz);
843: VecRestoreArrayRead(xx,&x);
844: VecRestoreArray(zz,&y);
845: return(0);
846: }
848: /* ------------------------------------------------------------------------------*/
851: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
852: {
853: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
854: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
855: const PetscScalar *x,*v;
856: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6;
857: PetscErrorCode ierr;
858: PetscInt nonzerorow=0,n,i,jrow,j;
859: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
862: VecGetArrayRead(xx,&x);
863: VecGetArray(yy,&y);
864: idx = a->j;
865: v = a->a;
866: ii = a->i;
868: for (i=0; i<m; i++) {
869: jrow = ii[i];
870: n = ii[i+1] - jrow;
871: sum1 = 0.0;
872: sum2 = 0.0;
873: sum3 = 0.0;
874: sum4 = 0.0;
875: sum5 = 0.0;
876: sum6 = 0.0;
877: nonzerorow += (n>0);
878: for (j=0; j<n; j++) {
879: sum1 += v[jrow]*x[6*idx[jrow]];
880: sum2 += v[jrow]*x[6*idx[jrow]+1];
881: sum3 += v[jrow]*x[6*idx[jrow]+2];
882: sum4 += v[jrow]*x[6*idx[jrow]+3];
883: sum5 += v[jrow]*x[6*idx[jrow]+4];
884: sum6 += v[jrow]*x[6*idx[jrow]+5];
885: jrow++;
886: }
887: y[6*i] = sum1;
888: y[6*i+1] = sum2;
889: y[6*i+2] = sum3;
890: y[6*i+3] = sum4;
891: y[6*i+4] = sum5;
892: y[6*i+5] = sum6;
893: }
895: PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);
896: VecRestoreArrayRead(xx,&x);
897: VecRestoreArray(yy,&y);
898: return(0);
899: }
903: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
904: {
905: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
906: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
907: const PetscScalar *x,*v;
908: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
909: PetscErrorCode ierr;
910: PetscInt n,i;
911: const PetscInt m = b->AIJ->rmap->n,*idx;
914: VecSet(yy,0.0);
915: VecGetArrayRead(xx,&x);
916: VecGetArray(yy,&y);
918: for (i=0; i<m; i++) {
919: idx = a->j + a->i[i] ;
920: v = a->a + a->i[i] ;
921: n = a->i[i+1] - a->i[i];
922: alpha1 = x[6*i];
923: alpha2 = x[6*i+1];
924: alpha3 = x[6*i+2];
925: alpha4 = x[6*i+3];
926: alpha5 = x[6*i+4];
927: alpha6 = x[6*i+5];
928: while (n-->0) {
929: y[6*(*idx)] += alpha1*(*v);
930: y[6*(*idx)+1] += alpha2*(*v);
931: y[6*(*idx)+2] += alpha3*(*v);
932: y[6*(*idx)+3] += alpha4*(*v);
933: y[6*(*idx)+4] += alpha5*(*v);
934: y[6*(*idx)+5] += alpha6*(*v);
935: idx++; v++;
936: }
937: }
938: PetscLogFlops(12.0*a->nz);
939: VecRestoreArrayRead(xx,&x);
940: VecRestoreArray(yy,&y);
941: return(0);
942: }
946: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
947: {
948: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
949: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
950: const PetscScalar *x,*v;
951: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6;
952: PetscErrorCode ierr;
953: PetscInt n,i,jrow,j;
954: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
957: if (yy != zz) {VecCopy(yy,zz);}
958: VecGetArrayRead(xx,&x);
959: VecGetArray(zz,&y);
960: idx = a->j;
961: v = a->a;
962: ii = a->i;
964: for (i=0; i<m; i++) {
965: jrow = ii[i];
966: n = ii[i+1] - jrow;
967: sum1 = 0.0;
968: sum2 = 0.0;
969: sum3 = 0.0;
970: sum4 = 0.0;
971: sum5 = 0.0;
972: sum6 = 0.0;
973: for (j=0; j<n; j++) {
974: sum1 += v[jrow]*x[6*idx[jrow]];
975: sum2 += v[jrow]*x[6*idx[jrow]+1];
976: sum3 += v[jrow]*x[6*idx[jrow]+2];
977: sum4 += v[jrow]*x[6*idx[jrow]+3];
978: sum5 += v[jrow]*x[6*idx[jrow]+4];
979: sum6 += v[jrow]*x[6*idx[jrow]+5];
980: jrow++;
981: }
982: y[6*i] += sum1;
983: y[6*i+1] += sum2;
984: y[6*i+2] += sum3;
985: y[6*i+3] += sum4;
986: y[6*i+4] += sum5;
987: y[6*i+5] += sum6;
988: }
990: PetscLogFlops(12.0*a->nz);
991: VecRestoreArrayRead(xx,&x);
992: VecRestoreArray(zz,&y);
993: return(0);
994: }
998: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
999: {
1000: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1001: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1002: const PetscScalar *x,*v;
1003: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
1004: PetscErrorCode ierr;
1005: PetscInt n,i;
1006: const PetscInt m = b->AIJ->rmap->n,*idx;
1009: if (yy != zz) {VecCopy(yy,zz);}
1010: VecGetArrayRead(xx,&x);
1011: VecGetArray(zz,&y);
1012:
1013: for (i=0; i<m; i++) {
1014: idx = a->j + a->i[i] ;
1015: v = a->a + a->i[i] ;
1016: n = a->i[i+1] - a->i[i];
1017: alpha1 = x[6*i];
1018: alpha2 = x[6*i+1];
1019: alpha3 = x[6*i+2];
1020: alpha4 = x[6*i+3];
1021: alpha5 = x[6*i+4];
1022: alpha6 = x[6*i+5];
1023: while (n-->0) {
1024: y[6*(*idx)] += alpha1*(*v);
1025: y[6*(*idx)+1] += alpha2*(*v);
1026: y[6*(*idx)+2] += alpha3*(*v);
1027: y[6*(*idx)+3] += alpha4*(*v);
1028: y[6*(*idx)+4] += alpha5*(*v);
1029: y[6*(*idx)+5] += alpha6*(*v);
1030: idx++; v++;
1031: }
1032: }
1033: PetscLogFlops(12.0*a->nz);
1034: VecRestoreArrayRead(xx,&x);
1035: VecRestoreArray(zz,&y);
1036: return(0);
1037: }
1039: /* ------------------------------------------------------------------------------*/
1042: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1043: {
1044: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1045: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1046: const PetscScalar *x,*v;
1047: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1048: PetscErrorCode ierr;
1049: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1050: PetscInt nonzerorow=0,n,i,jrow,j;
1053: VecGetArrayRead(xx,&x);
1054: VecGetArray(yy,&y);
1055: idx = a->j;
1056: v = a->a;
1057: ii = a->i;
1059: for (i=0; i<m; i++) {
1060: jrow = ii[i];
1061: n = ii[i+1] - jrow;
1062: sum1 = 0.0;
1063: sum2 = 0.0;
1064: sum3 = 0.0;
1065: sum4 = 0.0;
1066: sum5 = 0.0;
1067: sum6 = 0.0;
1068: sum7 = 0.0;
1069: nonzerorow += (n>0);
1070: for (j=0; j<n; j++) {
1071: sum1 += v[jrow]*x[7*idx[jrow]];
1072: sum2 += v[jrow]*x[7*idx[jrow]+1];
1073: sum3 += v[jrow]*x[7*idx[jrow]+2];
1074: sum4 += v[jrow]*x[7*idx[jrow]+3];
1075: sum5 += v[jrow]*x[7*idx[jrow]+4];
1076: sum6 += v[jrow]*x[7*idx[jrow]+5];
1077: sum7 += v[jrow]*x[7*idx[jrow]+6];
1078: jrow++;
1079: }
1080: y[7*i] = sum1;
1081: y[7*i+1] = sum2;
1082: y[7*i+2] = sum3;
1083: y[7*i+3] = sum4;
1084: y[7*i+4] = sum5;
1085: y[7*i+5] = sum6;
1086: y[7*i+6] = sum7;
1087: }
1089: PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);
1090: VecRestoreArrayRead(xx,&x);
1091: VecRestoreArray(yy,&y);
1092: return(0);
1093: }
1097: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1098: {
1099: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1100: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1101: const PetscScalar *x,*v;
1102: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1103: PetscErrorCode ierr;
1104: const PetscInt m = b->AIJ->rmap->n,*idx;
1105: PetscInt n,i;
1108: VecSet(yy,0.0);
1109: VecGetArrayRead(xx,&x);
1110: VecGetArray(yy,&y);
1112: for (i=0; i<m; i++) {
1113: idx = a->j + a->i[i] ;
1114: v = a->a + a->i[i] ;
1115: n = a->i[i+1] - a->i[i];
1116: alpha1 = x[7*i];
1117: alpha2 = x[7*i+1];
1118: alpha3 = x[7*i+2];
1119: alpha4 = x[7*i+3];
1120: alpha5 = x[7*i+4];
1121: alpha6 = x[7*i+5];
1122: alpha7 = x[7*i+6];
1123: while (n-->0) {
1124: y[7*(*idx)] += alpha1*(*v);
1125: y[7*(*idx)+1] += alpha2*(*v);
1126: y[7*(*idx)+2] += alpha3*(*v);
1127: y[7*(*idx)+3] += alpha4*(*v);
1128: y[7*(*idx)+4] += alpha5*(*v);
1129: y[7*(*idx)+5] += alpha6*(*v);
1130: y[7*(*idx)+6] += alpha7*(*v);
1131: idx++; v++;
1132: }
1133: }
1134: PetscLogFlops(14.0*a->nz);
1135: VecRestoreArrayRead(xx,&x);
1136: VecRestoreArray(yy,&y);
1137: return(0);
1138: }
1142: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1143: {
1144: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1145: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1146: const PetscScalar *x,*v;
1147: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1148: PetscErrorCode ierr;
1149: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1150: PetscInt n,i,jrow,j;
1153: if (yy != zz) {VecCopy(yy,zz);}
1154: VecGetArrayRead(xx,&x);
1155: VecGetArray(zz,&y);
1156: idx = a->j;
1157: v = a->a;
1158: ii = a->i;
1160: for (i=0; i<m; i++) {
1161: jrow = ii[i];
1162: n = ii[i+1] - jrow;
1163: sum1 = 0.0;
1164: sum2 = 0.0;
1165: sum3 = 0.0;
1166: sum4 = 0.0;
1167: sum5 = 0.0;
1168: sum6 = 0.0;
1169: sum7 = 0.0;
1170: for (j=0; j<n; j++) {
1171: sum1 += v[jrow]*x[7*idx[jrow]];
1172: sum2 += v[jrow]*x[7*idx[jrow]+1];
1173: sum3 += v[jrow]*x[7*idx[jrow]+2];
1174: sum4 += v[jrow]*x[7*idx[jrow]+3];
1175: sum5 += v[jrow]*x[7*idx[jrow]+4];
1176: sum6 += v[jrow]*x[7*idx[jrow]+5];
1177: sum7 += v[jrow]*x[7*idx[jrow]+6];
1178: jrow++;
1179: }
1180: y[7*i] += sum1;
1181: y[7*i+1] += sum2;
1182: y[7*i+2] += sum3;
1183: y[7*i+3] += sum4;
1184: y[7*i+4] += sum5;
1185: y[7*i+5] += sum6;
1186: y[7*i+6] += sum7;
1187: }
1189: PetscLogFlops(14.0*a->nz);
1190: VecRestoreArrayRead(xx,&x);
1191: VecRestoreArray(zz,&y);
1192: return(0);
1193: }
1197: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1198: {
1199: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1200: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1201: const PetscScalar *x,*v;
1202: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1203: PetscErrorCode ierr;
1204: const PetscInt m = b->AIJ->rmap->n,*idx;
1205: PetscInt n,i;
1208: if (yy != zz) {VecCopy(yy,zz);}
1209: VecGetArrayRead(xx,&x);
1210: VecGetArray(zz,&y);
1211: for (i=0; i<m; i++) {
1212: idx = a->j + a->i[i] ;
1213: v = a->a + a->i[i] ;
1214: n = a->i[i+1] - a->i[i];
1215: alpha1 = x[7*i];
1216: alpha2 = x[7*i+1];
1217: alpha3 = x[7*i+2];
1218: alpha4 = x[7*i+3];
1219: alpha5 = x[7*i+4];
1220: alpha6 = x[7*i+5];
1221: alpha7 = x[7*i+6];
1222: while (n-->0) {
1223: y[7*(*idx)] += alpha1*(*v);
1224: y[7*(*idx)+1] += alpha2*(*v);
1225: y[7*(*idx)+2] += alpha3*(*v);
1226: y[7*(*idx)+3] += alpha4*(*v);
1227: y[7*(*idx)+4] += alpha5*(*v);
1228: y[7*(*idx)+5] += alpha6*(*v);
1229: y[7*(*idx)+6] += alpha7*(*v);
1230: idx++; v++;
1231: }
1232: }
1233: PetscLogFlops(14.0*a->nz);
1234: VecRestoreArrayRead(xx,&x);
1235: VecRestoreArray(zz,&y);
1236: return(0);
1237: }
1241: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1242: {
1243: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1244: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1245: const PetscScalar *x,*v;
1246: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1247: PetscErrorCode ierr;
1248: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1249: PetscInt nonzerorow=0,n,i,jrow,j;
1252: VecGetArrayRead(xx,&x);
1253: VecGetArray(yy,&y);
1254: idx = a->j;
1255: v = a->a;
1256: ii = a->i;
1258: for (i=0; i<m; i++) {
1259: jrow = ii[i];
1260: n = ii[i+1] - jrow;
1261: sum1 = 0.0;
1262: sum2 = 0.0;
1263: sum3 = 0.0;
1264: sum4 = 0.0;
1265: sum5 = 0.0;
1266: sum6 = 0.0;
1267: sum7 = 0.0;
1268: sum8 = 0.0;
1269: nonzerorow += (n>0);
1270: for (j=0; j<n; j++) {
1271: sum1 += v[jrow]*x[8*idx[jrow]];
1272: sum2 += v[jrow]*x[8*idx[jrow]+1];
1273: sum3 += v[jrow]*x[8*idx[jrow]+2];
1274: sum4 += v[jrow]*x[8*idx[jrow]+3];
1275: sum5 += v[jrow]*x[8*idx[jrow]+4];
1276: sum6 += v[jrow]*x[8*idx[jrow]+5];
1277: sum7 += v[jrow]*x[8*idx[jrow]+6];
1278: sum8 += v[jrow]*x[8*idx[jrow]+7];
1279: jrow++;
1280: }
1281: y[8*i] = sum1;
1282: y[8*i+1] = sum2;
1283: y[8*i+2] = sum3;
1284: y[8*i+3] = sum4;
1285: y[8*i+4] = sum5;
1286: y[8*i+5] = sum6;
1287: y[8*i+6] = sum7;
1288: y[8*i+7] = sum8;
1289: }
1291: PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);
1292: VecRestoreArrayRead(xx,&x);
1293: VecRestoreArray(yy,&y);
1294: return(0);
1295: }
1299: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1300: {
1301: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1302: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1303: const PetscScalar *x,*v;
1304: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1305: PetscErrorCode ierr;
1306: const PetscInt m = b->AIJ->rmap->n,*idx;
1307: PetscInt n,i;
1310: VecSet(yy,0.0);
1311: VecGetArrayRead(xx,&x);
1312: VecGetArray(yy,&y);
1314: for (i=0; i<m; i++) {
1315: idx = a->j + a->i[i] ;
1316: v = a->a + a->i[i] ;
1317: n = a->i[i+1] - a->i[i];
1318: alpha1 = x[8*i];
1319: alpha2 = x[8*i+1];
1320: alpha3 = x[8*i+2];
1321: alpha4 = x[8*i+3];
1322: alpha5 = x[8*i+4];
1323: alpha6 = x[8*i+5];
1324: alpha7 = x[8*i+6];
1325: alpha8 = x[8*i+7];
1326: while (n-->0) {
1327: y[8*(*idx)] += alpha1*(*v);
1328: y[8*(*idx)+1] += alpha2*(*v);
1329: y[8*(*idx)+2] += alpha3*(*v);
1330: y[8*(*idx)+3] += alpha4*(*v);
1331: y[8*(*idx)+4] += alpha5*(*v);
1332: y[8*(*idx)+5] += alpha6*(*v);
1333: y[8*(*idx)+6] += alpha7*(*v);
1334: y[8*(*idx)+7] += alpha8*(*v);
1335: idx++; v++;
1336: }
1337: }
1338: PetscLogFlops(16.0*a->nz);
1339: VecRestoreArrayRead(xx,&x);
1340: VecRestoreArray(yy,&y);
1341: return(0);
1342: }
1346: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1347: {
1348: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1349: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1350: const PetscScalar *x,*v;
1351: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1352: PetscErrorCode ierr;
1353: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1354: PetscInt n,i,jrow,j;
1357: if (yy != zz) {VecCopy(yy,zz);}
1358: VecGetArrayRead(xx,&x);
1359: VecGetArray(zz,&y);
1360: idx = a->j;
1361: v = a->a;
1362: ii = a->i;
1364: for (i=0; i<m; i++) {
1365: jrow = ii[i];
1366: n = ii[i+1] - jrow;
1367: sum1 = 0.0;
1368: sum2 = 0.0;
1369: sum3 = 0.0;
1370: sum4 = 0.0;
1371: sum5 = 0.0;
1372: sum6 = 0.0;
1373: sum7 = 0.0;
1374: sum8 = 0.0;
1375: for (j=0; j<n; j++) {
1376: sum1 += v[jrow]*x[8*idx[jrow]];
1377: sum2 += v[jrow]*x[8*idx[jrow]+1];
1378: sum3 += v[jrow]*x[8*idx[jrow]+2];
1379: sum4 += v[jrow]*x[8*idx[jrow]+3];
1380: sum5 += v[jrow]*x[8*idx[jrow]+4];
1381: sum6 += v[jrow]*x[8*idx[jrow]+5];
1382: sum7 += v[jrow]*x[8*idx[jrow]+6];
1383: sum8 += v[jrow]*x[8*idx[jrow]+7];
1384: jrow++;
1385: }
1386: y[8*i] += sum1;
1387: y[8*i+1] += sum2;
1388: y[8*i+2] += sum3;
1389: y[8*i+3] += sum4;
1390: y[8*i+4] += sum5;
1391: y[8*i+5] += sum6;
1392: y[8*i+6] += sum7;
1393: y[8*i+7] += sum8;
1394: }
1396: PetscLogFlops(16.0*a->nz);
1397: VecRestoreArrayRead(xx,&x);
1398: VecRestoreArray(zz,&y);
1399: return(0);
1400: }
1404: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1405: {
1406: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1407: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1408: const PetscScalar *x,*v;
1409: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1410: PetscErrorCode ierr;
1411: const PetscInt m = b->AIJ->rmap->n,*idx;
1412: PetscInt n,i;
1415: if (yy != zz) {VecCopy(yy,zz);}
1416: VecGetArrayRead(xx,&x);
1417: VecGetArray(zz,&y);
1418: for (i=0; i<m; i++) {
1419: idx = a->j + a->i[i] ;
1420: v = a->a + a->i[i] ;
1421: n = a->i[i+1] - a->i[i];
1422: alpha1 = x[8*i];
1423: alpha2 = x[8*i+1];
1424: alpha3 = x[8*i+2];
1425: alpha4 = x[8*i+3];
1426: alpha5 = x[8*i+4];
1427: alpha6 = x[8*i+5];
1428: alpha7 = x[8*i+6];
1429: alpha8 = x[8*i+7];
1430: while (n-->0) {
1431: y[8*(*idx)] += alpha1*(*v);
1432: y[8*(*idx)+1] += alpha2*(*v);
1433: y[8*(*idx)+2] += alpha3*(*v);
1434: y[8*(*idx)+3] += alpha4*(*v);
1435: y[8*(*idx)+4] += alpha5*(*v);
1436: y[8*(*idx)+5] += alpha6*(*v);
1437: y[8*(*idx)+6] += alpha7*(*v);
1438: y[8*(*idx)+7] += alpha8*(*v);
1439: idx++; v++;
1440: }
1441: }
1442: PetscLogFlops(16.0*a->nz);
1443: VecRestoreArrayRead(xx,&x);
1444: VecRestoreArray(zz,&y);
1445: return(0);
1446: }
1448: /* ------------------------------------------------------------------------------*/
1451: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1452: {
1453: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1454: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1455: const PetscScalar *x,*v;
1456: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1457: PetscErrorCode ierr;
1458: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1459: PetscInt nonzerorow=0,n,i,jrow,j;
1462: VecGetArrayRead(xx,&x);
1463: VecGetArray(yy,&y);
1464: idx = a->j;
1465: v = a->a;
1466: ii = a->i;
1468: for (i=0; i<m; i++) {
1469: jrow = ii[i];
1470: n = ii[i+1] - jrow;
1471: sum1 = 0.0;
1472: sum2 = 0.0;
1473: sum3 = 0.0;
1474: sum4 = 0.0;
1475: sum5 = 0.0;
1476: sum6 = 0.0;
1477: sum7 = 0.0;
1478: sum8 = 0.0;
1479: sum9 = 0.0;
1480: nonzerorow += (n>0);
1481: for (j=0; j<n; j++) {
1482: sum1 += v[jrow]*x[9*idx[jrow]];
1483: sum2 += v[jrow]*x[9*idx[jrow]+1];
1484: sum3 += v[jrow]*x[9*idx[jrow]+2];
1485: sum4 += v[jrow]*x[9*idx[jrow]+3];
1486: sum5 += v[jrow]*x[9*idx[jrow]+4];
1487: sum6 += v[jrow]*x[9*idx[jrow]+5];
1488: sum7 += v[jrow]*x[9*idx[jrow]+6];
1489: sum8 += v[jrow]*x[9*idx[jrow]+7];
1490: sum9 += v[jrow]*x[9*idx[jrow]+8];
1491: jrow++;
1492: }
1493: y[9*i] = sum1;
1494: y[9*i+1] = sum2;
1495: y[9*i+2] = sum3;
1496: y[9*i+3] = sum4;
1497: y[9*i+4] = sum5;
1498: y[9*i+5] = sum6;
1499: y[9*i+6] = sum7;
1500: y[9*i+7] = sum8;
1501: y[9*i+8] = sum9;
1502: }
1504: PetscLogFlops(18.0*a->nz - 9*nonzerorow);
1505: VecRestoreArrayRead(xx,&x);
1506: VecRestoreArray(yy,&y);
1507: return(0);
1508: }
1510: /* ------------------------------------------------------------------------------*/
1514: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1515: {
1516: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1517: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1518: const PetscScalar *x,*v;
1519: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1520: PetscErrorCode ierr;
1521: const PetscInt m = b->AIJ->rmap->n,*idx;
1522: PetscInt n,i;
1525: VecSet(yy,0.0);
1526: VecGetArrayRead(xx,&x);
1527: VecGetArray(yy,&y);
1529: for (i=0; i<m; i++) {
1530: idx = a->j + a->i[i] ;
1531: v = a->a + a->i[i] ;
1532: n = a->i[i+1] - a->i[i];
1533: alpha1 = x[9*i];
1534: alpha2 = x[9*i+1];
1535: alpha3 = x[9*i+2];
1536: alpha4 = x[9*i+3];
1537: alpha5 = x[9*i+4];
1538: alpha6 = x[9*i+5];
1539: alpha7 = x[9*i+6];
1540: alpha8 = x[9*i+7];
1541: alpha9 = x[9*i+8];
1542: while (n-->0) {
1543: y[9*(*idx)] += alpha1*(*v);
1544: y[9*(*idx)+1] += alpha2*(*v);
1545: y[9*(*idx)+2] += alpha3*(*v);
1546: y[9*(*idx)+3] += alpha4*(*v);
1547: y[9*(*idx)+4] += alpha5*(*v);
1548: y[9*(*idx)+5] += alpha6*(*v);
1549: y[9*(*idx)+6] += alpha7*(*v);
1550: y[9*(*idx)+7] += alpha8*(*v);
1551: y[9*(*idx)+8] += alpha9*(*v);
1552: idx++; v++;
1553: }
1554: }
1555: PetscLogFlops(18.0*a->nz);
1556: VecRestoreArrayRead(xx,&x);
1557: VecRestoreArray(yy,&y);
1558: return(0);
1559: }
1563: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1564: {
1565: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1566: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1567: const PetscScalar *x,*v;
1568: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1569: PetscErrorCode ierr;
1570: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1571: PetscInt n,i,jrow,j;
1574: if (yy != zz) {VecCopy(yy,zz);}
1575: VecGetArrayRead(xx,&x);
1576: VecGetArray(zz,&y);
1577: idx = a->j;
1578: v = a->a;
1579: ii = a->i;
1581: for (i=0; i<m; i++) {
1582: jrow = ii[i];
1583: n = ii[i+1] - jrow;
1584: sum1 = 0.0;
1585: sum2 = 0.0;
1586: sum3 = 0.0;
1587: sum4 = 0.0;
1588: sum5 = 0.0;
1589: sum6 = 0.0;
1590: sum7 = 0.0;
1591: sum8 = 0.0;
1592: sum9 = 0.0;
1593: for (j=0; j<n; j++) {
1594: sum1 += v[jrow]*x[9*idx[jrow]];
1595: sum2 += v[jrow]*x[9*idx[jrow]+1];
1596: sum3 += v[jrow]*x[9*idx[jrow]+2];
1597: sum4 += v[jrow]*x[9*idx[jrow]+3];
1598: sum5 += v[jrow]*x[9*idx[jrow]+4];
1599: sum6 += v[jrow]*x[9*idx[jrow]+5];
1600: sum7 += v[jrow]*x[9*idx[jrow]+6];
1601: sum8 += v[jrow]*x[9*idx[jrow]+7];
1602: sum9 += v[jrow]*x[9*idx[jrow]+8];
1603: jrow++;
1604: }
1605: y[9*i] += sum1;
1606: y[9*i+1] += sum2;
1607: y[9*i+2] += sum3;
1608: y[9*i+3] += sum4;
1609: y[9*i+4] += sum5;
1610: y[9*i+5] += sum6;
1611: y[9*i+6] += sum7;
1612: y[9*i+7] += sum8;
1613: y[9*i+8] += sum9;
1614: }
1616: PetscLogFlops(18*a->nz);
1617: VecRestoreArrayRead(xx,&x);
1618: VecRestoreArray(zz,&y);
1619: return(0);
1620: }
1624: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1625: {
1626: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1627: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1628: const PetscScalar *x,*v;
1629: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1630: PetscErrorCode ierr;
1631: const PetscInt m = b->AIJ->rmap->n,*idx;
1632: PetscInt n,i;
1635: if (yy != zz) {VecCopy(yy,zz);}
1636: VecGetArrayRead(xx,&x);
1637: VecGetArray(zz,&y);
1638: for (i=0; i<m; i++) {
1639: idx = a->j + a->i[i] ;
1640: v = a->a + a->i[i] ;
1641: n = a->i[i+1] - a->i[i];
1642: alpha1 = x[9*i];
1643: alpha2 = x[9*i+1];
1644: alpha3 = x[9*i+2];
1645: alpha4 = x[9*i+3];
1646: alpha5 = x[9*i+4];
1647: alpha6 = x[9*i+5];
1648: alpha7 = x[9*i+6];
1649: alpha8 = x[9*i+7];
1650: alpha9 = x[9*i+8];
1651: while (n-->0) {
1652: y[9*(*idx)] += alpha1*(*v);
1653: y[9*(*idx)+1] += alpha2*(*v);
1654: y[9*(*idx)+2] += alpha3*(*v);
1655: y[9*(*idx)+3] += alpha4*(*v);
1656: y[9*(*idx)+4] += alpha5*(*v);
1657: y[9*(*idx)+5] += alpha6*(*v);
1658: y[9*(*idx)+6] += alpha7*(*v);
1659: y[9*(*idx)+7] += alpha8*(*v);
1660: y[9*(*idx)+8] += alpha9*(*v);
1661: idx++; v++;
1662: }
1663: }
1664: PetscLogFlops(18.0*a->nz);
1665: VecRestoreArrayRead(xx,&x);
1666: VecRestoreArray(zz,&y);
1667: return(0);
1668: }
1671: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1672: {
1673: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1674: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1675: const PetscScalar *x,*v;
1676: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1677: PetscErrorCode ierr;
1678: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1679: PetscInt nonzerorow=0,n,i,jrow,j;
1682: VecGetArrayRead(xx,&x);
1683: VecGetArray(yy,&y);
1684: idx = a->j;
1685: v = a->a;
1686: ii = a->i;
1688: for (i=0; i<m; i++) {
1689: jrow = ii[i];
1690: n = ii[i+1] - jrow;
1691: sum1 = 0.0;
1692: sum2 = 0.0;
1693: sum3 = 0.0;
1694: sum4 = 0.0;
1695: sum5 = 0.0;
1696: sum6 = 0.0;
1697: sum7 = 0.0;
1698: sum8 = 0.0;
1699: sum9 = 0.0;
1700: sum10 = 0.0;
1701: nonzerorow += (n>0);
1702: for (j=0; j<n; j++) {
1703: sum1 += v[jrow]*x[10*idx[jrow]];
1704: sum2 += v[jrow]*x[10*idx[jrow]+1];
1705: sum3 += v[jrow]*x[10*idx[jrow]+2];
1706: sum4 += v[jrow]*x[10*idx[jrow]+3];
1707: sum5 += v[jrow]*x[10*idx[jrow]+4];
1708: sum6 += v[jrow]*x[10*idx[jrow]+5];
1709: sum7 += v[jrow]*x[10*idx[jrow]+6];
1710: sum8 += v[jrow]*x[10*idx[jrow]+7];
1711: sum9 += v[jrow]*x[10*idx[jrow]+8];
1712: sum10 += v[jrow]*x[10*idx[jrow]+9];
1713: jrow++;
1714: }
1715: y[10*i] = sum1;
1716: y[10*i+1] = sum2;
1717: y[10*i+2] = sum3;
1718: y[10*i+3] = sum4;
1719: y[10*i+4] = sum5;
1720: y[10*i+5] = sum6;
1721: y[10*i+6] = sum7;
1722: y[10*i+7] = sum8;
1723: y[10*i+8] = sum9;
1724: y[10*i+9] = sum10;
1725: }
1727: PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);
1728: VecRestoreArrayRead(xx,&x);
1729: VecRestoreArray(yy,&y);
1730: return(0);
1731: }
1735: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1736: {
1737: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1738: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1739: const PetscScalar *x,*v;
1740: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1741: PetscErrorCode ierr;
1742: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1743: PetscInt n,i,jrow,j;
1746: if (yy != zz) {VecCopy(yy,zz);}
1747: VecGetArrayRead(xx,&x);
1748: VecGetArray(zz,&y);
1749: idx = a->j;
1750: v = a->a;
1751: ii = a->i;
1753: for (i=0; i<m; i++) {
1754: jrow = ii[i];
1755: n = ii[i+1] - jrow;
1756: sum1 = 0.0;
1757: sum2 = 0.0;
1758: sum3 = 0.0;
1759: sum4 = 0.0;
1760: sum5 = 0.0;
1761: sum6 = 0.0;
1762: sum7 = 0.0;
1763: sum8 = 0.0;
1764: sum9 = 0.0;
1765: sum10 = 0.0;
1766: for (j=0; j<n; j++) {
1767: sum1 += v[jrow]*x[10*idx[jrow]];
1768: sum2 += v[jrow]*x[10*idx[jrow]+1];
1769: sum3 += v[jrow]*x[10*idx[jrow]+2];
1770: sum4 += v[jrow]*x[10*idx[jrow]+3];
1771: sum5 += v[jrow]*x[10*idx[jrow]+4];
1772: sum6 += v[jrow]*x[10*idx[jrow]+5];
1773: sum7 += v[jrow]*x[10*idx[jrow]+6];
1774: sum8 += v[jrow]*x[10*idx[jrow]+7];
1775: sum9 += v[jrow]*x[10*idx[jrow]+8];
1776: sum10 += v[jrow]*x[10*idx[jrow]+9];
1777: jrow++;
1778: }
1779: y[10*i] += sum1;
1780: y[10*i+1] += sum2;
1781: y[10*i+2] += sum3;
1782: y[10*i+3] += sum4;
1783: y[10*i+4] += sum5;
1784: y[10*i+5] += sum6;
1785: y[10*i+6] += sum7;
1786: y[10*i+7] += sum8;
1787: y[10*i+8] += sum9;
1788: y[10*i+9] += sum10;
1789: }
1791: PetscLogFlops(20.0*a->nz);
1792: VecRestoreArrayRead(xx,&x);
1793: VecRestoreArray(yy,&y);
1794: return(0);
1795: }
1799: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1800: {
1801: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1802: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1803: const PetscScalar *x,*v;
1804: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1805: PetscErrorCode ierr;
1806: const PetscInt m = b->AIJ->rmap->n,*idx;
1807: PetscInt n,i;
1810: VecSet(yy,0.0);
1811: VecGetArrayRead(xx,&x);
1812: VecGetArray(yy,&y);
1814: for (i=0; i<m; i++) {
1815: idx = a->j + a->i[i] ;
1816: v = a->a + a->i[i] ;
1817: n = a->i[i+1] - a->i[i];
1818: alpha1 = x[10*i];
1819: alpha2 = x[10*i+1];
1820: alpha3 = x[10*i+2];
1821: alpha4 = x[10*i+3];
1822: alpha5 = x[10*i+4];
1823: alpha6 = x[10*i+5];
1824: alpha7 = x[10*i+6];
1825: alpha8 = x[10*i+7];
1826: alpha9 = x[10*i+8];
1827: alpha10 = x[10*i+9];
1828: while (n-->0) {
1829: y[10*(*idx)] += alpha1*(*v);
1830: y[10*(*idx)+1] += alpha2*(*v);
1831: y[10*(*idx)+2] += alpha3*(*v);
1832: y[10*(*idx)+3] += alpha4*(*v);
1833: y[10*(*idx)+4] += alpha5*(*v);
1834: y[10*(*idx)+5] += alpha6*(*v);
1835: y[10*(*idx)+6] += alpha7*(*v);
1836: y[10*(*idx)+7] += alpha8*(*v);
1837: y[10*(*idx)+8] += alpha9*(*v);
1838: y[10*(*idx)+9] += alpha10*(*v);
1839: idx++; v++;
1840: }
1841: }
1842: PetscLogFlops(20.0*a->nz);
1843: VecRestoreArrayRead(xx,&x);
1844: VecRestoreArray(yy,&y);
1845: return(0);
1846: }
1850: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1851: {
1852: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1853: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1854: const PetscScalar *x,*v;
1855: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1856: PetscErrorCode ierr;
1857: const PetscInt m = b->AIJ->rmap->n,*idx;
1858: PetscInt n,i;
1861: if (yy != zz) {VecCopy(yy,zz);}
1862: VecGetArrayRead(xx,&x);
1863: VecGetArray(zz,&y);
1864: for (i=0; i<m; i++) {
1865: idx = a->j + a->i[i] ;
1866: v = a->a + a->i[i] ;
1867: n = a->i[i+1] - a->i[i];
1868: alpha1 = x[10*i];
1869: alpha2 = x[10*i+1];
1870: alpha3 = x[10*i+2];
1871: alpha4 = x[10*i+3];
1872: alpha5 = x[10*i+4];
1873: alpha6 = x[10*i+5];
1874: alpha7 = x[10*i+6];
1875: alpha8 = x[10*i+7];
1876: alpha9 = x[10*i+8];
1877: alpha10 = x[10*i+9];
1878: while (n-->0) {
1879: y[10*(*idx)] += alpha1*(*v);
1880: y[10*(*idx)+1] += alpha2*(*v);
1881: y[10*(*idx)+2] += alpha3*(*v);
1882: y[10*(*idx)+3] += alpha4*(*v);
1883: y[10*(*idx)+4] += alpha5*(*v);
1884: y[10*(*idx)+5] += alpha6*(*v);
1885: y[10*(*idx)+6] += alpha7*(*v);
1886: y[10*(*idx)+7] += alpha8*(*v);
1887: y[10*(*idx)+8] += alpha9*(*v);
1888: y[10*(*idx)+9] += alpha10*(*v);
1889: idx++; v++;
1890: }
1891: }
1892: PetscLogFlops(20.0*a->nz);
1893: VecRestoreArrayRead(xx,&x);
1894: VecRestoreArray(zz,&y);
1895: return(0);
1896: }
1899: /*--------------------------------------------------------------------------------------------*/
1902: PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1903: {
1904: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1905: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1906: const PetscScalar *x,*v;
1907: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1908: PetscErrorCode ierr;
1909: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1910: PetscInt nonzerorow=0,n,i,jrow,j;
1913: VecGetArrayRead(xx,&x);
1914: VecGetArray(yy,&y);
1915: idx = a->j;
1916: v = a->a;
1917: ii = a->i;
1919: for (i=0; i<m; i++) {
1920: jrow = ii[i];
1921: n = ii[i+1] - jrow;
1922: sum1 = 0.0;
1923: sum2 = 0.0;
1924: sum3 = 0.0;
1925: sum4 = 0.0;
1926: sum5 = 0.0;
1927: sum6 = 0.0;
1928: sum7 = 0.0;
1929: sum8 = 0.0;
1930: sum9 = 0.0;
1931: sum10 = 0.0;
1932: sum11 = 0.0;
1933: nonzerorow += (n>0);
1934: for (j=0; j<n; j++) {
1935: sum1 += v[jrow]*x[11*idx[jrow]];
1936: sum2 += v[jrow]*x[11*idx[jrow]+1];
1937: sum3 += v[jrow]*x[11*idx[jrow]+2];
1938: sum4 += v[jrow]*x[11*idx[jrow]+3];
1939: sum5 += v[jrow]*x[11*idx[jrow]+4];
1940: sum6 += v[jrow]*x[11*idx[jrow]+5];
1941: sum7 += v[jrow]*x[11*idx[jrow]+6];
1942: sum8 += v[jrow]*x[11*idx[jrow]+7];
1943: sum9 += v[jrow]*x[11*idx[jrow]+8];
1944: sum10 += v[jrow]*x[11*idx[jrow]+9];
1945: sum11 += v[jrow]*x[11*idx[jrow]+10];
1946: jrow++;
1947: }
1948: y[11*i] = sum1;
1949: y[11*i+1] = sum2;
1950: y[11*i+2] = sum3;
1951: y[11*i+3] = sum4;
1952: y[11*i+4] = sum5;
1953: y[11*i+5] = sum6;
1954: y[11*i+6] = sum7;
1955: y[11*i+7] = sum8;
1956: y[11*i+8] = sum9;
1957: y[11*i+9] = sum10;
1958: y[11*i+10] = sum11;
1959: }
1961: PetscLogFlops(22*a->nz - 11*nonzerorow);
1962: VecRestoreArrayRead(xx,&x);
1963: VecRestoreArray(yy,&y);
1964: return(0);
1965: }
1969: PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1970: {
1971: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1972: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1973: const PetscScalar *x,*v;
1974: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1975: PetscErrorCode ierr;
1976: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1977: PetscInt n,i,jrow,j;
1980: if (yy != zz) {VecCopy(yy,zz);}
1981: VecGetArrayRead(xx,&x);
1982: VecGetArray(zz,&y);
1983: idx = a->j;
1984: v = a->a;
1985: ii = a->i;
1987: for (i=0; i<m; i++) {
1988: jrow = ii[i];
1989: n = ii[i+1] - jrow;
1990: sum1 = 0.0;
1991: sum2 = 0.0;
1992: sum3 = 0.0;
1993: sum4 = 0.0;
1994: sum5 = 0.0;
1995: sum6 = 0.0;
1996: sum7 = 0.0;
1997: sum8 = 0.0;
1998: sum9 = 0.0;
1999: sum10 = 0.0;
2000: sum11 = 0.0;
2001: for (j=0; j<n; j++) {
2002: sum1 += v[jrow]*x[11*idx[jrow]];
2003: sum2 += v[jrow]*x[11*idx[jrow]+1];
2004: sum3 += v[jrow]*x[11*idx[jrow]+2];
2005: sum4 += v[jrow]*x[11*idx[jrow]+3];
2006: sum5 += v[jrow]*x[11*idx[jrow]+4];
2007: sum6 += v[jrow]*x[11*idx[jrow]+5];
2008: sum7 += v[jrow]*x[11*idx[jrow]+6];
2009: sum8 += v[jrow]*x[11*idx[jrow]+7];
2010: sum9 += v[jrow]*x[11*idx[jrow]+8];
2011: sum10 += v[jrow]*x[11*idx[jrow]+9];
2012: sum11 += v[jrow]*x[11*idx[jrow]+10];
2013: jrow++;
2014: }
2015: y[11*i] += sum1;
2016: y[11*i+1] += sum2;
2017: y[11*i+2] += sum3;
2018: y[11*i+3] += sum4;
2019: y[11*i+4] += sum5;
2020: y[11*i+5] += sum6;
2021: y[11*i+6] += sum7;
2022: y[11*i+7] += sum8;
2023: y[11*i+8] += sum9;
2024: y[11*i+9] += sum10;
2025: y[11*i+10] += sum11;
2026: }
2028: PetscLogFlops(22*a->nz);
2029: VecRestoreArrayRead(xx,&x);
2030: VecRestoreArray(yy,&y);
2031: return(0);
2032: }
2036: PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
2037: {
2038: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2039: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2040: const PetscScalar *x,*v;
2041: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2042: PetscErrorCode ierr;
2043: const PetscInt m = b->AIJ->rmap->n,*idx;
2044: PetscInt n,i;
2047: VecSet(yy,0.0);
2048: VecGetArrayRead(xx,&x);
2049: VecGetArray(yy,&y);
2051: for (i=0; i<m; i++) {
2052: idx = a->j + a->i[i] ;
2053: v = a->a + a->i[i] ;
2054: n = a->i[i+1] - a->i[i];
2055: alpha1 = x[11*i];
2056: alpha2 = x[11*i+1];
2057: alpha3 = x[11*i+2];
2058: alpha4 = x[11*i+3];
2059: alpha5 = x[11*i+4];
2060: alpha6 = x[11*i+5];
2061: alpha7 = x[11*i+6];
2062: alpha8 = x[11*i+7];
2063: alpha9 = x[11*i+8];
2064: alpha10 = x[11*i+9];
2065: alpha11 = x[11*i+10];
2066: while (n-->0) {
2067: y[11*(*idx)] += alpha1*(*v);
2068: y[11*(*idx)+1] += alpha2*(*v);
2069: y[11*(*idx)+2] += alpha3*(*v);
2070: y[11*(*idx)+3] += alpha4*(*v);
2071: y[11*(*idx)+4] += alpha5*(*v);
2072: y[11*(*idx)+5] += alpha6*(*v);
2073: y[11*(*idx)+6] += alpha7*(*v);
2074: y[11*(*idx)+7] += alpha8*(*v);
2075: y[11*(*idx)+8] += alpha9*(*v);
2076: y[11*(*idx)+9] += alpha10*(*v);
2077: y[11*(*idx)+10] += alpha11*(*v);
2078: idx++; v++;
2079: }
2080: }
2081: PetscLogFlops(22*a->nz);
2082: VecRestoreArrayRead(xx,&x);
2083: VecRestoreArray(yy,&y);
2084: return(0);
2085: }
2089: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2090: {
2091: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2092: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2093: const PetscScalar *x,*v;
2094: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2095: PetscErrorCode ierr;
2096: const PetscInt m = b->AIJ->rmap->n,*idx;
2097: PetscInt n,i;
2100: if (yy != zz) {VecCopy(yy,zz);}
2101: VecGetArrayRead(xx,&x);
2102: VecGetArray(zz,&y);
2103: for (i=0; i<m; i++) {
2104: idx = a->j + a->i[i] ;
2105: v = a->a + a->i[i] ;
2106: n = a->i[i+1] - a->i[i];
2107: alpha1 = x[11*i];
2108: alpha2 = x[11*i+1];
2109: alpha3 = x[11*i+2];
2110: alpha4 = x[11*i+3];
2111: alpha5 = x[11*i+4];
2112: alpha6 = x[11*i+5];
2113: alpha7 = x[11*i+6];
2114: alpha8 = x[11*i+7];
2115: alpha9 = x[11*i+8];
2116: alpha10 = x[11*i+9];
2117: alpha11 = x[11*i+10];
2118: while (n-->0) {
2119: y[11*(*idx)] += alpha1*(*v);
2120: y[11*(*idx)+1] += alpha2*(*v);
2121: y[11*(*idx)+2] += alpha3*(*v);
2122: y[11*(*idx)+3] += alpha4*(*v);
2123: y[11*(*idx)+4] += alpha5*(*v);
2124: y[11*(*idx)+5] += alpha6*(*v);
2125: y[11*(*idx)+6] += alpha7*(*v);
2126: y[11*(*idx)+7] += alpha8*(*v);
2127: y[11*(*idx)+8] += alpha9*(*v);
2128: y[11*(*idx)+9] += alpha10*(*v);
2129: y[11*(*idx)+10] += alpha11*(*v);
2130: idx++; v++;
2131: }
2132: }
2133: PetscLogFlops(22*a->nz);
2134: VecRestoreArrayRead(xx,&x);
2135: VecRestoreArray(zz,&y);
2136: return(0);
2137: }
2140: /*--------------------------------------------------------------------------------------------*/
2143: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2144: {
2145: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2146: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2147: const PetscScalar *x,*v;
2148: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2149: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2150: PetscErrorCode ierr;
2151: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2152: PetscInt nonzerorow=0,n,i,jrow,j;
2155: VecGetArrayRead(xx,&x);
2156: VecGetArray(yy,&y);
2157: idx = a->j;
2158: v = a->a;
2159: ii = a->i;
2161: for (i=0; i<m; i++) {
2162: jrow = ii[i];
2163: n = ii[i+1] - jrow;
2164: sum1 = 0.0;
2165: sum2 = 0.0;
2166: sum3 = 0.0;
2167: sum4 = 0.0;
2168: sum5 = 0.0;
2169: sum6 = 0.0;
2170: sum7 = 0.0;
2171: sum8 = 0.0;
2172: sum9 = 0.0;
2173: sum10 = 0.0;
2174: sum11 = 0.0;
2175: sum12 = 0.0;
2176: sum13 = 0.0;
2177: sum14 = 0.0;
2178: sum15 = 0.0;
2179: sum16 = 0.0;
2180: nonzerorow += (n>0);
2181: for (j=0; j<n; j++) {
2182: sum1 += v[jrow]*x[16*idx[jrow]];
2183: sum2 += v[jrow]*x[16*idx[jrow]+1];
2184: sum3 += v[jrow]*x[16*idx[jrow]+2];
2185: sum4 += v[jrow]*x[16*idx[jrow]+3];
2186: sum5 += v[jrow]*x[16*idx[jrow]+4];
2187: sum6 += v[jrow]*x[16*idx[jrow]+5];
2188: sum7 += v[jrow]*x[16*idx[jrow]+6];
2189: sum8 += v[jrow]*x[16*idx[jrow]+7];
2190: sum9 += v[jrow]*x[16*idx[jrow]+8];
2191: sum10 += v[jrow]*x[16*idx[jrow]+9];
2192: sum11 += v[jrow]*x[16*idx[jrow]+10];
2193: sum12 += v[jrow]*x[16*idx[jrow]+11];
2194: sum13 += v[jrow]*x[16*idx[jrow]+12];
2195: sum14 += v[jrow]*x[16*idx[jrow]+13];
2196: sum15 += v[jrow]*x[16*idx[jrow]+14];
2197: sum16 += v[jrow]*x[16*idx[jrow]+15];
2198: jrow++;
2199: }
2200: y[16*i] = sum1;
2201: y[16*i+1] = sum2;
2202: y[16*i+2] = sum3;
2203: y[16*i+3] = sum4;
2204: y[16*i+4] = sum5;
2205: y[16*i+5] = sum6;
2206: y[16*i+6] = sum7;
2207: y[16*i+7] = sum8;
2208: y[16*i+8] = sum9;
2209: y[16*i+9] = sum10;
2210: y[16*i+10] = sum11;
2211: y[16*i+11] = sum12;
2212: y[16*i+12] = sum13;
2213: y[16*i+13] = sum14;
2214: y[16*i+14] = sum15;
2215: y[16*i+15] = sum16;
2216: }
2218: PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);
2219: VecRestoreArrayRead(xx,&x);
2220: VecRestoreArray(yy,&y);
2221: return(0);
2222: }
2226: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2227: {
2228: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2229: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2230: const PetscScalar *x,*v;
2231: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2232: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2233: PetscErrorCode ierr;
2234: const PetscInt m = b->AIJ->rmap->n,*idx;
2235: PetscInt n,i;
2238: VecSet(yy,0.0);
2239: VecGetArrayRead(xx,&x);
2240: VecGetArray(yy,&y);
2242: for (i=0; i<m; i++) {
2243: idx = a->j + a->i[i] ;
2244: v = a->a + a->i[i] ;
2245: n = a->i[i+1] - a->i[i];
2246: alpha1 = x[16*i];
2247: alpha2 = x[16*i+1];
2248: alpha3 = x[16*i+2];
2249: alpha4 = x[16*i+3];
2250: alpha5 = x[16*i+4];
2251: alpha6 = x[16*i+5];
2252: alpha7 = x[16*i+6];
2253: alpha8 = x[16*i+7];
2254: alpha9 = x[16*i+8];
2255: alpha10 = x[16*i+9];
2256: alpha11 = x[16*i+10];
2257: alpha12 = x[16*i+11];
2258: alpha13 = x[16*i+12];
2259: alpha14 = x[16*i+13];
2260: alpha15 = x[16*i+14];
2261: alpha16 = x[16*i+15];
2262: while (n-->0) {
2263: y[16*(*idx)] += alpha1*(*v);
2264: y[16*(*idx)+1] += alpha2*(*v);
2265: y[16*(*idx)+2] += alpha3*(*v);
2266: y[16*(*idx)+3] += alpha4*(*v);
2267: y[16*(*idx)+4] += alpha5*(*v);
2268: y[16*(*idx)+5] += alpha6*(*v);
2269: y[16*(*idx)+6] += alpha7*(*v);
2270: y[16*(*idx)+7] += alpha8*(*v);
2271: y[16*(*idx)+8] += alpha9*(*v);
2272: y[16*(*idx)+9] += alpha10*(*v);
2273: y[16*(*idx)+10] += alpha11*(*v);
2274: y[16*(*idx)+11] += alpha12*(*v);
2275: y[16*(*idx)+12] += alpha13*(*v);
2276: y[16*(*idx)+13] += alpha14*(*v);
2277: y[16*(*idx)+14] += alpha15*(*v);
2278: y[16*(*idx)+15] += alpha16*(*v);
2279: idx++; v++;
2280: }
2281: }
2282: PetscLogFlops(32.0*a->nz);
2283: VecRestoreArrayRead(xx,&x);
2284: VecRestoreArray(yy,&y);
2285: return(0);
2286: }
2290: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2291: {
2292: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2293: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2294: const PetscScalar *x,*v;
2295: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2296: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2297: PetscErrorCode ierr;
2298: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2299: PetscInt n,i,jrow,j;
2302: if (yy != zz) {VecCopy(yy,zz);}
2303: VecGetArrayRead(xx,&x);
2304: VecGetArray(zz,&y);
2305: idx = a->j;
2306: v = a->a;
2307: ii = a->i;
2309: for (i=0; i<m; i++) {
2310: jrow = ii[i];
2311: n = ii[i+1] - jrow;
2312: sum1 = 0.0;
2313: sum2 = 0.0;
2314: sum3 = 0.0;
2315: sum4 = 0.0;
2316: sum5 = 0.0;
2317: sum6 = 0.0;
2318: sum7 = 0.0;
2319: sum8 = 0.0;
2320: sum9 = 0.0;
2321: sum10 = 0.0;
2322: sum11 = 0.0;
2323: sum12 = 0.0;
2324: sum13 = 0.0;
2325: sum14 = 0.0;
2326: sum15 = 0.0;
2327: sum16 = 0.0;
2328: for (j=0; j<n; j++) {
2329: sum1 += v[jrow]*x[16*idx[jrow]];
2330: sum2 += v[jrow]*x[16*idx[jrow]+1];
2331: sum3 += v[jrow]*x[16*idx[jrow]+2];
2332: sum4 += v[jrow]*x[16*idx[jrow]+3];
2333: sum5 += v[jrow]*x[16*idx[jrow]+4];
2334: sum6 += v[jrow]*x[16*idx[jrow]+5];
2335: sum7 += v[jrow]*x[16*idx[jrow]+6];
2336: sum8 += v[jrow]*x[16*idx[jrow]+7];
2337: sum9 += v[jrow]*x[16*idx[jrow]+8];
2338: sum10 += v[jrow]*x[16*idx[jrow]+9];
2339: sum11 += v[jrow]*x[16*idx[jrow]+10];
2340: sum12 += v[jrow]*x[16*idx[jrow]+11];
2341: sum13 += v[jrow]*x[16*idx[jrow]+12];
2342: sum14 += v[jrow]*x[16*idx[jrow]+13];
2343: sum15 += v[jrow]*x[16*idx[jrow]+14];
2344: sum16 += v[jrow]*x[16*idx[jrow]+15];
2345: jrow++;
2346: }
2347: y[16*i] += sum1;
2348: y[16*i+1] += sum2;
2349: y[16*i+2] += sum3;
2350: y[16*i+3] += sum4;
2351: y[16*i+4] += sum5;
2352: y[16*i+5] += sum6;
2353: y[16*i+6] += sum7;
2354: y[16*i+7] += sum8;
2355: y[16*i+8] += sum9;
2356: y[16*i+9] += sum10;
2357: y[16*i+10] += sum11;
2358: y[16*i+11] += sum12;
2359: y[16*i+12] += sum13;
2360: y[16*i+13] += sum14;
2361: y[16*i+14] += sum15;
2362: y[16*i+15] += sum16;
2363: }
2365: PetscLogFlops(32.0*a->nz);
2366: VecRestoreArrayRead(xx,&x);
2367: VecRestoreArray(zz,&y);
2368: return(0);
2369: }
2373: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2374: {
2375: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2376: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2377: const PetscScalar *x,*v;
2378: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2379: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2380: PetscErrorCode ierr;
2381: const PetscInt m = b->AIJ->rmap->n,*idx;
2382: PetscInt n,i;
2385: if (yy != zz) {VecCopy(yy,zz);}
2386: VecGetArrayRead(xx,&x);
2387: VecGetArray(zz,&y);
2388: for (i=0; i<m; i++) {
2389: idx = a->j + a->i[i] ;
2390: v = a->a + a->i[i] ;
2391: n = a->i[i+1] - a->i[i];
2392: alpha1 = x[16*i];
2393: alpha2 = x[16*i+1];
2394: alpha3 = x[16*i+2];
2395: alpha4 = x[16*i+3];
2396: alpha5 = x[16*i+4];
2397: alpha6 = x[16*i+5];
2398: alpha7 = x[16*i+6];
2399: alpha8 = x[16*i+7];
2400: alpha9 = x[16*i+8];
2401: alpha10 = x[16*i+9];
2402: alpha11 = x[16*i+10];
2403: alpha12 = x[16*i+11];
2404: alpha13 = x[16*i+12];
2405: alpha14 = x[16*i+13];
2406: alpha15 = x[16*i+14];
2407: alpha16 = x[16*i+15];
2408: while (n-->0) {
2409: y[16*(*idx)] += alpha1*(*v);
2410: y[16*(*idx)+1] += alpha2*(*v);
2411: y[16*(*idx)+2] += alpha3*(*v);
2412: y[16*(*idx)+3] += alpha4*(*v);
2413: y[16*(*idx)+4] += alpha5*(*v);
2414: y[16*(*idx)+5] += alpha6*(*v);
2415: y[16*(*idx)+6] += alpha7*(*v);
2416: y[16*(*idx)+7] += alpha8*(*v);
2417: y[16*(*idx)+8] += alpha9*(*v);
2418: y[16*(*idx)+9] += alpha10*(*v);
2419: y[16*(*idx)+10] += alpha11*(*v);
2420: y[16*(*idx)+11] += alpha12*(*v);
2421: y[16*(*idx)+12] += alpha13*(*v);
2422: y[16*(*idx)+13] += alpha14*(*v);
2423: y[16*(*idx)+14] += alpha15*(*v);
2424: y[16*(*idx)+15] += alpha16*(*v);
2425: idx++; v++;
2426: }
2427: }
2428: PetscLogFlops(32.0*a->nz);
2429: VecRestoreArrayRead(xx,&x);
2430: VecRestoreArray(zz,&y);
2431: return(0);
2432: }
2434: /*--------------------------------------------------------------------------------------------*/
2437: PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2438: {
2439: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2440: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2441: const PetscScalar *x,*v;
2442: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2443: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2444: PetscErrorCode ierr;
2445: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2446: PetscInt nonzerorow=0,n,i,jrow,j;
2449: VecGetArrayRead(xx,&x);
2450: VecGetArray(yy,&y);
2451: idx = a->j;
2452: v = a->a;
2453: ii = a->i;
2455: for (i=0; i<m; i++) {
2456: jrow = ii[i];
2457: n = ii[i+1] - jrow;
2458: sum1 = 0.0;
2459: sum2 = 0.0;
2460: sum3 = 0.0;
2461: sum4 = 0.0;
2462: sum5 = 0.0;
2463: sum6 = 0.0;
2464: sum7 = 0.0;
2465: sum8 = 0.0;
2466: sum9 = 0.0;
2467: sum10 = 0.0;
2468: sum11 = 0.0;
2469: sum12 = 0.0;
2470: sum13 = 0.0;
2471: sum14 = 0.0;
2472: sum15 = 0.0;
2473: sum16 = 0.0;
2474: sum17 = 0.0;
2475: sum18 = 0.0;
2476: nonzerorow += (n>0);
2477: for (j=0; j<n; j++) {
2478: sum1 += v[jrow]*x[18*idx[jrow]];
2479: sum2 += v[jrow]*x[18*idx[jrow]+1];
2480: sum3 += v[jrow]*x[18*idx[jrow]+2];
2481: sum4 += v[jrow]*x[18*idx[jrow]+3];
2482: sum5 += v[jrow]*x[18*idx[jrow]+4];
2483: sum6 += v[jrow]*x[18*idx[jrow]+5];
2484: sum7 += v[jrow]*x[18*idx[jrow]+6];
2485: sum8 += v[jrow]*x[18*idx[jrow]+7];
2486: sum9 += v[jrow]*x[18*idx[jrow]+8];
2487: sum10 += v[jrow]*x[18*idx[jrow]+9];
2488: sum11 += v[jrow]*x[18*idx[jrow]+10];
2489: sum12 += v[jrow]*x[18*idx[jrow]+11];
2490: sum13 += v[jrow]*x[18*idx[jrow]+12];
2491: sum14 += v[jrow]*x[18*idx[jrow]+13];
2492: sum15 += v[jrow]*x[18*idx[jrow]+14];
2493: sum16 += v[jrow]*x[18*idx[jrow]+15];
2494: sum17 += v[jrow]*x[18*idx[jrow]+16];
2495: sum18 += v[jrow]*x[18*idx[jrow]+17];
2496: jrow++;
2497: }
2498: y[18*i] = sum1;
2499: y[18*i+1] = sum2;
2500: y[18*i+2] = sum3;
2501: y[18*i+3] = sum4;
2502: y[18*i+4] = sum5;
2503: y[18*i+5] = sum6;
2504: y[18*i+6] = sum7;
2505: y[18*i+7] = sum8;
2506: y[18*i+8] = sum9;
2507: y[18*i+9] = sum10;
2508: y[18*i+10] = sum11;
2509: y[18*i+11] = sum12;
2510: y[18*i+12] = sum13;
2511: y[18*i+13] = sum14;
2512: y[18*i+14] = sum15;
2513: y[18*i+15] = sum16;
2514: y[18*i+16] = sum17;
2515: y[18*i+17] = sum18;
2516: }
2518: PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);
2519: VecRestoreArrayRead(xx,&x);
2520: VecRestoreArray(yy,&y);
2521: return(0);
2522: }
2526: PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2527: {
2528: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2529: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2530: const PetscScalar *x,*v;
2531: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2532: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2533: PetscErrorCode ierr;
2534: const PetscInt m = b->AIJ->rmap->n,*idx;
2535: PetscInt n,i;
2538: VecSet(yy,0.0);
2539: VecGetArrayRead(xx,&x);
2540: VecGetArray(yy,&y);
2542: for (i=0; i<m; i++) {
2543: idx = a->j + a->i[i] ;
2544: v = a->a + a->i[i] ;
2545: n = a->i[i+1] - a->i[i];
2546: alpha1 = x[18*i];
2547: alpha2 = x[18*i+1];
2548: alpha3 = x[18*i+2];
2549: alpha4 = x[18*i+3];
2550: alpha5 = x[18*i+4];
2551: alpha6 = x[18*i+5];
2552: alpha7 = x[18*i+6];
2553: alpha8 = x[18*i+7];
2554: alpha9 = x[18*i+8];
2555: alpha10 = x[18*i+9];
2556: alpha11 = x[18*i+10];
2557: alpha12 = x[18*i+11];
2558: alpha13 = x[18*i+12];
2559: alpha14 = x[18*i+13];
2560: alpha15 = x[18*i+14];
2561: alpha16 = x[18*i+15];
2562: alpha17 = x[18*i+16];
2563: alpha18 = x[18*i+17];
2564: while (n-->0) {
2565: y[18*(*idx)] += alpha1*(*v);
2566: y[18*(*idx)+1] += alpha2*(*v);
2567: y[18*(*idx)+2] += alpha3*(*v);
2568: y[18*(*idx)+3] += alpha4*(*v);
2569: y[18*(*idx)+4] += alpha5*(*v);
2570: y[18*(*idx)+5] += alpha6*(*v);
2571: y[18*(*idx)+6] += alpha7*(*v);
2572: y[18*(*idx)+7] += alpha8*(*v);
2573: y[18*(*idx)+8] += alpha9*(*v);
2574: y[18*(*idx)+9] += alpha10*(*v);
2575: y[18*(*idx)+10] += alpha11*(*v);
2576: y[18*(*idx)+11] += alpha12*(*v);
2577: y[18*(*idx)+12] += alpha13*(*v);
2578: y[18*(*idx)+13] += alpha14*(*v);
2579: y[18*(*idx)+14] += alpha15*(*v);
2580: y[18*(*idx)+15] += alpha16*(*v);
2581: y[18*(*idx)+16] += alpha17*(*v);
2582: y[18*(*idx)+17] += alpha18*(*v);
2583: idx++; v++;
2584: }
2585: }
2586: PetscLogFlops(36.0*a->nz);
2587: VecRestoreArrayRead(xx,&x);
2588: VecRestoreArray(yy,&y);
2589: return(0);
2590: }
2594: PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2595: {
2596: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2597: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2598: const PetscScalar *x,*v;
2599: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2600: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2601: PetscErrorCode ierr;
2602: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2603: PetscInt n,i,jrow,j;
2606: if (yy != zz) {VecCopy(yy,zz);}
2607: VecGetArrayRead(xx,&x);
2608: VecGetArray(zz,&y);
2609: idx = a->j;
2610: v = a->a;
2611: ii = a->i;
2613: for (i=0; i<m; i++) {
2614: jrow = ii[i];
2615: n = ii[i+1] - jrow;
2616: sum1 = 0.0;
2617: sum2 = 0.0;
2618: sum3 = 0.0;
2619: sum4 = 0.0;
2620: sum5 = 0.0;
2621: sum6 = 0.0;
2622: sum7 = 0.0;
2623: sum8 = 0.0;
2624: sum9 = 0.0;
2625: sum10 = 0.0;
2626: sum11 = 0.0;
2627: sum12 = 0.0;
2628: sum13 = 0.0;
2629: sum14 = 0.0;
2630: sum15 = 0.0;
2631: sum16 = 0.0;
2632: sum17 = 0.0;
2633: sum18 = 0.0;
2634: for (j=0; j<n; j++) {
2635: sum1 += v[jrow]*x[18*idx[jrow]];
2636: sum2 += v[jrow]*x[18*idx[jrow]+1];
2637: sum3 += v[jrow]*x[18*idx[jrow]+2];
2638: sum4 += v[jrow]*x[18*idx[jrow]+3];
2639: sum5 += v[jrow]*x[18*idx[jrow]+4];
2640: sum6 += v[jrow]*x[18*idx[jrow]+5];
2641: sum7 += v[jrow]*x[18*idx[jrow]+6];
2642: sum8 += v[jrow]*x[18*idx[jrow]+7];
2643: sum9 += v[jrow]*x[18*idx[jrow]+8];
2644: sum10 += v[jrow]*x[18*idx[jrow]+9];
2645: sum11 += v[jrow]*x[18*idx[jrow]+10];
2646: sum12 += v[jrow]*x[18*idx[jrow]+11];
2647: sum13 += v[jrow]*x[18*idx[jrow]+12];
2648: sum14 += v[jrow]*x[18*idx[jrow]+13];
2649: sum15 += v[jrow]*x[18*idx[jrow]+14];
2650: sum16 += v[jrow]*x[18*idx[jrow]+15];
2651: sum17 += v[jrow]*x[18*idx[jrow]+16];
2652: sum18 += v[jrow]*x[18*idx[jrow]+17];
2653: jrow++;
2654: }
2655: y[18*i] += sum1;
2656: y[18*i+1] += sum2;
2657: y[18*i+2] += sum3;
2658: y[18*i+3] += sum4;
2659: y[18*i+4] += sum5;
2660: y[18*i+5] += sum6;
2661: y[18*i+6] += sum7;
2662: y[18*i+7] += sum8;
2663: y[18*i+8] += sum9;
2664: y[18*i+9] += sum10;
2665: y[18*i+10] += sum11;
2666: y[18*i+11] += sum12;
2667: y[18*i+12] += sum13;
2668: y[18*i+13] += sum14;
2669: y[18*i+14] += sum15;
2670: y[18*i+15] += sum16;
2671: y[18*i+16] += sum17;
2672: y[18*i+17] += sum18;
2673: }
2675: PetscLogFlops(36.0*a->nz);
2676: VecRestoreArrayRead(xx,&x);
2677: VecRestoreArray(zz,&y);
2678: return(0);
2679: }
2683: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2684: {
2685: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2686: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2687: const PetscScalar *x,*v;
2688: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2689: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2690: PetscErrorCode ierr;
2691: const PetscInt m = b->AIJ->rmap->n,*idx;
2692: PetscInt n,i;
2695: if (yy != zz) {VecCopy(yy,zz);}
2696: VecGetArrayRead(xx,&x);
2697: VecGetArray(zz,&y);
2698: for (i=0; i<m; i++) {
2699: idx = a->j + a->i[i] ;
2700: v = a->a + a->i[i] ;
2701: n = a->i[i+1] - a->i[i];
2702: alpha1 = x[18*i];
2703: alpha2 = x[18*i+1];
2704: alpha3 = x[18*i+2];
2705: alpha4 = x[18*i+3];
2706: alpha5 = x[18*i+4];
2707: alpha6 = x[18*i+5];
2708: alpha7 = x[18*i+6];
2709: alpha8 = x[18*i+7];
2710: alpha9 = x[18*i+8];
2711: alpha10 = x[18*i+9];
2712: alpha11 = x[18*i+10];
2713: alpha12 = x[18*i+11];
2714: alpha13 = x[18*i+12];
2715: alpha14 = x[18*i+13];
2716: alpha15 = x[18*i+14];
2717: alpha16 = x[18*i+15];
2718: alpha17 = x[18*i+16];
2719: alpha18 = x[18*i+17];
2720: while (n-->0) {
2721: y[18*(*idx)] += alpha1*(*v);
2722: y[18*(*idx)+1] += alpha2*(*v);
2723: y[18*(*idx)+2] += alpha3*(*v);
2724: y[18*(*idx)+3] += alpha4*(*v);
2725: y[18*(*idx)+4] += alpha5*(*v);
2726: y[18*(*idx)+5] += alpha6*(*v);
2727: y[18*(*idx)+6] += alpha7*(*v);
2728: y[18*(*idx)+7] += alpha8*(*v);
2729: y[18*(*idx)+8] += alpha9*(*v);
2730: y[18*(*idx)+9] += alpha10*(*v);
2731: y[18*(*idx)+10] += alpha11*(*v);
2732: y[18*(*idx)+11] += alpha12*(*v);
2733: y[18*(*idx)+12] += alpha13*(*v);
2734: y[18*(*idx)+13] += alpha14*(*v);
2735: y[18*(*idx)+14] += alpha15*(*v);
2736: y[18*(*idx)+15] += alpha16*(*v);
2737: y[18*(*idx)+16] += alpha17*(*v);
2738: y[18*(*idx)+17] += alpha18*(*v);
2739: idx++; v++;
2740: }
2741: }
2742: PetscLogFlops(36.0*a->nz);
2743: VecRestoreArrayRead(xx,&x);
2744: VecRestoreArray(zz,&y);
2745: return(0);
2746: }
2750: PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2751: {
2752: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2753: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2754: const PetscScalar *x,*v;
2755: PetscScalar *y,*sums;
2756: PetscErrorCode ierr;
2757: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2758: PetscInt n,i,jrow,j,dof = b->dof,k;
2761: VecGetArrayRead(xx,&x);
2762: VecSet(yy,0.0);
2763: VecGetArray(yy,&y);
2764: idx = a->j;
2765: v = a->a;
2766: ii = a->i;
2768: for (i=0; i<m; i++) {
2769: jrow = ii[i];
2770: n = ii[i+1] - jrow;
2771: sums = y + dof*i;
2772: for (j=0; j<n; j++) {
2773: for (k=0; k<dof; k++) {
2774: sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2775: }
2776: jrow++;
2777: }
2778: }
2780: PetscLogFlops(2.0*dof*a->nz);
2781: VecRestoreArrayRead(xx,&x);
2782: VecRestoreArray(yy,&y);
2783: return(0);
2784: }
2788: PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2789: {
2790: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2791: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2792: const PetscScalar *x,*v;
2793: PetscScalar *y,*sums;
2794: PetscErrorCode ierr;
2795: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2796: PetscInt n,i,jrow,j,dof = b->dof,k;
2799: if (yy != zz) {VecCopy(yy,zz);}
2800: VecGetArrayRead(xx,&x);
2801: VecGetArray(zz,&y);
2802: idx = a->j;
2803: v = a->a;
2804: ii = a->i;
2806: for (i=0; i<m; i++) {
2807: jrow = ii[i];
2808: n = ii[i+1] - jrow;
2809: sums = y + dof*i;
2810: for (j=0; j<n; j++) {
2811: for (k=0; k<dof; k++) {
2812: sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2813: }
2814: jrow++;
2815: }
2816: }
2818: PetscLogFlops(2.0*dof*a->nz);
2819: VecRestoreArrayRead(xx,&x);
2820: VecRestoreArray(zz,&y);
2821: return(0);
2822: }
2826: PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2827: {
2828: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2829: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2830: const PetscScalar *x,*v,*alpha;
2831: PetscScalar *y;
2832: PetscErrorCode ierr;
2833: const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof;
2834: PetscInt n,i,k;
2837: VecGetArrayRead(xx,&x);
2838: VecSet(yy,0.0);
2839: VecGetArray(yy,&y);
2840: for (i=0; i<m; i++) {
2841: idx = a->j + a->i[i] ;
2842: v = a->a + a->i[i] ;
2843: n = a->i[i+1] - a->i[i];
2844: alpha = x + dof*i;
2845: while (n-->0) {
2846: for (k=0; k<dof; k++) {
2847: y[dof*(*idx)+k] += alpha[k]*(*v);
2848: }
2849: idx++; v++;
2850: }
2851: }
2852: PetscLogFlops(2.0*dof*a->nz);
2853: VecRestoreArrayRead(xx,&x);
2854: VecRestoreArray(yy,&y);
2855: return(0);
2856: }
2860: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2861: {
2862: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2863: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2864: const PetscScalar *x,*v,*alpha;
2865: PetscScalar *y;
2866: PetscErrorCode ierr;
2867: const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof;
2868: PetscInt n,i,k;
2871: if (yy != zz) {VecCopy(yy,zz);}
2872: VecGetArrayRead(xx,&x);
2873: VecGetArray(zz,&y);
2874: for (i=0; i<m; i++) {
2875: idx = a->j + a->i[i] ;
2876: v = a->a + a->i[i] ;
2877: n = a->i[i+1] - a->i[i];
2878: alpha = x + dof*i;
2879: while (n-->0) {
2880: for (k=0; k<dof; k++) {
2881: y[dof*(*idx)+k] += alpha[k]*(*v);
2882: }
2883: idx++; v++;
2884: }
2885: }
2886: PetscLogFlops(2.0*dof*a->nz);
2887: VecRestoreArrayRead(xx,&x);
2888: VecRestoreArray(zz,&y);
2889: return(0);
2890: }
2892: /*===================================================================================*/
2895: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2896: {
2897: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2901: /* start the scatter */
2902: VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2903: (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
2904: VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2905: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
2906: return(0);
2907: }
2911: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2912: {
2913: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2917: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2918: (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
2919: VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2920: VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2921: return(0);
2922: }
2926: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2927: {
2928: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2932: /* start the scatter */
2933: VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2934: (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
2935: VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2936: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
2937: return(0);
2938: }
2942: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2943: {
2944: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2948: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2949: VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2950: (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
2951: VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2952: return(0);
2953: }
2955: /* ----------------------------------------------------------------*/
2958: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2959: {
2960: /* This routine requires testing -- but it's getting better. */
2961: PetscErrorCode ierr;
2962: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2963: Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data;
2964: Mat P=pp->AIJ;
2965: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2966: PetscInt *pti,*ptj,*ptJ;
2967: PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2968: const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2969: PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
2970: MatScalar *ca;
2971: const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
2974: /* Start timer */
2975: PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);
2977: /* Get ij structure of P^T */
2978: MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
2980: cn = pn*ppdof;
2981: /* Allocate ci array, arrays for fill computation and */
2982: /* free space for accumulating nonzero column info */
2983: PetscMalloc((cn+1)*sizeof(PetscInt),&ci);
2984: ci[0] = 0;
2986: /* Work arrays for rows of P^T*A */
2987: PetscMalloc4(an,PetscInt,&ptadenserow,an,PetscInt,&ptasparserow,cn,PetscInt,&denserow,cn,PetscInt,&sparserow);
2988: PetscMemzero(ptadenserow,an*sizeof(PetscInt));
2989: PetscMemzero(denserow,cn*sizeof(PetscInt));
2991: /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2992: /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2993: /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2994: PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
2995: current_space = free_space;
2997: /* Determine symbolic info for each row of C: */
2998: for (i=0;i<pn;i++) {
2999: ptnzi = pti[i+1] - pti[i];
3000: ptJ = ptj + pti[i];
3001: for (dof=0;dof<ppdof;dof++) {
3002: ptanzi = 0;
3003: /* Determine symbolic row of PtA: */
3004: for (j=0;j<ptnzi;j++) {
3005: /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
3006: arow = ptJ[j]*ppdof + dof;
3007: /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
3008: anzj = ai[arow+1] - ai[arow];
3009: ajj = aj + ai[arow];
3010: for (k=0;k<anzj;k++) {
3011: if (!ptadenserow[ajj[k]]) {
3012: ptadenserow[ajj[k]] = -1;
3013: ptasparserow[ptanzi++] = ajj[k];
3014: }
3015: }
3016: }
3017: /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
3018: ptaj = ptasparserow;
3019: cnzi = 0;
3020: for (j=0;j<ptanzi;j++) {
3021: /* Get offset within block of P */
3022: pshift = *ptaj%ppdof;
3023: /* Get block row of P */
3024: prow = (*ptaj++)/ppdof; /* integer division */
3025: /* P has same number of nonzeros per row as the compressed form */
3026: pnzj = pi[prow+1] - pi[prow];
3027: pjj = pj + pi[prow];
3028: for (k=0;k<pnzj;k++) {
3029: /* Locations in C are shifted by the offset within the block */
3030: /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
3031: if (!denserow[pjj[k]*ppdof+pshift]) {
3032: denserow[pjj[k]*ppdof+pshift] = -1;
3033: sparserow[cnzi++] = pjj[k]*ppdof+pshift;
3034: }
3035: }
3036: }
3038: /* sort sparserow */
3039: PetscSortInt(cnzi,sparserow);
3040:
3041: /* If free space is not available, make more free space */
3042: /* Double the amount of total space in the list */
3043: if (current_space->local_remaining<cnzi) {
3044: PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);
3045: }
3047: /* Copy data into free space, and zero out denserows */
3048: PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));
3049: current_space->array += cnzi;
3050: current_space->local_used += cnzi;
3051: current_space->local_remaining -= cnzi;
3053: for (j=0;j<ptanzi;j++) {
3054: ptadenserow[ptasparserow[j]] = 0;
3055: }
3056: for (j=0;j<cnzi;j++) {
3057: denserow[sparserow[j]] = 0;
3058: }
3059: /* Aside: Perhaps we should save the pta info for the numerical factorization. */
3060: /* For now, we will recompute what is needed. */
3061: ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
3062: }
3063: }
3064: /* nnz is now stored in ci[ptm], column indices are in the list of free space */
3065: /* Allocate space for cj, initialize cj, and */
3066: /* destroy list of free space and other temporary array(s) */
3067: PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);
3068: PetscFreeSpaceContiguous(&free_space,cj);
3069: PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);
3070:
3071: /* Allocate space for ca */
3072: PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);
3073: PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));
3074:
3075: /* put together the new matrix */
3076: MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);
3078: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3079: /* Since these are PETSc arrays, change flags to free them as necessary. */
3080: c = (Mat_SeqAIJ *)((*C)->data);
3081: c->free_a = PETSC_TRUE;
3082: c->free_ij = PETSC_TRUE;
3083: c->nonew = 0;
3085: /* Clean up. */
3086: MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
3088: PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);
3089: return(0);
3090: }
3094: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
3095: {
3096: /* This routine requires testing -- first draft only */
3097: PetscErrorCode ierr;
3098: Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data;
3099: Mat P=pp->AIJ;
3100: Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
3101: Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data;
3102: Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data;
3103: const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
3104: const PetscInt *ci=c->i,*cj=c->j,*cjj;
3105: const PetscInt am=A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3106: PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3107: const MatScalar *aa=a->a,*pa=p->a,*pA=p->a,*paj;
3108: MatScalar *ca=c->a,*caj,*apa;
3111: /* Allocate temporary array for storage of one row of A*P */
3112: PetscMalloc3(cn,MatScalar,&apa,cn,PetscInt,&apj,cn,PetscInt,&apjdense);
3113: PetscMemzero(apa,cn*sizeof(MatScalar));
3114: PetscMemzero(apj,cn*sizeof(PetscInt));
3115: PetscMemzero(apjdense,cn*sizeof(PetscInt));
3117: /* Clear old values in C */
3118: PetscMemzero(ca,ci[cm]*sizeof(MatScalar));
3120: for (i=0;i<am;i++) {
3121: /* Form sparse row of A*P */
3122: anzi = ai[i+1] - ai[i];
3123: apnzj = 0;
3124: for (j=0;j<anzi;j++) {
3125: /* Get offset within block of P */
3126: pshift = *aj%ppdof;
3127: /* Get block row of P */
3128: prow = *aj++/ppdof; /* integer division */
3129: pnzj = pi[prow+1] - pi[prow];
3130: pjj = pj + pi[prow];
3131: paj = pa + pi[prow];
3132: for (k=0;k<pnzj;k++) {
3133: poffset = pjj[k]*ppdof+pshift;
3134: if (!apjdense[poffset]) {
3135: apjdense[poffset] = -1;
3136: apj[apnzj++] = poffset;
3137: }
3138: apa[poffset] += (*aa)*paj[k];
3139: }
3140: PetscLogFlops(2.0*pnzj);
3141: aa++;
3142: }
3144: /* Sort the j index array for quick sparse axpy. */
3145: /* Note: a array does not need sorting as it is in dense storage locations. */
3146: PetscSortInt(apnzj,apj);
3148: /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3149: prow = i/ppdof; /* integer division */
3150: pshift = i%ppdof;
3151: poffset = pi[prow];
3152: pnzi = pi[prow+1] - poffset;
3153: /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3154: pJ = pj+poffset;
3155: pA = pa+poffset;
3156: for (j=0;j<pnzi;j++) {
3157: crow = (*pJ)*ppdof+pshift;
3158: cjj = cj + ci[crow];
3159: caj = ca + ci[crow];
3160: pJ++;
3161: /* Perform sparse axpy operation. Note cjj includes apj. */
3162: for (k=0,nextap=0;nextap<apnzj;k++) {
3163: if (cjj[k]==apj[nextap]) {
3164: caj[k] += (*pA)*apa[apj[nextap++]];
3165: }
3166: }
3167: PetscLogFlops(2.0*apnzj);
3168: pA++;
3169: }
3171: /* Zero the current row info for A*P */
3172: for (j=0;j<apnzj;j++) {
3173: apa[apj[j]] = 0.;
3174: apjdense[apj[j]] = 0;
3175: }
3176: }
3178: /* Assemble the final matrix and clean up */
3179: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
3180: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
3181: PetscFree3(apa,apj,apjdense);
3182: return(0);
3183: }
3187: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3188: {
3189: PetscErrorCode ierr;
3192: /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
3193: MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);
3194: ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);
3195: return(0);
3196: }
3200: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3201: {
3203: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3204: return(0);
3205: }
3210: PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3211: {
3212: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
3213: Mat a = b->AIJ,B;
3214: Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data;
3215: PetscErrorCode ierr;
3216: PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3217: PetscInt *cols;
3218: PetscScalar *vals;
3221: MatGetSize(a,&m,&n);
3222: PetscMalloc(dof*m*sizeof(PetscInt),&ilen);
3223: for (i=0; i<m; i++) {
3224: nmax = PetscMax(nmax,aij->ilen[i]);
3225: for (j=0; j<dof; j++) {
3226: ilen[dof*i+j] = aij->ilen[i];
3227: }
3228: }
3229: MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);
3230: PetscFree(ilen);
3231: PetscMalloc(nmax*sizeof(PetscInt),&icols);
3232: ii = 0;
3233: for (i=0; i<m; i++) {
3234: MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3235: for (j=0; j<dof; j++) {
3236: for (k=0; k<ncols; k++) {
3237: icols[k] = dof*cols[k]+j;
3238: }
3239: MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3240: ii++;
3241: }
3242: MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3243: }
3244: PetscFree(icols);
3245: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3246: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3248: if (reuse == MAT_REUSE_MATRIX) {
3249: MatHeaderReplace(A,B);
3250: } else {
3251: *newmat = B;
3252: }
3253: return(0);
3254: }
3257: #include <../src/mat/impls/aij/mpi/mpiaij.h>
3262: PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3263: {
3264: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data;
3265: Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3266: Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3267: Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
3268: Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
3269: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3270: PetscInt dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0;
3271: PetscInt *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL;
3272: PetscInt rstart,cstart,*garray,ii,k;
3273: PetscErrorCode ierr;
3274: PetscScalar *vals,*ovals;
3277: PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);
3278: for (i=0; i<A->rmap->n/dof; i++) {
3279: nmax = PetscMax(nmax,AIJ->ilen[i]);
3280: onmax = PetscMax(onmax,OAIJ->ilen[i]);
3281: for (j=0; j<dof; j++) {
3282: dnz[dof*i+j] = AIJ->ilen[i];
3283: onz[dof*i+j] = OAIJ->ilen[i];
3284: }
3285: }
3286: MatCreateMPIAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);
3287: PetscFree2(dnz,onz);
3289: PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);
3290: rstart = dof*maij->A->rmap->rstart;
3291: cstart = dof*maij->A->cmap->rstart;
3292: garray = mpiaij->garray;
3294: ii = rstart;
3295: for (i=0; i<A->rmap->n/dof; i++) {
3296: MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3297: MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3298: for (j=0; j<dof; j++) {
3299: for (k=0; k<ncols; k++) {
3300: icols[k] = cstart + dof*cols[k]+j;
3301: }
3302: for (k=0; k<oncols; k++) {
3303: oicols[k] = dof*garray[ocols[k]]+j;
3304: }
3305: MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3306: MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
3307: ii++;
3308: }
3309: MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3310: MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3311: }
3312: PetscFree2(icols,oicols);
3314: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3315: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3317: if (reuse == MAT_REUSE_MATRIX) {
3318: PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3319: ((PetscObject)A)->refct = 1;
3320: MatHeaderReplace(A,B);
3321: ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3322: } else {
3323: *newmat = B;
3324: }
3325: return(0);
3326: }
3331: PetscErrorCode MatGetSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
3332: {
3334: Mat A;
3337: MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
3338: MatGetSubMatrix(A,isrow,iscol,cll,newmat);
3339: MatDestroy(&A);
3340: return(0);
3341: }
3343: /* ---------------------------------------------------------------------------------- */
3346: /*@C
3347: MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3348: operations for multicomponent problems. It interpolates each component the same
3349: way independently. The matrix type is based on MATSEQAIJ for sequential matrices,
3350: and MATMPIAIJ for distributed matrices.
3352: Collective
3354: Input Parameters:
3355: + A - the AIJ matrix describing the action on blocks
3356: - dof - the block size (number of components per node)
3358: Output Parameter:
3359: . maij - the new MAIJ matrix
3361: Operations provided:
3362: + MatMult
3363: . MatMultTranspose
3364: . MatMultAdd
3365: . MatMultTransposeAdd
3366: - MatView
3368: Level: advanced
3370: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3371: @*/
3372: PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3373: {
3375: PetscMPIInt size;
3376: PetscInt n;
3377: Mat B;
3380: PetscObjectReference((PetscObject)A);
3382: if (dof == 1) {
3383: *maij = A;
3384: } else {
3385: MatCreate(((PetscObject)A)->comm,&B);
3386: MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);
3387: PetscLayoutSetBlockSize(B->rmap,dof);
3388: PetscLayoutSetBlockSize(B->cmap,dof);
3389: PetscLayoutSetUp(B->rmap);
3390: PetscLayoutSetUp(B->cmap);
3391: B->assembled = PETSC_TRUE;
3393: MPI_Comm_size(((PetscObject)A)->comm,&size);
3394: if (size == 1) {
3395: Mat_SeqMAIJ *b;
3397: MatSetType(B,MATSEQMAIJ);
3398: B->ops->destroy = MatDestroy_SeqMAIJ;
3399: B->ops->view = MatView_SeqMAIJ;
3400: b = (Mat_SeqMAIJ*)B->data;
3401: b->dof = dof;
3402: b->AIJ = A;
3403: if (dof == 2) {
3404: B->ops->mult = MatMult_SeqMAIJ_2;
3405: B->ops->multadd = MatMultAdd_SeqMAIJ_2;
3406: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2;
3407: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3408: } else if (dof == 3) {
3409: B->ops->mult = MatMult_SeqMAIJ_3;
3410: B->ops->multadd = MatMultAdd_SeqMAIJ_3;
3411: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3;
3412: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3413: } else if (dof == 4) {
3414: B->ops->mult = MatMult_SeqMAIJ_4;
3415: B->ops->multadd = MatMultAdd_SeqMAIJ_4;
3416: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4;
3417: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3418: } else if (dof == 5) {
3419: B->ops->mult = MatMult_SeqMAIJ_5;
3420: B->ops->multadd = MatMultAdd_SeqMAIJ_5;
3421: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5;
3422: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3423: } else if (dof == 6) {
3424: B->ops->mult = MatMult_SeqMAIJ_6;
3425: B->ops->multadd = MatMultAdd_SeqMAIJ_6;
3426: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6;
3427: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3428: } else if (dof == 7) {
3429: B->ops->mult = MatMult_SeqMAIJ_7;
3430: B->ops->multadd = MatMultAdd_SeqMAIJ_7;
3431: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7;
3432: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3433: } else if (dof == 8) {
3434: B->ops->mult = MatMult_SeqMAIJ_8;
3435: B->ops->multadd = MatMultAdd_SeqMAIJ_8;
3436: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8;
3437: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3438: } else if (dof == 9) {
3439: B->ops->mult = MatMult_SeqMAIJ_9;
3440: B->ops->multadd = MatMultAdd_SeqMAIJ_9;
3441: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9;
3442: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3443: } else if (dof == 10) {
3444: B->ops->mult = MatMult_SeqMAIJ_10;
3445: B->ops->multadd = MatMultAdd_SeqMAIJ_10;
3446: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10;
3447: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3448: } else if (dof == 11) {
3449: B->ops->mult = MatMult_SeqMAIJ_11;
3450: B->ops->multadd = MatMultAdd_SeqMAIJ_11;
3451: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11;
3452: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3453: } else if (dof == 16) {
3454: B->ops->mult = MatMult_SeqMAIJ_16;
3455: B->ops->multadd = MatMultAdd_SeqMAIJ_16;
3456: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16;
3457: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3458: } else if (dof == 18) {
3459: B->ops->mult = MatMult_SeqMAIJ_18;
3460: B->ops->multadd = MatMultAdd_SeqMAIJ_18;
3461: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18;
3462: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3463: } else {
3464: B->ops->mult = MatMult_SeqMAIJ_N;
3465: B->ops->multadd = MatMultAdd_SeqMAIJ_N;
3466: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N;
3467: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3468: }
3469: B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
3470: B->ops->ptapnumeric_seqaij = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
3471: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);
3472: } else {
3473: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
3474: Mat_MPIMAIJ *b;
3475: IS from,to;
3476: Vec gvec;
3478: MatSetType(B,MATMPIMAIJ);
3479: B->ops->destroy = MatDestroy_MPIMAIJ;
3480: B->ops->view = MatView_MPIMAIJ;
3481: b = (Mat_MPIMAIJ*)B->data;
3482: b->dof = dof;
3483: b->A = A;
3484: MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
3485: MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);
3487: VecGetSize(mpiaij->lvec,&n);
3488: VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);
3489: VecSetBlockSize(b->w,dof);
3491: /* create two temporary Index sets for build scatter gather */
3492: ISCreateBlock(((PetscObject)A)->comm,dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);
3493: ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);
3495: /* create temporary global vector to generate scatter context */
3496: VecCreateMPIWithArray(((PetscObject)A)->comm,dof*A->cmap->n,dof*A->cmap->N,PETSC_NULL,&gvec);
3497: VecSetBlockSize(gvec,dof);
3499: /* generate the scatter context */
3500: VecScatterCreate(gvec,from,b->w,to,&b->ctx);
3502: ISDestroy(&from);
3503: ISDestroy(&to);
3504: VecDestroy(&gvec);
3506: B->ops->mult = MatMult_MPIMAIJ_dof;
3507: B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof;
3508: B->ops->multadd = MatMultAdd_MPIMAIJ_dof;
3509: B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3510: B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ;
3511: B->ops->ptapnumeric_mpiaij = MatPtAPNumeric_MPIAIJ_MPIMAIJ;
3512: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);
3513: }
3514: B->ops->getsubmatrix = MatGetSubMatrix_MAIJ;
3515: *maij = B;
3516: MatView_Private(B);
3517: }
3518: return(0);
3519: }