Actual source code: matpapt.c
2: /*
3: Defines matrix-matrix product routines for pairs of SeqAIJ matrices
4: C = P * A * P^T
5: */
7: #include <../src/mat/impls/aij/seq/aij.h>
8: #include <../src/mat/utils/freespace.h>
11: /*
12: MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices
13: C = P * A * P^T;
15: Note: C is assumed to be uncreated.
16: If this is not the case, Destroy C before calling this routine.
17: */
20: PetscErrorCode MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C)
21: {
22: /* Note: This code is virtually identical to that of MatApplyPtAP_SeqAIJ_Symbolic */
23: /* and MatMatMult_SeqAIJ_SeqAIJ_Symbolic. Perhaps they could be merged nicely. */
24: PetscErrorCode ierr;
25: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
26: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
27: PetscInt *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pti,*ptj,*ptjj;
28: PetscInt *ci,*cj,*paj,*padenserow,*pasparserow,*denserow,*sparserow;
29: PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N;
30: PetscInt i,j,k,pnzi,arow,anzj,panzi,ptrow,ptnzj,cnzi;
31: MatScalar *ca;
34: /* some error checking which could be moved into interface layer */
35: if (pn!=am) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pn,am);
36: if (am!=an) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",am, an);
38: /* Set up timers */
39: PetscLogEventBegin(MAT_Applypapt_symbolic,A,P,0,0);
41: /* Create ij structure of P^T */
42: MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
44: /* Allocate ci array, arrays for fill computation and */
45: /* free space for accumulating nonzero column info */
46: PetscMalloc(((pm+1)*1)*sizeof(PetscInt),&ci);
47: ci[0] = 0;
49: PetscMalloc4(an,PetscInt,&padenserow,an,PetscInt,&pasparserow,pm,PetscInt,&denserow,pm,PetscInt,&sparserow);
50: PetscMemzero(padenserow,an*sizeof(PetscInt));
51: PetscMemzero(pasparserow,an*sizeof(PetscInt));
52: PetscMemzero(denserow,pm*sizeof(PetscInt));
53: PetscMemzero(sparserow,pm*sizeof(PetscInt));
55: /* Set initial free space to be nnz(A) scaled by aspect ratio of Pt. */
56: /* This should be reasonable if sparsity of PAPt is similar to that of A. */
57: PetscFreeSpaceGet((ai[am]/pn)*pm,&free_space);
58: current_space = free_space;
60: /* Determine fill for each row of C: */
61: for (i=0;i<pm;i++) {
62: pnzi = pi[i+1] - pi[i];
63: panzi = 0;
64: /* Get symbolic sparse row of PA: */
65: for (j=0;j<pnzi;j++) {
66: arow = *pj++;
67: anzj = ai[arow+1] - ai[arow];
68: ajj = aj + ai[arow];
69: for (k=0;k<anzj;k++) {
70: if (!padenserow[ajj[k]]) {
71: padenserow[ajj[k]] = -1;
72: pasparserow[panzi++] = ajj[k];
73: }
74: }
75: }
76: /* Using symbolic row of PA, determine symbolic row of C: */
77: paj = pasparserow;
78: cnzi = 0;
79: for (j=0;j<panzi;j++) {
80: ptrow = *paj++;
81: ptnzj = pti[ptrow+1] - pti[ptrow];
82: ptjj = ptj + pti[ptrow];
83: for (k=0;k<ptnzj;k++) {
84: if (!denserow[ptjj[k]]) {
85: denserow[ptjj[k]] = -1;
86: sparserow[cnzi++] = ptjj[k];
87: }
88: }
89: }
91: /* sort sparse representation */
92: PetscSortInt(cnzi,sparserow);
94: /* If free space is not available, make more free space */
95: /* Double the amount of total space in the list */
96: if (current_space->local_remaining<cnzi) {
97: PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);
98: }
100: /* Copy data into free space, and zero out dense row */
101: PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));
102: current_space->array += cnzi;
103: current_space->local_used += cnzi;
104: current_space->local_remaining -= cnzi;
106: for (j=0;j<panzi;j++) {
107: padenserow[pasparserow[j]] = 0;
108: }
109: for (j=0;j<cnzi;j++) {
110: denserow[sparserow[j]] = 0;
111: }
112: ci[i+1] = ci[i] + cnzi;
113: }
114: /* column indices are in the list of free space */
115: /* Allocate space for cj, initialize cj, and */
116: /* destroy list of free space and other temporary array(s) */
117: PetscMalloc((ci[pm]+1)*sizeof(PetscInt),&cj);
118: PetscFreeSpaceContiguous(&free_space,cj);
119: PetscFree4(padenserow,pasparserow,denserow,sparserow);
120:
121: /* Allocate space for ca */
122: PetscMalloc((ci[pm]+1)*sizeof(MatScalar),&ca);
123: PetscMemzero(ca,(ci[pm]+1)*sizeof(MatScalar));
124:
125: /* put together the new matrix */
126: MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,pm,pm,ci,cj,ca,C);
128: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
129: /* Since these are PETSc arrays, change flags to free them as necessary. */
130: c = (Mat_SeqAIJ *)((*C)->data);
131: c->free_a = PETSC_TRUE;
132: c->free_ij = PETSC_TRUE;
133: c->nonew = 0;
135: /* Clean up. */
136: MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
138: PetscLogEventEnd(MAT_Applypapt_symbolic,A,P,0,0);
139: return(0);
140: }
142: /*
143: MatApplyPAPt_Numeric_SeqAIJ - Forms the numeric product of two SeqAIJ matrices
144: C = P * A * P^T;
145: Note: C must have been created by calling MatApplyPAPt_Symbolic_SeqAIJ.
146: */
149: PetscErrorCode MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
150: {
152: PetscInt flops=0;
153: Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
154: Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data;
155: Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data;
156: PetscInt *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj=p->j,*paj,*pajdense,*ptj;
157: PetscInt *ci=c->i,*cj=c->j;
158: PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,cn=C->cmap->N,cm=C->rmap->N;
159: PetscInt i,j,k,k1,k2,pnzi,anzj,panzj,arow,ptcol,ptnzj,cnzi;
160: MatScalar *aa=a->a,*pa=p->a,*pta=p->a,*ptaj,*paa,*aaj,*ca=c->a,sum;
163: /* This error checking should be unnecessary if the symbolic was performed */
164: if (pm!=cm) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pm,cm);
165: if (pn!=am) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pn,am);
166: if (am!=an) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",am, an);
167: if (pm!=cn) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pm, cn);
169: /* Set up timers */
170: PetscLogEventBegin(MAT_Applypapt_numeric,A,P,C,0);
171: PetscMemzero(ca,ci[cm]*sizeof(MatScalar));
173: PetscMalloc3(an,MatScalar,&paa,an,PetscInt,&paj,an,PetscInt,&pajdense);
174: PetscMemzero(paa,an*(sizeof(MatScalar)+2*sizeof(PetscInt)));
175:
176: for (i=0;i<pm;i++) {
177: /* Form sparse row of P*A */
178: pnzi = pi[i+1] - pi[i];
179: panzj = 0;
180: for (j=0;j<pnzi;j++) {
181: arow = *pj++;
182: anzj = ai[arow+1] - ai[arow];
183: ajj = aj + ai[arow];
184: aaj = aa + ai[arow];
185: for (k=0;k<anzj;k++) {
186: if (!pajdense[ajj[k]]) {
187: pajdense[ajj[k]] = -1;
188: paj[panzj++] = ajj[k];
189: }
190: paa[ajj[k]] += (*pa)*aaj[k];
191: }
192: flops += 2*anzj;
193: pa++;
194: }
196: /* Sort the j index array for quick sparse axpy. */
197: PetscSortInt(panzj,paj);
199: /* Compute P*A*P^T using sparse inner products. */
200: /* Take advantage of pre-computed (i,j) of C for locations of non-zeros. */
201: cnzi = ci[i+1] - ci[i];
202: for (j=0;j<cnzi;j++) {
203: /* Form sparse inner product of current row of P*A with (*cj++) col of P^T. */
204: ptcol = *cj++;
205: ptnzj = pi[ptcol+1] - pi[ptcol];
206: ptj = pjj + pi[ptcol];
207: ptaj = pta + pi[ptcol];
208: sum = 0.;
209: k1 = 0;
210: k2 = 0;
211: while ((k1<panzj) && (k2<ptnzj)) {
212: if (paj[k1]==ptj[k2]) {
213: sum += paa[paj[k1++]]*ptaj[k2++];
214: } else if (paj[k1] < ptj[k2]) {
215: k1++;
216: } else /* if (paj[k1] > ptj[k2]) */ {
217: k2++;
218: }
219: }
220: *ca++ = sum;
221: }
223: /* Zero the current row info for P*A */
224: for (j=0;j<panzj;j++) {
225: paa[paj[j]] = 0.;
226: pajdense[paj[j]] = 0;
227: }
228: }
230: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
231: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
232: PetscFree3(paa,paj,pajdense);
233: PetscLogFlops(flops);
234: PetscLogEventEnd(MAT_Applypapt_numeric,A,P,C,0);
235: return(0);
236: }
237:
240: PetscErrorCode MatApplyPAPt_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C)
241: {
245: PetscLogEventBegin(MAT_Applypapt,A,P,0,0);
246: MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(A,P,C);
247: MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(A,P,*C);
248: PetscLogEventEnd(MAT_Applypapt,A,P,0,0);
249: return(0);
250: }