Actual source code: mpiaij.c
2: #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
3: #include <petscblaslapack.h>
5: /*MC
6: MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
8: This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9: and MATMPIAIJ otherwise. As a result, for single process communicators,
10: MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11: for communicators controlling multiple processes. It is recommended that you call both of
12: the above preallocation routines for simplicity.
14: Options Database Keys:
15: . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
17: Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18: enough exist.
20: Level: beginner
22: .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23: M*/
25: /*MC
26: MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
28: This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29: and MATMPIAIJCRL otherwise. As a result, for single process communicators,
30: MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31: for communicators controlling multiple processes. It is recommended that you call both of
32: the above preallocation routines for simplicity.
34: Options Database Keys:
35: . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
37: Level: beginner
39: .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40: M*/
44: PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45: {
46: PetscErrorCode ierr;
47: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data;
48: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data;
49: Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data;
50: const PetscInt *ia,*ib;
51: const MatScalar *aa,*bb;
52: PetscInt na,nb,i,j,*rows,cnt=0,n0rows;
53: PetscInt m = M->rmap->n,rstart = M->rmap->rstart;
56: *keptrows = 0;
57: ia = a->i;
58: ib = b->i;
59: for (i=0; i<m; i++) {
60: na = ia[i+1] - ia[i];
61: nb = ib[i+1] - ib[i];
62: if (!na && !nb) {
63: cnt++;
64: goto ok1;
65: }
66: aa = a->a + ia[i];
67: for (j=0; j<na; j++) {
68: if (aa[j] != 0.0) goto ok1;
69: }
70: bb = b->a + ib[i];
71: for (j=0; j <nb; j++) {
72: if (bb[j] != 0.0) goto ok1;
73: }
74: cnt++;
75: ok1:;
76: }
77: MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);
78: if (!n0rows) return(0);
79: PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);
80: cnt = 0;
81: for (i=0; i<m; i++) {
82: na = ia[i+1] - ia[i];
83: nb = ib[i+1] - ib[i];
84: if (!na && !nb) continue;
85: aa = a->a + ia[i];
86: for(j=0; j<na;j++) {
87: if (aa[j] != 0.0) {
88: rows[cnt++] = rstart + i;
89: goto ok2;
90: }
91: }
92: bb = b->a + ib[i];
93: for (j=0; j<nb; j++) {
94: if (bb[j] != 0.0) {
95: rows[cnt++] = rstart + i;
96: goto ok2;
97: }
98: }
99: ok2:;
100: }
101: ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);
102: return(0);
103: }
107: PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108: {
110: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data;
111: PetscInt i,n,*garray = aij->garray;
112: Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data;
113: Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data;
114: PetscReal *work;
117: MatGetSize(A,PETSC_NULL,&n);
118: PetscMalloc(n*sizeof(PetscReal),&work);
119: PetscMemzero(work,n*sizeof(PetscReal));
120: if (type == NORM_2) {
121: for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122: work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123: }
124: for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125: work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126: }
127: } else if (type == NORM_1) {
128: for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129: work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130: }
131: for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132: work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133: }
134: } else if (type == NORM_INFINITY) {
135: for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136: work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137: }
138: for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139: work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140: }
142: } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143: if (type == NORM_INFINITY) {
144: MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);
145: } else {
146: MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);
147: }
148: PetscFree(work);
149: if (type == NORM_2) {
150: for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151: }
152: return(0);
153: }
157: /*
158: Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159: MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
161: Only for square matrices
162: */
163: PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164: {
165: PetscMPIInt rank,size;
166: PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
168: Mat mat;
169: Mat_SeqAIJ *gmata;
170: PetscMPIInt tag;
171: MPI_Status status;
172: PetscBool aij;
173: MatScalar *gmataa,*ao,*ad,*gmataarestore=0;
176: CHKMEMQ;
177: MPI_Comm_rank(comm,&rank);
178: MPI_Comm_size(comm,&size);
179: if (!rank) {
180: PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);
181: if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182: }
183: if (reuse == MAT_INITIAL_MATRIX) {
184: MatCreate(comm,&mat);
185: MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);
186: MatSetType(mat,MATAIJ);
187: PetscMalloc((size+1)*sizeof(PetscInt),&rowners);
188: PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);
189: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
190: rowners[0] = 0;
191: for (i=2; i<=size; i++) {
192: rowners[i] += rowners[i-1];
193: }
194: rstart = rowners[rank];
195: rend = rowners[rank+1];
196: PetscObjectGetNewTag((PetscObject)mat,&tag);
197: if (!rank) {
198: gmata = (Mat_SeqAIJ*) gmat->data;
199: /* send row lengths to all processors */
200: for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
201: for (i=1; i<size; i++) {
202: MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);
203: }
204: /* determine number diagonal and off-diagonal counts */
205: PetscMemzero(olens,m*sizeof(PetscInt));
206: PetscMalloc(m*sizeof(PetscInt),&ld);
207: PetscMemzero(ld,m*sizeof(PetscInt));
208: jj = 0;
209: for (i=0; i<m; i++) {
210: for (j=0; j<dlens[i]; j++) {
211: if (gmata->j[jj] < rstart) ld[i]++;
212: if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
213: jj++;
214: }
215: }
216: /* send column indices to other processes */
217: for (i=1; i<size; i++) {
218: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
219: MPI_Send(&nz,1,MPIU_INT,i,tag,comm);
220: MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);
221: }
223: /* send numerical values to other processes */
224: for (i=1; i<size; i++) {
225: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
226: MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);
227: }
228: gmataa = gmata->a;
229: gmataj = gmata->j;
231: } else {
232: /* receive row lengths */
233: MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);
234: /* receive column indices */
235: MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);
236: PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);
237: MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);
238: /* determine number diagonal and off-diagonal counts */
239: PetscMemzero(olens,m*sizeof(PetscInt));
240: PetscMalloc(m*sizeof(PetscInt),&ld);
241: PetscMemzero(ld,m*sizeof(PetscInt));
242: jj = 0;
243: for (i=0; i<m; i++) {
244: for (j=0; j<dlens[i]; j++) {
245: if (gmataj[jj] < rstart) ld[i]++;
246: if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
247: jj++;
248: }
249: }
250: /* receive numerical values */
251: PetscMemzero(gmataa,nz*sizeof(PetscScalar));
252: MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);
253: }
254: /* set preallocation */
255: for (i=0; i<m; i++) {
256: dlens[i] -= olens[i];
257: }
258: MatSeqAIJSetPreallocation(mat,0,dlens);
259: MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);
260:
261: for (i=0; i<m; i++) {
262: dlens[i] += olens[i];
263: }
264: cnt = 0;
265: for (i=0; i<m; i++) {
266: row = rstart + i;
267: MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);
268: cnt += dlens[i];
269: }
270: if (rank) {
271: PetscFree2(gmataa,gmataj);
272: }
273: PetscFree2(dlens,olens);
274: PetscFree(rowners);
275: ((Mat_MPIAIJ*)(mat->data))->ld = ld;
276: *inmat = mat;
277: } else { /* column indices are already set; only need to move over numerical values from process 0 */
278: Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
279: Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
280: mat = *inmat;
281: PetscObjectGetNewTag((PetscObject)mat,&tag);
282: if (!rank) {
283: /* send numerical values to other processes */
284: gmata = (Mat_SeqAIJ*) gmat->data;
285: MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);
286: gmataa = gmata->a;
287: for (i=1; i<size; i++) {
288: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
289: MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);
290: }
291: nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
292: } else {
293: /* receive numerical values from process 0*/
294: nz = Ad->nz + Ao->nz;
295: PetscMalloc(nz*sizeof(PetscScalar),&gmataa); gmataarestore = gmataa;
296: MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);
297: }
298: /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
299: ld = ((Mat_MPIAIJ*)(mat->data))->ld;
300: ad = Ad->a;
301: ao = Ao->a;
302: if (mat->rmap->n) {
303: i = 0;
304: nz = ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
305: nz = Ad->i[i+1] - Ad->i[i]; PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar)); ad += nz; gmataa += nz;
306: }
307: for (i=1; i<mat->rmap->n; i++) {
308: nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
309: nz = Ad->i[i+1] - Ad->i[i]; PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar)); ad += nz; gmataa += nz;
310: }
311: i--;
312: if (mat->rmap->n) {
313: nz = Ao->i[i+1] - Ao->i[i] - ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
314: }
315: if (rank) {
316: PetscFree(gmataarestore);
317: }
318: }
319: MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
320: MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
321: CHKMEMQ;
322: return(0);
323: }
325: /*
326: Local utility routine that creates a mapping from the global column
327: number to the local number in the off-diagonal part of the local
328: storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at
329: a slightly higher hash table cost; without it it is not scalable (each processor
330: has an order N integer array but is fast to acess.
331: */
334: PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
335: {
336: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
338: PetscInt n = aij->B->cmap->n,i;
341: #if defined (PETSC_USE_CTABLE)
342: PetscTableCreate(n,&aij->colmap);
343: for (i=0; i<n; i++){
344: PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);
345: }
346: #else
347: PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);
348: PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));
349: PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));
350: for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
351: #endif
352: return(0);
353: }
355: #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
356: { \
357: if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
358: lastcol1 = col;\
359: while (high1-low1 > 5) { \
360: t = (low1+high1)/2; \
361: if (rp1[t] > col) high1 = t; \
362: else low1 = t; \
363: } \
364: for (_i=low1; _i<high1; _i++) { \
365: if (rp1[_i] > col) break; \
366: if (rp1[_i] == col) { \
367: if (addv == ADD_VALUES) ap1[_i] += value; \
368: else ap1[_i] = value; \
369: goto a_noinsert; \
370: } \
371: } \
372: if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
373: if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \
374: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
375: MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
376: N = nrow1++ - 1; a->nz++; high1++; \
377: /* shift up all the later entries in this row */ \
378: for (ii=N; ii>=_i; ii--) { \
379: rp1[ii+1] = rp1[ii]; \
380: ap1[ii+1] = ap1[ii]; \
381: } \
382: rp1[_i] = col; \
383: ap1[_i] = value; \
384: a_noinsert: ; \
385: ailen[row] = nrow1; \
386: }
389: #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
390: { \
391: if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
392: lastcol2 = col;\
393: while (high2-low2 > 5) { \
394: t = (low2+high2)/2; \
395: if (rp2[t] > col) high2 = t; \
396: else low2 = t; \
397: } \
398: for (_i=low2; _i<high2; _i++) { \
399: if (rp2[_i] > col) break; \
400: if (rp2[_i] == col) { \
401: if (addv == ADD_VALUES) ap2[_i] += value; \
402: else ap2[_i] = value; \
403: goto b_noinsert; \
404: } \
405: } \
406: if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
407: if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
408: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
409: MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
410: N = nrow2++ - 1; b->nz++; high2++; \
411: /* shift up all the later entries in this row */ \
412: for (ii=N; ii>=_i; ii--) { \
413: rp2[ii+1] = rp2[ii]; \
414: ap2[ii+1] = ap2[ii]; \
415: } \
416: rp2[_i] = col; \
417: ap2[_i] = value; \
418: b_noinsert: ; \
419: bilen[row] = nrow2; \
420: }
424: PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
425: {
426: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
427: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
429: PetscInt l,*garray = mat->garray,diag;
432: /* code only works for square matrices A */
434: /* find size of row to the left of the diagonal part */
435: MatGetOwnershipRange(A,&diag,0);
436: row = row - diag;
437: for (l=0; l<b->i[row+1]-b->i[row]; l++) {
438: if (garray[b->j[b->i[row]+l]] > diag) break;
439: }
440: PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));
442: /* diagonal part */
443: PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));
445: /* right of diagonal part */
446: PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));
447: return(0);
448: }
452: PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
453: {
454: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
455: PetscScalar value;
457: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
458: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
459: PetscBool roworiented = aij->roworiented;
461: /* Some Variables required in the macro */
462: Mat A = aij->A;
463: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
464: PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
465: MatScalar *aa = a->a;
466: PetscBool ignorezeroentries = a->ignorezeroentries;
467: Mat B = aij->B;
468: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
469: PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
470: MatScalar *ba = b->a;
472: PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
473: PetscInt nonew = a->nonew;
474: MatScalar *ap1,*ap2;
478: for (i=0; i<m; i++) {
479: if (im[i] < 0) continue;
480: #if defined(PETSC_USE_DEBUG)
481: if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
482: #endif
483: if (im[i] >= rstart && im[i] < rend) {
484: row = im[i] - rstart;
485: lastcol1 = -1;
486: rp1 = aj + ai[row];
487: ap1 = aa + ai[row];
488: rmax1 = aimax[row];
489: nrow1 = ailen[row];
490: low1 = 0;
491: high1 = nrow1;
492: lastcol2 = -1;
493: rp2 = bj + bi[row];
494: ap2 = ba + bi[row];
495: rmax2 = bimax[row];
496: nrow2 = bilen[row];
497: low2 = 0;
498: high2 = nrow2;
500: for (j=0; j<n; j++) {
501: if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
502: if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
503: if (in[j] >= cstart && in[j] < cend){
504: col = in[j] - cstart;
505: MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
506: } else if (in[j] < 0) continue;
507: #if defined(PETSC_USE_DEBUG)
508: else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
509: #endif
510: else {
511: if (mat->was_assembled) {
512: if (!aij->colmap) {
513: CreateColmap_MPIAIJ_Private(mat);
514: }
515: #if defined (PETSC_USE_CTABLE)
516: PetscTableFind(aij->colmap,in[j]+1,&col);
517: col--;
518: #else
519: col = aij->colmap[in[j]] - 1;
520: #endif
521: if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
522: DisAssemble_MPIAIJ(mat);
523: col = in[j];
524: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
525: B = aij->B;
526: b = (Mat_SeqAIJ*)B->data;
527: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
528: rp2 = bj + bi[row];
529: ap2 = ba + bi[row];
530: rmax2 = bimax[row];
531: nrow2 = bilen[row];
532: low2 = 0;
533: high2 = nrow2;
534: bm = aij->B->rmap->n;
535: ba = b->a;
536: }
537: } else col = in[j];
538: MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
539: }
540: }
541: } else {
542: if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
543: if (!aij->donotstash) {
544: if (roworiented) {
545: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
546: } else {
547: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
548: }
549: }
550: }
551: }
552: return(0);
553: }
557: PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
558: {
559: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
561: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
562: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
565: for (i=0; i<m; i++) {
566: if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
567: if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
568: if (idxm[i] >= rstart && idxm[i] < rend) {
569: row = idxm[i] - rstart;
570: for (j=0; j<n; j++) {
571: if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
572: if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
573: if (idxn[j] >= cstart && idxn[j] < cend){
574: col = idxn[j] - cstart;
575: MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);
576: } else {
577: if (!aij->colmap) {
578: CreateColmap_MPIAIJ_Private(mat);
579: }
580: #if defined (PETSC_USE_CTABLE)
581: PetscTableFind(aij->colmap,idxn[j]+1,&col);
582: col --;
583: #else
584: col = aij->colmap[idxn[j]] - 1;
585: #endif
586: if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
587: else {
588: MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);
589: }
590: }
591: }
592: } else {
593: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
594: }
595: }
596: return(0);
597: }
603: PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
604: {
605: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
607: PetscInt nstash,reallocs;
608: InsertMode addv;
611: if (aij->donotstash || mat->nooffprocentries) {
612: return(0);
613: }
615: /* make sure all processors are either in INSERTMODE or ADDMODE */
616: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);
617: if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
618: mat->insertmode = addv; /* in case this processor had no cache */
620: MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
621: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
622: PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
623: return(0);
624: }
628: PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
629: {
630: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
631: Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data;
633: PetscMPIInt n;
634: PetscInt i,j,rstart,ncols,flg;
635: PetscInt *row,*col;
636: PetscBool other_disassembled;
637: PetscScalar *val;
638: InsertMode addv = mat->insertmode;
640: /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
642: if (!aij->donotstash && !mat->nooffprocentries) {
643: while (1) {
644: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
645: if (!flg) break;
647: for (i=0; i<n;) {
648: /* Now identify the consecutive vals belonging to the same row */
649: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
650: if (j < n) ncols = j-i;
651: else ncols = n-i;
652: /* Now assemble all these values with a single function call */
653: MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);
654: i = j;
655: }
656: }
657: MatStashScatterEnd_Private(&mat->stash);
658: }
659: MatAssemblyBegin(aij->A,mode);
660: MatAssemblyEnd(aij->A,mode);
662: /* determine if any processor has disassembled, if so we must
663: also disassemble ourselfs, in order that we may reassemble. */
664: /*
665: if nonzero structure of submatrix B cannot change then we know that
666: no processor disassembled thus we can skip this stuff
667: */
668: if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
669: MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);
670: if (mat->was_assembled && !other_disassembled) {
671: DisAssemble_MPIAIJ(mat);
672: }
673: }
674: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
675: MatSetUpMultiply_MPIAIJ(mat);
676: }
677: MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);
678: MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);
679: MatAssemblyBegin(aij->B,mode);
680: MatAssemblyEnd(aij->B,mode);
682: PetscFree2(aij->rowvalues,aij->rowindices);
683: aij->rowvalues = 0;
685: /* used by MatAXPY() */
686: a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */
687: a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */
689: VecDestroy(&aij->diag);
690: if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
691: return(0);
692: }
696: PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
697: {
698: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
702: MatZeroEntries(l->A);
703: MatZeroEntries(l->B);
704: return(0);
705: }
709: PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
710: {
711: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
712: PetscErrorCode ierr;
713: PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
714: PetscInt i,*owners = A->rmap->range;
715: PetscInt *nprocs,j,idx,nsends,row;
716: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
717: PetscInt *rvalues,count,base,slen,*source;
718: PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart;
719: MPI_Comm comm = ((PetscObject)A)->comm;
720: MPI_Request *send_waits,*recv_waits;
721: MPI_Status recv_status,*send_status;
722: const PetscScalar *xx;
723: PetscScalar *bb;
724: #if defined(PETSC_DEBUG)
725: PetscBool found = PETSC_FALSE;
726: #endif
729: /* first count number of contributors to each processor */
730: PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
731: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
732: PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
733: j = 0;
734: for (i=0; i<N; i++) {
735: if (lastidx > (idx = rows[i])) j = 0;
736: lastidx = idx;
737: for (; j<size; j++) {
738: if (idx >= owners[j] && idx < owners[j+1]) {
739: nprocs[2*j]++;
740: nprocs[2*j+1] = 1;
741: owner[i] = j;
742: #if defined(PETSC_DEBUG)
743: found = PETSC_TRUE;
744: #endif
745: break;
746: }
747: }
748: #if defined(PETSC_DEBUG)
749: if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
750: found = PETSC_FALSE;
751: #endif
752: }
753: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
755: if (A->nooffproczerorows) {
756: if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
757: nrecvs = nsends;
758: nmax = N;
759: } else {
760: /* inform other processors of number of messages and max length*/
761: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
762: }
764: /* post receives: */
765: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
766: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
767: for (i=0; i<nrecvs; i++) {
768: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
769: }
771: /* do sends:
772: 1) starts[i] gives the starting index in svalues for stuff going to
773: the ith processor
774: */
775: PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
776: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
777: PetscMalloc((size+1)*sizeof(PetscInt),&starts);
778: starts[0] = 0;
779: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
780: for (i=0; i<N; i++) {
781: svalues[starts[owner[i]]++] = rows[i];
782: }
784: starts[0] = 0;
785: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
786: count = 0;
787: for (i=0; i<size; i++) {
788: if (nprocs[2*i+1]) {
789: MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
790: }
791: }
792: PetscFree(starts);
794: base = owners[rank];
796: /* wait on receives */
797: PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);
798: count = nrecvs; slen = 0;
799: while (count) {
800: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
801: /* unpack receives into our local space */
802: MPI_Get_count(&recv_status,MPIU_INT,&n);
803: source[imdex] = recv_status.MPI_SOURCE;
804: lens[imdex] = n;
805: slen += n;
806: count--;
807: }
808: PetscFree(recv_waits);
809:
810: /* move the data into the send scatter */
811: PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
812: count = 0;
813: for (i=0; i<nrecvs; i++) {
814: values = rvalues + i*nmax;
815: for (j=0; j<lens[i]; j++) {
816: lrows[count++] = values[j] - base;
817: }
818: }
819: PetscFree(rvalues);
820: PetscFree2(lens,source);
821: PetscFree(owner);
822: PetscFree(nprocs);
823:
824: /* fix right hand side if needed */
825: if (x && b) {
826: VecGetArrayRead(x,&xx);
827: VecGetArray(b,&bb);
828: for (i=0; i<slen; i++) {
829: bb[lrows[i]] = diag*xx[lrows[i]];
830: }
831: VecRestoreArrayRead(x,&xx);
832: VecRestoreArray(b,&bb);
833: }
834: /*
835: Zero the required rows. If the "diagonal block" of the matrix
836: is square and the user wishes to set the diagonal we use separate
837: code so that MatSetValues() is not called for each diagonal allocating
838: new memory, thus calling lots of mallocs and slowing things down.
840: */
841: /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
842: MatZeroRows(l->B,slen,lrows,0.0,0,0);
843: if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
844: MatZeroRows(l->A,slen,lrows,diag,0,0);
845: } else if (diag != 0.0) {
846: MatZeroRows(l->A,slen,lrows,0.0,0,0);
847: if (((Mat_SeqAIJ*)l->A->data)->nonew) {
848: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
849: MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
850: }
851: for (i = 0; i < slen; i++) {
852: row = lrows[i] + rstart;
853: MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);
854: }
855: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
856: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
857: } else {
858: MatZeroRows(l->A,slen,lrows,0.0,0,0);
859: }
860: PetscFree(lrows);
862: /* wait on sends */
863: if (nsends) {
864: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
865: MPI_Waitall(nsends,send_waits,send_status);
866: PetscFree(send_status);
867: }
868: PetscFree(send_waits);
869: PetscFree(svalues);
870: return(0);
871: }
875: PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
876: {
877: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
878: PetscErrorCode ierr;
879: PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
880: PetscInt i,*owners = A->rmap->range;
881: PetscInt *nprocs,j,idx,nsends;
882: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
883: PetscInt *rvalues,count,base,slen,*source;
884: PetscInt *lens,*lrows,*values,m;
885: MPI_Comm comm = ((PetscObject)A)->comm;
886: MPI_Request *send_waits,*recv_waits;
887: MPI_Status recv_status,*send_status;
888: const PetscScalar *xx;
889: PetscScalar *bb,*mask;
890: Vec xmask,lmask;
891: Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data;
892: const PetscInt *aj, *ii,*ridx;
893: PetscScalar *aa;
894: #if defined(PETSC_DEBUG)
895: PetscBool found = PETSC_FALSE;
896: #endif
899: /* first count number of contributors to each processor */
900: PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
901: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
902: PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
903: j = 0;
904: for (i=0; i<N; i++) {
905: if (lastidx > (idx = rows[i])) j = 0;
906: lastidx = idx;
907: for (; j<size; j++) {
908: if (idx >= owners[j] && idx < owners[j+1]) {
909: nprocs[2*j]++;
910: nprocs[2*j+1] = 1;
911: owner[i] = j;
912: #if defined(PETSC_DEBUG)
913: found = PETSC_TRUE;
914: #endif
915: break;
916: }
917: }
918: #if defined(PETSC_DEBUG)
919: if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
920: found = PETSC_FALSE;
921: #endif
922: }
923: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
925: /* inform other processors of number of messages and max length*/
926: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
928: /* post receives: */
929: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
930: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
931: for (i=0; i<nrecvs; i++) {
932: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
933: }
935: /* do sends:
936: 1) starts[i] gives the starting index in svalues for stuff going to
937: the ith processor
938: */
939: PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
940: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
941: PetscMalloc((size+1)*sizeof(PetscInt),&starts);
942: starts[0] = 0;
943: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
944: for (i=0; i<N; i++) {
945: svalues[starts[owner[i]]++] = rows[i];
946: }
948: starts[0] = 0;
949: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
950: count = 0;
951: for (i=0; i<size; i++) {
952: if (nprocs[2*i+1]) {
953: MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
954: }
955: }
956: PetscFree(starts);
958: base = owners[rank];
960: /* wait on receives */
961: PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);
962: count = nrecvs; slen = 0;
963: while (count) {
964: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
965: /* unpack receives into our local space */
966: MPI_Get_count(&recv_status,MPIU_INT,&n);
967: source[imdex] = recv_status.MPI_SOURCE;
968: lens[imdex] = n;
969: slen += n;
970: count--;
971: }
972: PetscFree(recv_waits);
973:
974: /* move the data into the send scatter */
975: PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
976: count = 0;
977: for (i=0; i<nrecvs; i++) {
978: values = rvalues + i*nmax;
979: for (j=0; j<lens[i]; j++) {
980: lrows[count++] = values[j] - base;
981: }
982: }
983: PetscFree(rvalues);
984: PetscFree2(lens,source);
985: PetscFree(owner);
986: PetscFree(nprocs);
987: /* lrows are the local rows to be zeroed, slen is the number of local rows */
989: /* zero diagonal part of matrix */
990: MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);
991:
992: /* handle off diagonal part of matrix */
993: MatGetVecs(A,&xmask,PETSC_NULL);
994: VecDuplicate(l->lvec,&lmask);
995: VecGetArray(xmask,&bb);
996: for (i=0; i<slen; i++) {
997: bb[lrows[i]] = 1;
998: }
999: VecRestoreArray(xmask,&bb);
1000: VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);
1001: VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);
1002: VecDestroy(&xmask);
1003: VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);
1004: VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);
1005: VecGetArrayRead(l->lvec,&xx);
1006: VecGetArray(lmask,&mask);
1007: VecGetArray(b,&bb);
1009: /* remove zeroed rows of off diagonal matrix */
1010: ii = aij->i;
1011: for (i=0; i<slen; i++) {
1012: PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));
1013: }
1015: /* loop over all elements of off process part of matrix zeroing removed columns*/
1016: if (aij->compressedrow.use){
1017: m = aij->compressedrow.nrows;
1018: ii = aij->compressedrow.i;
1019: ridx = aij->compressedrow.rindex;
1020: for (i=0; i<m; i++){
1021: n = ii[i+1] - ii[i];
1022: aj = aij->j + ii[i];
1023: aa = aij->a + ii[i];
1025: for (j=0; j<n; j++) {
1026: if (PetscAbsScalar(mask[*aj])) {
1027: bb[*ridx] -= *aa*xx[*aj];
1028: *aa = 0.0;
1029: }
1030: aa++;
1031: aj++;
1032: }
1033: ridx++;
1034: }
1035: } else { /* do not use compressed row format */
1036: m = l->B->rmap->n;
1037: for (i=0; i<m; i++) {
1038: n = ii[i+1] - ii[i];
1039: aj = aij->j + ii[i];
1040: aa = aij->a + ii[i];
1041: for (j=0; j<n; j++) {
1042: if (PetscAbsScalar(mask[*aj])) {
1043: bb[i] -= *aa*xx[*aj];
1044: *aa = 0.0;
1045: }
1046: aa++;
1047: aj++;
1048: }
1049: }
1050: }
1051: VecRestoreArray(b,&bb);
1052: VecRestoreArray(lmask,&mask);
1053: VecRestoreArrayRead(l->lvec,&xx);
1054: VecDestroy(&lmask);
1055: PetscFree(lrows);
1057: /* wait on sends */
1058: if (nsends) {
1059: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1060: MPI_Waitall(nsends,send_waits,send_status);
1061: PetscFree(send_status);
1062: }
1063: PetscFree(send_waits);
1064: PetscFree(svalues);
1066: return(0);
1067: }
1071: PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1072: {
1073: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1075: PetscInt nt;
1078: VecGetLocalSize(xx,&nt);
1079: if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1080: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1081: (*a->A->ops->mult)(a->A,xx,yy);
1082: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1083: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
1084: return(0);
1085: }
1089: PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1090: {
1091: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1095: MatMultDiagonalBlock(a->A,bb,xx);
1096: return(0);
1097: }
1101: PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1102: {
1103: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1107: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1108: (*a->A->ops->multadd)(a->A,xx,yy,zz);
1109: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1110: (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
1111: return(0);
1112: }
1116: PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1117: {
1118: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1120: PetscBool merged;
1123: VecScatterGetMerged(a->Mvctx,&merged);
1124: /* do nondiagonal part */
1125: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1126: if (!merged) {
1127: /* send it on its way */
1128: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1129: /* do local part */
1130: (*a->A->ops->multtranspose)(a->A,xx,yy);
1131: /* receive remote parts: note this assumes the values are not actually */
1132: /* added in yy until the next line, */
1133: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1134: } else {
1135: /* do local part */
1136: (*a->A->ops->multtranspose)(a->A,xx,yy);
1137: /* send it on its way */
1138: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1139: /* values actually were received in the Begin() but we need to call this nop */
1140: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1141: }
1142: return(0);
1143: }
1148: PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f)
1149: {
1150: MPI_Comm comm;
1151: Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1152: Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1153: IS Me,Notme;
1155: PetscInt M,N,first,last,*notme,i;
1156: PetscMPIInt size;
1160: /* Easy test: symmetric diagonal block */
1161: Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1162: MatIsTranspose(Adia,Bdia,tol,f);
1163: if (!*f) return(0);
1164: PetscObjectGetComm((PetscObject)Amat,&comm);
1165: MPI_Comm_size(comm,&size);
1166: if (size == 1) return(0);
1168: /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1169: MatGetSize(Amat,&M,&N);
1170: MatGetOwnershipRange(Amat,&first,&last);
1171: PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);
1172: for (i=0; i<first; i++) notme[i] = i;
1173: for (i=last; i<M; i++) notme[i-last+first] = i;
1174: ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);
1175: ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);
1176: MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);
1177: Aoff = Aoffs[0];
1178: MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);
1179: Boff = Boffs[0];
1180: MatIsTranspose(Aoff,Boff,tol,f);
1181: MatDestroyMatrices(1,&Aoffs);
1182: MatDestroyMatrices(1,&Boffs);
1183: ISDestroy(&Me);
1184: ISDestroy(&Notme);
1185: PetscFree(notme);
1186: return(0);
1187: }
1192: PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1193: {
1194: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1198: /* do nondiagonal part */
1199: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1200: /* send it on its way */
1201: VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1202: /* do local part */
1203: (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
1204: /* receive remote parts */
1205: VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1206: return(0);
1207: }
1209: /*
1210: This only works correctly for square matrices where the subblock A->A is the
1211: diagonal block
1212: */
1215: PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1216: {
1218: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1221: if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1222: if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1223: MatGetDiagonal(a->A,v);
1224: return(0);
1225: }
1229: PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1230: {
1231: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1235: MatScale(a->A,aa);
1236: MatScale(a->B,aa);
1237: return(0);
1238: }
1242: PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1243: {
1244: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1248: #if defined(PETSC_USE_LOG)
1249: PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1250: #endif
1251: MatStashDestroy_Private(&mat->stash);
1252: VecDestroy(&aij->diag);
1253: MatDestroy(&aij->A);
1254: MatDestroy(&aij->B);
1255: #if defined (PETSC_USE_CTABLE)
1256: PetscTableDestroy(&aij->colmap);
1257: #else
1258: PetscFree(aij->colmap);
1259: #endif
1260: PetscFree(aij->garray);
1261: VecDestroy(&aij->lvec);
1262: VecScatterDestroy(&aij->Mvctx);
1263: PetscFree2(aij->rowvalues,aij->rowindices);
1264: PetscFree(aij->ld);
1265: PetscFree(mat->data);
1267: PetscObjectChangeTypeName((PetscObject)mat,0);
1268: PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);
1269: PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);
1270: PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);
1271: PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);
1272: PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);
1273: PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);
1274: PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);
1275: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);
1276: return(0);
1277: }
1281: PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1282: {
1283: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1284: Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data;
1285: Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data;
1286: PetscErrorCode ierr;
1287: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag;
1288: int fd;
1289: PetscInt nz,header[4],*row_lengths,*range=0,rlen,i;
1290: PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1291: PetscScalar *column_values;
1292: PetscInt message_count,flowcontrolcount;
1295: MPI_Comm_rank(((PetscObject)mat)->comm,&rank);
1296: MPI_Comm_size(((PetscObject)mat)->comm,&size);
1297: nz = A->nz + B->nz;
1298: if (!rank) {
1299: header[0] = MAT_FILE_CLASSID;
1300: header[1] = mat->rmap->N;
1301: header[2] = mat->cmap->N;
1302: MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);
1303: PetscViewerBinaryGetDescriptor(viewer,&fd);
1304: PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);
1305: /* get largest number of rows any processor has */
1306: rlen = mat->rmap->n;
1307: range = mat->rmap->range;
1308: for (i=1; i<size; i++) {
1309: rlen = PetscMax(rlen,range[i+1] - range[i]);
1310: }
1311: } else {
1312: MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);
1313: rlen = mat->rmap->n;
1314: }
1316: /* load up the local row counts */
1317: PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);
1318: for (i=0; i<mat->rmap->n; i++) {
1319: row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1320: }
1322: /* store the row lengths to the file */
1323: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1324: if (!rank) {
1325: PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);
1326: for (i=1; i<size; i++) {
1327: PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);
1328: rlen = range[i+1] - range[i];
1329: MPILong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);
1330: PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);
1331: }
1332: PetscViewerFlowControlEndMaster(viewer,message_count);
1333: } else {
1334: PetscViewerFlowControlStepWorker(viewer,rank,message_count);
1335: MPILong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);
1336: PetscViewerFlowControlEndWorker(viewer,message_count);
1337: }
1338: PetscFree(row_lengths);
1340: /* load up the local column indices */
1341: nzmax = nz; /* )th processor needs space a largest processor needs */
1342: MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);
1343: PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);
1344: cnt = 0;
1345: for (i=0; i<mat->rmap->n; i++) {
1346: for (j=B->i[i]; j<B->i[i+1]; j++) {
1347: if ( (col = garray[B->j[j]]) > cstart) break;
1348: column_indices[cnt++] = col;
1349: }
1350: for (k=A->i[i]; k<A->i[i+1]; k++) {
1351: column_indices[cnt++] = A->j[k] + cstart;
1352: }
1353: for (; j<B->i[i+1]; j++) {
1354: column_indices[cnt++] = garray[B->j[j]];
1355: }
1356: }
1357: if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1359: /* store the column indices to the file */
1360: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1361: if (!rank) {
1362: MPI_Status status;
1363: PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);
1364: for (i=1; i<size; i++) {
1365: PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);
1366: MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);
1367: if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1368: MPILong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);
1369: PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);
1370: }
1371: PetscViewerFlowControlEndMaster(viewer,message_count);
1372: } else {
1373: PetscViewerFlowControlStepWorker(viewer,rank,message_count);
1374: MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);
1375: MPILong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);
1376: PetscViewerFlowControlEndWorker(viewer,message_count);
1377: }
1378: PetscFree(column_indices);
1380: /* load up the local column values */
1381: PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);
1382: cnt = 0;
1383: for (i=0; i<mat->rmap->n; i++) {
1384: for (j=B->i[i]; j<B->i[i+1]; j++) {
1385: if ( garray[B->j[j]] > cstart) break;
1386: column_values[cnt++] = B->a[j];
1387: }
1388: for (k=A->i[i]; k<A->i[i+1]; k++) {
1389: column_values[cnt++] = A->a[k];
1390: }
1391: for (; j<B->i[i+1]; j++) {
1392: column_values[cnt++] = B->a[j];
1393: }
1394: }
1395: if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1397: /* store the column values to the file */
1398: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1399: if (!rank) {
1400: MPI_Status status;
1401: PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);
1402: for (i=1; i<size; i++) {
1403: PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);
1404: MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);
1405: if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1406: MPILong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);
1407: PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);
1408: }
1409: PetscViewerFlowControlEndMaster(viewer,message_count);
1410: } else {
1411: PetscViewerFlowControlStepWorker(viewer,rank,message_count);
1412: MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);
1413: MPILong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);
1414: PetscViewerFlowControlEndWorker(viewer,message_count);
1415: }
1416: PetscFree(column_values);
1417: return(0);
1418: }
1422: PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1423: {
1424: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1425: PetscErrorCode ierr;
1426: PetscMPIInt rank = aij->rank,size = aij->size;
1427: PetscBool isdraw,iascii,isbinary;
1428: PetscViewer sviewer;
1429: PetscViewerFormat format;
1432: PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1433: PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1434: PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1435: if (iascii) {
1436: PetscViewerGetFormat(viewer,&format);
1437: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1438: MatInfo info;
1439: PetscBool inodes;
1441: MPI_Comm_rank(((PetscObject)mat)->comm,&rank);
1442: MatGetInfo(mat,MAT_LOCAL,&info);
1443: MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);
1444: PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
1445: if (!inodes) {
1446: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1447: rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
1448: } else {
1449: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1450: rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
1451: }
1452: MatGetInfo(aij->A,MAT_LOCAL,&info);
1453: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
1454: MatGetInfo(aij->B,MAT_LOCAL,&info);
1455: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
1456: PetscViewerFlush(viewer);
1457: PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
1458: PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");
1459: VecScatterView(aij->Mvctx,viewer);
1460: return(0);
1461: } else if (format == PETSC_VIEWER_ASCII_INFO) {
1462: PetscInt inodecount,inodelimit,*inodes;
1463: MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);
1464: if (inodes) {
1465: PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);
1466: } else {
1467: PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");
1468: }
1469: return(0);
1470: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1471: return(0);
1472: }
1473: } else if (isbinary) {
1474: if (size == 1) {
1475: PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);
1476: MatView(aij->A,viewer);
1477: } else {
1478: MatView_MPIAIJ_Binary(mat,viewer);
1479: }
1480: return(0);
1481: } else if (isdraw) {
1482: PetscDraw draw;
1483: PetscBool isnull;
1484: PetscViewerDrawGetDraw(viewer,0,&draw);
1485: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
1486: }
1488: if (size == 1) {
1489: PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);
1490: MatView(aij->A,viewer);
1491: } else {
1492: /* assemble the entire matrix onto first processor. */
1493: Mat A;
1494: Mat_SeqAIJ *Aloc;
1495: PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1496: MatScalar *a;
1498: if (mat->rmap->N > 1024) {
1499: PetscBool flg = PETSC_FALSE;
1501: PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);
1502: if (!flg) {
1503: SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1504: }
1505: }
1507: MatCreate(((PetscObject)mat)->comm,&A);
1508: if (!rank) {
1509: MatSetSizes(A,M,N,M,N);
1510: } else {
1511: MatSetSizes(A,0,0,M,N);
1512: }
1513: /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1514: MatSetType(A,MATMPIAIJ);
1515: MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);
1516: PetscLogObjectParent(mat,A);
1518: /* copy over the A part */
1519: Aloc = (Mat_SeqAIJ*)aij->A->data;
1520: m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1521: row = mat->rmap->rstart;
1522: for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1523: for (i=0; i<m; i++) {
1524: MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);
1525: row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1526: }
1527: aj = Aloc->j;
1528: for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1530: /* copy over the B part */
1531: Aloc = (Mat_SeqAIJ*)aij->B->data;
1532: m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1533: row = mat->rmap->rstart;
1534: PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);
1535: ct = cols;
1536: for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1537: for (i=0; i<m; i++) {
1538: MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);
1539: row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1540: }
1541: PetscFree(ct);
1542: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1543: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1544: /*
1545: Everyone has to call to draw the matrix since the graphics waits are
1546: synchronized across all processors that share the PetscDraw object
1547: */
1548: PetscViewerGetSingleton(viewer,&sviewer);
1549: if (!rank) {
1550: PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);
1551: /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1552: PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1553: MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);
1554: }
1555: PetscViewerRestoreSingleton(viewer,&sviewer);
1556: MatDestroy(&A);
1557: }
1558: return(0);
1559: }
1563: PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1564: {
1566: PetscBool iascii,isdraw,issocket,isbinary;
1567:
1569: PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1570: PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1571: PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1572: PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
1573: if (iascii || isdraw || isbinary || issocket) {
1574: MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);
1575: } else {
1576: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1577: }
1578: return(0);
1579: }
1583: PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1584: {
1585: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1587: Vec bb1 = 0;
1588: PetscBool hasop;
1591: if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1592: VecDuplicate(bb,&bb1);
1593: }
1595: if (flag == SOR_APPLY_UPPER) {
1596: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1597: return(0);
1598: }
1600: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1601: if (flag & SOR_ZERO_INITIAL_GUESS) {
1602: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1603: its--;
1604: }
1605:
1606: while (its--) {
1607: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1608: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1610: /* update rhs: bb1 = bb - B*x */
1611: VecScale(mat->lvec,-1.0);
1612: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1614: /* local sweep */
1615: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);
1616: }
1617: } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1618: if (flag & SOR_ZERO_INITIAL_GUESS) {
1619: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1620: its--;
1621: }
1622: while (its--) {
1623: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1624: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1626: /* update rhs: bb1 = bb - B*x */
1627: VecScale(mat->lvec,-1.0);
1628: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1630: /* local sweep */
1631: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);
1632: }
1633: } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1634: if (flag & SOR_ZERO_INITIAL_GUESS) {
1635: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1636: its--;
1637: }
1638: while (its--) {
1639: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1640: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1642: /* update rhs: bb1 = bb - B*x */
1643: VecScale(mat->lvec,-1.0);
1644: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1646: /* local sweep */
1647: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);
1648: }
1649: } else if (flag & SOR_EISENSTAT) {
1650: Vec xx1;
1652: VecDuplicate(bb,&xx1);
1653: (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);
1655: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1656: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1657: if (!mat->diag) {
1658: MatGetVecs(matin,&mat->diag,PETSC_NULL);
1659: MatGetDiagonal(matin,mat->diag);
1660: }
1661: MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);
1662: if (hasop) {
1663: MatMultDiagonalBlock(matin,xx,bb1);
1664: } else {
1665: VecPointwiseMult(bb1,mat->diag,xx);
1666: }
1667: VecAYPX(bb1,(omega-2.0)/omega,bb);
1669: MatMultAdd(mat->B,mat->lvec,bb1,bb1);
1671: /* local sweep */
1672: (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);
1673: VecAXPY(xx,1.0,xx1);
1674: VecDestroy(&xx1);
1675: } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1677: VecDestroy(&bb1);
1678: return(0);
1679: }
1683: PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1684: {
1685: MPI_Comm comm,pcomm;
1686: PetscInt first,local_size,nrows;
1687: const PetscInt *rows;
1688: PetscMPIInt size;
1689: IS crowp,growp,irowp,lrowp,lcolp,icolp;
1693: PetscObjectGetComm((PetscObject)A,&comm);
1694: /* make a collective version of 'rowp' */
1695: PetscObjectGetComm((PetscObject)rowp,&pcomm);
1696: if (pcomm==comm) {
1697: crowp = rowp;
1698: } else {
1699: ISGetSize(rowp,&nrows);
1700: ISGetIndices(rowp,&rows);
1701: ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);
1702: ISRestoreIndices(rowp,&rows);
1703: }
1704: /* collect the global row permutation and invert it */
1705: ISAllGather(crowp,&growp);
1706: ISSetPermutation(growp);
1707: if (pcomm!=comm) {
1708: ISDestroy(&crowp);
1709: }
1710: ISInvertPermutation(growp,PETSC_DECIDE,&irowp);
1711: /* get the local target indices */
1712: MatGetOwnershipRange(A,&first,PETSC_NULL);
1713: MatGetLocalSize(A,&local_size,PETSC_NULL);
1714: ISGetIndices(irowp,&rows);
1715: ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);
1716: ISRestoreIndices(irowp,&rows);
1717: ISDestroy(&irowp);
1718: /* the column permutation is so much easier;
1719: make a local version of 'colp' and invert it */
1720: PetscObjectGetComm((PetscObject)colp,&pcomm);
1721: MPI_Comm_size(pcomm,&size);
1722: if (size==1) {
1723: lcolp = colp;
1724: } else {
1725: ISGetSize(colp,&nrows);
1726: ISGetIndices(colp,&rows);
1727: ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);
1728: }
1729: ISSetPermutation(lcolp);
1730: ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);
1731: ISSetPermutation(icolp);
1732: if (size>1) {
1733: ISRestoreIndices(colp,&rows);
1734: ISDestroy(&lcolp);
1735: }
1736: /* now we just get the submatrix */
1737: MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);
1738: /* clean up */
1739: ISDestroy(&lrowp);
1740: ISDestroy(&icolp);
1741: return(0);
1742: }
1746: PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1747: {
1748: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1749: Mat A = mat->A,B = mat->B;
1751: PetscReal isend[5],irecv[5];
1754: info->block_size = 1.0;
1755: MatGetInfo(A,MAT_LOCAL,info);
1756: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1757: isend[3] = info->memory; isend[4] = info->mallocs;
1758: MatGetInfo(B,MAT_LOCAL,info);
1759: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1760: isend[3] += info->memory; isend[4] += info->mallocs;
1761: if (flag == MAT_LOCAL) {
1762: info->nz_used = isend[0];
1763: info->nz_allocated = isend[1];
1764: info->nz_unneeded = isend[2];
1765: info->memory = isend[3];
1766: info->mallocs = isend[4];
1767: } else if (flag == MAT_GLOBAL_MAX) {
1768: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);
1769: info->nz_used = irecv[0];
1770: info->nz_allocated = irecv[1];
1771: info->nz_unneeded = irecv[2];
1772: info->memory = irecv[3];
1773: info->mallocs = irecv[4];
1774: } else if (flag == MAT_GLOBAL_SUM) {
1775: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);
1776: info->nz_used = irecv[0];
1777: info->nz_allocated = irecv[1];
1778: info->nz_unneeded = irecv[2];
1779: info->memory = irecv[3];
1780: info->mallocs = irecv[4];
1781: }
1782: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1783: info->fill_ratio_needed = 0;
1784: info->factor_mallocs = 0;
1786: return(0);
1787: }
1791: PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1792: {
1793: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1797: switch (op) {
1798: case MAT_NEW_NONZERO_LOCATIONS:
1799: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1800: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1801: case MAT_KEEP_NONZERO_PATTERN:
1802: case MAT_NEW_NONZERO_LOCATION_ERR:
1803: case MAT_USE_INODES:
1804: case MAT_IGNORE_ZERO_ENTRIES:
1805: MatSetOption(a->A,op,flg);
1806: MatSetOption(a->B,op,flg);
1807: break;
1808: case MAT_ROW_ORIENTED:
1809: a->roworiented = flg;
1810: MatSetOption(a->A,op,flg);
1811: MatSetOption(a->B,op,flg);
1812: break;
1813: case MAT_NEW_DIAGONALS:
1814: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1815: break;
1816: case MAT_IGNORE_OFF_PROC_ENTRIES:
1817: a->donotstash = flg;
1818: break;
1819: case MAT_SPD:
1820: A->spd_set = PETSC_TRUE;
1821: A->spd = flg;
1822: if (flg) {
1823: A->symmetric = PETSC_TRUE;
1824: A->structurally_symmetric = PETSC_TRUE;
1825: A->symmetric_set = PETSC_TRUE;
1826: A->structurally_symmetric_set = PETSC_TRUE;
1827: }
1828: break;
1829: case MAT_SYMMETRIC:
1830: MatSetOption(a->A,op,flg);
1831: break;
1832: case MAT_STRUCTURALLY_SYMMETRIC:
1833: MatSetOption(a->A,op,flg);
1834: break;
1835: case MAT_HERMITIAN:
1836: MatSetOption(a->A,op,flg);
1837: break;
1838: case MAT_SYMMETRY_ETERNAL:
1839: MatSetOption(a->A,op,flg);
1840: break;
1841: default:
1842: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1843: }
1844: return(0);
1845: }
1849: PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1850: {
1851: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1852: PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1854: PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1855: PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1856: PetscInt *cmap,*idx_p;
1859: if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1860: mat->getrowactive = PETSC_TRUE;
1862: if (!mat->rowvalues && (idx || v)) {
1863: /*
1864: allocate enough space to hold information from the longest row.
1865: */
1866: Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1867: PetscInt max = 1,tmp;
1868: for (i=0; i<matin->rmap->n; i++) {
1869: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1870: if (max < tmp) { max = tmp; }
1871: }
1872: PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);
1873: }
1875: if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1876: lrow = row - rstart;
1878: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1879: if (!v) {pvA = 0; pvB = 0;}
1880: if (!idx) {pcA = 0; if (!v) pcB = 0;}
1881: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1882: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1883: nztot = nzA + nzB;
1885: cmap = mat->garray;
1886: if (v || idx) {
1887: if (nztot) {
1888: /* Sort by increasing column numbers, assuming A and B already sorted */
1889: PetscInt imark = -1;
1890: if (v) {
1891: *v = v_p = mat->rowvalues;
1892: for (i=0; i<nzB; i++) {
1893: if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1894: else break;
1895: }
1896: imark = i;
1897: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1898: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1899: }
1900: if (idx) {
1901: *idx = idx_p = mat->rowindices;
1902: if (imark > -1) {
1903: for (i=0; i<imark; i++) {
1904: idx_p[i] = cmap[cworkB[i]];
1905: }
1906: } else {
1907: for (i=0; i<nzB; i++) {
1908: if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1909: else break;
1910: }
1911: imark = i;
1912: }
1913: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i];
1914: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]];
1915: }
1916: } else {
1917: if (idx) *idx = 0;
1918: if (v) *v = 0;
1919: }
1920: }
1921: *nz = nztot;
1922: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1923: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1924: return(0);
1925: }
1929: PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1930: {
1931: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1934: if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1935: aij->getrowactive = PETSC_FALSE;
1936: return(0);
1937: }
1941: PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1942: {
1943: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1944: Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1946: PetscInt i,j,cstart = mat->cmap->rstart;
1947: PetscReal sum = 0.0;
1948: MatScalar *v;
1951: if (aij->size == 1) {
1952: MatNorm(aij->A,type,norm);
1953: } else {
1954: if (type == NORM_FROBENIUS) {
1955: v = amat->a;
1956: for (i=0; i<amat->nz; i++) {
1957: #if defined(PETSC_USE_COMPLEX)
1958: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1959: #else
1960: sum += (*v)*(*v); v++;
1961: #endif
1962: }
1963: v = bmat->a;
1964: for (i=0; i<bmat->nz; i++) {
1965: #if defined(PETSC_USE_COMPLEX)
1966: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1967: #else
1968: sum += (*v)*(*v); v++;
1969: #endif
1970: }
1971: MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);
1972: *norm = PetscSqrtReal(*norm);
1973: } else if (type == NORM_1) { /* max column norm */
1974: PetscReal *tmp,*tmp2;
1975: PetscInt *jj,*garray = aij->garray;
1976: PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);
1977: PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);
1978: PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));
1979: *norm = 0.0;
1980: v = amat->a; jj = amat->j;
1981: for (j=0; j<amat->nz; j++) {
1982: tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++;
1983: }
1984: v = bmat->a; jj = bmat->j;
1985: for (j=0; j<bmat->nz; j++) {
1986: tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1987: }
1988: MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);
1989: for (j=0; j<mat->cmap->N; j++) {
1990: if (tmp2[j] > *norm) *norm = tmp2[j];
1991: }
1992: PetscFree(tmp);
1993: PetscFree(tmp2);
1994: } else if (type == NORM_INFINITY) { /* max row norm */
1995: PetscReal ntemp = 0.0;
1996: for (j=0; j<aij->A->rmap->n; j++) {
1997: v = amat->a + amat->i[j];
1998: sum = 0.0;
1999: for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2000: sum += PetscAbsScalar(*v); v++;
2001: }
2002: v = bmat->a + bmat->i[j];
2003: for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2004: sum += PetscAbsScalar(*v); v++;
2005: }
2006: if (sum > ntemp) ntemp = sum;
2007: }
2008: MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);
2009: } else {
2010: SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2011: }
2012: }
2013: return(0);
2014: }
2018: PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2019: {
2020: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2021: Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2023: PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2024: PetscInt cstart=A->cmap->rstart,ncol;
2025: Mat B;
2026: MatScalar *array;
2029: if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2031: ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2032: ai = Aloc->i; aj = Aloc->j;
2033: bi = Bloc->i; bj = Bloc->j;
2034: if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2035: /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2036: PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);
2037: PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));
2038: for (i=0; i<ai[ma]; i++){
2039: d_nnz[aj[i]] ++;
2040: aj[i] += cstart; /* global col index to be used by MatSetValues() */
2041: }
2043: MatCreate(((PetscObject)A)->comm,&B);
2044: MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
2045: MatSetType(B,((PetscObject)A)->type_name);
2046: MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);
2047: PetscFree(d_nnz);
2048: } else {
2049: B = *matout;
2050: }
2052: /* copy over the A part */
2053: array = Aloc->a;
2054: row = A->rmap->rstart;
2055: for (i=0; i<ma; i++) {
2056: ncol = ai[i+1]-ai[i];
2057: MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);
2058: row++; array += ncol; aj += ncol;
2059: }
2060: aj = Aloc->j;
2061: for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2063: /* copy over the B part */
2064: PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);
2065: PetscMemzero(cols,bi[mb]*sizeof(PetscInt));
2066: array = Bloc->a;
2067: row = A->rmap->rstart;
2068: for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2069: cols_tmp = cols;
2070: for (i=0; i<mb; i++) {
2071: ncol = bi[i+1]-bi[i];
2072: MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);
2073: row++; array += ncol; cols_tmp += ncol;
2074: }
2075: PetscFree(cols);
2076:
2077: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2078: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2079: if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2080: *matout = B;
2081: } else {
2082: MatHeaderMerge(A,B);
2083: }
2084: return(0);
2085: }
2089: PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2090: {
2091: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2092: Mat a = aij->A,b = aij->B;
2094: PetscInt s1,s2,s3;
2097: MatGetLocalSize(mat,&s2,&s3);
2098: if (rr) {
2099: VecGetLocalSize(rr,&s1);
2100: if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2101: /* Overlap communication with computation. */
2102: VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);
2103: }
2104: if (ll) {
2105: VecGetLocalSize(ll,&s1);
2106: if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2107: (*b->ops->diagonalscale)(b,ll,0);
2108: }
2109: /* scale the diagonal block */
2110: (*a->ops->diagonalscale)(a,ll,rr);
2112: if (rr) {
2113: /* Do a scatter end and then right scale the off-diagonal block */
2114: VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);
2115: (*b->ops->diagonalscale)(b,0,aij->lvec);
2116: }
2117:
2118: return(0);
2119: }
2123: PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
2124: {
2125: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2129: MatSetBlockSize(a->A,bs);
2130: MatSetBlockSize(a->B,bs);
2131: PetscLayoutSetBlockSize(A->rmap,bs);
2132: PetscLayoutSetBlockSize(A->cmap,bs);
2133: return(0);
2134: }
2137: PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2138: {
2139: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2143: MatSetUnfactored(a->A);
2144: return(0);
2145: }
2149: PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag)
2150: {
2151: Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2152: Mat a,b,c,d;
2153: PetscBool flg;
2157: a = matA->A; b = matA->B;
2158: c = matB->A; d = matB->B;
2160: MatEqual(a,c,&flg);
2161: if (flg) {
2162: MatEqual(b,d,&flg);
2163: }
2164: MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);
2165: return(0);
2166: }
2170: PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2171: {
2173: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2174: Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2177: /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2178: if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2179: /* because of the column compression in the off-processor part of the matrix a->B,
2180: the number of columns in a->B and b->B may be different, hence we cannot call
2181: the MatCopy() directly on the two parts. If need be, we can provide a more
2182: efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2183: then copying the submatrices */
2184: MatCopy_Basic(A,B,str);
2185: } else {
2186: MatCopy(a->A,b->A,str);
2187: MatCopy(a->B,b->B,str);
2188: }
2189: return(0);
2190: }
2194: PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
2195: {
2199: MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
2200: return(0);
2201: }
2205: PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2206: {
2208: PetscInt i;
2209: Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2210: PetscBLASInt bnz,one=1;
2211: Mat_SeqAIJ *x,*y;
2214: if (str == SAME_NONZERO_PATTERN) {
2215: PetscScalar alpha = a;
2216: x = (Mat_SeqAIJ *)xx->A->data;
2217: y = (Mat_SeqAIJ *)yy->A->data;
2218: bnz = PetscBLASIntCast(x->nz);
2219: BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2220: x = (Mat_SeqAIJ *)xx->B->data;
2221: y = (Mat_SeqAIJ *)yy->B->data;
2222: bnz = PetscBLASIntCast(x->nz);
2223: BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2224: } else if (str == SUBSET_NONZERO_PATTERN) {
2225: MatAXPY_SeqAIJ(yy->A,a,xx->A,str);
2227: x = (Mat_SeqAIJ *)xx->B->data;
2228: y = (Mat_SeqAIJ *)yy->B->data;
2229: if (y->xtoy && y->XtoY != xx->B) {
2230: PetscFree(y->xtoy);
2231: MatDestroy(&y->XtoY);
2232: }
2233: if (!y->xtoy) { /* get xtoy */
2234: MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);
2235: y->XtoY = xx->B;
2236: PetscObjectReference((PetscObject)xx->B);
2237: }
2238: for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2239: } else {
2240: Mat B;
2241: PetscInt *nnz_d,*nnz_o;
2242: PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);
2243: PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);
2244: MatCreate(((PetscObject)Y)->comm,&B);
2245: PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);
2246: MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);
2247: MatSetType(B,MATMPIAIJ);
2248: MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);
2249: MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);
2250: MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);
2251: MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
2252: MatHeaderReplace(Y,B);
2253: PetscFree(nnz_d);
2254: PetscFree(nnz_o);
2255: }
2256: return(0);
2257: }
2263: PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2264: {
2265: #if defined(PETSC_USE_COMPLEX)
2267: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2270: MatConjugate_SeqAIJ(aij->A);
2271: MatConjugate_SeqAIJ(aij->B);
2272: #else
2274: #endif
2275: return(0);
2276: }
2280: PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2281: {
2282: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2286: MatRealPart(a->A);
2287: MatRealPart(a->B);
2288: return(0);
2289: }
2293: PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2294: {
2295: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2299: MatImaginaryPart(a->A);
2300: MatImaginaryPart(a->B);
2301: return(0);
2302: }
2304: #ifdef PETSC_HAVE_PBGL
2306: #include <boost/parallel/mpi/bsp_process_group.hpp>
2307: #include <boost/graph/distributed/ilu_default_graph.hpp>
2308: #include <boost/graph/distributed/ilu_0_block.hpp>
2309: #include <boost/graph/distributed/ilu_preconditioner.hpp>
2310: #include <boost/graph/distributed/petsc/interface.hpp>
2311: #include <boost/multi_array.hpp>
2312: #include <boost/parallel/distributed_property_map->hpp>
2316: /*
2317: This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2318: */
2319: PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2320: {
2321: namespace petsc = boost::distributed::petsc;
2322:
2323: namespace graph_dist = boost::graph::distributed;
2324: using boost::graph::distributed::ilu_default::process_group_type;
2325: using boost::graph::ilu_permuted;
2327: PetscBool row_identity, col_identity;
2328: PetscContainer c;
2329: PetscInt m, n, M, N;
2330: PetscErrorCode ierr;
2333: if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2334: ISIdentity(isrow, &row_identity);
2335: ISIdentity(iscol, &col_identity);
2336: if (!row_identity || !col_identity) {
2337: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2338: }
2340: process_group_type pg;
2341: typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2342: lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2343: lgraph_type& level_graph = *lgraph_p;
2344: graph_dist::ilu_default::graph_type& graph(level_graph.graph);
2346: petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2347: ilu_permuted(level_graph);
2349: /* put together the new matrix */
2350: MatCreate(((PetscObject)A)->comm, fact);
2351: MatGetLocalSize(A, &m, &n);
2352: MatGetSize(A, &M, &N);
2353: MatSetSizes(fact, m, n, M, N);
2354: MatSetType(fact, ((PetscObject)A)->type_name);
2355: MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);
2356: MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);
2358: PetscContainerCreate(((PetscObject)A)->comm, &c);
2359: PetscContainerSetPointer(c, lgraph_p);
2360: PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2361: PetscContainerDestroy(&c);
2362: return(0);
2363: }
2367: PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2368: {
2370: return(0);
2371: }
2375: /*
2376: This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2377: */
2378: PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2379: {
2380: namespace graph_dist = boost::graph::distributed;
2382: typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2383: lgraph_type* lgraph_p;
2384: PetscContainer c;
2388: PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);
2389: PetscContainerGetPointer(c, (void **) &lgraph_p);
2390: VecCopy(b, x);
2392: PetscScalar* array_x;
2393: VecGetArray(x, &array_x);
2394: PetscInt sx;
2395: VecGetSize(x, &sx);
2396:
2397: PetscScalar* array_b;
2398: VecGetArray(b, &array_b);
2399: PetscInt sb;
2400: VecGetSize(b, &sb);
2402: lgraph_type& level_graph = *lgraph_p;
2403: graph_dist::ilu_default::graph_type& graph(level_graph.graph);
2405: typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2406: array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]),
2407: ref_x(array_x, boost::extents[num_vertices(graph)]);
2409: typedef boost::iterator_property_map<array_ref_type::iterator,
2410: boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type;
2411: gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2412: vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2413:
2414: ilu_set_solve(*lgraph_p, vector_b, vector_x);
2416: return(0);
2417: }
2418: #endif
2420: typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2421: PetscInt nzlocal,nsends,nrecvs;
2422: PetscMPIInt *send_rank,*recv_rank;
2423: PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2424: PetscScalar *sbuf_a,**rbuf_a;
2425: PetscErrorCode (*Destroy)(Mat);
2426: } Mat_Redundant;
2430: PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2431: {
2432: PetscErrorCode ierr;
2433: Mat_Redundant *redund=(Mat_Redundant*)ptr;
2434: PetscInt i;
2437: PetscFree2(redund->send_rank,redund->recv_rank);
2438: PetscFree(redund->sbuf_j);
2439: PetscFree(redund->sbuf_a);
2440: for (i=0; i<redund->nrecvs; i++){
2441: PetscFree(redund->rbuf_j[i]);
2442: PetscFree(redund->rbuf_a[i]);
2443: }
2444: PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);
2445: PetscFree(redund);
2446: return(0);
2447: }
2451: PetscErrorCode MatDestroy_MatRedundant(Mat A)
2452: {
2453: PetscErrorCode ierr;
2454: PetscContainer container;
2455: Mat_Redundant *redund=PETSC_NULL;
2458: PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);
2459: if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2460: PetscContainerGetPointer(container,(void **)&redund);
2461: A->ops->destroy = redund->Destroy;
2462: PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);
2463: if (A->ops->destroy) {
2464: (*A->ops->destroy)(A);
2465: }
2466: return(0);
2467: }
2471: PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2472: {
2473: PetscMPIInt rank,size;
2474: MPI_Comm comm=((PetscObject)mat)->comm;
2476: PetscInt nsends=0,nrecvs=0,i,rownz_max=0;
2477: PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2478: PetscInt *rowrange=mat->rmap->range;
2479: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2480: Mat A=aij->A,B=aij->B,C=*matredundant;
2481: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2482: PetscScalar *sbuf_a;
2483: PetscInt nzlocal=a->nz+b->nz;
2484: PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2485: PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2486: PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2487: MatScalar *aworkA,*aworkB;
2488: PetscScalar *vals;
2489: PetscMPIInt tag1,tag2,tag3,imdex;
2490: MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2491: *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2492: MPI_Status recv_status,*send_status;
2493: PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2494: PetscInt **rbuf_j=PETSC_NULL;
2495: PetscScalar **rbuf_a=PETSC_NULL;
2496: Mat_Redundant *redund=PETSC_NULL;
2497: PetscContainer container;
2500: MPI_Comm_rank(comm,&rank);
2501: MPI_Comm_size(comm,&size);
2503: if (reuse == MAT_REUSE_MATRIX) {
2504: MatGetSize(C,&M,&N);
2505: if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2506: MatGetLocalSize(C,&M,&N);
2507: if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2508: PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);
2509: if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2510: PetscContainerGetPointer(container,(void **)&redund);
2511: if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2513: nsends = redund->nsends;
2514: nrecvs = redund->nrecvs;
2515: send_rank = redund->send_rank;
2516: recv_rank = redund->recv_rank;
2517: sbuf_nz = redund->sbuf_nz;
2518: rbuf_nz = redund->rbuf_nz;
2519: sbuf_j = redund->sbuf_j;
2520: sbuf_a = redund->sbuf_a;
2521: rbuf_j = redund->rbuf_j;
2522: rbuf_a = redund->rbuf_a;
2523: }
2525: if (reuse == MAT_INITIAL_MATRIX){
2526: PetscMPIInt subrank,subsize;
2527: PetscInt nleftover,np_subcomm;
2528: /* get the destination processors' id send_rank, nsends and nrecvs */
2529: MPI_Comm_rank(subcomm,&subrank);
2530: MPI_Comm_size(subcomm,&subsize);
2531: PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2532: np_subcomm = size/nsubcomm;
2533: nleftover = size - nsubcomm*np_subcomm;
2534: nsends = 0; nrecvs = 0;
2535: for (i=0; i<size; i++){ /* i=rank*/
2536: if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2537: send_rank[nsends] = i; nsends++;
2538: recv_rank[nrecvs++] = i;
2539: }
2540: }
2541: if (rank >= size - nleftover){/* this proc is a leftover processor */
2542: i = size-nleftover-1;
2543: j = 0;
2544: while (j < nsubcomm - nleftover){
2545: send_rank[nsends++] = i;
2546: i--; j++;
2547: }
2548: }
2550: if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2551: for (i=0; i<nleftover; i++){
2552: recv_rank[nrecvs++] = size-nleftover+i;
2553: }
2554: }
2556: /* allocate sbuf_j, sbuf_a */
2557: i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2558: PetscMalloc(i*sizeof(PetscInt),&sbuf_j);
2559: PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);
2560: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2562: /* copy mat's local entries into the buffers */
2563: if (reuse == MAT_INITIAL_MATRIX){
2564: rownz_max = 0;
2565: rptr = sbuf_j;
2566: cols = sbuf_j + rend-rstart + 1;
2567: vals = sbuf_a;
2568: rptr[0] = 0;
2569: for (i=0; i<rend-rstart; i++){
2570: row = i + rstart;
2571: nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2572: ncols = nzA + nzB;
2573: cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2574: aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2575: /* load the column indices for this row into cols */
2576: lwrite = 0;
2577: for (l=0; l<nzB; l++) {
2578: if ((ctmp = bmap[cworkB[l]]) < cstart){
2579: vals[lwrite] = aworkB[l];
2580: cols[lwrite++] = ctmp;
2581: }
2582: }
2583: for (l=0; l<nzA; l++){
2584: vals[lwrite] = aworkA[l];
2585: cols[lwrite++] = cstart + cworkA[l];
2586: }
2587: for (l=0; l<nzB; l++) {
2588: if ((ctmp = bmap[cworkB[l]]) >= cend){
2589: vals[lwrite] = aworkB[l];
2590: cols[lwrite++] = ctmp;
2591: }
2592: }
2593: vals += ncols;
2594: cols += ncols;
2595: rptr[i+1] = rptr[i] + ncols;
2596: if (rownz_max < ncols) rownz_max = ncols;
2597: }
2598: if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2599: } else { /* only copy matrix values into sbuf_a */
2600: rptr = sbuf_j;
2601: vals = sbuf_a;
2602: rptr[0] = 0;
2603: for (i=0; i<rend-rstart; i++){
2604: row = i + rstart;
2605: nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2606: ncols = nzA + nzB;
2607: cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2608: aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2609: lwrite = 0;
2610: for (l=0; l<nzB; l++) {
2611: if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2612: }
2613: for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2614: for (l=0; l<nzB; l++) {
2615: if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2616: }
2617: vals += ncols;
2618: rptr[i+1] = rptr[i] + ncols;
2619: }
2620: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2622: /* send nzlocal to others, and recv other's nzlocal */
2623: /*--------------------------------------------------*/
2624: if (reuse == MAT_INITIAL_MATRIX){
2625: PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);
2626: s_waits2 = s_waits3 + nsends;
2627: s_waits1 = s_waits2 + nsends;
2628: r_waits1 = s_waits1 + nsends;
2629: r_waits2 = r_waits1 + nrecvs;
2630: r_waits3 = r_waits2 + nrecvs;
2631: } else {
2632: PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);
2633: r_waits3 = s_waits3 + nsends;
2634: }
2636: PetscObjectGetNewTag((PetscObject)mat,&tag3);
2637: if (reuse == MAT_INITIAL_MATRIX){
2638: /* get new tags to keep the communication clean */
2639: PetscObjectGetNewTag((PetscObject)mat,&tag1);
2640: PetscObjectGetNewTag((PetscObject)mat,&tag2);
2641: PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);
2643: /* post receives of other's nzlocal */
2644: for (i=0; i<nrecvs; i++){
2645: MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);
2646: }
2647: /* send nzlocal to others */
2648: for (i=0; i<nsends; i++){
2649: sbuf_nz[i] = nzlocal;
2650: MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);
2651: }
2652: /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2653: count = nrecvs;
2654: while (count) {
2655: MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);
2656: recv_rank[imdex] = recv_status.MPI_SOURCE;
2657: /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2658: PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);
2660: i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2661: rbuf_nz[imdex] += i + 2;
2662: PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);
2663: MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);
2664: count--;
2665: }
2666: /* wait on sends of nzlocal */
2667: if (nsends) {MPI_Waitall(nsends,s_waits1,send_status);}
2668: /* send mat->i,j to others, and recv from other's */
2669: /*------------------------------------------------*/
2670: for (i=0; i<nsends; i++){
2671: j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2672: MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);
2673: }
2674: /* wait on receives of mat->i,j */
2675: /*------------------------------*/
2676: count = nrecvs;
2677: while (count) {
2678: MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);
2679: if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2680: count--;
2681: }
2682: /* wait on sends of mat->i,j */
2683: /*---------------------------*/
2684: if (nsends) {
2685: MPI_Waitall(nsends,s_waits2,send_status);
2686: }
2687: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2689: /* post receives, send and receive mat->a */
2690: /*----------------------------------------*/
2691: for (imdex=0; imdex<nrecvs; imdex++) {
2692: MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);
2693: }
2694: for (i=0; i<nsends; i++){
2695: MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);
2696: }
2697: count = nrecvs;
2698: while (count) {
2699: MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);
2700: if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2701: count--;
2702: }
2703: if (nsends) {
2704: MPI_Waitall(nsends,s_waits3,send_status);
2705: }
2707: PetscFree2(s_waits3,send_status);
2709: /* create redundant matrix */
2710: /*-------------------------*/
2711: if (reuse == MAT_INITIAL_MATRIX){
2712: /* compute rownz_max for preallocation */
2713: for (imdex=0; imdex<nrecvs; imdex++){
2714: j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2715: rptr = rbuf_j[imdex];
2716: for (i=0; i<j; i++){
2717: ncols = rptr[i+1] - rptr[i];
2718: if (rownz_max < ncols) rownz_max = ncols;
2719: }
2720: }
2722: MatCreate(subcomm,&C);
2723: MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);
2724: MatSetFromOptions(C);
2725: MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);
2726: MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);
2727: } else {
2728: C = *matredundant;
2729: }
2731: /* insert local matrix entries */
2732: rptr = sbuf_j;
2733: cols = sbuf_j + rend-rstart + 1;
2734: vals = sbuf_a;
2735: for (i=0; i<rend-rstart; i++){
2736: row = i + rstart;
2737: ncols = rptr[i+1] - rptr[i];
2738: MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);
2739: vals += ncols;
2740: cols += ncols;
2741: }
2742: /* insert received matrix entries */
2743: for (imdex=0; imdex<nrecvs; imdex++){
2744: rstart = rowrange[recv_rank[imdex]];
2745: rend = rowrange[recv_rank[imdex]+1];
2746: rptr = rbuf_j[imdex];
2747: cols = rbuf_j[imdex] + rend-rstart + 1;
2748: vals = rbuf_a[imdex];
2749: for (i=0; i<rend-rstart; i++){
2750: row = i + rstart;
2751: ncols = rptr[i+1] - rptr[i];
2752: MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);
2753: vals += ncols;
2754: cols += ncols;
2755: }
2756: }
2757: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
2758: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
2759: MatGetSize(C,&M,&N);
2760: if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2761: if (reuse == MAT_INITIAL_MATRIX) {
2762: PetscContainer container;
2763: *matredundant = C;
2764: /* create a supporting struct and attach it to C for reuse */
2765: PetscNewLog(C,Mat_Redundant,&redund);
2766: PetscContainerCreate(PETSC_COMM_SELF,&container);
2767: PetscContainerSetPointer(container,redund);
2768: PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);
2769: PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);
2770: PetscContainerDestroy(&container);
2772: redund->nzlocal = nzlocal;
2773: redund->nsends = nsends;
2774: redund->nrecvs = nrecvs;
2775: redund->send_rank = send_rank;
2776: redund->recv_rank = recv_rank;
2777: redund->sbuf_nz = sbuf_nz;
2778: redund->rbuf_nz = rbuf_nz;
2779: redund->sbuf_j = sbuf_j;
2780: redund->sbuf_a = sbuf_a;
2781: redund->rbuf_j = rbuf_j;
2782: redund->rbuf_a = rbuf_a;
2784: redund->Destroy = C->ops->destroy;
2785: C->ops->destroy = MatDestroy_MatRedundant;
2786: }
2787: return(0);
2788: }
2792: PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2793: {
2794: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2796: PetscInt i,*idxb = 0;
2797: PetscScalar *va,*vb;
2798: Vec vtmp;
2801: MatGetRowMaxAbs(a->A,v,idx);
2802: VecGetArray(v,&va);
2803: if (idx) {
2804: for (i=0; i<A->rmap->n; i++) {
2805: if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2806: }
2807: }
2809: VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);
2810: if (idx) {
2811: PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);
2812: }
2813: MatGetRowMaxAbs(a->B,vtmp,idxb);
2814: VecGetArray(vtmp,&vb);
2816: for (i=0; i<A->rmap->n; i++){
2817: if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2818: va[i] = vb[i];
2819: if (idx) idx[i] = a->garray[idxb[i]];
2820: }
2821: }
2823: VecRestoreArray(v,&va);
2824: VecRestoreArray(vtmp,&vb);
2825: PetscFree(idxb);
2826: VecDestroy(&vtmp);
2827: return(0);
2828: }
2832: PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2833: {
2834: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2836: PetscInt i,*idxb = 0;
2837: PetscScalar *va,*vb;
2838: Vec vtmp;
2841: MatGetRowMinAbs(a->A,v,idx);
2842: VecGetArray(v,&va);
2843: if (idx) {
2844: for (i=0; i<A->cmap->n; i++) {
2845: if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2846: }
2847: }
2849: VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);
2850: if (idx) {
2851: PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);
2852: }
2853: MatGetRowMinAbs(a->B,vtmp,idxb);
2854: VecGetArray(vtmp,&vb);
2856: for (i=0; i<A->rmap->n; i++){
2857: if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2858: va[i] = vb[i];
2859: if (idx) idx[i] = a->garray[idxb[i]];
2860: }
2861: }
2863: VecRestoreArray(v,&va);
2864: VecRestoreArray(vtmp,&vb);
2865: PetscFree(idxb);
2866: VecDestroy(&vtmp);
2867: return(0);
2868: }
2872: PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2873: {
2874: Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data;
2875: PetscInt n = A->rmap->n;
2876: PetscInt cstart = A->cmap->rstart;
2877: PetscInt *cmap = mat->garray;
2878: PetscInt *diagIdx, *offdiagIdx;
2879: Vec diagV, offdiagV;
2880: PetscScalar *a, *diagA, *offdiagA;
2881: PetscInt r;
2885: PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);
2886: VecCreateSeq(((PetscObject)A)->comm, n, &diagV);
2887: VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);
2888: MatGetRowMin(mat->A, diagV, diagIdx);
2889: MatGetRowMin(mat->B, offdiagV, offdiagIdx);
2890: VecGetArray(v, &a);
2891: VecGetArray(diagV, &diagA);
2892: VecGetArray(offdiagV, &offdiagA);
2893: for(r = 0; r < n; ++r) {
2894: if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2895: a[r] = diagA[r];
2896: idx[r] = cstart + diagIdx[r];
2897: } else {
2898: a[r] = offdiagA[r];
2899: idx[r] = cmap[offdiagIdx[r]];
2900: }
2901: }
2902: VecRestoreArray(v, &a);
2903: VecRestoreArray(diagV, &diagA);
2904: VecRestoreArray(offdiagV, &offdiagA);
2905: VecDestroy(&diagV);
2906: VecDestroy(&offdiagV);
2907: PetscFree2(diagIdx, offdiagIdx);
2908: return(0);
2909: }
2913: PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2914: {
2915: Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data;
2916: PetscInt n = A->rmap->n;
2917: PetscInt cstart = A->cmap->rstart;
2918: PetscInt *cmap = mat->garray;
2919: PetscInt *diagIdx, *offdiagIdx;
2920: Vec diagV, offdiagV;
2921: PetscScalar *a, *diagA, *offdiagA;
2922: PetscInt r;
2926: PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);
2927: VecCreateSeq(((PetscObject)A)->comm, n, &diagV);
2928: VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);
2929: MatGetRowMax(mat->A, diagV, diagIdx);
2930: MatGetRowMax(mat->B, offdiagV, offdiagIdx);
2931: VecGetArray(v, &a);
2932: VecGetArray(diagV, &diagA);
2933: VecGetArray(offdiagV, &offdiagA);
2934: for(r = 0; r < n; ++r) {
2935: if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2936: a[r] = diagA[r];
2937: idx[r] = cstart + diagIdx[r];
2938: } else {
2939: a[r] = offdiagA[r];
2940: idx[r] = cmap[offdiagIdx[r]];
2941: }
2942: }
2943: VecRestoreArray(v, &a);
2944: VecRestoreArray(diagV, &diagA);
2945: VecRestoreArray(offdiagV, &offdiagA);
2946: VecDestroy(&diagV);
2947: VecDestroy(&offdiagV);
2948: PetscFree2(diagIdx, offdiagIdx);
2949: return(0);
2950: }
2954: PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2955: {
2957: Mat *dummy;
2960: MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);
2961: *newmat = *dummy;
2962: PetscFree(dummy);
2963: return(0);
2964: }
2970: PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values)
2971: {
2972: Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data;
2976: MatInvertBlockDiagonal(a->A,values);
2977: return(0);
2978: }
2981: /* -------------------------------------------------------------------*/
2982: static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2983: MatGetRow_MPIAIJ,
2984: MatRestoreRow_MPIAIJ,
2985: MatMult_MPIAIJ,
2986: /* 4*/ MatMultAdd_MPIAIJ,
2987: MatMultTranspose_MPIAIJ,
2988: MatMultTransposeAdd_MPIAIJ,
2989: #ifdef PETSC_HAVE_PBGL
2990: MatSolve_MPIAIJ,
2991: #else
2992: 0,
2993: #endif
2994: 0,
2995: 0,
2996: /*10*/ 0,
2997: 0,
2998: 0,
2999: MatSOR_MPIAIJ,
3000: MatTranspose_MPIAIJ,
3001: /*15*/ MatGetInfo_MPIAIJ,
3002: MatEqual_MPIAIJ,
3003: MatGetDiagonal_MPIAIJ,
3004: MatDiagonalScale_MPIAIJ,
3005: MatNorm_MPIAIJ,
3006: /*20*/ MatAssemblyBegin_MPIAIJ,
3007: MatAssemblyEnd_MPIAIJ,
3008: MatSetOption_MPIAIJ,
3009: MatZeroEntries_MPIAIJ,
3010: /*24*/ MatZeroRows_MPIAIJ,
3011: 0,
3012: #ifdef PETSC_HAVE_PBGL
3013: 0,
3014: #else
3015: 0,
3016: #endif
3017: 0,
3018: 0,
3019: /*29*/ MatSetUpPreallocation_MPIAIJ,
3020: #ifdef PETSC_HAVE_PBGL
3021: 0,
3022: #else
3023: 0,
3024: #endif
3025: 0,
3026: 0,
3027: 0,
3028: /*34*/ MatDuplicate_MPIAIJ,
3029: 0,
3030: 0,
3031: 0,
3032: 0,
3033: /*39*/ MatAXPY_MPIAIJ,
3034: MatGetSubMatrices_MPIAIJ,
3035: MatIncreaseOverlap_MPIAIJ,
3036: MatGetValues_MPIAIJ,
3037: MatCopy_MPIAIJ,
3038: /*44*/ MatGetRowMax_MPIAIJ,
3039: MatScale_MPIAIJ,
3040: 0,
3041: 0,
3042: MatZeroRowsColumns_MPIAIJ,
3043: /*49*/ MatSetBlockSize_MPIAIJ,
3044: 0,
3045: 0,
3046: 0,
3047: 0,
3048: /*54*/ MatFDColoringCreate_MPIAIJ,
3049: 0,
3050: MatSetUnfactored_MPIAIJ,
3051: MatPermute_MPIAIJ,
3052: 0,
3053: /*59*/ MatGetSubMatrix_MPIAIJ,
3054: MatDestroy_MPIAIJ,
3055: MatView_MPIAIJ,
3056: 0,
3057: 0,
3058: /*64*/ 0,
3059: 0,
3060: 0,
3061: 0,
3062: 0,
3063: /*69*/ MatGetRowMaxAbs_MPIAIJ,
3064: MatGetRowMinAbs_MPIAIJ,
3065: 0,
3066: MatSetColoring_MPIAIJ,
3067: #if defined(PETSC_HAVE_ADIC)
3068: MatSetValuesAdic_MPIAIJ,
3069: #else
3070: 0,
3071: #endif
3072: MatSetValuesAdifor_MPIAIJ,
3073: /*75*/ MatFDColoringApply_AIJ,
3074: 0,
3075: 0,
3076: 0,
3077: 0,
3078: /*80*/ 0,
3079: 0,
3080: 0,
3081: /*83*/ MatLoad_MPIAIJ,
3082: 0,
3083: 0,
3084: 0,
3085: 0,
3086: 0,
3087: /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3088: MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3089: MatMatMultNumeric_MPIAIJ_MPIAIJ,
3090: MatPtAP_Basic,
3091: MatPtAPSymbolic_MPIAIJ,
3092: /*94*/ MatPtAPNumeric_MPIAIJ,
3093: 0,
3094: 0,
3095: 0,
3096: 0,
3097: /*99*/ 0,
3098: MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3099: MatPtAPNumeric_MPIAIJ_MPIAIJ,
3100: MatConjugate_MPIAIJ,
3101: 0,
3102: /*104*/MatSetValuesRow_MPIAIJ,
3103: MatRealPart_MPIAIJ,
3104: MatImaginaryPart_MPIAIJ,
3105: 0,
3106: 0,
3107: /*109*/0,
3108: MatGetRedundantMatrix_MPIAIJ,
3109: MatGetRowMin_MPIAIJ,
3110: 0,
3111: 0,
3112: /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3113: 0,
3114: 0,
3115: 0,
3116: 0,
3117: /*119*/0,
3118: 0,
3119: 0,
3120: 0,
3121: MatGetMultiProcBlock_MPIAIJ,
3122: /*124*/MatFindNonZeroRows_MPIAIJ,
3123: MatGetColumnNorms_MPIAIJ,
3124: MatInvertBlockDiagonal_MPIAIJ,
3125: 0,
3126: MatGetSubMatricesParallel_MPIAIJ,
3127: /*129*/0
3128: };
3130: /* ----------------------------------------------------------------------------------------*/
3135: PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
3136: {
3137: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
3141: MatStoreValues(aij->A);
3142: MatStoreValues(aij->B);
3143: return(0);
3144: }
3150: PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
3151: {
3152: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
3156: MatRetrieveValues(aij->A);
3157: MatRetrieveValues(aij->B);
3158: return(0);
3159: }
3165: PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3166: {
3167: Mat_MPIAIJ *b;
3169: PetscInt i;
3172: if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3173: if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3174: if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3175: if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3177: PetscLayoutSetBlockSize(B->rmap,1);
3178: PetscLayoutSetBlockSize(B->cmap,1);
3179: PetscLayoutSetUp(B->rmap);
3180: PetscLayoutSetUp(B->cmap);
3181: if (d_nnz) {
3182: for (i=0; i<B->rmap->n; i++) {
3183: if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3184: }
3185: }
3186: if (o_nnz) {
3187: for (i=0; i<B->rmap->n; i++) {
3188: if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3189: }
3190: }
3191: b = (Mat_MPIAIJ*)B->data;
3193: if (!B->preallocated) {
3194: /* Explicitly create 2 MATSEQAIJ matrices. */
3195: MatCreate(PETSC_COMM_SELF,&b->A);
3196: MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);
3197: MatSetType(b->A,MATSEQAIJ);
3198: PetscLogObjectParent(B,b->A);
3199: MatCreate(PETSC_COMM_SELF,&b->B);
3200: MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);
3201: MatSetType(b->B,MATSEQAIJ);
3202: PetscLogObjectParent(B,b->B);
3203: }
3205: MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);
3206: MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);
3207: B->preallocated = PETSC_TRUE;
3208: return(0);
3209: }
3214: PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3215: {
3216: Mat mat;
3217: Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3221: *newmat = 0;
3222: MatCreate(((PetscObject)matin)->comm,&mat);
3223: MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);
3224: MatSetType(mat,((PetscObject)matin)->type_name);
3225: PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));
3226: a = (Mat_MPIAIJ*)mat->data;
3227:
3228: mat->factortype = matin->factortype;
3229: mat->rmap->bs = matin->rmap->bs;
3230: mat->assembled = PETSC_TRUE;
3231: mat->insertmode = NOT_SET_VALUES;
3232: mat->preallocated = PETSC_TRUE;
3234: a->size = oldmat->size;
3235: a->rank = oldmat->rank;
3236: a->donotstash = oldmat->donotstash;
3237: a->roworiented = oldmat->roworiented;
3238: a->rowindices = 0;
3239: a->rowvalues = 0;
3240: a->getrowactive = PETSC_FALSE;
3242: PetscLayoutReference(matin->rmap,&mat->rmap);
3243: PetscLayoutReference(matin->cmap,&mat->cmap);
3245: if (oldmat->colmap) {
3246: #if defined (PETSC_USE_CTABLE)
3247: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
3248: #else
3249: PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);
3250: PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));
3251: PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));
3252: #endif
3253: } else a->colmap = 0;
3254: if (oldmat->garray) {
3255: PetscInt len;
3256: len = oldmat->B->cmap->n;
3257: PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);
3258: PetscLogObjectMemory(mat,len*sizeof(PetscInt));
3259: if (len) { PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt)); }
3260: } else a->garray = 0;
3261:
3262: VecDuplicate(oldmat->lvec,&a->lvec);
3263: PetscLogObjectParent(mat,a->lvec);
3264: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
3265: PetscLogObjectParent(mat,a->Mvctx);
3266: MatDuplicate(oldmat->A,cpvalues,&a->A);
3267: PetscLogObjectParent(mat,a->A);
3268: MatDuplicate(oldmat->B,cpvalues,&a->B);
3269: PetscLogObjectParent(mat,a->B);
3270: PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);
3271: *newmat = mat;
3272: return(0);
3273: }
3279: PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3280: {
3281: PetscScalar *vals,*svals;
3282: MPI_Comm comm = ((PetscObject)viewer)->comm;
3284: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag;
3285: PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3286: PetscInt header[4],*rowlengths = 0,M,N,m,*cols;
3287: PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3288: PetscInt cend,cstart,n,*rowners,sizesset=1;
3289: int fd;
3292: MPI_Comm_size(comm,&size);
3293: MPI_Comm_rank(comm,&rank);
3294: if (!rank) {
3295: PetscViewerBinaryGetDescriptor(viewer,&fd);
3296: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
3297: if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3298: }
3300: if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3302: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
3303: M = header[1]; N = header[2];
3304: /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3305: if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3306: if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3307:
3308: /* If global sizes are set, check if they are consistent with that given in the file */
3309: if (sizesset) {
3310: MatGetSize(newMat,&grows,&gcols);
3311: }
3312: if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3313: if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3315: /* determine ownership of all rows */
3316: if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3317: else m = newMat->rmap->n; /* Set by user */
3318:
3319: PetscMalloc((size+1)*sizeof(PetscInt),&rowners);
3320: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
3322: /* First process needs enough room for process with most rows */
3323: if (!rank) {
3324: mmax = rowners[1];
3325: for (i=2; i<size; i++) {
3326: mmax = PetscMax(mmax,rowners[i]);
3327: }
3328: } else mmax = m;
3330: rowners[0] = 0;
3331: for (i=2; i<=size; i++) {
3332: rowners[i] += rowners[i-1];
3333: }
3334: rstart = rowners[rank];
3335: rend = rowners[rank+1];
3337: /* distribute row lengths to all processors */
3338: PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);
3339: if (!rank) {
3340: PetscBinaryRead(fd,ourlens,m,PETSC_INT);
3341: PetscMalloc(m*sizeof(PetscInt),&rowlengths);
3342: PetscMalloc(size*sizeof(PetscInt),&procsnz);
3343: PetscMemzero(procsnz,size*sizeof(PetscInt));
3344: for (j=0; j<m; j++) {
3345: procsnz[0] += ourlens[j];
3346: }
3347: for (i=1; i<size; i++) {
3348: PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);
3349: /* calculate the number of nonzeros on each processor */
3350: for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3351: procsnz[i] += rowlengths[j];
3352: }
3353: MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);
3354: }
3355: PetscFree(rowlengths);
3356: } else {
3357: MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);
3358: }
3360: if (!rank) {
3361: /* determine max buffer needed and allocate it */
3362: maxnz = 0;
3363: for (i=0; i<size; i++) {
3364: maxnz = PetscMax(maxnz,procsnz[i]);
3365: }
3366: PetscMalloc(maxnz*sizeof(PetscInt),&cols);
3368: /* read in my part of the matrix column indices */
3369: nz = procsnz[0];
3370: PetscMalloc(nz*sizeof(PetscInt),&mycols);
3371: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
3373: /* read in every one elses and ship off */
3374: for (i=1; i<size; i++) {
3375: nz = procsnz[i];
3376: PetscBinaryRead(fd,cols,nz,PETSC_INT);
3377: MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);
3378: }
3379: PetscFree(cols);
3380: } else {
3381: /* determine buffer space needed for message */
3382: nz = 0;
3383: for (i=0; i<m; i++) {
3384: nz += ourlens[i];
3385: }
3386: PetscMalloc(nz*sizeof(PetscInt),&mycols);
3388: /* receive message of column indices*/
3389: MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);
3390: }
3392: /* determine column ownership if matrix is not square */
3393: if (N != M) {
3394: if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3395: else n = newMat->cmap->n;
3396: MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);
3397: cstart = cend - n;
3398: } else {
3399: cstart = rstart;
3400: cend = rend;
3401: n = cend - cstart;
3402: }
3404: /* loop over local rows, determining number of off diagonal entries */
3405: PetscMemzero(offlens,m*sizeof(PetscInt));
3406: jj = 0;
3407: for (i=0; i<m; i++) {
3408: for (j=0; j<ourlens[i]; j++) {
3409: if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3410: jj++;
3411: }
3412: }
3414: for (i=0; i<m; i++) {
3415: ourlens[i] -= offlens[i];
3416: }
3417: if (!sizesset) {
3418: MatSetSizes(newMat,m,n,M,N);
3419: }
3420: MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);
3422: for (i=0; i<m; i++) {
3423: ourlens[i] += offlens[i];
3424: }
3426: if (!rank) {
3427: PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);
3429: /* read in my part of the matrix numerical values */
3430: nz = procsnz[0];
3431: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
3432:
3433: /* insert into matrix */
3434: jj = rstart;
3435: smycols = mycols;
3436: svals = vals;
3437: for (i=0; i<m; i++) {
3438: MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
3439: smycols += ourlens[i];
3440: svals += ourlens[i];
3441: jj++;
3442: }
3444: /* read in other processors and ship out */
3445: for (i=1; i<size; i++) {
3446: nz = procsnz[i];
3447: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
3448: MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);
3449: }
3450: PetscFree(procsnz);
3451: } else {
3452: /* receive numeric values */
3453: PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);
3455: /* receive message of values*/
3456: MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);
3458: /* insert into matrix */
3459: jj = rstart;
3460: smycols = mycols;
3461: svals = vals;
3462: for (i=0; i<m; i++) {
3463: MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
3464: smycols += ourlens[i];
3465: svals += ourlens[i];
3466: jj++;
3467: }
3468: }
3469: PetscFree2(ourlens,offlens);
3470: PetscFree(vals);
3471: PetscFree(mycols);
3472: PetscFree(rowners);
3474: MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);
3475: MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);
3476: return(0);
3477: }
3481: PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3482: {
3484: IS iscol_local;
3485: PetscInt csize;
3488: ISGetLocalSize(iscol,&csize);
3489: if (call == MAT_REUSE_MATRIX) {
3490: PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);
3491: if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3492: } else {
3493: ISAllGather(iscol,&iscol_local);
3494: }
3495: MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);
3496: if (call == MAT_INITIAL_MATRIX) {
3497: PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);
3498: ISDestroy(&iscol_local);
3499: }
3500: return(0);
3501: }
3505: /*
3506: Not great since it makes two copies of the submatrix, first an SeqAIJ
3507: in local and then by concatenating the local matrices the end result.
3508: Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3510: Note: This requires a sequential iscol with all indices.
3511: */
3512: PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3513: {
3515: PetscMPIInt rank,size;
3516: PetscInt i,m,n,rstart,row,rend,nz,*cwork,j;
3517: PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3518: Mat *local,M,Mreuse;
3519: MatScalar *vwork,*aa;
3520: MPI_Comm comm = ((PetscObject)mat)->comm;
3521: Mat_SeqAIJ *aij;
3525: MPI_Comm_rank(comm,&rank);
3526: MPI_Comm_size(comm,&size);
3528: if (call == MAT_REUSE_MATRIX) {
3529: PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);
3530: if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3531: local = &Mreuse;
3532: MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);
3533: } else {
3534: MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);
3535: Mreuse = *local;
3536: PetscFree(local);
3537: }
3539: /*
3540: m - number of local rows
3541: n - number of columns (same on all processors)
3542: rstart - first row in new global matrix generated
3543: */
3544: MatGetSize(Mreuse,&m,&n);
3545: if (call == MAT_INITIAL_MATRIX) {
3546: aij = (Mat_SeqAIJ*)(Mreuse)->data;
3547: ii = aij->i;
3548: jj = aij->j;
3550: /*
3551: Determine the number of non-zeros in the diagonal and off-diagonal
3552: portions of the matrix in order to do correct preallocation
3553: */
3555: /* first get start and end of "diagonal" columns */
3556: if (csize == PETSC_DECIDE) {
3557: ISGetSize(isrow,&mglobal);
3558: if (mglobal == n) { /* square matrix */
3559: nlocal = m;
3560: } else {
3561: nlocal = n/size + ((n % size) > rank);
3562: }
3563: } else {
3564: nlocal = csize;
3565: }
3566: MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);
3567: rstart = rend - nlocal;
3568: if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3570: /* next, compute all the lengths */
3571: PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);
3572: olens = dlens + m;
3573: for (i=0; i<m; i++) {
3574: jend = ii[i+1] - ii[i];
3575: olen = 0;
3576: dlen = 0;
3577: for (j=0; j<jend; j++) {
3578: if (*jj < rstart || *jj >= rend) olen++;
3579: else dlen++;
3580: jj++;
3581: }
3582: olens[i] = olen;
3583: dlens[i] = dlen;
3584: }
3585: MatCreate(comm,&M);
3586: MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);
3587: MatSetType(M,((PetscObject)mat)->type_name);
3588: MatMPIAIJSetPreallocation(M,0,dlens,0,olens);
3589: PetscFree(dlens);
3590: } else {
3591: PetscInt ml,nl;
3593: M = *newmat;
3594: MatGetLocalSize(M,&ml,&nl);
3595: if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3596: MatZeroEntries(M);
3597: /*
3598: The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3599: rather than the slower MatSetValues().
3600: */
3601: M->was_assembled = PETSC_TRUE;
3602: M->assembled = PETSC_FALSE;
3603: }
3604: MatGetOwnershipRange(M,&rstart,&rend);
3605: aij = (Mat_SeqAIJ*)(Mreuse)->data;
3606: ii = aij->i;
3607: jj = aij->j;
3608: aa = aij->a;
3609: for (i=0; i<m; i++) {
3610: row = rstart + i;
3611: nz = ii[i+1] - ii[i];
3612: cwork = jj; jj += nz;
3613: vwork = aa; aa += nz;
3614: MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);
3615: }
3617: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
3618: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
3619: *newmat = M;
3621: /* save submatrix used in processor for next request */
3622: if (call == MAT_INITIAL_MATRIX) {
3623: PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);
3624: MatDestroy(&Mreuse);
3625: }
3627: return(0);
3628: }
3633: PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3634: {
3635: PetscInt m,cstart, cend,j,nnz,i,d;
3636: PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3637: const PetscInt *JJ;
3638: PetscScalar *values;
3642: if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3644: PetscLayoutSetBlockSize(B->rmap,1);
3645: PetscLayoutSetBlockSize(B->cmap,1);
3646: PetscLayoutSetUp(B->rmap);
3647: PetscLayoutSetUp(B->cmap);
3648: m = B->rmap->n;
3649: cstart = B->cmap->rstart;
3650: cend = B->cmap->rend;
3651: rstart = B->rmap->rstart;
3653: PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);
3655: #if defined(PETSC_USE_DEBUGGING)
3656: for (i=0; i<m; i++) {
3657: nnz = Ii[i+1]- Ii[i];
3658: JJ = J + Ii[i];
3659: if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3660: if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3661: if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3662: }
3663: #endif
3665: for (i=0; i<m; i++) {
3666: nnz = Ii[i+1]- Ii[i];
3667: JJ = J + Ii[i];
3668: nnz_max = PetscMax(nnz_max,nnz);
3669: d = 0;
3670: for (j=0; j<nnz; j++) {
3671: if (cstart <= JJ[j] && JJ[j] < cend) d++;
3672: }
3673: d_nnz[i] = d;
3674: o_nnz[i] = nnz - d;
3675: }
3676: MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);
3677: PetscFree2(d_nnz,o_nnz);
3679: if (v) values = (PetscScalar*)v;
3680: else {
3681: PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);
3682: PetscMemzero(values,nnz_max*sizeof(PetscScalar));
3683: }
3685: for (i=0; i<m; i++) {
3686: ii = i + rstart;
3687: nnz = Ii[i+1]- Ii[i];
3688: MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);
3689: }
3690: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3691: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3693: if (!v) {
3694: PetscFree(values);
3695: }
3696: return(0);
3697: }
3702: /*@
3703: MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3704: (the default parallel PETSc format).
3706: Collective on MPI_Comm
3708: Input Parameters:
3709: + B - the matrix
3710: . i - the indices into j for the start of each local row (starts with zero)
3711: . j - the column indices for each local row (starts with zero)
3712: - v - optional values in the matrix
3714: Level: developer
3716: Notes:
3717: The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3718: thus you CANNOT change the matrix entries by changing the values of a[] after you have
3719: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3721: The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3723: The format which is used for the sparse matrix input, is equivalent to a
3724: row-major ordering.. i.e for the following matrix, the input data expected is
3725: as shown:
3727: 1 0 0
3728: 2 0 3 P0
3729: -------
3730: 4 5 6 P1
3732: Process0 [P0]: rows_owned=[0,1]
3733: i = {0,1,3} [size = nrow+1 = 2+1]
3734: j = {0,0,2} [size = nz = 6]
3735: v = {1,2,3} [size = nz = 6]
3737: Process1 [P1]: rows_owned=[2]
3738: i = {0,3} [size = nrow+1 = 1+1]
3739: j = {0,1,2} [size = nz = 6]
3740: v = {4,5,6} [size = nz = 6]
3742: .keywords: matrix, aij, compressed row, sparse, parallel
3744: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3745: MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3746: @*/
3747: PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3748: {
3752: PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));
3753: return(0);
3754: }
3758: /*@C
3759: MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3760: (the default parallel PETSc format). For good matrix assembly performance
3761: the user should preallocate the matrix storage by setting the parameters
3762: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
3763: performance can be increased by more than a factor of 50.
3765: Collective on MPI_Comm
3767: Input Parameters:
3768: + A - the matrix
3769: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
3770: (same value is used for all local rows)
3771: . d_nnz - array containing the number of nonzeros in the various rows of the
3772: DIAGONAL portion of the local submatrix (possibly different for each row)
3773: or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3774: The size of this array is equal to the number of local rows, i.e 'm'.
3775: You must leave room for the diagonal entry even if it is zero.
3776: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
3777: submatrix (same value is used for all local rows).
3778: - o_nnz - array containing the number of nonzeros in the various rows of the
3779: OFF-DIAGONAL portion of the local submatrix (possibly different for
3780: each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3781: structure. The size of this array is equal to the number
3782: of local rows, i.e 'm'.
3784: If the *_nnz parameter is given then the *_nz parameter is ignored
3786: The AIJ format (also called the Yale sparse matrix format or
3787: compressed row storage (CSR)), is fully compatible with standard Fortran 77
3788: storage. The stored row and column indices begin with zero.
3789: See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3791: The parallel matrix is partitioned such that the first m0 rows belong to
3792: process 0, the next m1 rows belong to process 1, the next m2 rows belong
3793: to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3795: The DIAGONAL portion of the local submatrix of a processor can be defined
3796: as the submatrix which is obtained by extraction the part corresponding to
3797: the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3798: first row that belongs to the processor, r2 is the last row belonging to
3799: the this processor, and c1-c2 is range of indices of the local part of a
3800: vector suitable for applying the matrix to. This is an mxn matrix. In the
3801: common case of a square matrix, the row and column ranges are the same and
3802: the DIAGONAL part is also square. The remaining portion of the local
3803: submatrix (mxN) constitute the OFF-DIAGONAL portion.
3805: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3807: You can call MatGetInfo() to get information on how effective the preallocation was;
3808: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3809: You can also run with the option -info and look for messages with the string
3810: malloc in them to see if additional memory allocation was needed.
3812: Example usage:
3813:
3814: Consider the following 8x8 matrix with 34 non-zero values, that is
3815: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3816: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3817: as follows:
3819: .vb
3820: 1 2 0 | 0 3 0 | 0 4
3821: Proc0 0 5 6 | 7 0 0 | 8 0
3822: 9 0 10 | 11 0 0 | 12 0
3823: -------------------------------------
3824: 13 0 14 | 15 16 17 | 0 0
3825: Proc1 0 18 0 | 19 20 21 | 0 0
3826: 0 0 0 | 22 23 0 | 24 0
3827: -------------------------------------
3828: Proc2 25 26 27 | 0 0 28 | 29 0
3829: 30 0 0 | 31 32 33 | 0 34
3830: .ve
3832: This can be represented as a collection of submatrices as:
3834: .vb
3835: A B C
3836: D E F
3837: G H I
3838: .ve
3840: Where the submatrices A,B,C are owned by proc0, D,E,F are
3841: owned by proc1, G,H,I are owned by proc2.
3843: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3844: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3845: The 'M','N' parameters are 8,8, and have the same values on all procs.
3847: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3848: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3849: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3850: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3851: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3852: matrix, ans [DF] as another SeqAIJ matrix.
3854: When d_nz, o_nz parameters are specified, d_nz storage elements are
3855: allocated for every row of the local diagonal submatrix, and o_nz
3856: storage locations are allocated for every row of the OFF-DIAGONAL submat.
3857: One way to choose d_nz and o_nz is to use the max nonzerors per local
3858: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3859: In this case, the values of d_nz,o_nz are:
3860: .vb
3861: proc0 : dnz = 2, o_nz = 2
3862: proc1 : dnz = 3, o_nz = 2
3863: proc2 : dnz = 1, o_nz = 4
3864: .ve
3865: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3866: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3867: for proc3. i.e we are using 12+15+10=37 storage locations to store
3868: 34 values.
3870: When d_nnz, o_nnz parameters are specified, the storage is specified
3871: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3872: In the above case the values for d_nnz,o_nnz are:
3873: .vb
3874: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3875: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3876: proc2: d_nnz = [1,1] and o_nnz = [4,4]
3877: .ve
3878: Here the space allocated is sum of all the above values i.e 34, and
3879: hence pre-allocation is perfect.
3881: Level: intermediate
3883: .keywords: matrix, aij, compressed row, sparse, parallel
3885: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3886: MPIAIJ, MatGetInfo()
3887: @*/
3888: PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3889: {
3893: PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));
3894: return(0);
3895: }
3899: /*@
3900: MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3901: CSR format the local rows.
3903: Collective on MPI_Comm
3905: Input Parameters:
3906: + comm - MPI communicator
3907: . m - number of local rows (Cannot be PETSC_DECIDE)
3908: . n - This value should be the same as the local size used in creating the
3909: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3910: calculated if N is given) For square matrices n is almost always m.
3911: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3912: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3913: . i - row indices
3914: . j - column indices
3915: - a - matrix values
3917: Output Parameter:
3918: . mat - the matrix
3920: Level: intermediate
3922: Notes:
3923: The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3924: thus you CANNOT change the matrix entries by changing the values of a[] after you have
3925: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3927: The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3929: The format which is used for the sparse matrix input, is equivalent to a
3930: row-major ordering.. i.e for the following matrix, the input data expected is
3931: as shown:
3933: 1 0 0
3934: 2 0 3 P0
3935: -------
3936: 4 5 6 P1
3938: Process0 [P0]: rows_owned=[0,1]
3939: i = {0,1,3} [size = nrow+1 = 2+1]
3940: j = {0,0,2} [size = nz = 6]
3941: v = {1,2,3} [size = nz = 6]
3943: Process1 [P1]: rows_owned=[2]
3944: i = {0,3} [size = nrow+1 = 1+1]
3945: j = {0,1,2} [size = nz = 6]
3946: v = {4,5,6} [size = nz = 6]
3948: .keywords: matrix, aij, compressed row, sparse, parallel
3950: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3951: MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3952: @*/
3953: PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3954: {
3958: if (i[0]) {
3959: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3960: }
3961: if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3962: MatCreate(comm,mat);
3963: MatSetSizes(*mat,m,n,M,N);
3964: MatSetType(*mat,MATMPIAIJ);
3965: MatMPIAIJSetPreallocationCSR(*mat,i,j,a);
3966: return(0);
3967: }
3971: /*@C
3972: MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3973: (the default parallel PETSc format). For good matrix assembly performance
3974: the user should preallocate the matrix storage by setting the parameters
3975: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
3976: performance can be increased by more than a factor of 50.
3978: Collective on MPI_Comm
3980: Input Parameters:
3981: + comm - MPI communicator
3982: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3983: This value should be the same as the local size used in creating the
3984: y vector for the matrix-vector product y = Ax.
3985: . n - This value should be the same as the local size used in creating the
3986: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3987: calculated if N is given) For square matrices n is almost always m.
3988: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3989: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3990: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
3991: (same value is used for all local rows)
3992: . d_nnz - array containing the number of nonzeros in the various rows of the
3993: DIAGONAL portion of the local submatrix (possibly different for each row)
3994: or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3995: The size of this array is equal to the number of local rows, i.e 'm'.
3996: You must leave room for the diagonal entry even if it is zero.
3997: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
3998: submatrix (same value is used for all local rows).
3999: - o_nnz - array containing the number of nonzeros in the various rows of the
4000: OFF-DIAGONAL portion of the local submatrix (possibly different for
4001: each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4002: structure. The size of this array is equal to the number
4003: of local rows, i.e 'm'.
4005: Output Parameter:
4006: . A - the matrix
4008: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4009: MatXXXXSetPreallocation() paradgm instead of this routine directly.
4010: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4012: Notes:
4013: If the *_nnz parameter is given then the *_nz parameter is ignored
4015: m,n,M,N parameters specify the size of the matrix, and its partitioning across
4016: processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4017: storage requirements for this matrix.
4019: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one
4020: processor than it must be used on all processors that share the object for
4021: that argument.
4023: The user MUST specify either the local or global matrix dimensions
4024: (possibly both).
4026: The parallel matrix is partitioned across processors such that the
4027: first m0 rows belong to process 0, the next m1 rows belong to
4028: process 1, the next m2 rows belong to process 2 etc.. where
4029: m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4030: values corresponding to [m x N] submatrix.
4032: The columns are logically partitioned with the n0 columns belonging
4033: to 0th partition, the next n1 columns belonging to the next
4034: partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4036: The DIAGONAL portion of the local submatrix on any given processor
4037: is the submatrix corresponding to the rows and columns m,n
4038: corresponding to the given processor. i.e diagonal matrix on
4039: process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4040: etc. The remaining portion of the local submatrix [m x (N-n)]
4041: constitute the OFF-DIAGONAL portion. The example below better
4042: illustrates this concept.
4044: For a square global matrix we define each processor's diagonal portion
4045: to be its local rows and the corresponding columns (a square submatrix);
4046: each processor's off-diagonal portion encompasses the remainder of the
4047: local matrix (a rectangular submatrix).
4049: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4051: When calling this routine with a single process communicator, a matrix of
4052: type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this
4053: type of communicator, use the construction mechanism:
4054: MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4055:
4056: By default, this format uses inodes (identical nodes) when possible.
4057: We search for consecutive rows with the same nonzero structure, thereby
4058: reusing matrix information to achieve increased efficiency.
4060: Options Database Keys:
4061: + -mat_no_inode - Do not use inodes
4062: . -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4063: - -mat_aij_oneindex - Internally use indexing starting at 1
4064: rather than 0. Note that when calling MatSetValues(),
4065: the user still MUST index entries starting at 0!
4068: Example usage:
4069:
4070: Consider the following 8x8 matrix with 34 non-zero values, that is
4071: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4072: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4073: as follows:
4075: .vb
4076: 1 2 0 | 0 3 0 | 0 4
4077: Proc0 0 5 6 | 7 0 0 | 8 0
4078: 9 0 10 | 11 0 0 | 12 0
4079: -------------------------------------
4080: 13 0 14 | 15 16 17 | 0 0
4081: Proc1 0 18 0 | 19 20 21 | 0 0
4082: 0 0 0 | 22 23 0 | 24 0
4083: -------------------------------------
4084: Proc2 25 26 27 | 0 0 28 | 29 0
4085: 30 0 0 | 31 32 33 | 0 34
4086: .ve
4088: This can be represented as a collection of submatrices as:
4090: .vb
4091: A B C
4092: D E F
4093: G H I
4094: .ve
4096: Where the submatrices A,B,C are owned by proc0, D,E,F are
4097: owned by proc1, G,H,I are owned by proc2.
4099: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4100: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4101: The 'M','N' parameters are 8,8, and have the same values on all procs.
4103: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4104: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4105: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4106: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4107: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4108: matrix, ans [DF] as another SeqAIJ matrix.
4110: When d_nz, o_nz parameters are specified, d_nz storage elements are
4111: allocated for every row of the local diagonal submatrix, and o_nz
4112: storage locations are allocated for every row of the OFF-DIAGONAL submat.
4113: One way to choose d_nz and o_nz is to use the max nonzerors per local
4114: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4115: In this case, the values of d_nz,o_nz are:
4116: .vb
4117: proc0 : dnz = 2, o_nz = 2
4118: proc1 : dnz = 3, o_nz = 2
4119: proc2 : dnz = 1, o_nz = 4
4120: .ve
4121: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4122: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4123: for proc3. i.e we are using 12+15+10=37 storage locations to store
4124: 34 values.
4126: When d_nnz, o_nnz parameters are specified, the storage is specified
4127: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4128: In the above case the values for d_nnz,o_nnz are:
4129: .vb
4130: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4131: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4132: proc2: d_nnz = [1,1] and o_nnz = [4,4]
4133: .ve
4134: Here the space allocated is sum of all the above values i.e 34, and
4135: hence pre-allocation is perfect.
4137: Level: intermediate
4139: .keywords: matrix, aij, compressed row, sparse, parallel
4141: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4142: MPIAIJ, MatCreateMPIAIJWithArrays()
4143: @*/
4144: PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4145: {
4147: PetscMPIInt size;
4150: MatCreate(comm,A);
4151: MatSetSizes(*A,m,n,M,N);
4152: MPI_Comm_size(comm,&size);
4153: if (size > 1) {
4154: MatSetType(*A,MATMPIAIJ);
4155: MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);
4156: } else {
4157: MatSetType(*A,MATSEQAIJ);
4158: MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);
4159: }
4160: return(0);
4161: }
4165: PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4166: {
4167: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4170: *Ad = a->A;
4171: *Ao = a->B;
4172: *colmap = a->garray;
4173: return(0);
4174: }
4178: PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4179: {
4181: PetscInt i;
4182: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4185: if (coloring->ctype == IS_COLORING_GLOBAL) {
4186: ISColoringValue *allcolors,*colors;
4187: ISColoring ocoloring;
4189: /* set coloring for diagonal portion */
4190: MatSetColoring_SeqAIJ(a->A,coloring);
4192: /* set coloring for off-diagonal portion */
4193: ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);
4194: PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);
4195: for (i=0; i<a->B->cmap->n; i++) {
4196: colors[i] = allcolors[a->garray[i]];
4197: }
4198: PetscFree(allcolors);
4199: ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);
4200: MatSetColoring_SeqAIJ(a->B,ocoloring);
4201: ISColoringDestroy(&ocoloring);
4202: } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4203: ISColoringValue *colors;
4204: PetscInt *larray;
4205: ISColoring ocoloring;
4207: /* set coloring for diagonal portion */
4208: PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);
4209: for (i=0; i<a->A->cmap->n; i++) {
4210: larray[i] = i + A->cmap->rstart;
4211: }
4212: ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);
4213: PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);
4214: for (i=0; i<a->A->cmap->n; i++) {
4215: colors[i] = coloring->colors[larray[i]];
4216: }
4217: PetscFree(larray);
4218: ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);
4219: MatSetColoring_SeqAIJ(a->A,ocoloring);
4220: ISColoringDestroy(&ocoloring);
4222: /* set coloring for off-diagonal portion */
4223: PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);
4224: ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);
4225: PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);
4226: for (i=0; i<a->B->cmap->n; i++) {
4227: colors[i] = coloring->colors[larray[i]];
4228: }
4229: PetscFree(larray);
4230: ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);
4231: MatSetColoring_SeqAIJ(a->B,ocoloring);
4232: ISColoringDestroy(&ocoloring);
4233: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4235: return(0);
4236: }
4238: #if defined(PETSC_HAVE_ADIC)
4241: PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4242: {
4243: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4247: MatSetValuesAdic_SeqAIJ(a->A,advalues);
4248: MatSetValuesAdic_SeqAIJ(a->B,advalues);
4249: return(0);
4250: }
4251: #endif
4255: PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4256: {
4257: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4261: MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);
4262: MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);
4263: return(0);
4264: }
4268: /*@
4269: MatMerge - Creates a single large PETSc matrix by concatinating sequential
4270: matrices from each processor
4272: Collective on MPI_Comm
4274: Input Parameters:
4275: + comm - the communicators the parallel matrix will live on
4276: . inmat - the input sequential matrices
4277: . n - number of local columns (or PETSC_DECIDE)
4278: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4280: Output Parameter:
4281: . outmat - the parallel matrix generated
4283: Level: advanced
4285: Notes: The number of columns of the matrix in EACH processor MUST be the same.
4287: @*/
4288: PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4289: {
4291: PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz;
4292: PetscInt *indx;
4293: PetscScalar *values;
4296: MatGetSize(inmat,&m,&N);
4297: if (scall == MAT_INITIAL_MATRIX){
4298: /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
4299: if (n == PETSC_DECIDE){
4300: PetscSplitOwnership(comm,&n,&N);
4301: }
4302: MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);
4303: rstart -= m;
4305: MatPreallocateInitialize(comm,m,n,dnz,onz);
4306: for (i=0;i<m;i++) {
4307: MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);
4308: MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);
4309: MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);
4310: }
4311: /* This routine will ONLY return MPIAIJ type matrix */
4312: MatCreate(comm,outmat);
4313: MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
4314: MatSetType(*outmat,MATMPIAIJ);
4315: MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);
4316: MatPreallocateFinalize(dnz,onz);
4317:
4318: } else if (scall == MAT_REUSE_MATRIX){
4319: MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);
4320: } else {
4321: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4322: }
4324: for (i=0;i<m;i++) {
4325: MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
4326: Ii = i + rstart;
4327: MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);
4328: MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
4329: }
4330: MatDestroy(&inmat);
4331: MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);
4332: MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);
4334: return(0);
4335: }
4339: PetscErrorCode MatFileSplit(Mat A,char *outfile)
4340: {
4341: PetscErrorCode ierr;
4342: PetscMPIInt rank;
4343: PetscInt m,N,i,rstart,nnz;
4344: size_t len;
4345: const PetscInt *indx;
4346: PetscViewer out;
4347: char *name;
4348: Mat B;
4349: const PetscScalar *values;
4352: MatGetLocalSize(A,&m,0);
4353: MatGetSize(A,0,&N);
4354: /* Should this be the type of the diagonal block of A? */
4355: MatCreate(PETSC_COMM_SELF,&B);
4356: MatSetSizes(B,m,N,m,N);
4357: MatSetType(B,MATSEQAIJ);
4358: MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
4359: MatGetOwnershipRange(A,&rstart,0);
4360: for (i=0;i<m;i++) {
4361: MatGetRow(A,i+rstart,&nnz,&indx,&values);
4362: MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);
4363: MatRestoreRow(A,i+rstart,&nnz,&indx,&values);
4364: }
4365: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
4366: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
4368: MPI_Comm_rank(((PetscObject)A)->comm,&rank);
4369: PetscStrlen(outfile,&len);
4370: PetscMalloc((len+5)*sizeof(char),&name);
4371: sprintf(name,"%s.%d",outfile,rank);
4372: PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);
4373: PetscFree(name);
4374: MatView(B,out);
4375: PetscViewerDestroy(&out);
4376: MatDestroy(&B);
4377: return(0);
4378: }
4383: PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4384: {
4385: PetscErrorCode ierr;
4386: Mat_Merge_SeqsToMPI *merge;
4387: PetscContainer container;
4390: PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);
4391: if (container) {
4392: PetscContainerGetPointer(container,(void **)&merge);
4393: PetscFree(merge->id_r);
4394: PetscFree(merge->len_s);
4395: PetscFree(merge->len_r);
4396: PetscFree(merge->bi);
4397: PetscFree(merge->bj);
4398: PetscFree(merge->buf_ri[0]);
4399: PetscFree(merge->buf_ri);
4400: PetscFree(merge->buf_rj[0]);
4401: PetscFree(merge->buf_rj);
4402: PetscFree(merge->coi);
4403: PetscFree(merge->coj);
4404: PetscFree(merge->owners_co);
4405: PetscLayoutDestroy(&merge->rowmap);
4406: PetscFree(merge);
4407: PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);
4408: }
4409: MatDestroy_MPIAIJ(A);
4410: return(0);
4411: }
4413: #include <../src/mat/utils/freespace.h>
4414: #include <petscbt.h>
4418: /*@C
4419: MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4420: matrices from each processor
4422: Collective on MPI_Comm
4424: Input Parameters:
4425: + comm - the communicators the parallel matrix will live on
4426: . seqmat - the input sequential matrices
4427: . m - number of local rows (or PETSC_DECIDE)
4428: . n - number of local columns (or PETSC_DECIDE)
4429: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4431: Output Parameter:
4432: . mpimat - the parallel matrix generated
4434: Level: advanced
4436: Notes:
4437: The dimensions of the sequential matrix in each processor MUST be the same.
4438: The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4439: destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4440: @*/
4441: PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4442: {
4443: PetscErrorCode ierr;
4444: MPI_Comm comm=((PetscObject)mpimat)->comm;
4445: Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data;
4446: PetscMPIInt size,rank,taga,*len_s;
4447: PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4448: PetscInt proc,m;
4449: PetscInt **buf_ri,**buf_rj;
4450: PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4451: PetscInt nrows,**buf_ri_k,**nextrow,**nextai;
4452: MPI_Request *s_waits,*r_waits;
4453: MPI_Status *status;
4454: MatScalar *aa=a->a;
4455: MatScalar **abuf_r,*ba_i;
4456: Mat_Merge_SeqsToMPI *merge;
4457: PetscContainer container;
4460: PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);
4462: MPI_Comm_size(comm,&size);
4463: MPI_Comm_rank(comm,&rank);
4465: PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);
4466: PetscContainerGetPointer(container,(void **)&merge);
4468: bi = merge->bi;
4469: bj = merge->bj;
4470: buf_ri = merge->buf_ri;
4471: buf_rj = merge->buf_rj;
4473: PetscMalloc(size*sizeof(MPI_Status),&status);
4474: owners = merge->rowmap->range;
4475: len_s = merge->len_s;
4477: /* send and recv matrix values */
4478: /*-----------------------------*/
4479: PetscObjectGetNewTag((PetscObject)mpimat,&taga);
4480: PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);
4482: PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);
4483: for (proc=0,k=0; proc<size; proc++){
4484: if (!len_s[proc]) continue;
4485: i = owners[proc];
4486: MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);
4487: k++;
4488: }
4490: if (merge->nrecv) {MPI_Waitall(merge->nrecv,r_waits,status);}
4491: if (merge->nsend) {MPI_Waitall(merge->nsend,s_waits,status);}
4492: PetscFree(status);
4494: PetscFree(s_waits);
4495: PetscFree(r_waits);
4497: /* insert mat values of mpimat */
4498: /*----------------------------*/
4499: PetscMalloc(N*sizeof(PetscScalar),&ba_i);
4500: PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);
4502: for (k=0; k<merge->nrecv; k++){
4503: buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4504: nrows = *(buf_ri_k[k]);
4505: nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */
4506: nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */
4507: }
4509: /* set values of ba */
4510: m = merge->rowmap->n;
4511: for (i=0; i<m; i++) {
4512: arow = owners[rank] + i;
4513: bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */
4514: bnzi = bi[i+1] - bi[i];
4515: PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));
4517: /* add local non-zero vals of this proc's seqmat into ba */
4518: anzi = ai[arow+1] - ai[arow];
4519: aj = a->j + ai[arow];
4520: aa = a->a + ai[arow];
4521: nextaj = 0;
4522: for (j=0; nextaj<anzi; j++){
4523: if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4524: ba_i[j] += aa[nextaj++];
4525: }
4526: }
4528: /* add received vals into ba */
4529: for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4530: /* i-th row */
4531: if (i == *nextrow[k]) {
4532: anzi = *(nextai[k]+1) - *nextai[k];
4533: aj = buf_rj[k] + *(nextai[k]);
4534: aa = abuf_r[k] + *(nextai[k]);
4535: nextaj = 0;
4536: for (j=0; nextaj<anzi; j++){
4537: if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4538: ba_i[j] += aa[nextaj++];
4539: }
4540: }
4541: nextrow[k]++; nextai[k]++;
4542: }
4543: }
4544: MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);
4545: }
4546: MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);
4547: MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);
4549: PetscFree(abuf_r[0]);
4550: PetscFree(abuf_r);
4551: PetscFree(ba_i);
4552: PetscFree3(buf_ri_k,nextrow,nextai);
4553: PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);
4554: return(0);
4555: }
4561: PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4562: {
4563: PetscErrorCode ierr;
4564: Mat B_mpi;
4565: Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data;
4566: PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4567: PetscInt **buf_rj,**buf_ri,**buf_ri_k;
4568: PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4569: PetscInt len,proc,*dnz,*onz;
4570: PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4571: PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4572: MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits;
4573: MPI_Status *status;
4574: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
4575: PetscBT lnkbt;
4576: Mat_Merge_SeqsToMPI *merge;
4577: PetscContainer container;
4580: PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);
4582: /* make sure it is a PETSc comm */
4583: PetscCommDuplicate(comm,&comm,PETSC_NULL);
4584: MPI_Comm_size(comm,&size);
4585: MPI_Comm_rank(comm,&rank);
4587: PetscNew(Mat_Merge_SeqsToMPI,&merge);
4588: PetscMalloc(size*sizeof(MPI_Status),&status);
4590: /* determine row ownership */
4591: /*---------------------------------------------------------*/
4592: PetscLayoutCreate(comm,&merge->rowmap);
4593: PetscLayoutSetLocalSize(merge->rowmap,m);
4594: PetscLayoutSetSize(merge->rowmap,M);
4595: PetscLayoutSetBlockSize(merge->rowmap,1);
4596: PetscLayoutSetUp(merge->rowmap);
4597: PetscMalloc(size*sizeof(PetscMPIInt),&len_si);
4598: PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);
4600: m = merge->rowmap->n;
4601: M = merge->rowmap->N;
4602: owners = merge->rowmap->range;
4604: /* determine the number of messages to send, their lengths */
4605: /*---------------------------------------------------------*/
4606: len_s = merge->len_s;
4608: len = 0; /* length of buf_si[] */
4609: merge->nsend = 0;
4610: for (proc=0; proc<size; proc++){
4611: len_si[proc] = 0;
4612: if (proc == rank){
4613: len_s[proc] = 0;
4614: } else {
4615: len_si[proc] = owners[proc+1] - owners[proc] + 1;
4616: len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4617: }
4618: if (len_s[proc]) {
4619: merge->nsend++;
4620: nrows = 0;
4621: for (i=owners[proc]; i<owners[proc+1]; i++){
4622: if (ai[i+1] > ai[i]) nrows++;
4623: }
4624: len_si[proc] = 2*(nrows+1);
4625: len += len_si[proc];
4626: }
4627: }
4629: /* determine the number and length of messages to receive for ij-structure */
4630: /*-------------------------------------------------------------------------*/
4631: PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);
4632: PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);
4634: /* post the Irecv of j-structure */
4635: /*-------------------------------*/
4636: PetscCommGetNewTag(comm,&tagj);
4637: PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);
4639: /* post the Isend of j-structure */
4640: /*--------------------------------*/
4641: PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);
4643: for (proc=0, k=0; proc<size; proc++){
4644: if (!len_s[proc]) continue;
4645: i = owners[proc];
4646: MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);
4647: k++;
4648: }
4650: /* receives and sends of j-structure are complete */
4651: /*------------------------------------------------*/
4652: if (merge->nrecv) {MPI_Waitall(merge->nrecv,rj_waits,status);}
4653: if (merge->nsend) {MPI_Waitall(merge->nsend,sj_waits,status);}
4655: /* send and recv i-structure */
4656: /*---------------------------*/
4657: PetscCommGetNewTag(comm,&tagi);
4658: PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);
4660: PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);
4661: buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4662: for (proc=0,k=0; proc<size; proc++){
4663: if (!len_s[proc]) continue;
4664: /* form outgoing message for i-structure:
4665: buf_si[0]: nrows to be sent
4666: [1:nrows]: row index (global)
4667: [nrows+1:2*nrows+1]: i-structure index
4668: */
4669: /*-------------------------------------------*/
4670: nrows = len_si[proc]/2 - 1;
4671: buf_si_i = buf_si + nrows+1;
4672: buf_si[0] = nrows;
4673: buf_si_i[0] = 0;
4674: nrows = 0;
4675: for (i=owners[proc]; i<owners[proc+1]; i++){
4676: anzi = ai[i+1] - ai[i];
4677: if (anzi) {
4678: buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4679: buf_si[nrows+1] = i-owners[proc]; /* local row index */
4680: nrows++;
4681: }
4682: }
4683: MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);
4684: k++;
4685: buf_si += len_si[proc];
4686: }
4688: if (merge->nrecv) {MPI_Waitall(merge->nrecv,ri_waits,status);}
4689: if (merge->nsend) {MPI_Waitall(merge->nsend,si_waits,status);}
4691: PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);
4692: for (i=0; i<merge->nrecv; i++){
4693: PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);
4694: }
4696: PetscFree(len_si);
4697: PetscFree(len_ri);
4698: PetscFree(rj_waits);
4699: PetscFree2(si_waits,sj_waits);
4700: PetscFree(ri_waits);
4701: PetscFree(buf_s);
4702: PetscFree(status);
4704: /* compute a local seq matrix in each processor */
4705: /*----------------------------------------------*/
4706: /* allocate bi array and free space for accumulating nonzero column info */
4707: PetscMalloc((m+1)*sizeof(PetscInt),&bi);
4708: bi[0] = 0;
4710: /* create and initialize a linked list */
4711: nlnk = N+1;
4712: PetscLLCreate(N,N,nlnk,lnk,lnkbt);
4714: /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4715: len = 0;
4716: len = ai[owners[rank+1]] - ai[owners[rank]];
4717: PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);
4718: current_space = free_space;
4720: /* determine symbolic info for each local row */
4721: PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);
4723: for (k=0; k<merge->nrecv; k++){
4724: buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4725: nrows = *buf_ri_k[k];
4726: nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */
4727: nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */
4728: }
4730: MatPreallocateInitialize(comm,m,n,dnz,onz);
4731: len = 0;
4732: for (i=0;i<m;i++) {
4733: bnzi = 0;
4734: /* add local non-zero cols of this proc's seqmat into lnk */
4735: arow = owners[rank] + i;
4736: anzi = ai[arow+1] - ai[arow];
4737: aj = a->j + ai[arow];
4738: PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);
4739: bnzi += nlnk;
4740: /* add received col data into lnk */
4741: for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4742: if (i == *nextrow[k]) { /* i-th row */
4743: anzi = *(nextai[k]+1) - *nextai[k];
4744: aj = buf_rj[k] + *nextai[k];
4745: PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);
4746: bnzi += nlnk;
4747: nextrow[k]++; nextai[k]++;
4748: }
4749: }
4750: if (len < bnzi) len = bnzi; /* =max(bnzi) */
4752: /* if free space is not available, make more free space */
4753: if (current_space->local_remaining<bnzi) {
4754: PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);
4755: nspacedouble++;
4756: }
4757: /* copy data into free space, then initialize lnk */
4758: PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);
4759: MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);
4761: current_space->array += bnzi;
4762: current_space->local_used += bnzi;
4763: current_space->local_remaining -= bnzi;
4765: bi[i+1] = bi[i] + bnzi;
4766: }
4768: PetscFree3(buf_ri_k,nextrow,nextai);
4770: PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);
4771: PetscFreeSpaceContiguous(&free_space,bj);
4772: PetscLLDestroy(lnk,lnkbt);
4774: /* create symbolic parallel matrix B_mpi */
4775: /*---------------------------------------*/
4776: MatCreate(comm,&B_mpi);
4777: if (n==PETSC_DECIDE) {
4778: MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);
4779: } else {
4780: MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
4781: }
4782: MatSetType(B_mpi,MATMPIAIJ);
4783: MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);
4784: MatPreallocateFinalize(dnz,onz);
4786: /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4787: B_mpi->assembled = PETSC_FALSE;
4788: B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4789: merge->bi = bi;
4790: merge->bj = bj;
4791: merge->buf_ri = buf_ri;
4792: merge->buf_rj = buf_rj;
4793: merge->coi = PETSC_NULL;
4794: merge->coj = PETSC_NULL;
4795: merge->owners_co = PETSC_NULL;
4797: PetscCommDestroy(&comm);
4799: /* attach the supporting struct to B_mpi for reuse */
4800: PetscContainerCreate(PETSC_COMM_SELF,&container);
4801: PetscContainerSetPointer(container,merge);
4802: PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);
4803: PetscContainerDestroy(&container);
4804: *mpimat = B_mpi;
4806: PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);
4807: return(0);
4808: }
4812: PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4813: {
4814: PetscErrorCode ierr;
4817: PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);
4818: if (scall == MAT_INITIAL_MATRIX){
4819: MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);
4820: }
4821: MatMerge_SeqsToMPINumeric(seqmat,*mpimat);
4822: PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);
4823: return(0);
4824: }
4828: /*@
4829: MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4830: mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4831: with MatGetSize()
4833: Not Collective
4835: Input Parameters:
4836: + A - the matrix
4837: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4839: Output Parameter:
4840: . A_loc - the local sequential matrix generated
4842: Level: developer
4844: .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4846: @*/
4847: PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4848: {
4849: PetscErrorCode ierr;
4850: Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data;
4851: Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4852: PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4853: MatScalar *aa=a->a,*ba=b->a,*cam;
4854: PetscScalar *ca;
4855: PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4856: PetscInt *ci,*cj,col,ncols_d,ncols_o,jo;
4857: PetscBool match;
4860: PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);
4861: if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4862: PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);
4863: if (scall == MAT_INITIAL_MATRIX){
4864: PetscMalloc((1+am)*sizeof(PetscInt),&ci);
4865: ci[0] = 0;
4866: for (i=0; i<am; i++){
4867: ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4868: }
4869: PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);
4870: PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);
4871: k = 0;
4872: for (i=0; i<am; i++) {
4873: ncols_o = bi[i+1] - bi[i];
4874: ncols_d = ai[i+1] - ai[i];
4875: /* off-diagonal portion of A */
4876: for (jo=0; jo<ncols_o; jo++) {
4877: col = cmap[*bj];
4878: if (col >= cstart) break;
4879: cj[k] = col; bj++;
4880: ca[k++] = *ba++;
4881: }
4882: /* diagonal portion of A */
4883: for (j=0; j<ncols_d; j++) {
4884: cj[k] = cstart + *aj++;
4885: ca[k++] = *aa++;
4886: }
4887: /* off-diagonal portion of A */
4888: for (j=jo; j<ncols_o; j++) {
4889: cj[k] = cmap[*bj++];
4890: ca[k++] = *ba++;
4891: }
4892: }
4893: /* put together the new matrix */
4894: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);
4895: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4896: /* Since these are PETSc arrays, change flags to free them as necessary. */
4897: mat = (Mat_SeqAIJ*)(*A_loc)->data;
4898: mat->free_a = PETSC_TRUE;
4899: mat->free_ij = PETSC_TRUE;
4900: mat->nonew = 0;
4901: } else if (scall == MAT_REUSE_MATRIX){
4902: mat=(Mat_SeqAIJ*)(*A_loc)->data;
4903: ci = mat->i; cj = mat->j; cam = mat->a;
4904: for (i=0; i<am; i++) {
4905: /* off-diagonal portion of A */
4906: ncols_o = bi[i+1] - bi[i];
4907: for (jo=0; jo<ncols_o; jo++) {
4908: col = cmap[*bj];
4909: if (col >= cstart) break;
4910: *cam++ = *ba++; bj++;
4911: }
4912: /* diagonal portion of A */
4913: ncols_d = ai[i+1] - ai[i];
4914: for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4915: /* off-diagonal portion of A */
4916: for (j=jo; j<ncols_o; j++) {
4917: *cam++ = *ba++; bj++;
4918: }
4919: }
4920: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4921: PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);
4922: return(0);
4923: }
4927: /*@C
4928: MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4930: Not Collective
4932: Input Parameters:
4933: + A - the matrix
4934: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4935: - row, col - index sets of rows and columns to extract (or PETSC_NULL)
4937: Output Parameter:
4938: . A_loc - the local sequential matrix generated
4940: Level: developer
4942: .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4944: @*/
4945: PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4946: {
4947: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
4948: PetscErrorCode ierr;
4949: PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4950: IS isrowa,iscola;
4951: Mat *aloc;
4952: PetscBool match;
4955: PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);
4956: if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4957: PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);
4958: if (!row){
4959: start = A->rmap->rstart; end = A->rmap->rend;
4960: ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);
4961: } else {
4962: isrowa = *row;
4963: }
4964: if (!col){
4965: start = A->cmap->rstart;
4966: cmap = a->garray;
4967: nzA = a->A->cmap->n;
4968: nzB = a->B->cmap->n;
4969: PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);
4970: ncols = 0;
4971: for (i=0; i<nzB; i++) {
4972: if (cmap[i] < start) idx[ncols++] = cmap[i];
4973: else break;
4974: }
4975: imark = i;
4976: for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4977: for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4978: ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);
4979: } else {
4980: iscola = *col;
4981: }
4982: if (scall != MAT_INITIAL_MATRIX){
4983: PetscMalloc(sizeof(Mat),&aloc);
4984: aloc[0] = *A_loc;
4985: }
4986: MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);
4987: *A_loc = aloc[0];
4988: PetscFree(aloc);
4989: if (!row){
4990: ISDestroy(&isrowa);
4991: }
4992: if (!col){
4993: ISDestroy(&iscola);
4994: }
4995: PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);
4996: return(0);
4997: }
5001: /*@C
5002: MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5004: Collective on Mat
5006: Input Parameters:
5007: + A,B - the matrices in mpiaij format
5008: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5009: - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5011: Output Parameter:
5012: + rowb, colb - index sets of rows and columns of B to extract
5013: . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
5014: - B_seq - the sequential matrix generated
5016: Level: developer
5018: @*/
5019: PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
5020: {
5021: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
5022: PetscErrorCode ierr;
5023: PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5024: IS isrowb,iscolb;
5025: Mat *bseq;
5026:
5028: if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5029: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5030: }
5031: PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);
5032:
5033: if (scall == MAT_INITIAL_MATRIX){
5034: start = A->cmap->rstart;
5035: cmap = a->garray;
5036: nzA = a->A->cmap->n;
5037: nzB = a->B->cmap->n;
5038: PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);
5039: ncols = 0;
5040: for (i=0; i<nzB; i++) { /* row < local row index */
5041: if (cmap[i] < start) idx[ncols++] = cmap[i];
5042: else break;
5043: }
5044: imark = i;
5045: for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */
5046: for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5047: ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);
5048: *brstart = imark;
5049: ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);
5050: } else {
5051: if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5052: isrowb = *rowb; iscolb = *colb;
5053: PetscMalloc(sizeof(Mat),&bseq);
5054: bseq[0] = *B_seq;
5055: }
5056: MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);
5057: *B_seq = bseq[0];
5058: PetscFree(bseq);
5059: if (!rowb){
5060: ISDestroy(&isrowb);
5061: } else {
5062: *rowb = isrowb;
5063: }
5064: if (!colb){
5065: ISDestroy(&iscolb);
5066: } else {
5067: *colb = iscolb;
5068: }
5069: PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);
5070: return(0);
5071: }
5075: /*@C
5076: MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5077: of the OFF-DIAGONAL portion of local A
5079: Collective on Mat
5081: Input Parameters:
5082: + A,B - the matrices in mpiaij format
5083: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5084: . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5085: . startsj_r - similar to startsj for receives
5086: - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5088: Output Parameter:
5089: + B_oth - the sequential matrix generated
5091: Level: developer
5093: @*/
5094: PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5095: {
5096: VecScatter_MPI_General *gen_to,*gen_from;
5097: PetscErrorCode ierr;
5098: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
5099: Mat_SeqAIJ *b_oth;
5100: VecScatter ctx=a->Mvctx;
5101: MPI_Comm comm=((PetscObject)ctx)->comm;
5102: PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5103: PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5104: PetscScalar *rvalues,*svalues;
5105: MatScalar *b_otha,*bufa,*bufA;
5106: PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5107: MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5108: MPI_Status *sstatus,rstatus;
5109: PetscMPIInt jj;
5110: PetscInt *cols,sbs,rbs;
5111: PetscScalar *vals;
5114: if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5115: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5116: }
5117: PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);
5118: MPI_Comm_rank(comm,&rank);
5120: gen_to = (VecScatter_MPI_General*)ctx->todata;
5121: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5122: rvalues = gen_from->values; /* holds the length of receiving row */
5123: svalues = gen_to->values; /* holds the length of sending row */
5124: nrecvs = gen_from->n;
5125: nsends = gen_to->n;
5127: PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);
5128: srow = gen_to->indices; /* local row index to be sent */
5129: sstarts = gen_to->starts;
5130: sprocs = gen_to->procs;
5131: sstatus = gen_to->sstatus;
5132: sbs = gen_to->bs;
5133: rstarts = gen_from->starts;
5134: rprocs = gen_from->procs;
5135: rbs = gen_from->bs;
5137: if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5138: if (scall == MAT_INITIAL_MATRIX){
5139: /* i-array */
5140: /*---------*/
5141: /* post receives */
5142: for (i=0; i<nrecvs; i++){
5143: rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5144: nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5145: MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);
5146: }
5148: /* pack the outgoing message */
5149: PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);
5150: sstartsj[0] = 0; rstartsj[0] = 0;
5151: len = 0; /* total length of j or a array to be sent */
5152: k = 0;
5153: for (i=0; i<nsends; i++){
5154: rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5155: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5156: for (j=0; j<nrows; j++) {
5157: row = srow[k] + B->rmap->range[rank]; /* global row idx */
5158: for (l=0; l<sbs; l++){
5159: MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL); /* rowlength */
5160: rowlen[j*sbs+l] = ncols;
5161: len += ncols;
5162: MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);
5163: }
5164: k++;
5165: }
5166: MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);
5167: sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5168: }
5169: /* recvs and sends of i-array are completed */
5170: i = nrecvs;
5171: while (i--) {
5172: MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);
5173: }
5174: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
5176: /* allocate buffers for sending j and a arrays */
5177: PetscMalloc((len+1)*sizeof(PetscInt),&bufj);
5178: PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);
5180: /* create i-array of B_oth */
5181: PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);
5182: b_othi[0] = 0;
5183: len = 0; /* total length of j or a array to be received */
5184: k = 0;
5185: for (i=0; i<nrecvs; i++){
5186: rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5187: nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5188: for (j=0; j<nrows; j++) {
5189: b_othi[k+1] = b_othi[k] + rowlen[j];
5190: len += rowlen[j]; k++;
5191: }
5192: rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5193: }
5195: /* allocate space for j and a arrrays of B_oth */
5196: PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);
5197: PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);
5199: /* j-array */
5200: /*---------*/
5201: /* post receives of j-array */
5202: for (i=0; i<nrecvs; i++){
5203: nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5204: MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);
5205: }
5207: /* pack the outgoing message j-array */
5208: k = 0;
5209: for (i=0; i<nsends; i++){
5210: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5211: bufJ = bufj+sstartsj[i];
5212: for (j=0; j<nrows; j++) {
5213: row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5214: for (ll=0; ll<sbs; ll++){
5215: MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);
5216: for (l=0; l<ncols; l++){
5217: *bufJ++ = cols[l];
5218: }
5219: MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);
5220: }
5221: }
5222: MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);
5223: }
5225: /* recvs and sends of j-array are completed */
5226: i = nrecvs;
5227: while (i--) {
5228: MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);
5229: }
5230: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
5231: } else if (scall == MAT_REUSE_MATRIX){
5232: sstartsj = *startsj;
5233: rstartsj = *startsj_r;
5234: bufa = *bufa_ptr;
5235: b_oth = (Mat_SeqAIJ*)(*B_oth)->data;
5236: b_otha = b_oth->a;
5237: } else {
5238: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5239: }
5241: /* a-array */
5242: /*---------*/
5243: /* post receives of a-array */
5244: for (i=0; i<nrecvs; i++){
5245: nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5246: MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
5247: }
5249: /* pack the outgoing message a-array */
5250: k = 0;
5251: for (i=0; i<nsends; i++){
5252: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5253: bufA = bufa+sstartsj[i];
5254: for (j=0; j<nrows; j++) {
5255: row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5256: for (ll=0; ll<sbs; ll++){
5257: MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);
5258: for (l=0; l<ncols; l++){
5259: *bufA++ = vals[l];
5260: }
5261: MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);
5262: }
5263: }
5264: MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
5265: }
5266: /* recvs and sends of a-array are completed */
5267: i = nrecvs;
5268: while (i--) {
5269: MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);
5270: }
5271: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
5272: PetscFree2(rwaits,swaits);
5274: if (scall == MAT_INITIAL_MATRIX){
5275: /* put together the new matrix */
5276: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);
5278: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5279: /* Since these are PETSc arrays, change flags to free them as necessary. */
5280: b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
5281: b_oth->free_a = PETSC_TRUE;
5282: b_oth->free_ij = PETSC_TRUE;
5283: b_oth->nonew = 0;
5285: PetscFree(bufj);
5286: if (!startsj || !bufa_ptr){
5287: PetscFree2(sstartsj,rstartsj);
5288: PetscFree(bufa_ptr);
5289: } else {
5290: *startsj = sstartsj;
5291: *startsj_r = rstartsj;
5292: *bufa_ptr = bufa;
5293: }
5294: }
5295: PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);
5296: return(0);
5297: }
5301: /*@C
5302: MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5304: Not Collective
5306: Input Parameters:
5307: . A - The matrix in mpiaij format
5309: Output Parameter:
5310: + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5311: . colmap - A map from global column index to local index into lvec
5312: - multScatter - A scatter from the argument of a matrix-vector product to lvec
5314: Level: developer
5316: @*/
5317: #if defined (PETSC_USE_CTABLE)
5318: PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5319: #else
5320: PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5321: #endif
5322: {
5323: Mat_MPIAIJ *a;
5330: a = (Mat_MPIAIJ *) A->data;
5331: if (lvec) *lvec = a->lvec;
5332: if (colmap) *colmap = a->colmap;
5333: if (multScatter) *multScatter = a->Mvctx;
5334: return(0);
5335: }
5345: /*
5346: Computes (B'*A')' since computing B*A directly is untenable
5348: n p p
5349: ( ) ( ) ( )
5350: m ( A ) * n ( B ) = m ( C )
5351: ( ) ( ) ( )
5353: */
5354: PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5355: {
5356: PetscErrorCode ierr;
5357: Mat At,Bt,Ct;
5360: MatTranspose(A,MAT_INITIAL_MATRIX,&At);
5361: MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);
5362: MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);
5363: MatDestroy(&At);
5364: MatDestroy(&Bt);
5365: MatTranspose(Ct,MAT_REUSE_MATRIX,&C);
5366: MatDestroy(&Ct);
5367: return(0);
5368: }
5372: PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5373: {
5375: PetscInt m=A->rmap->n,n=B->cmap->n;
5376: Mat Cmat;
5379: if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5380: MatCreate(((PetscObject)A)->comm,&Cmat);
5381: MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
5382: MatSetType(Cmat,MATMPIDENSE);
5383: MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);
5384: MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);
5385: MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);
5386: *C = Cmat;
5387: return(0);
5388: }
5390: /* ----------------------------------------------------------------*/
5393: PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5394: {
5398: if (scall == MAT_INITIAL_MATRIX){
5399: MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);
5400: }
5401: MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);
5402: return(0);
5403: }
5406: #if defined(PETSC_HAVE_MUMPS)
5408: #endif
5409: #if defined(PETSC_HAVE_PASTIX)
5411: #endif
5412: #if defined(PETSC_HAVE_SUPERLU_DIST)
5414: #endif
5415: #if defined(PETSC_HAVE_SPOOLES)
5417: #endif
5420: /*MC
5421: MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5423: Options Database Keys:
5424: . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5426: Level: beginner
5428: .seealso: MatCreateMPIAIJ()
5429: M*/
5434: PetscErrorCode MatCreate_MPIAIJ(Mat B)
5435: {
5436: Mat_MPIAIJ *b;
5438: PetscMPIInt size;
5441: MPI_Comm_size(((PetscObject)B)->comm,&size);
5443: PetscNewLog(B,Mat_MPIAIJ,&b);
5444: B->data = (void*)b;
5445: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
5446: B->rmap->bs = 1;
5447: B->assembled = PETSC_FALSE;
5449: B->insertmode = NOT_SET_VALUES;
5450: b->size = size;
5451: MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);
5453: /* build cache for off array entries formed */
5454: MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);
5455: b->donotstash = PETSC_FALSE;
5456: b->colmap = 0;
5457: b->garray = 0;
5458: b->roworiented = PETSC_TRUE;
5460: /* stuff used for matrix vector multiply */
5461: b->lvec = PETSC_NULL;
5462: b->Mvctx = PETSC_NULL;
5464: /* stuff for MatGetRow() */
5465: b->rowindices = 0;
5466: b->rowvalues = 0;
5467: b->getrowactive = PETSC_FALSE;
5469: #if defined(PETSC_HAVE_SPOOLES)
5470: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5471: "MatGetFactor_mpiaij_spooles",
5472: MatGetFactor_mpiaij_spooles);
5473: #endif
5474: #if defined(PETSC_HAVE_MUMPS)
5475: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5476: "MatGetFactor_aij_mumps",
5477: MatGetFactor_aij_mumps);
5478: #endif
5479: #if defined(PETSC_HAVE_PASTIX)
5480: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5481: "MatGetFactor_mpiaij_pastix",
5482: MatGetFactor_mpiaij_pastix);
5483: #endif
5484: #if defined(PETSC_HAVE_SUPERLU_DIST)
5485: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5486: "MatGetFactor_mpiaij_superlu_dist",
5487: MatGetFactor_mpiaij_superlu_dist);
5488: #endif
5489: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5490: "MatStoreValues_MPIAIJ",
5491: MatStoreValues_MPIAIJ);
5492: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5493: "MatRetrieveValues_MPIAIJ",
5494: MatRetrieveValues_MPIAIJ);
5495: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5496: "MatGetDiagonalBlock_MPIAIJ",
5497: MatGetDiagonalBlock_MPIAIJ);
5498: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5499: "MatIsTranspose_MPIAIJ",
5500: MatIsTranspose_MPIAIJ);
5501: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5502: "MatMPIAIJSetPreallocation_MPIAIJ",
5503: MatMPIAIJSetPreallocation_MPIAIJ);
5504: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5505: "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5506: MatMPIAIJSetPreallocationCSR_MPIAIJ);
5507: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5508: "MatDiagonalScaleLocal_MPIAIJ",
5509: MatDiagonalScaleLocal_MPIAIJ);
5510: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5511: "MatConvert_MPIAIJ_MPIAIJPERM",
5512: MatConvert_MPIAIJ_MPIAIJPERM);
5513: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5514: "MatConvert_MPIAIJ_MPIAIJCRL",
5515: MatConvert_MPIAIJ_MPIAIJCRL);
5516: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5517: "MatConvert_MPIAIJ_MPISBAIJ",
5518: MatConvert_MPIAIJ_MPISBAIJ);
5519: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5520: "MatMatMult_MPIDense_MPIAIJ",
5521: MatMatMult_MPIDense_MPIAIJ);
5522: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5523: "MatMatMultSymbolic_MPIDense_MPIAIJ",
5524: MatMatMultSymbolic_MPIDense_MPIAIJ);
5525: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5526: "MatMatMultNumeric_MPIDense_MPIAIJ",
5527: MatMatMultNumeric_MPIDense_MPIAIJ);
5528: PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);
5529: return(0);
5530: }
5535: /*@
5536: MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5537: and "off-diagonal" part of the matrix in CSR format.
5539: Collective on MPI_Comm
5541: Input Parameters:
5542: + comm - MPI communicator
5543: . m - number of local rows (Cannot be PETSC_DECIDE)
5544: . n - This value should be the same as the local size used in creating the
5545: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5546: calculated if N is given) For square matrices n is almost always m.
5547: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5548: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5549: . i - row indices for "diagonal" portion of matrix
5550: . j - column indices
5551: . a - matrix values
5552: . oi - row indices for "off-diagonal" portion of matrix
5553: . oj - column indices
5554: - oa - matrix values
5556: Output Parameter:
5557: . mat - the matrix
5559: Level: advanced
5561: Notes:
5562: The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5563: must free the arrays once the matrix has been destroyed and not before.
5565: The i and j indices are 0 based
5566:
5567: See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5569: This sets local rows and cannot be used to set off-processor values.
5571: You cannot later use MatSetValues() to change values in this matrix.
5573: .keywords: matrix, aij, compressed row, sparse, parallel
5575: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5576: MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5577: @*/
5578: PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5579: PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5580: {
5582: Mat_MPIAIJ *maij;
5585: if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5586: if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5587: if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5588: MatCreate(comm,mat);
5589: MatSetSizes(*mat,m,n,M,N);
5590: MatSetType(*mat,MATMPIAIJ);
5591: maij = (Mat_MPIAIJ*) (*mat)->data;
5592: maij->donotstash = PETSC_TRUE;
5593: (*mat)->preallocated = PETSC_TRUE;
5595: PetscLayoutSetBlockSize((*mat)->rmap,1);
5596: PetscLayoutSetBlockSize((*mat)->cmap,1);
5597: PetscLayoutSetUp((*mat)->rmap);
5598: PetscLayoutSetUp((*mat)->cmap);
5600: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);
5601: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);
5603: MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);
5604: MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);
5605: MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);
5606: MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);
5608: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
5609: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
5610: return(0);
5611: }
5613: /*
5614: Special version for direct calls from Fortran
5615: */
5616: #include <private/fortranimpl.h>
5618: #if defined(PETSC_HAVE_FORTRAN_CAPS)
5619: #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5620: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5621: #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5622: #endif
5624: /* Change these macros so can be used in void function */
5625: #undef CHKERRQ
5626: #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5627: #undef SETERRQ2
5628: #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5629: #undef SETERRQ
5630: #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5635: void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5636: {
5637: Mat mat = *mmat;
5638: PetscInt m = *mm, n = *mn;
5639: InsertMode addv = *maddv;
5640: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
5641: PetscScalar value;
5642: PetscErrorCode ierr;
5644: MatPreallocated(mat);
5645: if (mat->insertmode == NOT_SET_VALUES) {
5646: mat->insertmode = addv;
5647: }
5648: #if defined(PETSC_USE_DEBUG)
5649: else if (mat->insertmode != addv) {
5650: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5651: }
5652: #endif
5653: {
5654: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5655: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5656: PetscBool roworiented = aij->roworiented;
5658: /* Some Variables required in the macro */
5659: Mat A = aij->A;
5660: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
5661: PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5662: MatScalar *aa = a->a;
5663: PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5664: Mat B = aij->B;
5665: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
5666: PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5667: MatScalar *ba = b->a;
5669: PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5670: PetscInt nonew = a->nonew;
5671: MatScalar *ap1,*ap2;
5674: for (i=0; i<m; i++) {
5675: if (im[i] < 0) continue;
5676: #if defined(PETSC_USE_DEBUG)
5677: if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5678: #endif
5679: if (im[i] >= rstart && im[i] < rend) {
5680: row = im[i] - rstart;
5681: lastcol1 = -1;
5682: rp1 = aj + ai[row];
5683: ap1 = aa + ai[row];
5684: rmax1 = aimax[row];
5685: nrow1 = ailen[row];
5686: low1 = 0;
5687: high1 = nrow1;
5688: lastcol2 = -1;
5689: rp2 = bj + bi[row];
5690: ap2 = ba + bi[row];
5691: rmax2 = bimax[row];
5692: nrow2 = bilen[row];
5693: low2 = 0;
5694: high2 = nrow2;
5696: for (j=0; j<n; j++) {
5697: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5698: if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5699: if (in[j] >= cstart && in[j] < cend){
5700: col = in[j] - cstart;
5701: MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5702: } else if (in[j] < 0) continue;
5703: #if defined(PETSC_USE_DEBUG)
5704: else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5705: #endif
5706: else {
5707: if (mat->was_assembled) {
5708: if (!aij->colmap) {
5709: CreateColmap_MPIAIJ_Private(mat);
5710: }
5711: #if defined (PETSC_USE_CTABLE)
5712: PetscTableFind(aij->colmap,in[j]+1,&col);
5713: col--;
5714: #else
5715: col = aij->colmap[in[j]] - 1;
5716: #endif
5717: if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5718: DisAssemble_MPIAIJ(mat);
5719: col = in[j];
5720: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5721: B = aij->B;
5722: b = (Mat_SeqAIJ*)B->data;
5723: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5724: rp2 = bj + bi[row];
5725: ap2 = ba + bi[row];
5726: rmax2 = bimax[row];
5727: nrow2 = bilen[row];
5728: low2 = 0;
5729: high2 = nrow2;
5730: bm = aij->B->rmap->n;
5731: ba = b->a;
5732: }
5733: } else col = in[j];
5734: MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5735: }
5736: }
5737: } else {
5738: if (!aij->donotstash) {
5739: if (roworiented) {
5740: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
5741: } else {
5742: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
5743: }
5744: }
5745: }
5746: }}
5747: PetscFunctionReturnVoid();
5748: }