Actual source code: mpiov.c

  2: /*
  3:    Routines to compute overlapping regions of a parallel MPI matrix
  4:   and to find submatrices that were shared across processors.
  5: */
  6: #include <../src/mat/impls/aij/mpi/mpiaij.h>
  7: #include <petscbt.h>

  9: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Once(Mat,PetscInt,IS *);
 10: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Local(Mat,PetscInt,char **,PetscInt*,PetscInt**);
 11: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Receive(Mat,PetscInt,PetscInt **,PetscInt**,PetscInt*);

 17: PetscErrorCode MatIncreaseOverlap_MPIAIJ(Mat C,PetscInt imax,IS is[],PetscInt ov)
 18: {
 20:   PetscInt       i;

 23:   if (ov < 0) SETERRQ(((PetscObject)C)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
 24:   for (i=0; i<ov; ++i) {
 25:     MatIncreaseOverlap_MPIAIJ_Once(C,imax,is);
 26:   }
 27:   return(0);
 28: }

 30: /*
 31:   Sample message format:
 32:   If a processor A wants processor B to process some elements corresponding
 33:   to index sets is[1],is[5]
 34:   mesg [0] = 2   (no of index sets in the mesg)
 35:   -----------  
 36:   mesg [1] = 1 => is[1]
 37:   mesg [2] = sizeof(is[1]);
 38:   -----------  
 39:   mesg [3] = 5  => is[5]
 40:   mesg [4] = sizeof(is[5]);
 41:   -----------
 42:   mesg [5] 
 43:   mesg [n]  datas[1]
 44:   -----------  
 45:   mesg[n+1]
 46:   mesg[m]  data(is[5])
 47:   -----------  
 48:   
 49:   Notes:
 50:   nrqs - no of requests sent (or to be sent out)
 51:   nrqr - no of requests recieved (which have to be or which have been processed
 52: */
 55: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Once(Mat C,PetscInt imax,IS is[])
 56: {
 57:   Mat_MPIAIJ     *c = (Mat_MPIAIJ*)C->data;
 58:   PetscMPIInt    *w1,*w2,nrqr,*w3,*w4,*onodes1,*olengths1,*onodes2,*olengths2;
 59:   const PetscInt **idx,*idx_i;
 60:   PetscInt       *n,**data,len;
 62:   PetscMPIInt    size,rank,tag1,tag2;
 63:   PetscInt       M,i,j,k,**rbuf,row,proc = 0,nrqs,msz,**outdat,**ptr;
 64:   PetscInt       *ctr,*pa,*tmp,*isz,*isz1,**xdata,**rbuf2,*d_p;
 65:   PetscBT        *table;
 66:   MPI_Comm       comm;
 67:   MPI_Request    *s_waits1,*r_waits1,*s_waits2,*r_waits2;
 68:   MPI_Status     *s_status,*recv_status;
 69:   char           *t_p;

 72:   comm   = ((PetscObject)C)->comm;
 73:   size   = c->size;
 74:   rank   = c->rank;
 75:   M      = C->rmap->N;

 77:   PetscObjectGetNewTag((PetscObject)C,&tag1);
 78:   PetscObjectGetNewTag((PetscObject)C,&tag2);
 79: 
 80:   PetscMalloc2(imax,PetscInt*,&idx,imax,PetscInt,&n);

 82:   for (i=0; i<imax; i++) {
 83:     ISGetIndices(is[i],&idx[i]);
 84:     ISGetLocalSize(is[i],&n[i]);
 85:   }

 87:   /* evaluate communication - mesg to who,length of mesg, and buffer space
 88:      required. Based on this, buffers are allocated, and data copied into them*/
 89:   PetscMalloc4(size,PetscMPIInt,&w1,size,PetscMPIInt,&w2,size,PetscMPIInt,&w3,size,PetscMPIInt,&w4);
 90:   PetscMemzero(w1,size*sizeof(PetscMPIInt)); /* initialise work vector*/
 91:   PetscMemzero(w2,size*sizeof(PetscMPIInt)); /* initialise work vector*/
 92:   PetscMemzero(w3,size*sizeof(PetscMPIInt)); /* initialise work vector*/
 93:   for (i=0; i<imax; i++) {
 94:     PetscMemzero(w4,size*sizeof(PetscMPIInt)); /* initialise work vector*/
 95:     idx_i = idx[i];
 96:     len   = n[i];
 97:     for (j=0; j<len; j++) {
 98:       row  = idx_i[j];
 99:       if (row < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index set cannot have negative entries");
100:       PetscLayoutFindOwner(C->rmap,row,&proc);
101:       w4[proc]++;
102:     }
103:     for (j=0; j<size; j++){
104:       if (w4[j]) { w1[j] += w4[j]; w3[j]++;}
105:     }
106:   }

108:   nrqs     = 0;              /* no of outgoing messages */
109:   msz      = 0;              /* total mesg length (for all proc */
110:   w1[rank] = 0;              /* no mesg sent to intself */
111:   w3[rank] = 0;
112:   for (i=0; i<size; i++) {
113:     if (w1[i])  {w2[i] = 1; nrqs++;} /* there exists a message to proc i */
114:   }
115:   /* pa - is list of processors to communicate with */
116:   PetscMalloc((nrqs+1)*sizeof(PetscInt),&pa);
117:   for (i=0,j=0; i<size; i++) {
118:     if (w1[i]) {pa[j] = i; j++;}
119:   }

121:   /* Each message would have a header = 1 + 2*(no of IS) + data */
122:   for (i=0; i<nrqs; i++) {
123:     j      = pa[i];
124:     w1[j] += w2[j] + 2*w3[j];
125:     msz   += w1[j];
126:   }

128:   /* Determine the number of messages to expect, their lengths, from from-ids */
129:   PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);
130:   PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);

132:   /* Now post the Irecvs corresponding to these messages */
133:   PetscPostIrecvInt(comm,tag1,nrqr,onodes1,olengths1,&rbuf,&r_waits1);

135:   /* Allocate Memory for outgoing messages */
136:   PetscMalloc4(size,PetscInt*,&outdat,size,PetscInt*,&ptr,msz,PetscInt,&tmp,size,PetscInt,&ctr);
137:   PetscMemzero(outdat,size*sizeof(PetscInt*));
138:   PetscMemzero(ptr,size*sizeof(PetscInt*));

140:   {
141:     PetscInt *iptr = tmp,ict  = 0;
142:     for (i=0; i<nrqs; i++) {
143:       j         = pa[i];
144:       iptr     +=  ict;
145:       outdat[j] = iptr;
146:       ict       = w1[j];
147:     }
148:   }

150:   /* Form the outgoing messages */
151:   /*plug in the headers*/
152:   for (i=0; i<nrqs; i++) {
153:     j            = pa[i];
154:     outdat[j][0] = 0;
155:     PetscMemzero(outdat[j]+1,2*w3[j]*sizeof(PetscInt));
156:     ptr[j]       = outdat[j] + 2*w3[j] + 1;
157:   }
158: 
159:   /* Memory for doing local proc's work*/
160:   {
161:     PetscMalloc5(imax,PetscBT,&table, imax,PetscInt*,&data, imax,PetscInt,&isz,
162:                         M*imax,PetscInt,&d_p, (M/PETSC_BITS_PER_BYTE+1)*imax,char,&t_p);
163:     PetscMemzero(table,imax*sizeof(PetscBT));
164:     PetscMemzero(data,imax*sizeof(PetscInt*));
165:     PetscMemzero(isz,imax*sizeof(PetscInt));
166:     PetscMemzero(d_p,M*imax*sizeof(PetscInt));
167:     PetscMemzero(t_p,(M/PETSC_BITS_PER_BYTE+1)*imax*sizeof(char));

169:     for (i=0; i<imax; i++) {
170:       table[i] = t_p + (M/PETSC_BITS_PER_BYTE+1)*i;
171:       data[i]  = d_p + M*i;
172:     }
173:   }

175:   /* Parse the IS and update local tables and the outgoing buf with the data*/
176:   {
177:     PetscInt n_i,*data_i,isz_i,*outdat_j,ctr_j;
178:     PetscBT  table_i;

180:     for (i=0; i<imax; i++) {
181:       PetscMemzero(ctr,size*sizeof(PetscInt));
182:       n_i     = n[i];
183:       table_i = table[i];
184:       idx_i   = idx[i];
185:       data_i  = data[i];
186:       isz_i   = isz[i];
187:       for (j=0;  j<n_i; j++) {  /* parse the indices of each IS */
188:         row  = idx_i[j];
189:         PetscLayoutFindOwner(C->rmap,row,&proc);
190:         if (proc != rank) { /* copy to the outgoing buffer */
191:           ctr[proc]++;
192:           *ptr[proc] = row;
193:           ptr[proc]++;
194:         } else { /* Update the local table */
195:           if (!PetscBTLookupSet(table_i,row)) { data_i[isz_i++] = row;}
196:         }
197:       }
198:       /* Update the headers for the current IS */
199:       for (j=0; j<size; j++) { /* Can Optimise this loop by using pa[] */
200:         if ((ctr_j = ctr[j])) {
201:           outdat_j        = outdat[j];
202:           k               = ++outdat_j[0];
203:           outdat_j[2*k]   = ctr_j;
204:           outdat_j[2*k-1] = i;
205:         }
206:       }
207:       isz[i] = isz_i;
208:     }
209:   }

211:   /*  Now  post the sends */
212:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
213:   for (i=0; i<nrqs; ++i) {
214:     j    = pa[i];
215:     MPI_Isend(outdat[j],w1[j],MPIU_INT,j,tag1,comm,s_waits1+i);
216:   }
217: 
218:   /* No longer need the original indices*/
219:   for (i=0; i<imax; ++i) {
220:     ISRestoreIndices(is[i],idx+i);
221:   }
222:   PetscFree2(idx,n);

224:   for (i=0; i<imax; ++i) {
225:     ISDestroy(&is[i]);
226:   }
227: 
228:   /* Do Local work*/
229:   MatIncreaseOverlap_MPIAIJ_Local(C,imax,table,isz,data);

231:   /* Receive messages*/
232:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&recv_status);
233:   if (nrqr) {MPI_Waitall(nrqr,r_waits1,recv_status);}
234: 
235:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status);
236:   if (nrqs) {MPI_Waitall(nrqs,s_waits1,s_status);}

238:   /* Phase 1 sends are complete - deallocate buffers */
239:   PetscFree4(outdat,ptr,tmp,ctr);
240:   PetscFree4(w1,w2,w3,w4);

242:   PetscMalloc((nrqr+1)*sizeof(PetscInt*),&xdata);
243:   PetscMalloc((nrqr+1)*sizeof(PetscInt),&isz1);
244:   MatIncreaseOverlap_MPIAIJ_Receive(C,nrqr,rbuf,xdata,isz1);
245:   PetscFree(rbuf[0]);
246:   PetscFree(rbuf);

248: 
249:  /* Send the data back*/
250:   /* Do a global reduction to know the buffer space req for incoming messages*/
251:   {
252:     PetscMPIInt *rw1;
253: 
254:     PetscMalloc(size*sizeof(PetscMPIInt),&rw1);
255:     PetscMemzero(rw1,size*sizeof(PetscMPIInt));

257:     for (i=0; i<nrqr; ++i) {
258:       proc      = recv_status[i].MPI_SOURCE;
259:       if (proc != onodes1[i]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPI_SOURCE mismatch");
260:       rw1[proc] = isz1[i];
261:     }
262:     PetscFree(onodes1);
263:     PetscFree(olengths1);

265:     /* Determine the number of messages to expect, their lengths, from from-ids */
266:     PetscGatherMessageLengths(comm,nrqr,nrqs,rw1,&onodes2,&olengths2);
267:     PetscFree(rw1);
268:   }
269:   /* Now post the Irecvs corresponding to these messages */
270:   PetscPostIrecvInt(comm,tag2,nrqs,onodes2,olengths2,&rbuf2,&r_waits2);

272:   /*  Now  post the sends */
273:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);
274:   for (i=0; i<nrqr; ++i) {
275:     j    = recv_status[i].MPI_SOURCE;
276:     MPI_Isend(xdata[i],isz1[i],MPIU_INT,j,tag2,comm,s_waits2+i);
277:   }

279:   /* receive work done on other processors*/
280:   {
281:     PetscInt    is_no,ct1,max,*rbuf2_i,isz_i,*data_i,jmax;
282:     PetscMPIInt idex;
283:     PetscBT     table_i;
284:     MPI_Status  *status2;
285: 
286:     PetscMalloc((PetscMax(nrqr,nrqs)+1)*sizeof(MPI_Status),&status2);
287:     for (i=0; i<nrqs; ++i) {
288:       MPI_Waitany(nrqs,r_waits2,&idex,status2+i);
289:       /* Process the message*/
290:       rbuf2_i = rbuf2[idex];
291:       ct1     = 2*rbuf2_i[0]+1;
292:       jmax    = rbuf2[idex][0];
293:       for (j=1; j<=jmax; j++) {
294:         max     = rbuf2_i[2*j];
295:         is_no   = rbuf2_i[2*j-1];
296:         isz_i   = isz[is_no];
297:         data_i  = data[is_no];
298:         table_i = table[is_no];
299:         for (k=0; k<max; k++,ct1++) {
300:           row = rbuf2_i[ct1];
301:           if (!PetscBTLookupSet(table_i,row)) { data_i[isz_i++] = row;}
302:         }
303:         isz[is_no] = isz_i;
304:       }
305:     }

307:     if (nrqr) {MPI_Waitall(nrqr,s_waits2,status2);}
308:     PetscFree(status2);
309:   }
310: 
311:   for (i=0; i<imax; ++i) {
312:     ISCreateGeneral(PETSC_COMM_SELF,isz[i],data[i],PETSC_COPY_VALUES,is+i);
313:   }
314: 
315:   PetscFree(onodes2);
316:   PetscFree(olengths2);

318:   PetscFree(pa);
319:   PetscFree(rbuf2[0]);
320:   PetscFree(rbuf2);
321:   PetscFree(s_waits1);
322:   PetscFree(r_waits1);
323:   PetscFree(s_waits2);
324:   PetscFree(r_waits2);
325:   PetscFree5(table,data,isz,d_p,t_p);
326:   PetscFree(s_status);
327:   PetscFree(recv_status);
328:   PetscFree(xdata[0]);
329:   PetscFree(xdata);
330:   PetscFree(isz1);
331:   return(0);
332: }

336: /*  
337:    MatIncreaseOverlap_MPIAIJ_Local - Called by MatincreaseOverlap, to do 
338:        the work on the local processor.

340:      Inputs:
341:       C      - MAT_MPIAIJ;
342:       imax - total no of index sets processed at a time;
343:       table  - an array of char - size = m bits.
344:       
345:      Output:
346:       isz    - array containing the count of the solution elements corresponding
347:                to each index set;
348:       data   - pointer to the solutions
349: */
350: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Local(Mat C,PetscInt imax,PetscBT *table,PetscInt *isz,PetscInt **data)
351: {
352:   Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
353:   Mat        A = c->A,B = c->B;
354:   Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)B->data;
355:   PetscInt   start,end,val,max,rstart,cstart,*ai,*aj;
356:   PetscInt   *bi,*bj,*garray,i,j,k,row,*data_i,isz_i;
357:   PetscBT    table_i;

360:   rstart = C->rmap->rstart;
361:   cstart = C->cmap->rstart;
362:   ai     = a->i;
363:   aj     = a->j;
364:   bi     = b->i;
365:   bj     = b->j;
366:   garray = c->garray;

368: 
369:   for (i=0; i<imax; i++) {
370:     data_i  = data[i];
371:     table_i = table[i];
372:     isz_i   = isz[i];
373:     for (j=0,max=isz[i]; j<max; j++) {
374:       row   = data_i[j] - rstart;
375:       start = ai[row];
376:       end   = ai[row+1];
377:       for (k=start; k<end; k++) { /* Amat */
378:         val = aj[k] + cstart;
379:         if (!PetscBTLookupSet(table_i,val)) { data_i[isz_i++] = val;}
380:       }
381:       start = bi[row];
382:       end   = bi[row+1];
383:       for (k=start; k<end; k++) { /* Bmat */
384:         val = garray[bj[k]];
385:         if (!PetscBTLookupSet(table_i,val)) { data_i[isz_i++] = val;}
386:       }
387:     }
388:     isz[i] = isz_i;
389:   }
390:   return(0);
391: }

395: /*     
396:       MatIncreaseOverlap_MPIAIJ_Receive - Process the recieved messages,
397:          and return the output

399:          Input:
400:            C    - the matrix
401:            nrqr - no of messages being processed.
402:            rbuf - an array of pointers to the recieved requests
403:            
404:          Output:
405:            xdata - array of messages to be sent back
406:            isz1  - size of each message

408:   For better efficiency perhaps we should malloc separately each xdata[i],
409: then if a remalloc is required we need only copy the data for that one row
410: rather then all previous rows as it is now where a single large chunck of 
411: memory is used.

413: */
414: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Receive(Mat C,PetscInt nrqr,PetscInt **rbuf,PetscInt **xdata,PetscInt * isz1)
415: {
416:   Mat_MPIAIJ     *c = (Mat_MPIAIJ*)C->data;
417:   Mat            A = c->A,B = c->B;
418:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)B->data;
420:   PetscInt       rstart,cstart,*ai,*aj,*bi,*bj,*garray,i,j,k;
421:   PetscInt       row,total_sz,ct,ct1,ct2,ct3,mem_estimate,oct2,l,start,end;
422:   PetscInt       val,max1,max2,m,no_malloc =0,*tmp,new_estimate,ctr;
423:   PetscInt       *rbuf_i,kmax,rbuf_0;
424:   PetscBT        xtable;

427:   m      = C->rmap->N;
428:   rstart = C->rmap->rstart;
429:   cstart = C->cmap->rstart;
430:   ai     = a->i;
431:   aj     = a->j;
432:   bi     = b->i;
433:   bj     = b->j;
434:   garray = c->garray;
435: 
436: 
437:   for (i=0,ct=0,total_sz=0; i<nrqr; ++i) {
438:     rbuf_i  =  rbuf[i];
439:     rbuf_0  =  rbuf_i[0];
440:     ct     += rbuf_0;
441:     for (j=1; j<=rbuf_0; j++) { total_sz += rbuf_i[2*j]; }
442:   }
443: 
444:   if (C->rmap->n) max1 = ct*(a->nz + b->nz)/C->rmap->n;
445:   else      max1 = 1;
446:   mem_estimate = 3*((total_sz > max1 ? total_sz : max1)+1);
447:   PetscMalloc(mem_estimate*sizeof(PetscInt),&xdata[0]);
448:   ++no_malloc;
449:   PetscBTCreate(m,xtable);
450:   PetscMemzero(isz1,nrqr*sizeof(PetscInt));
451: 
452:   ct3 = 0;
453:   for (i=0; i<nrqr; i++) { /* for easch mesg from proc i */
454:     rbuf_i =  rbuf[i];
455:     rbuf_0 =  rbuf_i[0];
456:     ct1    =  2*rbuf_0+1;
457:     ct2    =  ct1;
458:     ct3    += ct1;
459:     for (j=1; j<=rbuf_0; j++) { /* for each IS from proc i*/
460:       PetscBTMemzero(m,xtable);
461:       oct2 = ct2;
462:       kmax = rbuf_i[2*j];
463:       for (k=0; k<kmax; k++,ct1++) {
464:         row = rbuf_i[ct1];
465:         if (!PetscBTLookupSet(xtable,row)) {
466:           if (!(ct3 < mem_estimate)) {
467:             new_estimate = (PetscInt)(1.5*mem_estimate)+1;
468:             PetscMalloc(new_estimate*sizeof(PetscInt),&tmp);
469:             PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));
470:             PetscFree(xdata[0]);
471:             xdata[0]     = tmp;
472:             mem_estimate = new_estimate; ++no_malloc;
473:             for (ctr=1; ctr<=i; ctr++) { xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];}
474:           }
475:           xdata[i][ct2++] = row;
476:           ct3++;
477:         }
478:       }
479:       for (k=oct2,max2=ct2; k<max2; k++) {
480:         row   = xdata[i][k] - rstart;
481:         start = ai[row];
482:         end   = ai[row+1];
483:         for (l=start; l<end; l++) {
484:           val = aj[l] + cstart;
485:           if (!PetscBTLookupSet(xtable,val)) {
486:             if (!(ct3 < mem_estimate)) {
487:               new_estimate = (PetscInt)(1.5*mem_estimate)+1;
488:               PetscMalloc(new_estimate*sizeof(PetscInt),&tmp);
489:               PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));
490:               PetscFree(xdata[0]);
491:               xdata[0]     = tmp;
492:               mem_estimate = new_estimate; ++no_malloc;
493:               for (ctr=1; ctr<=i; ctr++) { xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];}
494:             }
495:             xdata[i][ct2++] = val;
496:             ct3++;
497:           }
498:         }
499:         start = bi[row];
500:         end   = bi[row+1];
501:         for (l=start; l<end; l++) {
502:           val = garray[bj[l]];
503:           if (!PetscBTLookupSet(xtable,val)) {
504:             if (!(ct3 < mem_estimate)) {
505:               new_estimate = (PetscInt)(1.5*mem_estimate)+1;
506:               PetscMalloc(new_estimate*sizeof(PetscInt),&tmp);
507:               PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));
508:               PetscFree(xdata[0]);
509:               xdata[0]     = tmp;
510:               mem_estimate = new_estimate; ++no_malloc;
511:               for (ctr =1; ctr <=i; ctr++) { xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];}
512:             }
513:             xdata[i][ct2++] = val;
514:             ct3++;
515:           }
516:         }
517:       }
518:       /* Update the header*/
519:       xdata[i][2*j]   = ct2 - oct2; /* Undo the vector isz1 and use only a var*/
520:       xdata[i][2*j-1] = rbuf_i[2*j-1];
521:     }
522:     xdata[i][0] = rbuf_0;
523:     xdata[i+1]  = xdata[i] + ct2;
524:     isz1[i]     = ct2; /* size of each message */
525:   }
526:   PetscBTDestroy(xtable);
527:   PetscInfo3(C,"Allocated %D bytes, required %D bytes, no of mallocs = %D\n",mem_estimate,ct3,no_malloc);
528:   return(0);
529: }
530: /* -------------------------------------------------------------------------*/
533: /*
534:     Every processor gets the entire matrix
535: */
538: PetscErrorCode MatGetSubMatrix_MPIAIJ_All(Mat A,MatGetSubMatrixOption flag,MatReuse scall,Mat *Bin[])
539: {
540:   Mat            B;
541:   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
542:   Mat_SeqAIJ     *b,*ad = (Mat_SeqAIJ*)a->A->data,*bd = (Mat_SeqAIJ*)a->B->data;
544:   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
545:   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j;
546:   PetscInt       m,*b_sendj,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
547:   MatScalar      *sendbuf,*recvbuf,*a_sendbuf,*b_sendbuf;

550:   MPI_Comm_size(((PetscObject)A)->comm,&size);
551:   MPI_Comm_rank(((PetscObject)A)->comm,&rank);

553:   if (scall == MAT_INITIAL_MATRIX) {
554:     /* ----------------------------------------------------------------
555:          Tell every processor the number of nonzeros per row
556:     */
557:     PetscMalloc(A->rmap->N*sizeof(PetscInt),&lens);
558:     for (i=A->rmap->rstart; i<A->rmap->rend; i++) {
559:       lens[i] = ad->i[i-A->rmap->rstart+1] - ad->i[i-A->rmap->rstart] + bd->i[i-A->rmap->rstart+1] - bd->i[i-A->rmap->rstart];
560:     }
561:     sendcount = A->rmap->rend - A->rmap->rstart;
562:     PetscMalloc2(size,PetscMPIInt,&recvcounts,size,PetscMPIInt,&displs);
563:     for (i=0; i<size; i++) {
564:       recvcounts[i] = A->rmap->range[i+1] - A->rmap->range[i];
565:       displs[i]     = A->rmap->range[i];
566:     }
567: #if defined(PETSC_HAVE_MPI_IN_PLACE)
568:     MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);
569: #else
570:     MPI_Allgatherv(lens+A->rmap->rstart,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);
571: #endif
572:     /* ---------------------------------------------------------------
573:          Create the sequential matrix of the same type as the local block diagonal
574:     */
575:     MatCreate(PETSC_COMM_SELF,&B);
576:     MatSetSizes(B,A->rmap->N,A->cmap->N,PETSC_DETERMINE,PETSC_DETERMINE);
577:     MatSetType(B,((PetscObject)a->A)->type_name);
578:     MatSeqAIJSetPreallocation(B,0,lens);
579:     PetscMalloc(sizeof(Mat),Bin);
580:     **Bin = B;
581:     b = (Mat_SeqAIJ *)B->data;

583:     /*--------------------------------------------------------------------
584:        Copy my part of matrix column indices over
585:     */
586:     sendcount  = ad->nz + bd->nz;
587:     jsendbuf   = b->j + b->i[rstarts[rank]];
588:     a_jsendbuf = ad->j;
589:     b_jsendbuf = bd->j;
590:     n          = A->rmap->rend - A->rmap->rstart;
591:     cnt        = 0;
592:     for (i=0; i<n; i++) {

594:       /* put in lower diagonal portion */
595:       m = bd->i[i+1] - bd->i[i];
596:       while (m > 0) {
597:         /* is it above diagonal (in bd (compressed) numbering) */
598:         if (garray[*b_jsendbuf] > A->rmap->rstart + i) break;
599:         jsendbuf[cnt++] = garray[*b_jsendbuf++];
600:         m--;
601:       }

603:       /* put in diagonal portion */
604:       for (j=ad->i[i]; j<ad->i[i+1]; j++) {
605:         jsendbuf[cnt++] = A->rmap->rstart + *a_jsendbuf++;
606:       }

608:       /* put in upper diagonal portion */
609:       while (m-- > 0) {
610:         jsendbuf[cnt++] = garray[*b_jsendbuf++];
611:       }
612:     }
613:     if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);

615:     /*--------------------------------------------------------------------
616:        Gather all column indices to all processors
617:     */
618:     for (i=0; i<size; i++) {
619:       recvcounts[i] = 0;
620:       for (j=A->rmap->range[i]; j<A->rmap->range[i+1]; j++) {
621:         recvcounts[i] += lens[j];
622:       }
623:     }
624:     displs[0]  = 0;
625:     for (i=1; i<size; i++) {
626:       displs[i] = displs[i-1] + recvcounts[i-1];
627:     }
628: #if defined(PETSC_HAVE_MPI_IN_PLACE)
629:     MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);
630: #else
631:     MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);
632: #endif
633:     /*--------------------------------------------------------------------
634:         Assemble the matrix into useable form (note numerical values not yet set)
635:     */
636:     /* set the b->ilen (length of each row) values */
637:     PetscMemcpy(b->ilen,lens,A->rmap->N*sizeof(PetscInt));
638:     /* set the b->i indices */
639:     b->i[0] = 0;
640:     for (i=1; i<=A->rmap->N; i++) {
641:       b->i[i] = b->i[i-1] + lens[i-1];
642:     }
643:     PetscFree(lens);
644:     MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
645:     MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

647:   } else {
648:     B  = **Bin;
649:     b = (Mat_SeqAIJ *)B->data;
650:   }

652:   /*--------------------------------------------------------------------
653:        Copy my part of matrix numerical values into the values location 
654:   */
655:   if (flag == MAT_GET_VALUES){
656:     sendcount = ad->nz + bd->nz;
657:     sendbuf   = b->a + b->i[rstarts[rank]];
658:     a_sendbuf = ad->a;
659:     b_sendbuf = bd->a;
660:     b_sendj   = bd->j;
661:     n         = A->rmap->rend - A->rmap->rstart;
662:     cnt       = 0;
663:     for (i=0; i<n; i++) {

665:       /* put in lower diagonal portion */
666:       m = bd->i[i+1] - bd->i[i];
667:       while (m > 0) {
668:         /* is it above diagonal (in bd (compressed) numbering) */
669:         if (garray[*b_sendj] > A->rmap->rstart + i) break;
670:         sendbuf[cnt++] = *b_sendbuf++;
671:         m--;
672:         b_sendj++;
673:       }

675:       /* put in diagonal portion */
676:       for (j=ad->i[i]; j<ad->i[i+1]; j++) {
677:         sendbuf[cnt++] = *a_sendbuf++;
678:       }

680:       /* put in upper diagonal portion */
681:       while (m-- > 0) {
682:         sendbuf[cnt++] = *b_sendbuf++;
683:         b_sendj++;
684:       }
685:     }
686:     if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
687: 
688:     /* ----------------------------------------------------------------- 
689:        Gather all numerical values to all processors 
690:     */
691:     if (!recvcounts) {
692:       PetscMalloc2(size,PetscMPIInt,&recvcounts,size,PetscMPIInt,&displs);
693:     }
694:     for (i=0; i<size; i++) {
695:       recvcounts[i] = b->i[rstarts[i+1]] - b->i[rstarts[i]];
696:     }
697:     displs[0]  = 0;
698:     for (i=1; i<size; i++) {
699:       displs[i] = displs[i-1] + recvcounts[i-1];
700:     }
701:     recvbuf   = b->a;
702: #if defined(PETSC_HAVE_MPI_IN_PLACE)
703:     MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,recvbuf,recvcounts,displs,MPIU_SCALAR,((PetscObject)A)->comm);
704: #else
705:     MPI_Allgatherv(sendbuf,sendcount,MPIU_SCALAR,recvbuf,recvcounts,displs,MPIU_SCALAR,((PetscObject)A)->comm);
706: #endif
707:   }  /* endof (flag == MAT_GET_VALUES) */
708:   PetscFree2(recvcounts,displs);

710:   if (A->symmetric){
711:     MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);
712:   } else if (A->hermitian) {
713:     MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);
714:   } else if (A->structurally_symmetric) {
715:     MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);
716:   }
717:   return(0);
718: }



724: PetscErrorCode MatGetSubMatrices_MPIAIJ(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[])
725: {
727:   PetscInt       nmax,nstages_local,nstages,i,pos,max_no,nrow,ncol;
728:   PetscBool      rowflag,colflag,wantallmatrix = PETSC_FALSE,twantallmatrix;

731:   /*
732:        Check for special case each processor gets entire matrix
733:   */
734:   if (ismax == 1 && C->rmap->N == C->cmap->N) {
735:     ISIdentity(*isrow,&rowflag);
736:     ISIdentity(*iscol,&colflag);
737:     ISGetLocalSize(*isrow,&nrow);
738:     ISGetLocalSize(*iscol,&ncol);
739:     if (rowflag && colflag && nrow == C->rmap->N && ncol == C->cmap->N) {
740:       wantallmatrix = PETSC_TRUE;
741:       PetscOptionsGetBool(((PetscObject)C)->prefix,"-use_fast_submatrix",&wantallmatrix,PETSC_NULL);
742:     }
743:   }
744:   MPI_Allreduce(&wantallmatrix,&twantallmatrix,1,MPI_INT,MPI_MIN,((PetscObject)C)->comm);
745:   if (twantallmatrix) {
746:     MatGetSubMatrix_MPIAIJ_All(C,MAT_GET_VALUES,scall,submat);
747:     return(0);
748:   }

750:   /* Allocate memory to hold all the submatrices */
751:   if (scall != MAT_REUSE_MATRIX) {
752:     PetscMalloc((ismax+1)*sizeof(Mat),submat);
753:   }
754:   /* Determine the number of stages through which submatrices are done */
755:   nmax          = 20*1000000 / (C->cmap->N * sizeof(PetscInt));
756:   /* 
757:      Each stage will extract nmax submatrices.  
758:      nmax is determined by the matrix column dimension.
759:      If the original matrix has 20M columns, only one submatrix per stage is allowed, etc.
760:   */
761:   if (!nmax) nmax = 1;
762:   nstages_local = ismax/nmax + ((ismax % nmax)?1:0);

764:   /* Make sure every processor loops through the nstages */
765:   MPI_Allreduce(&nstages_local,&nstages,1,MPIU_INT,MPI_MAX,((PetscObject)C)->comm);

767:   for (i=0,pos=0; i<nstages; i++) {
768:     if (pos+nmax <= ismax) max_no = nmax;
769:     else if (pos == ismax) max_no = 0;
770:     else                   max_no = ismax-pos;
771:     MatGetSubMatrices_MPIAIJ_Local(C,max_no,isrow+pos,iscol+pos,scall,*submat+pos);
772:     pos += max_no;
773:   }
774:   return(0);
775: }

777: /* -------------------------------------------------------------------------*/
780: PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submats)
781: {
782:   Mat_MPIAIJ     *c = (Mat_MPIAIJ*)C->data;
783:   Mat            A = c->A;
784:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)c->B->data,*mat;
785:   const PetscInt **icol,**irow;
786:   PetscInt       *nrow,*ncol,start;
788:   PetscMPIInt    rank,size,tag0,tag1,tag2,tag3,*w1,*w2,*w3,*w4,nrqr;
789:   PetscInt       **sbuf1,**sbuf2,i,j,k,l,ct1,ct2,**rbuf1,row,proc;
790:   PetscInt       nrqs,msz,**ptr,*req_size,*ctr,*pa,*tmp,tcol;
791:   PetscInt       **rbuf3,*req_source,**sbuf_aj,**rbuf2,max1,max2;
792:   PetscInt       **lens,is_no,ncols,*cols,mat_i,*mat_j,tmp2,jmax;
793: #if defined (PETSC_USE_CTABLE)
794:   PetscTable     *cmap,cmap_i,*rmap,rmap_i;
795: #else
796:   PetscInt       **cmap,*cmap_i,**rmap,*rmap_i;
797: #endif
798:   const PetscInt *irow_i;
799:   PetscInt       ctr_j,*sbuf1_j,*sbuf_aj_i,*rbuf1_i,kmax,*lens_i;
800:   MPI_Request    *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3;
801:   MPI_Request    *r_waits4,*s_waits3,*s_waits4;
802:   MPI_Status     *r_status1,*r_status2,*s_status1,*s_status3,*s_status2;
803:   MPI_Status     *r_status3,*r_status4,*s_status4;
804:   MPI_Comm       comm;
805:   PetscScalar    **rbuf4,**sbuf_aa,*vals,*mat_a,*sbuf_aa_i;
806:   PetscMPIInt    *onodes1,*olengths1;
807:   PetscMPIInt    idex,idex2,end;

810:   comm   = ((PetscObject)C)->comm;
811:   tag0   = ((PetscObject)C)->tag;
812:   size   = c->size;
813:   rank   = c->rank;
814: 
815:   /* Get some new tags to keep the communication clean */
816:   PetscObjectGetNewTag((PetscObject)C,&tag1);
817:   PetscObjectGetNewTag((PetscObject)C,&tag2);
818:   PetscObjectGetNewTag((PetscObject)C,&tag3);

820:   PetscMalloc4(ismax,const PetscInt*,&irow,ismax,const PetscInt*,&icol,ismax,PetscInt,&nrow,ismax,PetscInt,&ncol);

822:   for (i=0; i<ismax; i++) {
823:     ISGetIndices(isrow[i],&irow[i]);
824:     ISGetIndices(iscol[i],&icol[i]);
825:     ISGetLocalSize(isrow[i],&nrow[i]);
826:     ISGetLocalSize(iscol[i],&ncol[i]);
827:   }

829:   /* evaluate communication - mesg to who, length of mesg, and buffer space
830:      required. Based on this, buffers are allocated, and data copied into them*/
831:   PetscMalloc4(size,PetscMPIInt,&w1,size,PetscMPIInt,&w2,size,PetscMPIInt,&w3,size,PetscMPIInt,&w4); /* mesg size */
832:   PetscMemzero(w1,size*sizeof(PetscMPIInt)); /* initialize work vector*/
833:   PetscMemzero(w2,size*sizeof(PetscMPIInt)); /* initialize work vector*/
834:   PetscMemzero(w3,size*sizeof(PetscMPIInt)); /* initialize work vector*/
835:   for (i=0; i<ismax; i++) {
836:     PetscMemzero(w4,size*sizeof(PetscMPIInt)); /* initialize work vector*/
837:     jmax   = nrow[i];
838:     irow_i = irow[i];
839:     for (j=0; j<jmax; j++) {
840:       l = 0;
841:       row  = irow_i[j];
842:       while (row >= C->rmap->range[l+1]) l++;
843:       proc = l;
844:       w4[proc]++;
845:     }
846:     for (j=0; j<size; j++) {
847:       if (w4[j]) { w1[j] += w4[j];  w3[j]++;}
848:     }
849:   }
850: 
851:   nrqs     = 0;              /* no of outgoing messages */
852:   msz      = 0;              /* total mesg length (for all procs) */
853:   w1[rank] = 0;              /* no mesg sent to self */
854:   w3[rank] = 0;
855:   for (i=0; i<size; i++) {
856:     if (w1[i])  { w2[i] = 1; nrqs++;} /* there exists a message to proc i */
857:   }
858:   PetscMalloc((nrqs+1)*sizeof(PetscInt),&pa); /*(proc -array)*/
859:   for (i=0,j=0; i<size; i++) {
860:     if (w1[i]) { pa[j] = i; j++; }
861:   }

863:   /* Each message would have a header = 1 + 2*(no of IS) + data */
864:   for (i=0; i<nrqs; i++) {
865:     j     = pa[i];
866:     w1[j] += w2[j] + 2* w3[j];
867:     msz   += w1[j];
868:   }
869:   PetscInfo2(C,"Number of outgoing messages %D Total message length %D\n",nrqs,msz);

871:   /* Determine the number of messages to expect, their lengths, from from-ids */
872:   PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);
873:   PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);

875:   /* Now post the Irecvs corresponding to these messages */
876:   PetscPostIrecvInt(comm,tag0,nrqr,onodes1,olengths1,&rbuf1,&r_waits1);
877: 
878:   PetscFree(onodes1);
879:   PetscFree(olengths1);
880: 
881:   /* Allocate Memory for outgoing messages */
882:   PetscMalloc4(size,PetscInt*,&sbuf1,size,PetscInt*,&ptr,2*msz,PetscInt,&tmp,size,PetscInt,&ctr);
883:   PetscMemzero(sbuf1,size*sizeof(PetscInt*));
884:   PetscMemzero(ptr,size*sizeof(PetscInt*));

886:   {
887:     PetscInt *iptr = tmp,ict = 0;
888:     for (i=0; i<nrqs; i++) {
889:       j         = pa[i];
890:       iptr     += ict;
891:       sbuf1[j]  = iptr;
892:       ict       = w1[j];
893:     }
894:   }

896:   /* Form the outgoing messages */
897:   /* Initialize the header space */
898:   for (i=0; i<nrqs; i++) {
899:     j           = pa[i];
900:     sbuf1[j][0] = 0;
901:     PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(PetscInt));
902:     ptr[j]      = sbuf1[j] + 2*w3[j] + 1;
903:   }
904: 
905:   /* Parse the isrow and copy data into outbuf */
906:   for (i=0; i<ismax; i++) {
907:     PetscMemzero(ctr,size*sizeof(PetscInt));
908:     irow_i = irow[i];
909:     jmax   = nrow[i];
910:     for (j=0; j<jmax; j++) {  /* parse the indices of each IS */
911:       l = 0;
912:       row  = irow_i[j];
913:       while (row >= C->rmap->range[l+1]) l++;
914:       proc = l;
915:       if (proc != rank) { /* copy to the outgoing buf*/
916:         ctr[proc]++;
917:         *ptr[proc] = row;
918:         ptr[proc]++;
919:       }
920:     }
921:     /* Update the headers for the current IS */
922:     for (j=0; j<size; j++) { /* Can Optimise this loop too */
923:       if ((ctr_j = ctr[j])) {
924:         sbuf1_j        = sbuf1[j];
925:         k              = ++sbuf1_j[0];
926:         sbuf1_j[2*k]   = ctr_j;
927:         sbuf1_j[2*k-1] = i;
928:       }
929:     }
930:   }

932:   /*  Now  post the sends */
933:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
934:   for (i=0; i<nrqs; ++i) {
935:     j    = pa[i];
936:     MPI_Isend(sbuf1[j],w1[j],MPIU_INT,j,tag0,comm,s_waits1+i);
937:   }

939:   /* Post Receives to capture the buffer size */
940:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);
941:   PetscMalloc((nrqs+1)*sizeof(PetscInt*),&rbuf2);
942:   rbuf2[0] = tmp + msz;
943:   for (i=1; i<nrqs; ++i) {
944:     rbuf2[i] = rbuf2[i-1]+w1[pa[i-1]];
945:   }
946:   for (i=0; i<nrqs; ++i) {
947:     j    = pa[i];
948:     MPI_Irecv(rbuf2[i],w1[j],MPIU_INT,j,tag1,comm,r_waits2+i);
949:   }

951:   /* Send to other procs the buf size they should allocate */
952: 

954:   /* Receive messages*/
955:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);
956:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);
957:   PetscMalloc3(nrqr,PetscInt*,&sbuf2,nrqr,PetscInt,&req_size,nrqr,PetscInt,&req_source);
958:   {
959:     Mat_SeqAIJ  *sA = (Mat_SeqAIJ*)c->A->data,*sB = (Mat_SeqAIJ*)c->B->data;
960:     PetscInt    *sAi = sA->i,*sBi = sB->i,id,rstart = C->rmap->rstart;
961:     PetscInt    *sbuf2_i;

963:     for (i=0; i<nrqr; ++i) {
964:       MPI_Waitany(nrqr,r_waits1,&idex,r_status1+i);
965:       req_size[idex] = 0;
966:       rbuf1_i         = rbuf1[idex];
967:       start           = 2*rbuf1_i[0] + 1;
968:       MPI_Get_count(r_status1+i,MPIU_INT,&end);
969:       PetscMalloc((end+1)*sizeof(PetscInt),&sbuf2[idex]);
970:       sbuf2_i         = sbuf2[idex];
971:       for (j=start; j<end; j++) {
972:         id               = rbuf1_i[j] - rstart;
973:         ncols            = sAi[id+1] - sAi[id] + sBi[id+1] - sBi[id];
974:         sbuf2_i[j]       = ncols;
975:         req_size[idex] += ncols;
976:       }
977:       req_source[idex] = r_status1[i].MPI_SOURCE;
978:       /* form the header */
979:       sbuf2_i[0]   = req_size[idex];
980:       for (j=1; j<start; j++) { sbuf2_i[j] = rbuf1_i[j]; }
981:       MPI_Isend(sbuf2_i,end,MPIU_INT,req_source[idex],tag1,comm,s_waits2+i);
982:     }
983:   }
984:   PetscFree(r_status1);
985:   PetscFree(r_waits1);

987:   /*  recv buffer sizes */
988:   /* Receive messages*/
989: 
990:   PetscMalloc((nrqs+1)*sizeof(PetscInt*),&rbuf3);
991:   PetscMalloc((nrqs+1)*sizeof(PetscScalar*),&rbuf4);
992:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits3);
993:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits4);
994:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);

996:   for (i=0; i<nrqs; ++i) {
997:     MPI_Waitany(nrqs,r_waits2,&idex,r_status2+i);
998:     PetscMalloc((rbuf2[idex][0]+1)*sizeof(PetscInt),&rbuf3[idex]);
999:     PetscMalloc((rbuf2[idex][0]+1)*sizeof(PetscScalar),&rbuf4[idex]);
1000:     MPI_Irecv(rbuf3[idex],rbuf2[idex][0],MPIU_INT,r_status2[i].MPI_SOURCE,tag2,comm,r_waits3+idex);
1001:     MPI_Irecv(rbuf4[idex],rbuf2[idex][0],MPIU_SCALAR,r_status2[i].MPI_SOURCE,tag3,comm,r_waits4+idex);
1002:   }
1003:   PetscFree(r_status2);
1004:   PetscFree(r_waits2);
1005: 
1006:   /* Wait on sends1 and sends2 */
1007:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);
1008:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);

1010:   if (nrqs) {MPI_Waitall(nrqs,s_waits1,s_status1);}
1011:   if (nrqr) {MPI_Waitall(nrqr,s_waits2,s_status2);}
1012:   PetscFree(s_status1);
1013:   PetscFree(s_status2);
1014:   PetscFree(s_waits1);
1015:   PetscFree(s_waits2);

1017:   /* Now allocate buffers for a->j, and send them off */
1018:   PetscMalloc((nrqr+1)*sizeof(PetscInt*),&sbuf_aj);
1019:   for (i=0,j=0; i<nrqr; i++) j += req_size[i];
1020:   PetscMalloc((j+1)*sizeof(PetscInt),&sbuf_aj[0]);
1021:   for (i=1; i<nrqr; i++)  sbuf_aj[i] = sbuf_aj[i-1] + req_size[i-1];
1022: 
1023:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits3);
1024:   {
1025:     PetscInt nzA,nzB,*a_i = a->i,*b_i = b->i,lwrite;
1026:     PetscInt *cworkA,*cworkB,cstart = C->cmap->rstart,rstart = C->rmap->rstart,*bmap = c->garray;
1027:     PetscInt cend = C->cmap->rend;
1028:     PetscInt *a_j = a->j,*b_j = b->j,ctmp;

1030:     for (i=0; i<nrqr; i++) {
1031:       rbuf1_i   = rbuf1[i];
1032:       sbuf_aj_i = sbuf_aj[i];
1033:       ct1       = 2*rbuf1_i[0] + 1;
1034:       ct2       = 0;
1035:       for (j=1,max1=rbuf1_i[0]; j<=max1; j++) {
1036:         kmax = rbuf1[i][2*j];
1037:         for (k=0; k<kmax; k++,ct1++) {
1038:           row    = rbuf1_i[ct1] - rstart;
1039:           nzA    = a_i[row+1] - a_i[row];     nzB = b_i[row+1] - b_i[row];
1040:           ncols  = nzA + nzB;
1041:           cworkA = a_j + a_i[row]; cworkB = b_j + b_i[row];

1043:           /* load the column indices for this row into cols*/
1044:           cols  = sbuf_aj_i + ct2;
1045: 
1046:           lwrite = 0;
1047:           for (l=0; l<nzB; l++) {
1048:             if ((ctmp = bmap[cworkB[l]]) < cstart)  cols[lwrite++] = ctmp;
1049:           }
1050:           for (l=0; l<nzA; l++)   cols[lwrite++] = cstart + cworkA[l];
1051:           for (l=0; l<nzB; l++) {
1052:             if ((ctmp = bmap[cworkB[l]]) >= cend)  cols[lwrite++] = ctmp;
1053:           }

1055:           ct2 += ncols;
1056:         }
1057:       }
1058:       MPI_Isend(sbuf_aj_i,req_size[i],MPIU_INT,req_source[i],tag2,comm,s_waits3+i);
1059:     }
1060:   }
1061:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status3);
1062:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status3);

1064:   /* Allocate buffers for a->a, and send them off */
1065:   PetscMalloc((nrqr+1)*sizeof(PetscScalar*),&sbuf_aa);
1066:   for (i=0,j=0; i<nrqr; i++) j += req_size[i];
1067:   PetscMalloc((j+1)*sizeof(PetscScalar),&sbuf_aa[0]);
1068:   for (i=1; i<nrqr; i++)  sbuf_aa[i] = sbuf_aa[i-1] + req_size[i-1];
1069: 
1070:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits4);
1071:   {
1072:     PetscInt    nzA,nzB,*a_i = a->i,*b_i = b->i, *cworkB,lwrite;
1073:     PetscInt    cstart = C->cmap->rstart,rstart = C->rmap->rstart,*bmap = c->garray;
1074:     PetscInt    cend = C->cmap->rend;
1075:     PetscInt    *b_j = b->j;
1076:     PetscScalar *vworkA,*vworkB,*a_a = a->a,*b_a = b->a;
1077: 
1078:     for (i=0; i<nrqr; i++) {
1079:       rbuf1_i   = rbuf1[i];
1080:       sbuf_aa_i = sbuf_aa[i];
1081:       ct1       = 2*rbuf1_i[0]+1;
1082:       ct2       = 0;
1083:       for (j=1,max1=rbuf1_i[0]; j<=max1; j++) {
1084:         kmax = rbuf1_i[2*j];
1085:         for (k=0; k<kmax; k++,ct1++) {
1086:           row    = rbuf1_i[ct1] - rstart;
1087:           nzA    = a_i[row+1] - a_i[row];     nzB = b_i[row+1] - b_i[row];
1088:           ncols  = nzA + nzB;
1089:           cworkB = b_j + b_i[row];
1090:           vworkA = a_a + a_i[row];
1091:           vworkB = b_a + b_i[row];

1093:           /* load the column values for this row into vals*/
1094:           vals  = sbuf_aa_i+ct2;
1095: 
1096:           lwrite = 0;
1097:           for (l=0; l<nzB; l++) {
1098:             if ((bmap[cworkB[l]]) < cstart)  vals[lwrite++] = vworkB[l];
1099:           }
1100:           for (l=0; l<nzA; l++)   vals[lwrite++] = vworkA[l];
1101:           for (l=0; l<nzB; l++) {
1102:             if ((bmap[cworkB[l]]) >= cend)  vals[lwrite++] = vworkB[l];
1103:           }
1104: 
1105:           ct2 += ncols;
1106:         }
1107:       }
1108:       MPI_Isend(sbuf_aa_i,req_size[i],MPIU_SCALAR,req_source[i],tag3,comm,s_waits4+i);
1109:     }
1110:   }
1111:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status4);
1112:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status4);
1113:   PetscFree(rbuf1[0]);
1114:   PetscFree(rbuf1);

1116:   /* Form the matrix */
1117:   /* create col map: global col of C -> local col of submatrices */
1118:   {
1119:     const PetscInt *icol_i;
1120: #if defined (PETSC_USE_CTABLE)
1121:     PetscMalloc((1+ismax)*sizeof(PetscTable),&cmap);
1122:     for (i=0; i<ismax; i++) {
1123:       PetscTableCreate(ncol[i]+1,&cmap[i]);
1124:       jmax   = ncol[i];
1125:       icol_i = icol[i];
1126:       cmap_i = cmap[i];
1127:       for (j=0; j<jmax; j++) {
1128:         PetscTableAdd(cmap[i],icol_i[j]+1,j+1);
1129:       }
1130:     }
1131: #else
1132:     PetscMalloc(ismax*sizeof(PetscInt*),&cmap);
1133:     PetscMalloc(ismax*C->cmap->N*sizeof(PetscInt),&cmap[0]);
1134:     PetscMemzero(cmap[0],ismax*C->cmap->N*sizeof(PetscInt));
1135:     for (i=1; i<ismax; i++) { cmap[i] = cmap[i-1] + C->cmap->N; }
1136:     for (i=0; i<ismax; i++) {
1137:       jmax   = ncol[i];
1138:       icol_i = icol[i];
1139:       cmap_i = cmap[i];
1140:       for (j=0; j<jmax; j++) {
1141:         cmap_i[icol_i[j]] = j+1;
1142:       }
1143:     }
1144: #endif
1145:   }

1147:   /* Create lens which is required for MatCreate... */
1148:   for (i=0,j=0; i<ismax; i++) { j += nrow[i]; }
1149:   PetscMalloc(ismax*sizeof(PetscInt*),&lens);
1150:   PetscMalloc(j*sizeof(PetscInt),&lens[0]);
1151:   PetscMemzero(lens[0],j*sizeof(PetscInt));
1152:   for (i=1; i<ismax; i++) { lens[i] = lens[i-1] + nrow[i-1]; }
1153: 
1154:   /* Update lens from local data */
1155:   for (i=0; i<ismax; i++) {
1156:     jmax   = nrow[i];
1157:     cmap_i = cmap[i];
1158:     irow_i = irow[i];
1159:     lens_i = lens[i];
1160:     for (j=0; j<jmax; j++) {
1161:       l = 0;
1162:       row  = irow_i[j];
1163:       while (row >= C->rmap->range[l+1]) l++;
1164:       proc = l;
1165:       if (proc == rank) {
1166:         MatGetRow_MPIAIJ(C,row,&ncols,&cols,0);
1167:         for (k=0; k<ncols; k++) {
1168: #if defined (PETSC_USE_CTABLE)
1169:           PetscTableFind(cmap_i,cols[k]+1,&tcol);
1170: #else
1171:           tcol = cmap_i[cols[k]];
1172: #endif
1173:           if (tcol) { lens_i[j]++;}
1174:         }
1175:         MatRestoreRow_MPIAIJ(C,row,&ncols,&cols,0);
1176:       }
1177:     }
1178:   }
1179: 
1180:   /* Create row map: global row of C -> local row of submatrices */
1181: #if defined (PETSC_USE_CTABLE)
1182:   PetscMalloc((1+ismax)*sizeof(PetscTable),&rmap);
1183:     for (i=0; i<ismax; i++) {
1184:       PetscTableCreate(nrow[i]+1,&rmap[i]);
1185:       rmap_i = rmap[i];
1186:       irow_i = irow[i];
1187:       jmax   = nrow[i];
1188:       for (j=0; j<jmax; j++) {
1189:         PetscTableAdd(rmap[i],irow_i[j]+1,j+1);
1190:       }
1191:     }
1192: #else
1193:   PetscMalloc(ismax*sizeof(PetscInt*),&rmap);
1194:   PetscMalloc(ismax*C->rmap->N*sizeof(PetscInt),&rmap[0]);
1195:   PetscMemzero(rmap[0],ismax*C->rmap->N*sizeof(PetscInt));
1196:   for (i=1; i<ismax; i++) { rmap[i] = rmap[i-1] + C->rmap->N;}
1197:   for (i=0; i<ismax; i++) {
1198:     rmap_i = rmap[i];
1199:     irow_i = irow[i];
1200:     jmax   = nrow[i];
1201:     for (j=0; j<jmax; j++) {
1202:       rmap_i[irow_i[j]] = j;
1203:     }
1204:   }
1205: #endif
1206: 
1207:   /* Update lens from offproc data */
1208:   {
1209:     PetscInt *rbuf2_i,*rbuf3_i,*sbuf1_i;

1211:     for (tmp2=0; tmp2<nrqs; tmp2++) {
1212:       MPI_Waitany(nrqs,r_waits3,&idex2,r_status3+tmp2);
1213:       idex   = pa[idex2];
1214:       sbuf1_i = sbuf1[idex];
1215:       jmax    = sbuf1_i[0];
1216:       ct1     = 2*jmax+1;
1217:       ct2     = 0;
1218:       rbuf2_i = rbuf2[idex2];
1219:       rbuf3_i = rbuf3[idex2];
1220:       for (j=1; j<=jmax; j++) {
1221:         is_no   = sbuf1_i[2*j-1];
1222:         max1    = sbuf1_i[2*j];
1223:         lens_i  = lens[is_no];
1224:         cmap_i  = cmap[is_no];
1225:         rmap_i  = rmap[is_no];
1226:         for (k=0; k<max1; k++,ct1++) {
1227: #if defined (PETSC_USE_CTABLE)
1228:           PetscTableFind(rmap_i,sbuf1_i[ct1]+1,&row);
1229:           row--;
1230:           if (row < 0) { SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"row not found in table"); }
1231: #else
1232:           row  = rmap_i[sbuf1_i[ct1]]; /* the val in the new matrix to be */
1233: #endif
1234:           max2 = rbuf2_i[ct1];
1235:           for (l=0; l<max2; l++,ct2++) {
1236: #if defined (PETSC_USE_CTABLE)
1237:             PetscTableFind(cmap_i,rbuf3_i[ct2]+1,&tcol);
1238: #else
1239:             tcol = cmap_i[rbuf3_i[ct2]];
1240: #endif
1241:             if (tcol) {
1242:               lens_i[row]++;
1243:             }
1244:           }
1245:         }
1246:       }
1247:     }
1248:   }
1249:   PetscFree(r_status3);
1250:   PetscFree(r_waits3);
1251:   if (nrqr) {MPI_Waitall(nrqr,s_waits3,s_status3);}
1252:   PetscFree(s_status3);
1253:   PetscFree(s_waits3);

1255:   /* Create the submatrices */
1256:   if (scall == MAT_REUSE_MATRIX) {
1257:     PetscBool  flag;

1259:     /*
1260:         Assumes new rows are same length as the old rows,hence bug!
1261:     */
1262:     for (i=0; i<ismax; i++) {
1263:       mat = (Mat_SeqAIJ *)(submats[i]->data);
1264:       if ((submats[i]->rmap->n != nrow[i]) || (submats[i]->cmap->n != ncol[i])) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size");
1265:       PetscMemcmp(mat->ilen,lens[i],submats[i]->rmap->n*sizeof(PetscInt),&flag);
1266:       if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
1267:       /* Initial matrix as if empty */
1268:       PetscMemzero(mat->ilen,submats[i]->rmap->n*sizeof(PetscInt));
1269:       submats[i]->factortype = C->factortype;
1270:     }
1271:   } else {
1272:     for (i=0; i<ismax; i++) {
1273:       MatCreate(PETSC_COMM_SELF,submats+i);
1274:       MatSetSizes(submats[i],nrow[i],ncol[i],PETSC_DETERMINE,PETSC_DETERMINE);
1275:       MatSetType(submats[i],((PetscObject)A)->type_name);
1276:       MatSeqAIJSetPreallocation(submats[i],0,lens[i]);
1277:     }
1278:   }

1280:   /* Assemble the matrices */
1281:   /* First assemble the local rows */
1282:   {
1283:     PetscInt    ilen_row,*imat_ilen,*imat_j,*imat_i,old_row;
1284:     PetscScalar *imat_a;
1285: 
1286:     for (i=0; i<ismax; i++) {
1287:       mat       = (Mat_SeqAIJ*)submats[i]->data;
1288:       imat_ilen = mat->ilen;
1289:       imat_j    = mat->j;
1290:       imat_i    = mat->i;
1291:       imat_a    = mat->a;
1292:       cmap_i    = cmap[i];
1293:       rmap_i    = rmap[i];
1294:       irow_i    = irow[i];
1295:       jmax      = nrow[i];
1296:       for (j=0; j<jmax; j++) {
1297:         l = 0;
1298:         row      = irow_i[j];
1299:         while (row >= C->rmap->range[l+1]) l++;
1300:         proc = l;
1301:         if (proc == rank) {
1302:           old_row  = row;
1303: #if defined (PETSC_USE_CTABLE)
1304:           PetscTableFind(rmap_i,row+1,&row);
1305:           row--;
1306: #else
1307:           row      = rmap_i[row];
1308: #endif
1309:           ilen_row = imat_ilen[row];
1310:           MatGetRow_MPIAIJ(C,old_row,&ncols,&cols,&vals);
1311:           mat_i    = imat_i[row] ;
1312:           mat_a    = imat_a + mat_i;
1313:           mat_j    = imat_j + mat_i;
1314:           for (k=0; k<ncols; k++) {
1315: #if defined (PETSC_USE_CTABLE)
1316:             PetscTableFind(cmap_i,cols[k]+1,&tcol);
1317: #else
1318:             tcol = cmap_i[cols[k]];
1319: #endif
1320:             if (tcol){
1321:               *mat_j++ = tcol - 1;
1322:               *mat_a++ = vals[k];
1323:               ilen_row++;
1324:             }
1325:           }
1326:           MatRestoreRow_MPIAIJ(C,old_row,&ncols,&cols,&vals);
1327:           imat_ilen[row] = ilen_row;
1328:         }
1329:       }
1330:     }
1331:   }

1333:   /*   Now assemble the off proc rows*/
1334:   {
1335:     PetscInt    *sbuf1_i,*rbuf2_i,*rbuf3_i,*imat_ilen,ilen;
1336:     PetscInt    *imat_j,*imat_i;
1337:     PetscScalar *imat_a,*rbuf4_i;

1339:     for (tmp2=0; tmp2<nrqs; tmp2++) {
1340:       MPI_Waitany(nrqs,r_waits4,&idex2,r_status4+tmp2);
1341:       idex   = pa[idex2];
1342:       sbuf1_i = sbuf1[idex];
1343:       jmax    = sbuf1_i[0];
1344:       ct1     = 2*jmax + 1;
1345:       ct2     = 0;
1346:       rbuf2_i = rbuf2[idex2];
1347:       rbuf3_i = rbuf3[idex2];
1348:       rbuf4_i = rbuf4[idex2];
1349:       for (j=1; j<=jmax; j++) {
1350:         is_no     = sbuf1_i[2*j-1];
1351:         rmap_i    = rmap[is_no];
1352:         cmap_i    = cmap[is_no];
1353:         mat       = (Mat_SeqAIJ*)submats[is_no]->data;
1354:         imat_ilen = mat->ilen;
1355:         imat_j    = mat->j;
1356:         imat_i    = mat->i;
1357:         imat_a    = mat->a;
1358:         max1      = sbuf1_i[2*j];
1359:         for (k=0; k<max1; k++,ct1++) {
1360:           row   = sbuf1_i[ct1];
1361: #if defined (PETSC_USE_CTABLE)
1362:           PetscTableFind(rmap_i,row+1,&row);
1363:           row--;
1364: #else
1365:           row   = rmap_i[row];
1366: #endif
1367:           ilen  = imat_ilen[row];
1368:           mat_i = imat_i[row] ;
1369:           mat_a = imat_a + mat_i;
1370:           mat_j = imat_j + mat_i;
1371:           max2 = rbuf2_i[ct1];
1372:           for (l=0; l<max2; l++,ct2++) {
1373: 
1374: #if defined (PETSC_USE_CTABLE)
1375:             PetscTableFind(cmap_i,rbuf3_i[ct2]+1,&tcol);
1376: #else
1377:             tcol = cmap_i[rbuf3_i[ct2]];
1378: #endif
1379:             if (tcol) {
1380:               *mat_j++ = tcol - 1;
1381:               *mat_a++ = rbuf4_i[ct2];
1382:               ilen++;
1383:             }
1384:           }
1385:           imat_ilen[row] = ilen;
1386:         }
1387:       }
1388:     }
1389:   }
1390:   PetscFree(r_status4);
1391:   PetscFree(r_waits4);
1392:   if (nrqr) {MPI_Waitall(nrqr,s_waits4,s_status4);}
1393:   PetscFree(s_waits4);
1394:   PetscFree(s_status4);

1396:   /* Restore the indices */
1397:   for (i=0; i<ismax; i++) {
1398:     ISRestoreIndices(isrow[i],irow+i);
1399:     ISRestoreIndices(iscol[i],icol+i);
1400:   }

1402:   /* Destroy allocated memory */
1403:   PetscFree4(irow,icol,nrow,ncol);
1404:   PetscFree4(w1,w2,w3,w4);
1405:   PetscFree(pa);

1407:   PetscFree4(sbuf1,ptr,tmp,ctr);
1408:   PetscFree(rbuf2);
1409:   for (i=0; i<nrqr; ++i) {
1410:     PetscFree(sbuf2[i]);
1411:   }
1412:   for (i=0; i<nrqs; ++i) {
1413:     PetscFree(rbuf3[i]);
1414:     PetscFree(rbuf4[i]);
1415:   }

1417:   PetscFree3(sbuf2,req_size,req_source);
1418:   PetscFree(rbuf3);
1419:   PetscFree(rbuf4);
1420:   PetscFree(sbuf_aj[0]);
1421:   PetscFree(sbuf_aj);
1422:   PetscFree(sbuf_aa[0]);
1423:   PetscFree(sbuf_aa);
1424: #if defined (PETSC_USE_CTABLE)
1425:   for (i=0; i<ismax; i++) {
1426:     PetscTableDestroy((PetscTable*)&cmap[i]);
1427:     PetscTableDestroy((PetscTable*)&rmap[i]);
1428:   }
1429: #else
1430:   PetscFree(cmap[0]);
1431:   PetscFree(rmap[0]);
1432: #endif
1433:   PetscFree(cmap);
1434:   PetscFree(rmap);
1435:   PetscFree(lens[0]);
1436:   PetscFree(lens);

1438:   for (i=0; i<ismax; i++) {
1439:     MatAssemblyBegin(submats[i],MAT_FINAL_ASSEMBLY);
1440:     MatAssemblyEnd(submats[i],MAT_FINAL_ASSEMBLY);
1441:   }
1442:   return(0);
1443: }

1445: /* 
1446:  Observe that the Seq matrices used to construct this MPI matrix are not increfed.
1447:  Be careful not to destroy them elsewhere.  
1448:  */
1451: PetscErrorCode MatCreateMPIAIJFromSeqMatrices_Private(MPI_Comm comm, Mat A, Mat B, Mat *C)
1452: {
1453:   /* If making this function public, change the error returned in this function away from _PLIB. */
1455:   Mat_MPIAIJ *aij;
1456:   PetscBool seqaij;

1459:   /* Check to make sure the component matrices are compatible with C. */
1460:   PetscTypeCompare((PetscObject)A, MATSEQAIJ, &seqaij);
1461:   if(!seqaij) {
1462:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Diagonal matrix is of wrong type");
1463:   }
1464:   PetscTypeCompare((PetscObject)B, MATSEQAIJ, &seqaij);
1465:   if(!seqaij) {
1466:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Off-diagonal matrix is of wrong type");
1467:   }
1468:   if(A->rmap->n != B->rmap->n || A->rmap->bs != B->rmap->bs || A->cmap->bs != B->cmap->bs) {
1469:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incompatible component matrices of an MPIAIJ matrix");
1470:   }
1471:   MatCreate(comm, C);
1472:   MatSetSizes(*C,A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE);
1473:   PetscLayoutSetBlockSize((*C)->rmap,A->rmap->bs);
1474:   PetscLayoutSetBlockSize((*C)->cmap,A->cmap->bs);
1475:   PetscLayoutSetUp((*C)->rmap);
1476:   PetscLayoutSetUp((*C)->cmap);
1477:   if((*C)->cmap->N != A->cmap->n + B->cmap->n) {
1478:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incompatible component matrices of an MPIAIJ matrix");
1479:   }
1480:   MatSetType(*C, MATMPIAIJ);
1481:   aij = (Mat_MPIAIJ*)((*C)->data);
1482:   aij->A = A;
1483:   aij->B = B;
1484:   PetscLogObjectParent(*C,A);
1485:   PetscLogObjectParent(*C,B);
1486:   (*C)->preallocated = (PetscBool)(A->preallocated && B->preallocated);
1487:   (*C)->assembled    = (PetscBool)(A->assembled && B->assembled);
1488:   return(0);
1489: }

1493: PetscErrorCode MatMPIAIJExtractSeqMatrices_Private(Mat C, Mat *A, Mat *B)
1494: {
1495:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*) (C->data);
1499:   *A = aij->A;
1500:   *B = aij->B;
1501:   /* Note that we don't incref *A and *B, so be careful! */
1502:   return(0);
1503: }


1508: PetscErrorCode MatGetSubMatricesParallel_MPIXAIJ(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[],
1509:                                                  PetscErrorCode(*getsubmats_seq)(Mat, PetscInt, const IS[], const IS[], MatReuse, Mat**),
1510:                                                  PetscErrorCode(*makefromseq)(MPI_Comm, Mat, Mat,Mat*),
1511:                                                  PetscErrorCode(*extractseq)(Mat, Mat*, Mat*))
1512: {
1514:   PetscMPIInt size, flag;
1515:   PetscInt    i,ii;
1516:   PetscInt    ismax_c;
1518:   if(!ismax) {
1519:     return(0);
1520:   }
1521:   for(i = 0, ismax_c = 0; i < ismax; ++i) {
1522:     MPI_Comm_compare(((PetscObject)isrow[i])->comm,((PetscObject)iscol[i])->comm, &flag);
1523:     if(flag != MPI_IDENT) {
1524:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column index sets must have the same communicator");
1525:     }
1526:     MPI_Comm_size(((PetscObject)isrow[i])->comm, &size);
1527:     if(size > 1) {
1528:       ++ismax_c;
1529:     }
1530:   }
1531:   if(!ismax_c) { /* Sequential ISs only, so can call the sequential matrix extraction subroutine. */
1532:     (*getsubmats_seq)(C,ismax,isrow,iscol,scall,submat);
1533:   }
1534:   else { /* if(ismax_c) */
1535:     Mat *A,*B;
1536:     IS  *isrow_c, *iscol_c;
1537:     PetscMPIInt size;
1538:     /* 
1539:      Allocate the necessary arrays to hold the resulting parallel matrices as well as the intermediate 
1540:      array of sequential matrices underlying the resulting parallel matrices.  
1541:      Which arrays to allocate is based on the value of MatReuse scall.
1542:      There are as many diag matrices as there are original index sets.
1543:      There are only as many parallel and off-diag matrices, as there are parallel (comm size > 1) index sets.

1545:      Sequential matrix arrays are allocated in any event: even if the array of parallel matrices already exists, 
1546:      we need to consolidate the underlying seq matrices into as single array to serve as placeholders into getsubmats_seq 
1547:      will deposite the extracted diag and off-diag parts. 
1548:      However, if reuse is taking place, we have to allocate the seq matrix arrays here.
1549:      If reuse is NOT taking place, then the seq matrix arrays are allocated by getsubmats_seq.
1550:     */
1551: 
1552:     /* Parallel matrix array is allocated only if no reuse is taking place. */
1553:     if (scall != MAT_REUSE_MATRIX) {
1554:       PetscMalloc((ismax)*sizeof(Mat),submat);
1555:     }
1556:     else {
1557:       PetscMalloc(ismax*sizeof(Mat), &A);
1558:       PetscMalloc(ismax_c*sizeof(Mat), &B);
1559:       /* If parallel matrices are being reused, then simply reuse the underlying seq matrices as well. */
1560:       for(i = 0, ii = 0; i < ismax; ++i) {
1561:         MPI_Comm_size(((PetscObject)isrow[i])->comm, &size);
1562:         if(size > 1) {
1563:           (*extractseq)((*submat)[i],A+i,B+ii);
1564:           ++ii;
1565:         }
1566:         else {
1567:           A[i] = (*submat)[i];
1568:         }
1569:       }
1570:     }
1571:     /* 
1572:      Construct the complements of the iscol ISs for parallel ISs only.
1573:      These are used to extract the off-diag portion of the resulting parallel matrix.
1574:      The row IS for the off-diag portion is the same as for the diag portion,
1575:      so we merely alias the row IS, while skipping those that are sequential.
1576:     */
1577:     PetscMalloc2(ismax_c,IS,&isrow_c, ismax_c, IS, &iscol_c);
1578:     for(i = 0, ii = 0; i < ismax; ++i) {
1579:       MPI_Comm_size(((PetscObject)isrow[i])->comm, &size);
1580:       if(size > 1) {
1581:         isrow_c[ii] = isrow[i];
1582:         ISGetNonlocalIS(iscol[i], &(iscol_c[ii]));
1583:         ++ii;
1584:       }
1585:     }
1586:     /* Now obtain the sequential A and B submatrices separately. */
1587:     (*getsubmats_seq)(C,ismax,isrow, iscol,scall, &A);
1588:     (*getsubmats_seq)(C,ismax_c,isrow_c, iscol_c,scall, &B);
1589:     for(ii = 0; ii < ismax_c; ++ii) {
1590:       ISDestroy(&iscol_c[ii]);
1591:     }
1592:     PetscFree2(isrow_c, iscol_c);
1593:     /* 
1594:      If scall == MAT_REUSE_MATRIX, we are done, since the sequential matrices A & B
1595:      have been extracted directly into the parallel matrices containing them, or
1596:      simply into the sequential matrix identical with the corresponding A (if size == 1).
1597:      Otherwise, make sure that parallel matrices are constructed from A & B, or the
1598:      A is put into the correct submat slot (if size == 1).
1599:      */
1600:     if(scall != MAT_REUSE_MATRIX) {
1601:       for(i = 0, ii = 0; i < ismax; ++i) {
1602:         MPI_Comm_size(((PetscObject)isrow[i])->comm, &size);
1603:         if(size > 1) {
1604:           /* 
1605:            For each parallel isrow[i], create parallel matrices from the extracted sequential matrices. 
1606:            */
1607:           /* Construct submat[i] from the Seq pieces A and B. */
1608:           (*makefromseq)(((PetscObject)isrow[i])->comm, A[i], B[ii], (*submat)+i);
1609: 
1610:           ++ii;
1611:         }
1612:         else {
1613:           (*submat)[i] = A[i];
1614:         }
1615:       }
1616:     }
1617:     PetscFree(A);
1618:     PetscFree(B);
1619:   }
1620:   return(0);
1621: }/* MatGetSubMatricesParallel_MPIXAIJ() */



1627: PetscErrorCode MatGetSubMatricesParallel_MPIAIJ(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[])
1628: {
1631:   MatGetSubMatricesParallel_MPIXAIJ(C,ismax,isrow,iscol,scall,submat,MatGetSubMatrices_MPIAIJ,
1632:                                            MatCreateMPIAIJFromSeqMatrices_Private,
1633:                                            MatMPIAIJExtractSeqMatrices_Private);
1634: 
1635:   return(0);
1636: }