Actual source code: createProlongation.c

  1: /* 
  2:  GAMG geometric-algebric multiogrid PC - Mark Adams 2011
  3:  */

  5: #include <../src/ksp/pc/impls/gamg/gamg.h>

  7: #define REAL PetscReal
  8: #include <triangle.h>

 10: #include <assert.h>
 11: #include <petscblaslapack.h>

 13: typedef enum { NOT_DONE=-2, DELETED=-1, REMOVED=-3 } NState;
 14: #define  IS_SELECTED(s) (s!=DELETED && s!=NOT_DONE && s!=REMOVED)

 16: /* -------------------------------------------------------------------------- */
 17: /*
 18:    getDataWithGhosts - hacks into Mat MPIAIJ so this must have > 1 pe

 20:    Input Parameter:
 21:    . a_Gmat - MPIAIJ matrix for scattters
 22:    . a_data_sz - number of data terms per node (# cols in output)
 23:    . a_data_in[a_nloc*a_data_sz] - column oriented data
 24:    Output Parameter:
 25:    . a_stride - numbrt of rows of output
 26:    . a_data_out[a_stride*a_data_sz] - output data with ghosts
 27: */
 30: PetscErrorCode getDataWithGhosts( const Mat a_Gmat,
 31:                                   const PetscInt a_data_sz,
 32:                                   const PetscReal a_data_in[],
 33:                                   PetscInt *a_stride,
 34:                                   PetscReal **a_data_out
 35:                                   )
 36: {
 37:   PetscMPIInt    ierr,mype,npe;
 38:   MPI_Comm       wcomm = ((PetscObject)a_Gmat)->comm;
 39:   Vec            tmp_crds;
 40:   Mat_MPIAIJ    *mpimat = (Mat_MPIAIJ*)a_Gmat->data;
 41:   PetscInt       nnodes,num_ghosts,dir,kk,jj,my0,Iend,nloc;
 42:   PetscScalar   *data_arr;
 43:   PetscReal     *datas;

 46:   MPI_Comm_rank(wcomm,&mype);
 47:   MPI_Comm_size(wcomm,&npe);                      assert(npe>1);
 48:   MatGetOwnershipRange( a_Gmat, &my0, &Iend );
 49:   nloc = Iend - my0;
 50:   VecGetLocalSize( mpimat->lvec, &num_ghosts );
 51:   nnodes = num_ghosts + nloc;
 52:   *a_stride = nnodes;
 53:   MatGetVecs( a_Gmat, &tmp_crds, 0 );

 55:   PetscMalloc( a_data_sz*nnodes*sizeof(PetscReal), &datas);
 56:   for(dir=0; dir<a_data_sz; dir++) {
 57:     /* set local, and global */
 58:     for(kk=0; kk<nloc; kk++) {
 59:       PetscInt gid = my0 + kk;
 60:       PetscScalar crd = a_data_in[dir*nloc + kk]; /* col oriented */
 61:       datas[dir*nnodes + kk] = crd;
 62:       VecSetValues(tmp_crds, 1, &gid, &crd, INSERT_VALUES );
 63:     }
 64:     VecAssemblyBegin( tmp_crds );
 65:     VecAssemblyEnd( tmp_crds );
 66:     /* get ghost datas */
 67:     VecScatterBegin(mpimat->Mvctx,tmp_crds,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);
 68: 
 69:     VecScatterEnd(mpimat->Mvctx,tmp_crds,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);
 70: 
 71:     VecGetArray( mpimat->lvec, &data_arr );
 72:     for(kk=nloc,jj=0;jj<num_ghosts;kk++,jj++){
 73:       datas[dir*nnodes + kk] = data_arr[jj];
 74:     }
 75:     VecRestoreArray( mpimat->lvec, &data_arr );
 76:   }
 77:   VecDestroy(&tmp_crds);

 79:   *a_data_out = datas;

 81:   return(0);
 82: }
 83:  /* -------------------------------------------------------------------------- */
 84: /*
 85:    smoothAggs - 

 87:    Input Parameter:
 88:    . a_Gmat - glabal matrix of graph (data not defined)
 89:    Input/Output Parameter:

 91:    . a_locals_llist - linked list of local nodes rooted at selected node (size is nloc + nghosts)
 92: */
 95: PetscErrorCode smoothAggs( const Mat a_Gmat_2, /* base (squared) graph */
 96:                            const Mat a_Gmat_1, /* base graph */
 97:                            const PetscScalar a_lid_state[], /* [nloc], states */
 98:                            PetscInt a_id_llist[], /* [nloc+nghost], aggragate list */
 99:                            PetscScalar a_deleted_parent_gid[] /* [nloc], which pe owns my deleted */

101:                            )
102: {
104:   PetscBool      isMPI;
105:   Mat_SeqAIJ    *matA_2, *matB_2, *matA_1, *matB_1=0;
106:   MPI_Comm       wcomm = ((PetscObject)a_Gmat_2)->comm;
107:   PetscMPIInt    mype;
108:   PetscInt       nghosts_2,nghosts_1,lid,*ii,n,*idx,j,ix,Iend,my0;
109:   Mat_MPIAIJ    *mpimat_2 = 0, *mpimat_1=0;
110:   const PetscInt nloc = a_Gmat_2->rmap->n;
111:   PetscScalar   *cpcol_1_state;

114:   MPI_Comm_rank( wcomm, &mype );
115:   MatGetOwnershipRange(a_Gmat_1,&my0,&Iend);
116: 
117:   /* get submatrices */
118:   PetscTypeCompare( (PetscObject)a_Gmat_2, MATMPIAIJ, &isMPI );
119:   if (isMPI) {
120:     PetscInt    *gids, gid;
121:     PetscScalar *real_gids;
122:     Vec          tempVec;

124:     PetscMalloc( nloc*sizeof(PetscInt), &gids );
125:     PetscMalloc( nloc*sizeof(PetscScalar), &real_gids );

127:     for(lid=0,gid=my0;lid<nloc;lid++,gid++){
128:       gids[lid] = gid;
129:       real_gids[lid] = (PetscScalar)gid;
130:     }
131:     /* grab matrix objects */
132:     mpimat_2 = (Mat_MPIAIJ*)a_Gmat_2->data;
133:     matA_2 = (Mat_SeqAIJ*)mpimat_2->A->data;
134:     matB_2 = (Mat_SeqAIJ*)mpimat_2->B->data;
135:     mpimat_1 = (Mat_MPIAIJ*)a_Gmat_1->data;
136:     matA_1 = (Mat_SeqAIJ*)mpimat_1->A->data;
137:     matB_1 = (Mat_SeqAIJ*)mpimat_1->B->data;
138:     /* get ghost sizes */
139:     VecGetLocalSize( mpimat_1->lvec, &nghosts_1 );
140:     VecGetLocalSize( mpimat_2->lvec, &nghosts_2 );
141:     /* get 'cpcol_1_state' */
142:     MatGetVecs( a_Gmat_1, &tempVec, 0 );
143:     VecSetValues( tempVec, nloc, gids, a_lid_state, INSERT_VALUES );
144:     VecAssemblyBegin( tempVec );
145:     VecAssemblyEnd( tempVec );
146:     VecScatterBegin(mpimat_1->Mvctx,tempVec, mpimat_1->lvec,INSERT_VALUES,SCATTER_FORWARD);
147:       VecScatterEnd(mpimat_1->Mvctx,tempVec, mpimat_1->lvec,INSERT_VALUES,SCATTER_FORWARD);
148:     VecGetArray( mpimat_1->lvec, &cpcol_1_state );
149:     VecDestroy( &tempVec );

151:     PetscFree( gids );
152:     PetscFree( real_gids );
153:   } else {
154:     matA_2 = (Mat_SeqAIJ*)a_Gmat_2->data;
155:     matA_1 = (Mat_SeqAIJ*)a_Gmat_1->data;
156:     nghosts_2 = nghosts_1 = 0;
157:   }
158:   assert( matA_1 && !matA_1->compressedrow.use );
159:   assert( matB_1==0 || matB_1->compressedrow.use );

161:   {
162:     PetscInt *lid_cprowID_1;
163:     PetscInt *lid_selectedid;

165:     PetscMalloc( nloc*sizeof(PetscInt), &lid_cprowID_1 );
166:     PetscMalloc( nloc*sizeof(PetscInt), &lid_selectedid );

168:     /*reverse map to selelected node */
169:     for(lid=0;lid<nloc;lid++) lid_cprowID_1[lid] = lid_selectedid[lid] = -1;
170:     for(lid=0;lid<nloc;lid++){
171:       NState state = (NState)a_lid_state[lid];
172:       if( IS_SELECTED(state) ){
173:         PetscInt flid = lid;
174:         do{
175:           lid_selectedid[flid] = lid;
176:         } while( (flid=a_id_llist[flid]) != -1 );
177:       }
178:     }

180:     /* set index into cmpressed row 'lid_cprowID' */
181:     if( matB_1 ) {
182:       ii = matB_1->compressedrow.i;
183:       for (ix=0; ix<matB_1->compressedrow.nrows; ix++) {
184:         PetscInt lid = matB_1->compressedrow.rindex[ix];
185:         lid_cprowID_1[lid] = ix;
186:       }
187:     }
188: 
189:     for(lid=0;lid<nloc;lid++){
190:       NState state = (NState)a_lid_state[lid];
191:       if( IS_SELECTED(state) ) {
192:         /* steal locals */
193:         ii = matA_1->i; n = ii[lid+1] - ii[lid];
194:         idx = matA_1->j + ii[lid];
195:         for (j=0; j<n; j++) {
196:           PetscInt flid, lidj = idx[j];
197:           NState statej = (NState)a_lid_state[lidj];
198:           if( statej==DELETED && lid_selectedid[lidj] != lid && lid_selectedid[lidj] != -1){ /* steal it */
199:             PetscInt hav=0, flid2 = lid_selectedid[lidj], lastid; assert(flid2!=-1);
200:             /* lid_selectedid[lidj] = lid; don't really need to do this ... */
201:             /* I'm stealing this local */
202:             for( lastid=flid2, flid=a_id_llist[flid2] ; flid!=-1 ; flid=a_id_llist[flid] ) {
203:               if( flid == lidj ) {
204:                 a_id_llist[lastid] = a_id_llist[lidj];                    /* remove lidj from list */
205:                 a_id_llist[lidj] = a_id_llist[lid]; a_id_llist[lid] = lidj; /* insert 'lidj' into head of llist */
206:                 hav=1;
207:                 /* break; */
208:               }
209:               lastid = flid;
210:             }
211:             assert(hav==1);
212:           }
213:         }
214:         /* ghosts are done by 'DELETED' branch */
215:       }
216:       else if( state == DELETED ) {
217:         /* see if I have a selected ghost neighbors */
218:         if( (ix=lid_cprowID_1[lid]) != -1 ) {
219:           PetscInt hav = 0, old_sel_lid = lid_selectedid[lid], lastid; assert(old_sel_lid<nloc);
220:           ii = matB_1->compressedrow.i; n = ii[ix+1] - ii[ix];
221:           idx = matB_1->j + ii[ix];
222:           for( j=0 ; j<n ; j++ ) {
223:             PetscInt cpid = idx[j];
224:             NState statej = (NState)cpcol_1_state[cpid];
225:             if( IS_SELECTED(statej) ) {
226:               PetscInt new_sel_gid = (PetscInt)statej, hv=0, flid;
227:               hav++;
228:               /* lid_selectedid[lid] = cpid; */
229:               /* remove from list */
230:               if(old_sel_lid != -1){
231:                 assert(a_deleted_parent_gid[lid]==-1.0);
232:                 for( lastid=old_sel_lid, flid=a_id_llist[old_sel_lid] ; flid != -1 ; flid=a_id_llist[flid] ) {
233:                   if( flid == lid ) {
234:                     a_id_llist[lastid] = a_id_llist[lid];   /* remove lid from 'old_sel_lid' list */
235:                     hv++;
236:                     /* break; */
237:                   }
238:                   lastid = flid;
239:                 }
240:                 assert(hv==1);
241:               }
242:               else {
243:                 assert(a_deleted_parent_gid[lid]!=-1.0);
244:               }
245: 
246:               a_deleted_parent_gid[lid] = (PetscScalar)new_sel_gid; /* this will get other proc to add this */
247:             }
248:           }
249:           assert(hav <= 1);
250:         }
251:       }
252:     }
253: 
254:     PetscFree( lid_cprowID_1 );
255:     PetscFree( lid_selectedid );
256:   }
257: 
258:   if(isMPI) {
259:     VecRestoreArray( mpimat_1->lvec, &cpcol_1_state );
260:   }

262:   return(0);
263: }

265: static const PetscMPIInt target = -2;
266: /* -------------------------------------------------------------------------- */
267: /*
268:    maxIndSetAgg - parallel maximal independent set (MIS) with data locality info.

270:    Input Parameter:
271:    . a_perm - serial permutation of rows of local to process in MIS
272:    . a_rank - serial array of rank (priority) for MIS, lower can not be deleted by higher
273:    . a_Gmat - glabal matrix of graph (data not defined)
274:    . a_Auxmat - non-squared matrix
275:    . a_strict_aggs - flag for whether to keep strict (non overlapping) aggregates in 'llist';
276:    Output Parameter:
277:    . a_selected - IS of selected vertices, includes 'ghost' nodes at end with natural local indices
278:    . a_locals_llist - linked list of local nodes rooted at selected node (size is nloc + nghosts)
279: */
282: PetscErrorCode maxIndSetAgg( const IS a_perm,
283:                              const IS a_ranks,
284:                              const Mat a_Gmat,
285:                              const Mat a_Auxmat,
286:                              const PetscBool a_strict_aggs,
287:                              IS *a_selected,
288:                              IS *a_locals_llist
289:                              )
290: {
292:   PetscBool      isMPI;
293:   Mat_SeqAIJ    *matA, *matB = 0;
294:   MPI_Comm       wcomm = ((PetscObject)a_Gmat)->comm;
295:   Vec            locState, ghostState;
296:   PetscInt       num_fine_ghosts,kk,n,ix,j,*idx,*ii,iter,Iend,my0;
297:   Mat_MPIAIJ    *mpimat = 0;
298:   PetscScalar   *cpcol_gid,*cpcol_state;
299:   PetscMPIInt    mype;
300:   const PetscInt *perm_ix;
301:   PetscInt nDone = 0, nselected = 0;
302:   const PetscInt nloc = a_Gmat->rmap->n;

305:   MPI_Comm_rank( wcomm, &mype );
306:   /* get submatrices */
307:   PetscTypeCompare( (PetscObject)a_Gmat, MATMPIAIJ, &isMPI );
308:   if (isMPI) {
309:     mpimat = (Mat_MPIAIJ*)a_Gmat->data;
310:     matA = (Mat_SeqAIJ*)mpimat->A->data;
311:     matB = (Mat_SeqAIJ*)mpimat->B->data;
312:   } else {
313:     matA = (Mat_SeqAIJ*)a_Gmat->data;
314:   }
315:   assert( matA && !matA->compressedrow.use );
316:   assert( matB==0 || matB->compressedrow.use );
317:   /* get vector */
318:   MatGetVecs( a_Gmat, &locState, 0 );

320:   MatGetOwnershipRange(a_Gmat,&my0,&Iend);

322: /* PetscReal *fiddata,fid_glid_loc[nloc]; */
323: /* for(kk=0;kk<nloc;kk++) fid_glid_loc[kk] = (PetscReal)(my0+kk); */
324: /* getDataWithGhosts( a_Gmat, 1, fid_glid_loc, &n, &fiddata );  */
325: /* PetscInt flid_fgid[n]; */
326: /* for(kk=0;kk<n;kk++) flid_fgid[kk] = (PetscInt)fiddata[kk]; */
327: /* PetscFree( fiddata );  */

329:   if( mpimat ) {
330:     PetscInt gid;
331:     for(kk=0,gid=my0;kk<nloc;kk++,gid++) {
332:       PetscScalar v = (PetscScalar)(gid);
333:       VecSetValues( locState, 1, &gid, &v, INSERT_VALUES );   /* set with GID */
334:     }
335:     VecAssemblyBegin( locState );
336:     VecAssemblyEnd( locState );
337:     VecScatterBegin(mpimat->Mvctx,locState,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);
338:       VecScatterEnd(mpimat->Mvctx,locState,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);
339:     VecGetArray( mpimat->lvec, &cpcol_gid );  /* get proc ID in 'cpcol_gid' */
340:     VecDuplicate( mpimat->lvec, &ghostState );  /* need 2nd compressed col. of off proc data */
341:     VecGetLocalSize( mpimat->lvec, &num_fine_ghosts );
342:     VecSet( ghostState, (PetscScalar)(NOT_DONE) );   /* set with UNKNOWN state */
343:   }
344:   else num_fine_ghosts = 0;

346:   {  /* need an inverse map - locals */
347:     PetscInt *lid_cprowID, *lid_gid;
348:     PetscScalar *deleted_parent_gid; /* only used for strict aggs */
349:     PetscInt *id_llist; /* linked list with locality info - output */
350:     PetscScalar *lid_state;

352:     PetscMalloc( nloc*sizeof(PetscInt), &lid_cprowID );
353:     PetscMalloc( nloc*sizeof(PetscInt), &lid_gid );
354:     PetscMalloc( nloc*sizeof(PetscScalar), &deleted_parent_gid );
355:     PetscMalloc( (nloc+num_fine_ghosts)*sizeof(PetscInt), &id_llist );
356:     PetscMalloc( nloc*sizeof(PetscScalar), &lid_state );

358:     for(kk=0;kk<nloc;kk++) {
359:       id_llist[kk] = -1; /* terminates linked lists */
360:       lid_cprowID[kk] = -1;
361:       deleted_parent_gid[kk] = -1.0;
362:       lid_gid[kk] = kk + my0;
363:       lid_state[kk] =  (PetscScalar)(NOT_DONE);
364:     }
365:     for(ix=0;kk<nloc+num_fine_ghosts;kk++,ix++) {
366:       id_llist[kk] = -1; /* terminates linked lists */
367:     }
368:     /* set index into cmpressed row 'lid_cprowID' */
369:     if( matB ) {
370:       ii = matB->compressedrow.i;
371:       for (ix=0; ix<matB->compressedrow.nrows; ix++) {
372:         PetscInt lid = matB->compressedrow.rindex[ix];
373:         lid_cprowID[lid] = ix;
374:       }
375:     }
376:     /* MIS */
377:     ISGetIndices( a_perm, &perm_ix );
378:     iter = 0;
379:     while ( nDone < nloc || PETSC_TRUE ) { /* asyncronous not implemented */
380:       iter++;
381:       if( mpimat ) {
382:         VecGetArray( ghostState, &cpcol_state );
383:       }
384:       /* check all vertices */
385:       for(kk=0;kk<nloc;kk++){
386:         PetscInt lid = perm_ix[kk];
387:         NState state = (NState)lid_state[lid];
388: if(mype==target||target==-1)PetscPrintf(PETSC_COMM_SELF,"\t[%d]%s %d) try gid %d in state %s\n",mype,__FUNCT__,iter,lid+my0, (state==NOT_DONE) ? "not done" : (state!=DELETED) ? (state==REMOVED?"removed":"selected") : "deleted");
389:         if( state == NOT_DONE ) {
390:           /* parallel test, delete if selected ghost */
391:           PetscBool isOK = PETSC_TRUE;
392:           if( (ix=lid_cprowID[lid]) != -1 ) { /* if I have any ghost neighbors */
393:             ii = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
394:             idx = matB->j + ii[ix];
395:             for( j=0 ; j<n ; j++ ) {
396:               PetscInt cpid = idx[j]; /* compressed row ID in B mat */
397:               PetscInt gid = (PetscInt)cpcol_gid[cpid];
398:               NState statej = (NState)cpcol_state[cpid];
399:               /* if(mype==target||target==-1)PetscPrintf(PETSC_COMM_SELF,"\t\t[%d]%s %d) check (local id) ghost %d on pe %d, for local gid %d \n",mype,__FUNCT__,iter,cpid,pe,lid+my0); */
400:               if( statej == NOT_DONE && gid >= Iend ) { /* should be (pe>mype), use gid as pe proxy */
401:                 isOK = PETSC_FALSE; /* can not delete */
402: if(mype==target||target==-1)PetscPrintf(PETSC_COMM_SELF,"\t\t\t[%d]%s %d) skip gid %d \n",mype,__FUNCT__,iter,lid+my0);
403:               }
404:               else if( IS_SELECTED(statej) ) { /* lid is now deleted, do it */
405:                 assert(0);
406:               }
407:             }
408:           } /* parallel test */
409:           if( isOK ){ /* select or remove this vertex */
410:             nDone++;
411:             /* check for singleton */
412:             ii = matA->i; n = ii[lid+1] - ii[lid];
413:             if( n < 2 ) {
414:               /* if I have any ghost adj then not a sing */
415:               ix = lid_cprowID[lid];
416:               if( ix==-1 || (matB->compressedrow.i[ix+1]-matB->compressedrow.i[ix])==0 ){
417: if(mype==target||target==-1)PetscPrintf(PETSC_COMM_SELF,"\t\t[%d]%s removing gid %d (n loc adj=%d, ghost ix = %d\n",mype,__FUNCT__,lid+my0,n,ix);
418:                 lid_state[lid] =  (PetscScalar)(REMOVED);
419:                 continue; /* one local adj (me) and no ghost - singleton - flag and continue */
420:               }
421:             }
422:             /* SELECTED state encoded with global index */
423:             lid_state[lid] =  (PetscScalar)(lid+my0);
424:             nselected++;
425: if(mype==target||target==-1)PetscPrintf(PETSC_COMM_SELF,"\t\t[%d]%s select gid <%d>\n",mype,__FUNCT__,lid+my0);
426:             /* delete local adj */
427:             idx = matA->j + ii[lid];
428:             for (j=0; j<n; j++) {
429:               PetscInt lidj = idx[j];
430:               NState statej = (NState)lid_state[lidj];
431:               if( statej == NOT_DONE ){
432: if(mype==target||target==-1)PetscPrintf(PETSC_COMM_SELF,"\t\t\t[%d]%s delete local <%d> with %d \n",mype,__FUNCT__,lidj+my0,lid+my0);
433:                 nDone++;
434:                 id_llist[lidj] = id_llist[lid]; id_llist[lid] = lidj; /* insert 'lidj' into head of llist */
435:                 lid_state[lidj] = (PetscScalar)DELETED;  /* delete this */
436:               }
437:             }

439:             /* delete ghost adj - deleted ghost done later for aggregation */
440:             if( !a_strict_aggs ) {
441:               if( (ix=lid_cprowID[lid]) != -1 ) { /* if I have any ghost neighbors */
442:                 ii = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
443:                 idx = matB->j + ii[ix];
444:                 for( j=0 ; j<n ; j++ ) {
445:                   PetscInt cpid = idx[j]; /* compressed row ID in B mat */
446:                   NState statej = (NState)cpcol_state[cpid]; assert( !IS_SELECTED(statej) );
447: 
448:                   if( statej == NOT_DONE ) {
449:                     PetscInt lidj = nloc + cpid;
450:                     if(mype==target||target==-1)PetscPrintf(PETSC_COMM_SELF,"\t\t\t[%d]%s %d) selected %d deletes (local id) ghost <%d>\n",mype,__FUNCT__,iter,lid+my0,cpid);
451:                     /* cpcol_state[cpid] = (PetscScalar)DELETED; this should happen later ... */
452:                     id_llist[lidj] = id_llist[lid]; id_llist[lid] = lidj; /* insert 'lidj' into head of llist */
453:                   }
454:                 }
455:               }
456:             }

458:           } /* selected */
459:         } /* not done vertex */
460:       } /* vertex loop */

462:       /* update ghost states and count todos */
463:       if( mpimat ) {
464:         PetscInt t1, t2;
465:         VecRestoreArray( ghostState, &cpcol_state );
466:         /* put lid state in 'locState' */
467:         VecSetValues( locState, nloc, lid_gid, lid_state, INSERT_VALUES );
468:         VecAssemblyBegin( locState );
469:         VecAssemblyEnd( locState );
470:         /* scatter states, check for done */
471:         VecScatterBegin(mpimat->Mvctx,locState,ghostState,INSERT_VALUES,SCATTER_FORWARD);
472: 
473:           VecScatterEnd(mpimat->Mvctx,locState,ghostState,INSERT_VALUES,SCATTER_FORWARD);
474: 
475:         /* delete locals from selected ghosts */
476:         VecGetArray( ghostState, &cpcol_state );
477:         ii = matB->compressedrow.i;
478:         for (ix=0; ix<matB->compressedrow.nrows; ix++) {
479:           PetscInt lid = matB->compressedrow.rindex[ix];
480:           NState state = (NState)lid_state[lid];
481:           if( state == NOT_DONE ) {
482:             /* look at ghosts */
483:             n = ii[ix+1] - ii[ix];
484:             idx = matB->j + ii[ix];
485:             for( j=0 ; j<n ; j++ ) {
486:               PetscInt cpid = idx[j]; /* compressed row ID in B mat */
487:               NState statej = (NState)cpcol_state[cpid];
488:               if( IS_SELECTED(statej) ) { /* lid is now deleted, do it */
489:                 PetscInt lidj = nloc + cpid;
490:                 nDone++;
491:                 lid_state[lid] = (PetscScalar)DELETED; /* delete this */
492:                 if( !a_strict_aggs ) {
493:                   id_llist[lid] = id_llist[lidj]; id_llist[lidj] = lid; /* insert 'lid' into head of ghost llist */
494: if(mype==target||target==-1)PetscPrintf(PETSC_COMM_SELF,"\t[%d]%s end %d loop: deleted local <%d> with selected ghost %d \n",mype,__FUNCT__,iter,lid+my0,(PetscInt)cpcol_state[cpid]);
495:                 }
496:                 else {
497:                   PetscInt gid = (PetscInt)cpcol_gid[cpid];
498:                   deleted_parent_gid[lid] = (PetscScalar)gid; /* keep track of proc that I belong to */
499: if(mype==target||target==-1)PetscPrintf(PETSC_COMM_SELF,"\t[%d]%s end %d loop: deleted local %d with selected ghost %d \n",mype,__FUNCT__,iter,lid+my0,gid);
500:                 }
501:                 break;
502:               }
503:             }
504:           }
505:         }
506:         VecRestoreArray( ghostState, &cpcol_state );

508:         /* all done? */
509:         t1 = nloc - nDone; assert(t1>=0);
510:         MPI_Allreduce ( &t1, &t2, 1, MPI_INT, MPI_SUM, wcomm ); /* synchronous version */
511:         if( t2 == 0 ) break;
512: if(mype==target||target==-1)PetscPrintf(PETSC_COMM_SELF,"[%d]%s %d) finished MIS loop %d left to do\n",mype,__FUNCT__,iter,t1);
513:       }
514:       else break; /* all done */
515:     } /* outer parallel MIS loop */
516:     ISRestoreIndices(a_perm,&perm_ix);

518:     if( mpimat ){ /* free this buffer up (not really needed here) */
519:       VecRestoreArray( mpimat->lvec, &cpcol_gid );
520:     }
521: 
522:     /* adjust aggregates */
523:     if( a_strict_aggs ) {
524:       smoothAggs(a_Gmat, a_Auxmat, lid_state, id_llist, deleted_parent_gid);
525: 
526:     }

528:     /* tell adj who my deleted vertices belong to */
529:     if( a_strict_aggs && matB ) {
530:       PetscScalar *cpcol_sel_gid;
531:       PetscInt cpid;
532:       /* get proc of deleted ghost */
533:       VecSetValues(locState, nloc, lid_gid, deleted_parent_gid, INSERT_VALUES);
534:       VecAssemblyBegin(locState);
535:       VecAssemblyEnd(locState);
536:       VecScatterBegin(mpimat->Mvctx,locState,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);
537:         VecScatterEnd(mpimat->Mvctx,locState,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);
538:       VecGetArray( mpimat->lvec, &cpcol_sel_gid );  /* has pe that owns ghost */
539:       for(cpid=0; cpid<num_fine_ghosts; cpid++) {
540:         PetscInt gid = (PetscInt)cpcol_sel_gid[cpid];
541:         if( gid >= my0 && gid < Iend ) { /* I own this deleted */
542:           PetscInt lidj = nloc + cpid;
543:           PetscInt lid = gid - my0;
544: if(mype==target||target==-1)PetscPrintf(PETSC_COMM_SELF,"\t\t\t[%d]%s post process: add (local id) ghost <%d> to selected node %d \n",mype,__FUNCT__,cpid,lid+my0);
545:           id_llist[lidj] = id_llist[lid]; id_llist[lid] = lidj; /* insert 'lidj' into head of llist */
546:           assert( IS_SELECTED((NState)lid_state[lid]) );
547:         }
548:       }
549:       VecRestoreArray( mpimat->lvec, &cpcol_sel_gid );
550:     }

552:     /* create output IS of aggregates in linked list */
553:     ISCreateGeneral(PETSC_COMM_SELF,nloc+num_fine_ghosts,id_llist,PETSC_COPY_VALUES,a_locals_llist);
554: 

556:     /* make 'a_selected' - output */
557:     if( mpimat ) {
558:       VecGetArray( ghostState, &cpcol_state );
559:     }
560:     for (j=0; j<num_fine_ghosts; j++) {
561:       if( IS_SELECTED((NState)cpcol_state[j]) ) nselected++;
562:     }
563:     {
564:       PetscInt *selected_set;
565:       PetscMalloc( nselected*sizeof(PetscInt), &selected_set );
566:       for(kk=0,j=0;kk<nloc;kk++){
567:         NState state = (NState)lid_state[kk];
568:         if( IS_SELECTED(state) ) {
569:           selected_set[j++] = kk;
570:         }
571:       }
572:       for (kk=0; kk<num_fine_ghosts; kk++) {
573:         if( IS_SELECTED((NState)cpcol_state[kk]) ) {
574:           selected_set[j++] = nloc + kk;
575:         }
576:       }
577:       assert(j==nselected);
578:       ISCreateGeneral(PETSC_COMM_SELF, nselected, selected_set, PETSC_COPY_VALUES, a_selected );
579: 
580:       PetscFree( selected_set );
581:     }
582:     if( mpimat ) {
583:       VecRestoreArray( ghostState, &cpcol_state );
584:     }

586:     PetscFree( lid_cprowID );
587:     PetscFree( lid_gid );
588:     PetscFree( deleted_parent_gid );
589:     PetscFree( id_llist );
590:     PetscFree( lid_state );
591:   } /* scoping */

593:   if(mpimat){
594:     VecDestroy( &ghostState );
595:   }

597:   VecDestroy( &locState );

599:   return(0);
600: }

602: /* -------------------------------------------------------------------------- */
603: /*
604:  formProl0

606:    Input Parameter:
607:    . a_selected - list of selected local ID, includes selected ghosts
608:    . a_locals_llist - linked list with aggregates
609:    . a_bs - block size
610:    . a_nSAvec - num columns of new P
611:    . a_my0crs - global index of locals
612:    . a_data_stride - a_bs*(nloc nodes + ghost nodes)
613:    . a_data_in[a_data_stride*a_nSAvec] - local data on fine grid
614:    . a_flid_fgid[a_data_stride/a_bs] - make local to global IDs, includes ghosts in 'a_locals_llist'
615:   Output Parameter:
616:    . a_data_out - in with fine grid data (w/ghosts), out with coarse grid data
617:    . a_Prol - prolongation operator
618: */
621: PetscErrorCode formProl0(IS a_selected, /* list of selected local ID, includes selected ghosts */
622:                          IS a_locals_llist, /* linked list from selected vertices of aggregate unselected vertices */
623:                          const PetscInt a_bs,
624:                          const PetscInt a_nSAvec,
625:                          const PetscInt a_my0crs,
626:                          const PetscInt a_data_stride,
627:                          PetscReal a_data_in[],
628:                          const PetscInt a_flid_fgid[],
629:                          PetscReal **a_data_out,
630:                          Mat a_Prol /* prolongation operator (output)*/
631:                          )
632: {
634:   PetscInt  Istart,Iend,nFineLoc,myFine0,clid,flid,aggID,kk,jj,ii,nLocalSelected,ndone,nSelected;
635:   MPI_Comm       wcomm = ((PetscObject)a_Prol)->comm;
636:   PetscMPIInt    mype, npe;
637:   const PetscInt *selected_idx,*llist_idx;
638:   PetscReal      *out_data;

641:   MPI_Comm_rank(wcomm,&mype);
642:   MPI_Comm_size(wcomm,&npe);
643:   MatGetOwnershipRange( a_Prol, &Istart, &Iend );
644:   nFineLoc = (Iend-Istart)/a_bs; myFine0 = Istart/a_bs; assert((Iend-Istart)%a_bs==0);
645: 
646:   ISGetLocalSize( a_selected, &nSelected );
647:   ISGetIndices( a_selected, &selected_idx );
648:   for(kk=0,nLocalSelected=0;kk<nSelected;kk++){
649:     PetscInt lid = selected_idx[kk];
650:     if(lid<nFineLoc) nLocalSelected++;
651:   }

653:   /* aloc space for coarse point data (output) */
654: #define DATA_OUT_STRIDE (nLocalSelected*a_nSAvec)
655:   PetscMalloc( (DATA_OUT_STRIDE*a_nSAvec+1)*sizeof(PetscReal), &out_data );
656:   for(ii=0;ii<DATA_OUT_STRIDE*a_nSAvec+1;ii++) out_data[ii]=1.e300;
657:   *a_data_out = out_data; /* output - stride nLocalSelected*a_nSAvec */

659:   /* find points and set prolongation */
660:   ndone = 0;
661:   ISGetIndices( a_locals_llist, &llist_idx );
662:   for( clid = 0 ; clid < nLocalSelected ; clid++ ){
663:     PetscInt cgid = a_my0crs + clid, cids[100];

665:     /* count agg */
666:     aggID = 0;
667:     flid = selected_idx[clid]; assert(flid != -1);
668:     do{
669:       aggID++;
670:     } while( (flid=llist_idx[flid]) != -1 );

672:     /* get block */
673:     {
674:       PetscInt       asz=aggID,M=asz*a_bs,N=a_nSAvec;
675:       PetscInt       Mdata=M+((N-M>0)?N-M:0),LDA=Mdata,LWORK=N*a_bs;
676:       PetscScalar    *qqc,*qqr,*TAU,*WORK;
677:       PetscInt       *fids,INFO;
678: 
679:       PetscMalloc( (Mdata*N)*sizeof(PetscScalar), &qqc );
680:       PetscMalloc( (M*N)*sizeof(PetscScalar), &qqr );
681:       PetscMalloc( N*sizeof(PetscScalar), &TAU );
682:       PetscMalloc( LWORK*sizeof(PetscScalar), &WORK );
683:       PetscMalloc( M*sizeof(PetscInt), &fids );

685:       flid = selected_idx[clid];
686:       aggID = 0;
687:       do{
688:         /* copy in B_i matrix - column oriented */
689:         PetscReal *data = &a_data_in[flid*a_bs];
690:         for( kk = ii = 0; ii < a_bs ; ii++ ) {
691:           for( jj = 0; jj < N ; jj++ ) {
692:             qqc[jj*Mdata + aggID*a_bs + ii] = data[jj*a_data_stride + ii];
693:           }
694:         }

696:         /* set fine IDs */
697:         for(kk=0;kk<a_bs;kk++) fids[aggID*a_bs + kk] = a_flid_fgid[flid]*a_bs + kk;
698: 
699:         aggID++;
700:       }while( (flid=llist_idx[flid]) != -1 );

702:       /* pad with zeros */
703:       for( ii = asz*a_bs; ii < Mdata ; ii++ ) {
704:         for( jj = 0; jj < N ; jj++, kk++ ) {
705:           qqc[jj*Mdata + ii] = .0;
706:         }
707:       }

709:       ndone += aggID;
710:       /* QR */
711:       LAPACKgeqrf_( &Mdata, &N, qqc, &LDA, TAU, WORK, &LWORK, &INFO );
712:       if( INFO != 0 ) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"xGEQRS error");
713:       /* get R - column oriented - output B_{i+1} */
714:       {
715:         PetscReal *data = &out_data[clid*a_nSAvec];
716:         for( jj = 0; jj < a_nSAvec ; jj++ ) {
717:           for( ii = 0; ii < a_nSAvec ; ii++ ) {
718:             assert(data[jj*DATA_OUT_STRIDE + ii] == 1.e300);
719:             if( ii <= jj ) data[jj*DATA_OUT_STRIDE + ii] = qqc[jj*Mdata + ii];
720:             else data[jj*DATA_OUT_STRIDE + ii] = 0.;
721:           }
722:         }
723:       }

725:       /* get Q - row oriented */
726:       LAPACKungqr_( &Mdata, &N, &N, qqc, &LDA, TAU, WORK, &LWORK, &INFO );
727:       if( INFO != 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"xORGQR error arg %d",-INFO);

729:       for( ii = 0 ; ii < M ; ii++ ){
730:         for( jj = 0 ; jj < N ; jj++ ) {
731:           qqr[N*ii + jj] = qqc[jj*Mdata + ii];
732:         }
733:       }

735:       /* add diagonal block of P0 */
736:       for(kk=0;kk<N;kk++) cids[kk] = N*cgid + kk; /* global col IDs in P0 */
737:       MatSetValues(a_Prol,M,fids,N,cids,qqr,INSERT_VALUES);

739:       PetscFree( qqc );
740:       PetscFree( qqr );
741:       PetscFree( TAU );
742:       PetscFree( WORK );
743:       PetscFree( fids );
744:     } /* scoping */
745:   } /* for all coarse nodes */
746:   assert(out_data[a_nSAvec*DATA_OUT_STRIDE]==1.e300);

748: /* MPI_Allreduce( &ndone, &ii, 1, MPI_INT, MPI_SUM, wcomm ); /\* synchronous version *\/ */
749: /* MatGetSize( a_Prol, &kk, &jj );  */
750: /* PetscPrintf(PETSC_COMM_WORLD," **** [%d]%s %d total done, N=%d (%d local done)\n",mype,__FUNCT__,ii,kk/a_bs,ndone); */

752:   ISRestoreIndices( a_selected, &selected_idx );
753:   ISRestoreIndices( a_locals_llist, &llist_idx );
754:   MatAssemblyBegin(a_Prol,MAT_FINAL_ASSEMBLY);
755:   MatAssemblyEnd(a_Prol,MAT_FINAL_ASSEMBLY);

757:   return(0);
758: }

760: /* -------------------------------------------------------------------------- */
761: /*
762:  triangulateAndFormProl

764:    Input Parameter:
765:    . a_selected_2 - list of selected local ID, includes selected ghosts
766:    . a_nnodes -
767:    . a_coords[2*a_nnodes] - column vector of local coordinates w/ ghosts
768:    . a_selected_1 - selected IDs that go with base (1) graph
769:    . a_locals_llist - linked list with (some) locality info of base graph
770:    . a_crsGID[a_selected.size()] - global index for prolongation operator
771:    . a_bs - block size
772:   Output Parameter:
773:    . a_Prol - prolongation operator
774:    . a_worst_best - measure of worst missed fine vertex, 0 is no misses
775: */
778: PetscErrorCode triangulateAndFormProl( IS  a_selected_2, /* list of selected local ID, includes selected ghosts */
779:                                        const PetscInt a_nnodes,
780:                                        const PetscReal a_coords[], /* column vector of local coordinates w/ ghosts */
781:                                        IS  a_selected_1, /* list of selected local ID, includes selected ghosts */
782:                                        IS  a_locals_llist, /* linked list from selected vertices of aggregate unselected vertices */
783:                                        const PetscInt a_crsGID[],
784:                                        const PetscInt a_bs,
785:                                        Mat a_Prol, /* prolongation operator (output) */
786:                                        PetscReal *a_worst_best /* measure of worst missed fine vertex, 0 is no misses */
787:                                        )
788: {
790:   PetscInt       kk,jj,tid,tt,sid,idx,nselected_1,nselected_2,nPlotPts;
791:   struct triangulateio in,mid;
792:   const PetscInt *selected_idx_1,*selected_idx_2,*llist_idx;
793:   PetscMPIInt    mype,npe;
794:   PetscInt Istart,Iend,nFineLoc,myFine0;

797:   *a_worst_best = 0.0;
798:   MPI_Comm_rank(((PetscObject)a_Prol)->comm,&mype);
799:   MPI_Comm_size(((PetscObject)a_Prol)->comm,&npe);
800:   ISGetLocalSize( a_selected_1, &nselected_1 );
801:   ISGetLocalSize( a_selected_2, &nselected_2 );
802:   if(nselected_2 == 1 || nselected_2 == 2 ){ /* 0 happens on idle processors */
803:     /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Not enough points - error in stopping logic",nselected_2); */
804:     *a_worst_best = 100.0; /* this will cause a stop, but not globalized (should not happen) */
805:     PetscPrintf(PETSC_COMM_SELF,"[%d]%s %d selected point - bailing out\n",mype,__FUNCT__,nselected_2);
806:     return(0);
807:   }
808:   MatGetOwnershipRange( a_Prol, &Istart, &Iend );
809:   nFineLoc = (Iend-Istart)/a_bs; myFine0 = Istart/a_bs;
810:   nPlotPts = nFineLoc; /* locals */
811:   /* traingle */
812:   /* Define input points - in*/
813:   in.numberofpoints = nselected_2;
814:   in.numberofpointattributes = 0;
815:   /* get nselected points */
816:   PetscMalloc( 2*(nselected_2)*sizeof(REAL), &in.pointlist );
817:   ISGetIndices( a_selected_2, &selected_idx_2 );

819:   for(kk=0,sid=0;kk<nselected_2;kk++,sid += 2){
820:     PetscInt lid = selected_idx_2[kk];
821:     in.pointlist[sid] = a_coords[lid];
822:     in.pointlist[sid+1] = a_coords[a_nnodes + lid];
823:     if(lid>=nFineLoc) nPlotPts++;
824:   }
825:   assert(sid==2*nselected_2);

827:   in.numberofsegments = 0;
828:   in.numberofedges = 0;
829:   in.numberofholes = 0;
830:   in.numberofregions = 0;
831:   in.trianglelist = 0;
832:   in.segmentmarkerlist = 0;
833:   in.pointattributelist = 0;
834:   in.pointmarkerlist = 0;
835:   in.triangleattributelist = 0;
836:   in.trianglearealist = 0;
837:   in.segmentlist = 0;
838:   in.holelist = 0;
839:   in.regionlist = 0;
840:   in.edgelist = 0;
841:   in.edgemarkerlist = 0;
842:   in.normlist = 0;
843:   /* triangulate */
844:   mid.pointlist = 0;            /* Not needed if -N switch used. */
845:   /* Not needed if -N switch used or number of point attributes is zero: */
846:   mid.pointattributelist = 0;
847:   mid.pointmarkerlist = 0; /* Not needed if -N or -B switch used. */
848:   mid.trianglelist = 0;          /* Not needed if -E switch used. */
849:   /* Not needed if -E switch used or number of triangle attributes is zero: */
850:   mid.triangleattributelist = 0;
851:   mid.neighborlist = 0;         /* Needed only if -n switch used. */
852:   /* Needed only if segments are output (-p or -c) and -P not used: */
853:   mid.segmentlist = 0;
854:   /* Needed only if segments are output (-p or -c) and -P and -B not used: */
855:   mid.segmentmarkerlist = 0;
856:   mid.edgelist = 0;             /* Needed only if -e switch used. */
857:   mid.edgemarkerlist = 0;   /* Needed if -e used and -B not used. */
858:   mid.numberoftriangles = 0;

860:   /* Triangulate the points.  Switches are chosen to read and write a  */
861:   /*   PSLG (p), preserve the convex hull (c), number everything from  */
862:   /*   zero (z), assign a regional attribute to each element (A), and  */
863:   /*   produce an edge list (e), a Voronoi diagram (v), and a triangle */
864:   /*   neighbor list (n).                                            */
865:   if(nselected_2 != 0){ /* inactive processor */
866:     char args[] = "npczQ"; /* c is needed ? */
867:     triangulate(args, &in, &mid, (struct triangulateio *) NULL );
868:     /* output .poly files for 'showme' */
869:     if( !PETSC_TRUE ) {
870:       static int level = 1;
871:       FILE *file; char fname[32];

873:       sprintf(fname,"C%d_%d.poly",level,mype); file = fopen(fname, "w");
874:       /*First line: <# of vertices> <dimension (must be 2)> <# of attributes> <# of boundary markers (0 or 1)>*/
875:       fprintf(file, "%d  %d  %d  %d\n",in.numberofpoints,2,0,0);
876:       /*Following lines: <vertex #> <x> <y> */
877:       for(kk=0,sid=0;kk<in.numberofpoints;kk++,sid += 2){
878:         fprintf(file, "%d %e %e\n",kk,in.pointlist[sid],in.pointlist[sid+1]);
879:       }
880:       /*One line: <# of segments> <# of boundary markers (0 or 1)> */
881:       fprintf(file, "%d  %d\n",0,0);
882:       /*Following lines: <segment #> <endpoint> <endpoint> [boundary marker] */
883:       /* One line: <# of holes> */
884:       fprintf(file, "%d\n",0);
885:       /* Following lines: <hole #> <x> <y> */
886:       /* Optional line: <# of regional attributes and/or area constraints> */
887:       /* Optional following lines: <region #> <x> <y> <attribute> <maximum area> */
888:       fclose(file);

890:       /* elems */
891:       sprintf(fname,"C%d_%d.ele",level,mype); file = fopen(fname, "w");
892:       /*First line: <# of triangles> <nodes per triangle> <# of attributes> */
893:       fprintf(file, "%d %d %d\n",mid.numberoftriangles,3,0);
894:       /*Remaining lines: <triangle #> <node> <node> <node> ... [attributes]*/
895:       for(kk=0,sid=0;kk<mid.numberoftriangles;kk++,sid += 3){
896:         fprintf(file, "%d %d %d %d\n",kk,mid.trianglelist[sid],mid.trianglelist[sid+1],mid.trianglelist[sid+2]);
897:       }
898:       fclose(file);

900:       sprintf(fname,"C%d_%d.node",level,mype); file = fopen(fname, "w");
901:       /*First line: <# of vertices> <dimension (must be 2)> <# of attributes> <# of boundary markers (0 or 1)>*/
902:       /* fprintf(file, "%d  %d  %d  %d\n",in.numberofpoints,2,0,0); */
903:       fprintf(file, "%d  %d  %d  %d\n",nPlotPts,2,0,0);
904:       /*Following lines: <vertex #> <x> <y> */
905:       for(kk=0,sid=0;kk<in.numberofpoints;kk++,sid+=2){
906:         fprintf(file, "%d %e %e\n",kk,in.pointlist[sid],in.pointlist[sid+1]);
907:       }

909:       sid /= 2;
910:       for(jj=0;jj<nFineLoc;jj++){
911:         PetscBool sel = PETSC_TRUE;
912:         for( kk=0 ; kk<nselected_2 && sel ; kk++ ){
913:           PetscInt lid = selected_idx_2[kk];
914:           if( lid == jj ) sel = PETSC_FALSE;
915:         }
916:         if( sel ) {
917:           fprintf(file, "%d %e %e\n",sid++,a_coords[jj],a_coords[a_nnodes + jj]);
918:         }
919:       }
920:       fclose(file);
921:       assert(sid==nPlotPts);
922:       level++;
923:     }
924:   }
925:   PetscLogEventBegin(gamg_setup_stages[FIND_V],0,0,0,0);
926:   { /* form P - setup some maps */
927:     PetscInt clid_iterator;
928:     PetscInt *nTri, *node_tri;
929: 
930:     PetscMalloc( nselected_2*sizeof(PetscInt), &node_tri );
931:     PetscMalloc( nselected_2*sizeof(PetscInt), &nTri );

933:     /* need list of triangles on node*/
934:     for(kk=0;kk<nselected_2;kk++) nTri[kk] = 0;
935:     for(tid=0,kk=0;tid<mid.numberoftriangles;tid++){
936:       for(jj=0;jj<3;jj++) {
937:         PetscInt cid = mid.trianglelist[kk++];
938:         if( nTri[cid] == 0 ) node_tri[cid] = tid;
939:         nTri[cid]++;
940:       }
941:     }
942: #define EPS 1.e-12
943:     /* find points and set prolongation */
944:     ISGetIndices( a_selected_1, &selected_idx_1 );
945:     ISGetIndices( a_locals_llist, &llist_idx );
946:     for( clid_iterator = 0 ; clid_iterator < nselected_1 ; clid_iterator++ ){
947:       PetscInt flid = selected_idx_1[clid_iterator]; assert(flid != -1);
948:       PetscScalar AA[3][3];
949:       PetscBLASInt N=3,NRHS=1,LDA=3,IPIV[3],LDB=3,INFO;
950:       do{
951:         if( flid < nFineLoc ) {  /*could be a ghost*/
952:           PetscInt bestTID = -1; PetscScalar best_alpha = 1.e10;
953:           const PetscInt fgid = flid + myFine0;
954:           /* compute shape function for gid */
955:           const PetscReal fcoord[3] = { a_coords[flid], a_coords[a_nnodes + flid], 1.0 };
956:           PetscBool haveit = PETSC_FALSE; PetscScalar alpha[3]; PetscInt clids[3];
957:           /* look for it */
958:           for( tid = node_tri[clid_iterator], jj=0;
959:                jj < 5 && !haveit && tid != -1;
960:                jj++ ){
961:             for(tt=0;tt<3;tt++){
962:               PetscInt cid2 = mid.trianglelist[3*tid + tt];
963:               PetscInt lid2 = selected_idx_2[cid2];
964:               AA[tt][0] = a_coords[lid2]; AA[tt][1] = a_coords[a_nnodes + lid2]; AA[tt][2] = 1.0;
965:               clids[tt] = cid2; /* store for interp */
966:             }

968:             for(tt=0;tt<3;tt++) alpha[tt] = fcoord[tt];

970:             /* SUBROUTINE DGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) */
971:             LAPACKgesv_(&N, &NRHS, (PetscScalar*)AA, &LDA, IPIV, alpha, &LDB, &INFO);
972:             {
973:               PetscBool have=PETSC_TRUE;  PetscScalar lowest=1.e10;
974:               for( tt = 0, idx = 0 ; tt < 3 ; tt++ ) {
975:                 if( alpha[tt] > 1.0+EPS || alpha[tt] < -EPS ) have = PETSC_FALSE;
976:                 if( alpha[tt] < lowest ){
977:                   lowest = alpha[tt];
978:                   idx = tt;
979:                 }
980:               }
981:               haveit = have;
982:             }
983:             tid = mid.neighborlist[3*tid + idx];
984:           }

986:           if( !haveit ) {
987:             /* brute force */
988:             for(tid=0 ; tid<mid.numberoftriangles && !haveit ; tid++ ){
989:               for(tt=0;tt<3;tt++){
990:                 PetscInt cid2 = mid.trianglelist[3*tid + tt];
991:                 PetscInt lid2 = selected_idx_2[cid2];
992:                 AA[tt][0] = a_coords[lid2]; AA[tt][1] = a_coords[a_nnodes + lid2]; AA[tt][2] = 1.0;
993:                 clids[tt] = cid2; /* store for interp */
994:               }
995:               for(tt=0;tt<3;tt++) alpha[tt] = fcoord[tt];
996:               /* SUBROUTINE DGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) */
997:               LAPACKgesv_(&N, &NRHS, (PetscScalar*)AA, &LDA, IPIV, alpha, &LDB, &INFO);
998:               {
999:                 PetscBool have=PETSC_TRUE;  PetscScalar worst=0.0, v;
1000:                 for(tt=0; tt<3 && have ;tt++) {
1001:                   if(alpha[tt] > 1.0+EPS || alpha[tt] < -EPS ) have=PETSC_FALSE;
1002:                   if( (v=PetscAbs(alpha[tt]-0.5)) > worst ) worst = v;
1003:                 }
1004:                 if( worst < best_alpha ) {
1005:                   best_alpha = worst; bestTID = tid;
1006:                 }
1007:                 haveit = have;
1008:               }
1009:             }
1010:           }
1011:           if( !haveit ) {
1012:             if( best_alpha > *a_worst_best ) *a_worst_best = best_alpha;
1013:             /* use best one */
1014:             for(tt=0;tt<3;tt++){
1015:               PetscInt cid2 = mid.trianglelist[3*bestTID + tt];
1016:               PetscInt lid2 = selected_idx_2[cid2];
1017:               AA[tt][0] = a_coords[lid2]; AA[tt][1] = a_coords[a_nnodes + lid2]; AA[tt][2] = 1.0;
1018:               clids[tt] = cid2; /* store for interp */
1019:             }
1020:             for(tt=0;tt<3;tt++) alpha[tt] = fcoord[tt];
1021:             /* SUBROUTINE DGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) */
1022:             LAPACKgesv_(&N, &NRHS, (PetscScalar*)AA, &LDA, IPIV, alpha, &LDB, &INFO);
1023:           }

1025:           /* put in row of P */
1026:           for(idx=0;idx<3;idx++){
1027:             PetscReal shp = alpha[idx];
1028:             if( PetscAbs(shp) > 1.e-6 ) {
1029:               PetscInt cgid = a_crsGID[clids[idx]];
1030:               PetscInt jj = cgid*a_bs, ii = fgid*a_bs; /* need to gloalize */
1031:               for(tt=0 ; tt < a_bs ; tt++, ii++, jj++ ){
1032:                 MatSetValues(a_Prol,1,&ii,1,&jj,&shp,INSERT_VALUES);
1033:               }
1034:             }
1035:           }
1036:         } /* local vertex test */
1037:       } while( (flid=llist_idx[flid]) != -1 );
1038:     }
1039:     ISRestoreIndices( a_selected_2, &selected_idx_2 );
1040:     ISRestoreIndices( a_selected_1, &selected_idx_1 );
1041:     ISRestoreIndices( a_locals_llist, &llist_idx );
1042:     MatAssemblyBegin(a_Prol,MAT_FINAL_ASSEMBLY);
1043:     MatAssemblyEnd(a_Prol,MAT_FINAL_ASSEMBLY);

1045:     PetscFree( node_tri );
1046:     PetscFree( nTri );
1047:   }
1048:   PetscLogEventEnd(gamg_setup_stages[FIND_V],0,0,0,0);

1050:   free( mid.trianglelist );
1051:   free( mid.neighborlist );
1052:   PetscFree( in.pointlist );

1054:   return(0);
1055: }
1056: /* -------------------------------------------------------------------------- */
1057: /*
1058:    getGIDsOnSquareGraph - square graph, get

1060:    Input Parameter:
1061:    . a_selected_1 - selected local indices (includes ghosts in input a_Gmat_1)
1062:    . a_Gmat1 - graph that goes with 'a_selected_1'
1063:    Output Parameter:
1064:    . a_selected_2 - selected local indices (includes ghosts in output a_Gmat_2)
1065:    . a_Gmat_2 - graph that is squared of 'a_Gmat_1'
1066:    . a_crsGID[a_selected_2.size()] - map of global IDs of coarse grid nodes
1067: */
1070: PetscErrorCode getGIDsOnSquareGraph( const IS a_selected_1,
1071:                                      const Mat a_Gmat1,
1072:                                      IS *a_selected_2,
1073:                                      Mat *a_Gmat_2,
1074:                                      PetscInt **a_crsGID
1075:                                      )
1076: {
1077:   PetscMPIInt    ierr,mype,npe;
1078:   PetscInt       *crsGID, kk,my0,Iend,nloc,nSelected_1;
1079:   const PetscInt *selected_idx;
1080:   MPI_Comm       wcomm = ((PetscObject)a_Gmat1)->comm;

1083:   MPI_Comm_rank(wcomm,&mype);
1084:   MPI_Comm_size(wcomm,&npe);
1085:   MatGetOwnershipRange(a_Gmat1,&my0,&Iend);  /* AIJ */
1086:   nloc = Iend - my0; /* this does not change */
1087:   ISGetLocalSize( a_selected_1, &nSelected_1 );

1089:   if (npe == 1) { /* not much to do in serial */
1090:     PetscMalloc( nSelected_1*sizeof(PetscInt), &crsGID );
1091:     for(kk=0;kk<nSelected_1;kk++) crsGID[kk] = kk;
1092:     *a_Gmat_2 = 0;
1093:     *a_selected_2 = a_selected_1; /* needed? */
1094:   }
1095:   else {
1096:     PetscInt      idx,num_fine_ghosts,num_crs_ghost,nLocalSelected,myCrs0;
1097:     Mat_MPIAIJ   *mpimat2;
1098:     Mat           Gmat2;
1099:     Vec           locState;
1100:     PetscScalar   *cpcol_state;

1102:     /* get 'nLocalSelected' */
1103:     ISGetIndices( a_selected_1, &selected_idx );
1104:     for(kk=0,nLocalSelected=0;kk<nSelected_1;kk++){
1105:       PetscInt lid = selected_idx[kk];
1106:       if(lid<nloc) nLocalSelected++;
1107:     }
1108:     ISRestoreIndices( a_selected_1, &selected_idx );
1109:     /* scan my coarse zero gid, set 'lid_state' with coarse GID */
1110:     MPI_Scan( &nLocalSelected, &myCrs0, 1, MPI_INT, MPI_SUM, wcomm );
1111:     myCrs0 -= nLocalSelected;

1113:     if( a_Gmat_2 != 0 ) { /* output */
1114:       /* grow graph to get wider set of selected vertices to cover fine grid, invalidates 'llist' */
1115:       MatMatMult(a_Gmat1, a_Gmat1, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Gmat2 );
1116:       *a_Gmat_2 = Gmat2; /* output */
1117:     }
1118:     else Gmat2 = a_Gmat1;  /* use local to get crsGIDs at least */
1119:     /* get coarse grid GIDS for selected (locals and ghosts) */
1120:     mpimat2 = (Mat_MPIAIJ*)Gmat2->data;
1121:     MatGetVecs( Gmat2, &locState, 0 );
1122:     VecSet( locState, (PetscScalar)(NOT_DONE) );   /* set with UNKNOWN state */
1123:     ISGetIndices( a_selected_1, &selected_idx );
1124:     for(kk=0;kk<nLocalSelected;kk++){
1125:       PetscInt fgid = selected_idx[kk] + my0;
1126:       PetscScalar v = (PetscScalar)(kk+myCrs0);
1127:       VecSetValues( locState, 1, &fgid, &v, INSERT_VALUES );   /* set with PID */
1128:     }
1129:     ISRestoreIndices( a_selected_1, &selected_idx );
1130:     VecAssemblyBegin( locState );
1131:     VecAssemblyEnd( locState );
1132:     VecScatterBegin(mpimat2->Mvctx,locState,mpimat2->lvec,INSERT_VALUES,SCATTER_FORWARD);
1133:       VecScatterEnd(mpimat2->Mvctx,locState,mpimat2->lvec,INSERT_VALUES,SCATTER_FORWARD);
1134:     VecGetLocalSize( mpimat2->lvec, &num_fine_ghosts );
1135:     VecGetArray( mpimat2->lvec, &cpcol_state );
1136:     for(kk=0,num_crs_ghost=0;kk<num_fine_ghosts;kk++){
1137:       if( (NState)cpcol_state[kk] != NOT_DONE ) num_crs_ghost++;
1138:     }
1139:     PetscMalloc( (nLocalSelected+num_crs_ghost)*sizeof(PetscInt), &crsGID );  /* output */
1140:     {
1141:       PetscInt *selected_set;
1142:       PetscMalloc( (nLocalSelected+num_crs_ghost)*sizeof(PetscInt), &selected_set );
1143:       /* do ghost of 'crsGID' */
1144:       for(kk=0,idx=nLocalSelected;kk<num_fine_ghosts;kk++){
1145:         if( (NState)cpcol_state[kk] != NOT_DONE ){
1146:           PetscInt cgid = (PetscInt)cpcol_state[kk];
1147:           selected_set[idx] = nloc + kk;
1148:           crsGID[idx++] = cgid;
1149:         }
1150:       }
1151:       assert(idx==(nLocalSelected+num_crs_ghost));
1152:       VecRestoreArray( mpimat2->lvec, &cpcol_state );
1153:       /* do locals in 'crsGID' */
1154:       VecGetArray( locState, &cpcol_state );
1155:       for(kk=0,idx=0;kk<nloc;kk++){
1156:         if( (NState)cpcol_state[kk] != NOT_DONE ){
1157:           PetscInt cgid = (PetscInt)cpcol_state[kk];
1158:           selected_set[idx] = kk;
1159:           crsGID[idx++] = cgid;
1160:         }
1161:       }
1162:       assert(idx==nLocalSelected);
1163:       VecRestoreArray( locState, &cpcol_state );

1165:       if( a_selected_2 != 0 ) { /* output */
1166:         ISCreateGeneral(PETSC_COMM_SELF,(nLocalSelected+num_crs_ghost),selected_set,PETSC_COPY_VALUES,a_selected_2);
1167: 
1168:       }
1169:       PetscFree( selected_set );
1170:     }
1171:     VecDestroy( &locState );
1172:   }
1173:   *a_crsGID = crsGID; /* output */

1175:   return(0);
1176: }


1179: /* Private context for the GAMG preconditioner */
1180: typedef struct{
1181:   PetscInt       m_lid;      /* local vertex index */
1182:   PetscInt       m_degree;   /* vertex degree */
1183: } GNode;
1184: int compare (const void *a, const void *b)
1185: {
1186:   return (((GNode*)a)->m_degree - ((GNode*)b)->m_degree);
1187: }

1189: /* -------------------------------------------------------------------------- */
1190: /*
1191:    createProlongation

1193:   Input Parameter:
1194:    . a_Amat - matrix on this fine level
1195:    . a_data[nloc*a_data_sz(in)]
1196:    . a_dim - dimention
1197:    . a_data_cols - number of colums in data (rows is infered from 
1198:    . a_useSA - do smoothed aggregation, otherwise do geometric
1199:    . a_level - 
1200:   Input/Output Parameter:
1201:    . a_bs - block size of fine grid (in) and coarse grid (out)
1202:   Output Parameter:
1203:    . a_P_out - prolongation operator to the next level
1204:    . a_data_out - data of coarse grid points (num local columns in 'a_P_out')
1205:    . a_isOK - flag for if this grid is usable
1206:    . a_emax - max iegen value
1207: */
1210: PetscErrorCode createProlongation( const Mat a_Amat,
1211:                                    const PetscReal a_data[],
1212:                                    const PetscInt a_dim,
1213:                                    const PetscInt a_data_cols,
1214:                                    const PetscBool a_useSA,
1215:                                    const PetscInt a_level,
1216:                                    PetscInt *a_bs,
1217:                                    Mat *a_P_out,
1218:                                    PetscReal **a_data_out,
1219:                                    PetscBool *a_isOK,
1220:                                    PetscReal *a_emax
1221:                                    )
1222: {
1224:   PetscInt       ncols,Istart,Iend,Ii,nloc,jj,kk,my0,nLocalSelected,nnz0,nnz1,N,M;
1225:   Mat            Prol, Gmat, AuxMat;
1226:   PetscMPIInt    mype, npe;
1227:   MPI_Comm       wcomm = ((PetscObject)a_Amat)->comm;
1228:   IS             rankIS, permIS, llist_1, selected_1, selected_2;
1229:   const PetscInt *selected_idx, *idx,bs_in=*a_bs,col_bs=(a_useSA ? a_data_cols : bs_in);
1230:   const PetscScalar *vals;
1231:   PetscScalar     v,vfilter=0.08;
1232:   MatInfo info;

1235:   *a_isOK = PETSC_TRUE;
1236:   MPI_Comm_rank(wcomm,&mype);
1237:   MPI_Comm_size(wcomm,&npe);
1238:   MatGetOwnershipRange( a_Amat, &Istart, &Iend );
1239:   nloc = (Iend-Istart)/bs_in; my0 = Istart/bs_in; assert((Iend-Istart)%bs_in==0);

1241:   PetscLogEventBegin(gamg_setup_stages[SET3],0,0,0,0);

1243:   /* get scalar copy (norms) of matrix */
1244:   MatGetInfo(a_Amat,MAT_LOCAL,&info);
1245:   kk = (PetscInt)info.nz_used/((nloc+1)*bs_in*bs_in)+1;
1246:   MatCreateMPIAIJ( wcomm, nloc, nloc,
1247:                           PETSC_DETERMINE, PETSC_DETERMINE,
1248:                           2*kk, PETSC_NULL, kk, PETSC_NULL, &Gmat );
1249: 
1250:   for (Ii=Istart; Ii<Iend; Ii++) {
1251:     PetscInt dest_row = Ii/bs_in;
1252:     MatGetRow(a_Amat,Ii,&ncols,&idx,&vals);
1253:     for(jj=0;jj<ncols;jj++){
1254:       PetscInt dest_col = idx[jj]/bs_in;
1255:       v = PetscAbs(vals[jj]);
1256:       MatSetValues(Gmat,1,&dest_row,1,&dest_col,&v,ADD_VALUES);
1257:     }
1258:     MatRestoreRow(a_Amat,Ii,&ncols,&idx,&vals);
1259:   }
1260:   MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY);
1261:   MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY);
1262:   MatGetInfo(Gmat,MAT_GLOBAL_SUM,&info);
1263:   MatGetSize( Gmat, &M, &N );
1264:   nnz0 = (PetscInt)(info.nz_used/(PetscReal)M + 0.5);
1265:   if( a_useSA ){
1266:     vfilter = .01/(PetscScalar)nnz0;
1267:   }
1268:   else {
1269:     vfilter = .1/(PetscScalar)nnz0;
1270:   }
1271:   MatGetOwnershipRange(Gmat,&Istart,&Iend); /* use AIJ from here */

1273:   /* filter Gmat */
1274:   {
1275:     Mat Gmat2;
1276:     MatCreateMPIAIJ(wcomm,nloc,nloc,PETSC_DECIDE,PETSC_DECIDE,3*nnz0,PETSC_NULL,2*nnz0,PETSC_NULL,&Gmat2);
1277: 
1278:     for (Ii=Istart; Ii<Iend; Ii++) {
1279:       MatGetRow(Gmat,Ii,&ncols,&idx,&vals);
1280:       for(jj=0;jj<ncols;jj++){
1281:         if( (v=PetscAbs(vals[jj])) > vfilter ) {
1282:           MatSetValues(Gmat2,1,&Ii,1,&idx[jj],&v,INSERT_VALUES);
1283:         }
1284:         /* else PetscPrintf(PETSC_COMM_SELF,"\t%s filtered %d, v=%e\n",__FUNCT__,Ii,vals[jj]); */
1285:       }
1286:       MatRestoreRow(Gmat,Ii,&ncols,&idx,&vals);
1287:     }
1288:     MatAssemblyBegin(Gmat2,MAT_FINAL_ASSEMBLY);
1289:     MatAssemblyEnd(Gmat2,MAT_FINAL_ASSEMBLY);
1290:     MatDestroy( &Gmat );
1291:     Gmat = Gmat2;
1292:     MatGetInfo(Gmat,MAT_GLOBAL_SUM,&info);
1293:     nnz1 = (PetscInt)(info.nz_used/(PetscReal)M + 0.5);
1294:     PetscPrintf(wcomm,"\t%s ave nnz/row %d --> %d\n",__FUNCT__,nnz0,nnz1);
1295:   }

1297:   /* square matrix - SA */
1298:   if( a_useSA ){
1299:     Mat Gmat2;
1300:     MatMatMult( Gmat, Gmat, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Gmat2 );
1301: 
1302:     /* MatDestroy( &Gmat );   */
1303:     AuxMat = Gmat;
1304:     Gmat = Gmat2;
1305:     /* force compressed row storage for B matrix in AuxMat */
1306:     if (npe > 1) {
1307:       Mat_MPIAIJ *mpimat = (Mat_MPIAIJ*)AuxMat->data;
1308:       Mat_SeqAIJ *Bmat = (Mat_SeqAIJ*) mpimat->B->data;
1309:       Bmat->compressedrow.check = PETSC_TRUE;
1310:       MatCheckCompressedRow(mpimat->B,&Bmat->compressedrow,Bmat->i,AuxMat->rmap->n,-1.0);
1311: 
1312:       assert( Bmat->compressedrow.use );
1313:     }
1314:   }

1316:   /* scale Gmat */
1317:   {
1318:     Vec diag;
1319:     MatGetVecs( Gmat, &diag, 0 );
1320:     MatGetDiagonal( Gmat, diag );
1321:     VecReciprocal( diag );
1322:     VecSqrtAbs( diag );
1323:     MatDiagonalScale( Gmat, diag, diag );
1324:     VecDestroy( &diag );
1325:   }
1326: 
1327:   /* force compressed row storage for B matrix */
1328:   if (npe > 1) {
1329:     Mat_MPIAIJ *mpimat = (Mat_MPIAIJ*)Gmat->data;
1330:     Mat_SeqAIJ *Bmat = (Mat_SeqAIJ*) mpimat->B->data;
1331:     Bmat->compressedrow.check = PETSC_TRUE;
1332:     MatCheckCompressedRow(mpimat->B,&Bmat->compressedrow,Bmat->i,Gmat->rmap->n,-1.0);
1333: 
1334:     assert( Bmat->compressedrow.use );
1335:   }

1337:   /* view */
1338:   if( PETSC_FALSE ) {
1339:     PetscViewer        viewer;
1340:     PetscViewerASCIIOpen(wcomm, "Gmat_2.m", &viewer);
1341:     PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
1342:     MatView(Gmat,viewer);
1343:     PetscViewerDestroy( &viewer );
1344: 
1345:     PetscViewerASCIIOpen(wcomm, "Gmat_1.m", &viewer);
1346:     PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
1347:     MatView(AuxMat,viewer);
1348:     PetscViewerDestroy( &viewer );
1349:   }
1350: 
1351:   /* Mat subMat = Gmat -- get degree of vertices */
1352:   {
1353:     GNode *gnodes;
1354:     PetscInt *permute,*ranks;
1355: 
1356:     PetscMalloc( nloc*sizeof(GNode), &gnodes );
1357:     PetscMalloc( nloc*sizeof(PetscInt), &permute );
1358:     PetscMalloc( nloc*sizeof(PetscInt), &ranks );

1360:     for (Ii=Istart; Ii<Iend; Ii++) { /* locals only? */
1361:       MatGetRow(Gmat,Ii,&ncols,0,0);
1362:       {
1363:         PetscInt lid = Ii - Istart;
1364:         gnodes[lid].m_lid = lid;
1365:         gnodes[lid].m_degree = ncols;
1366:         /* if( (fabs(a_data[2*lid])<1.e-12 || fabs(a_data[2*lid]-1.)<1.e-12) && */
1367:         /*             (fabs(a_data[2*lid+1])<1.e-12 || fabs(a_data[2*lid+1]-1.)<1.e-12) ) { */
1368:         /*           gnodes[lid].m_degree = 1; */
1369:         /*         } HIT CORNERS of ex54/5 */
1370:       }
1371:       MatRestoreRow(Gmat,Ii,&ncols,0,0);
1372:     }
1373:     /* randomize */
1374:     if( PETSC_TRUE ) {
1375:       PetscBool *bIndexSet;
1376:       PetscMalloc( nloc*sizeof(PetscBool), &bIndexSet );
1377:       for ( Ii = 0; Ii < nloc ; Ii++) bIndexSet[Ii] = PETSC_FALSE;
1378:       for ( Ii = 0; Ii < nloc ; Ii++)
1379:       {
1380:         PetscInt iSwapIndex = rand()%nloc;
1381:         if (!bIndexSet[iSwapIndex] && iSwapIndex != Ii)
1382:         {
1383:           GNode iTemp = gnodes[iSwapIndex];
1384:           gnodes[iSwapIndex] = gnodes[Ii];
1385:           gnodes[Ii] = iTemp;
1386:           bIndexSet[Ii] = PETSC_TRUE;
1387:           bIndexSet[iSwapIndex] = PETSC_TRUE;
1388:         }
1389:       }
1390:       PetscFree( bIndexSet );
1391:     }
1392:     /* only sort locals */
1393:     qsort( gnodes, nloc, sizeof(GNode), compare );
1394:     /* create IS of permutation */
1395:     for(kk=0;kk<nloc;kk++) { /* locals only */
1396:       permute[kk] = gnodes[kk].m_lid;
1397:       ranks[kk] = gnodes[kk].m_degree;
1398:     }
1399:     ISCreateGeneral( PETSC_COMM_SELF, (Iend-Istart), permute, PETSC_COPY_VALUES, &permIS );
1400: 
1401:     ISCreateGeneral( PETSC_COMM_SELF, (Iend-Istart), ranks, PETSC_COPY_VALUES, &rankIS );
1402: 

1404:     PetscFree( gnodes );
1405:     PetscFree( permute );
1406:     PetscFree( ranks );
1407:   }
1408:   PetscLogEventEnd(gamg_setup_stages[SET3],0,0,0,0);

1410:   /* SELECT COARSE POINTS */
1411:   PetscLogEventBegin(gamg_setup_stages[SET4],0,0,0,0);
1412:   maxIndSetAgg( permIS, rankIS, Gmat, AuxMat, a_useSA, &selected_1, &llist_1 );
1413: 
1414:   if( a_useSA ) {
1415:     MatDestroy( &AuxMat );
1416:   }
1417:   PetscLogEventEnd(gamg_setup_stages[SET4],0,0,0,0);
1418:   ISDestroy(&permIS);
1419:   ISDestroy(&rankIS);

1421:   /* get 'nLocalSelected' */
1422:   ISGetLocalSize( selected_1, &ncols );
1423:   ISGetIndices( selected_1, &selected_idx );
1424:   for(kk=0,nLocalSelected=0;kk<ncols;kk++){
1425:     PetscInt lid = selected_idx[kk];
1426:     if(lid<nloc) nLocalSelected++;
1427:   }
1428:   ISRestoreIndices( selected_1, &selected_idx );

1430:   /* create prolongator, create P matrix */
1431:   MatCreateMPIAIJ(wcomm, nloc*bs_in, nLocalSelected*col_bs,
1432:                          PETSC_DETERMINE, PETSC_DETERMINE,
1433:                          a_data_cols, PETSC_NULL, a_data_cols, PETSC_NULL,
1434:                          &Prol );
1435: 
1436: 
1437:   /* can get all points "removed" */
1438:   MatGetSize( Prol, &kk, &jj );
1439:   if( jj==0 ) {
1440:     assert(0);
1441:     *a_isOK = PETSC_FALSE;
1442:     PetscPrintf(PETSC_COMM_WORLD,"[%d]%s no selected points on coarse grid\n",mype,__FUNCT__);
1443:     MatDestroy( &Prol );
1444:     ISDestroy( &llist_1 );
1445:     ISDestroy( &selected_1 );
1446:     MatDestroy( &Gmat );
1447:     return(0);
1448:   }

1450:   /* switch for SA or GAMG */
1451:   if( !a_useSA ) {
1452:     PetscReal *coords; assert(a_dim==a_data_cols); PetscInt nnodes;
1453:     PetscInt  *crsGID;
1454:     Mat        Gmat2;
1455:     /* grow ghost data for better coarse grid cover of fine grid */
1456:     PetscLogEventBegin(gamg_setup_stages[SET5],0,0,0,0);
1457:     getGIDsOnSquareGraph( selected_1, Gmat, &selected_2, &Gmat2, &crsGID );
1458: 
1459:     /* llist is now not valid wrt squared graph, but will work as iterator in 'triangulateAndFormProl' */
1460:     PetscLogEventEnd(gamg_setup_stages[SET5],0,0,0,0);

1462:     /* create global vector of coorindates in 'coords' */
1463:     if (npe > 1) {
1464:       getDataWithGhosts( Gmat2, a_dim, a_data, &nnodes, &coords );
1465: 
1466:     }
1467:     else {
1468:       coords = (PetscReal*)a_data;
1469:       nnodes = nloc;
1470:     }
1471:     MatDestroy( &Gmat2 );

1473:     /* triangulate */
1474:     if( a_dim == 2 ) {
1475:       PetscReal metric;
1476:       PetscLogEventBegin(gamg_setup_stages[SET6],0,0,0,0);
1477:       triangulateAndFormProl( selected_2, nnodes, coords,
1478:                                      selected_1, llist_1, crsGID, bs_in, Prol, &metric );
1479: 
1480:       PetscLogEventEnd(gamg_setup_stages[SET6],0,0,0,0);
1481:       PetscFree( crsGID );

1483:       /* clean up and create coordinates for coarse grid (output) */
1484:       if (npe > 1) PetscFree( coords );
1485: 
1486:       if( metric > 1. ) { /* needs to be globalized - should not happen */
1487:         *a_isOK = PETSC_FALSE;
1488:         PetscPrintf(PETSC_COMM_SELF,"[%d]%s failed metric for coarse grid %e\n",mype,__FUNCT__,metric);
1489:         MatDestroy( &Prol );
1490:       }
1491:       else if( metric > .0 ) {
1492:         PetscPrintf(PETSC_COMM_SELF,"[%d]%s metric for coarse grid = %e\n",mype,__FUNCT__,metric);
1493:       }
1494:     } else {
1495:       SETERRQ(wcomm,PETSC_ERR_LIB,"3D not implemented");
1496:     }
1497:     { /* create next coords - output */
1498:       PetscReal *crs_crds;
1499:       PetscMalloc( a_dim*nLocalSelected*sizeof(PetscReal), &crs_crds );
1500: 
1501:       ISGetIndices( selected_1, &selected_idx );
1502:       for(kk=0;kk<nLocalSelected;kk++){/* grab local select nodes to promote - output */
1503:         PetscInt lid = selected_idx[kk];
1504:         for(jj=0;jj<a_dim;jj++) crs_crds[jj*nLocalSelected + kk] = a_data[jj*nloc + lid];
1505:       }
1506:       ISRestoreIndices( selected_1, &selected_idx );
1507:       *a_data_out = crs_crds; /* out */
1508:     }
1509:     if (npe > 1) {
1510:       ISDestroy( &selected_2 );  /* this is selected_1 in serial */
1511:     }
1512:     *a_emax = -1.0; /* no estimate */
1513:   }
1514:   else { /* SA */
1515:     PetscReal alpha,emax,emin,*data_w_ghost;
1516:     PetscInt  myCrs0, nbnodes, *flid_fgid;

1518:     /* create global vector of coorindates in 'coords' */
1519:     PetscLogEventBegin(gamg_setup_stages[SET7],0,0,0,0);
1520:     if (npe > 1) {
1521:       /* create blocked dummy matrix for communication only */
1522:       Mat tMat;
1523:       PetscInt Ii,ncols; const PetscInt *idx; PetscScalar v = 1.0;
1524: 
1525:       MatGetInfo(Gmat,MAT_LOCAL,&info);
1526:       kk = (PetscInt)info.nz_used*bs_in/(nloc+1) + 1;
1527:       MatCreateMPIAIJ( wcomm, nloc*bs_in, nloc*bs_in,
1528:                               PETSC_DETERMINE, PETSC_DETERMINE,
1529:                               2*kk, PETSC_NULL, kk, PETSC_NULL,
1530:                               &tMat );
1531:       MatSetBlockSize( tMat, bs_in );
1532:       for ( Ii = Istart; Ii < Iend; Ii++ ) {
1533:         PetscInt dest_row = Ii*bs_in;
1534:         MatGetRow(Gmat,Ii,&ncols,&idx,0);
1535:         for( jj = 0 ; jj < ncols ; jj++ ){
1536:           PetscInt k,jjj,iii,dest_col = idx[jj]*bs_in;
1537:           for( k = 0 ; k < bs_in ; k++ ){
1538:             iii = dest_row + k; jjj = dest_col + k;
1539:             MatSetValues(tMat,1,&iii,1,&jjj,&v,ADD_VALUES);
1540:           }
1541:         }
1542:         MatRestoreRow(Gmat,Ii,&ncols,&idx,0);
1543:       }
1544:       MatAssemblyBegin(tMat,MAT_FINAL_ASSEMBLY);
1545:       MatAssemblyEnd(tMat,MAT_FINAL_ASSEMBLY);
1546:       getDataWithGhosts( tMat, a_data_cols, a_data, &nbnodes, &data_w_ghost );
1547: 
1548:       MatDestroy( &tMat );
1549:     }
1550:     else {
1551:       nbnodes = bs_in*nloc;
1552:       data_w_ghost = (PetscReal*)a_data;
1553:     }
1554: 
1555:     /* scan my coarse zero gid */
1556:     MPI_Scan( &nLocalSelected, &myCrs0, 1, MPI_INT, MPI_SUM, wcomm );
1557:     myCrs0 -= nLocalSelected;

1559:     /* get P0 */
1560:     if( npe > 1 ){
1561:       PetscReal *fid_glid_loc,*fiddata;
1562:       PetscInt nnodes;

1564:       PetscMalloc( nloc*sizeof(PetscReal), &fid_glid_loc );
1565:       for(kk=0;kk<nloc;kk++) fid_glid_loc[kk] = (PetscReal)(my0+kk);
1566:       getDataWithGhosts(Gmat, 1, fid_glid_loc, &nnodes, &fiddata);
1567: 
1568:       PetscMalloc( nnodes*sizeof(PetscInt), &flid_fgid );
1569:       for(kk=0;kk<nnodes;kk++) flid_fgid[kk] = (PetscInt)fiddata[kk];
1570:       PetscFree( fiddata );
1571:       assert(nnodes==nbnodes/bs_in);
1572:       PetscFree( fid_glid_loc );
1573:     }
1574:     else {
1575:       PetscMalloc( nloc*sizeof(PetscInt), &flid_fgid );
1576:       for(kk=0;kk<nloc;kk++) flid_fgid[kk] = my0 + kk;
1577:     }
1578:     PetscLogEventEnd(gamg_setup_stages[SET7],0,0,0,0);

1580:     formProl0(selected_1,llist_1,bs_in,a_data_cols,myCrs0,nbnodes,
1581:                      data_w_ghost,flid_fgid,a_data_out,Prol);
1582: 
1583:     if (npe > 1) PetscFree( data_w_ghost );
1584:     PetscFree( flid_fgid );

1586:     /* smooth P0 */
1587:     PetscLogEventBegin(gamg_setup_stages[SET9],0,0,0,0);
1588:     { /* eigen estimate 'emax' - this is also use for cheb smoother */
1589:       KSP eksp; Mat Lmat = a_Amat;
1590:       Vec bb, xx; PC pc;
1591:       MatGetVecs( Lmat, &bb, 0 );
1592:       MatGetVecs( Lmat, &xx, 0 );
1593:       {
1594:         PetscRandom    rctx;
1595:         PetscRandomCreate(wcomm,&rctx);
1596:         PetscRandomSetFromOptions(rctx);
1597:         VecSetRandom(bb,rctx);
1598:         PetscRandomDestroy( &rctx );
1599:       }
1600:       KSPCreate(wcomm,&eksp);
1601:       KSPSetType( eksp, KSPCG );
1602:       KSPSetInitialGuessNonzero( eksp, PETSC_FALSE );
1603:       KSPSetOperators( eksp, Lmat, Lmat, DIFFERENT_NONZERO_PATTERN );
1604:       CHKERRQ( ierr );
1605:       KSPGetPC( eksp, &pc );                              CHKERRQ( ierr );
1606:       PCSetType( pc, PCPBJACOBI );  /* smoother */
1607:       KSPSetTolerances(eksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,10);
1608: 
1609:       KSPSetNormType( eksp, KSP_NORM_NONE );
1610:       KSPSetComputeSingularValues( eksp,PETSC_TRUE );
1611:       KSPSolve( eksp, bb, xx );
1612:       KSPComputeExtremeSingularValues( eksp, &emax, &emin );
1613:       PetscPrintf(PETSC_COMM_WORLD,"\t\t\t%s max eigen=%e min=%e PC=%s\n",__FUNCT__,emax,emin,PCPBJACOBI);
1614:       VecDestroy( &xx );
1615:       VecDestroy( &bb );
1616:       KSPDestroy( &eksp );
1617:     }
1618:     /* smooth P1 := (I - omega/lam D^{-1}A)P0 */
1619:     if( PETSC_TRUE ) {
1620:       Mat Prol1, AA; Vec diag;
1621:       MatDuplicate(a_Amat, MAT_COPY_VALUES, &AA);  /*AIJ*/
1622:       MatGetVecs( AA, &diag, 0 );
1623:       MatGetDiagonal( AA, diag );     /* effectively PCJACOBI */
1624:       VecReciprocal( diag );
1625:       MatDiagonalScale( AA, diag, 0 );
1626:       VecDestroy( &diag );
1627:       alpha = -1.5/emax;
1628:       MatScale( AA, alpha );
1629:       alpha = 1.;
1630:       MatShift( AA, alpha );
1631:       MatMatMult( AA, Prol, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Prol1 );
1632:       MatDestroy( &Prol );
1633:       MatDestroy( &AA );
1634:       Prol = Prol1;
1635:     }
1636:     PetscLogEventEnd(gamg_setup_stages[SET9],0,0,0,0);
1637:     *a_emax = emax; /* estimate for cheb smoother */

1639:     *a_bs = a_data_cols;
1640:   }

1642:   *a_P_out = Prol;  /* out */

1644:   ISDestroy( &llist_1 );
1645:   ISDestroy( &selected_1 );
1646:   MatDestroy( &Gmat );

1648:   return(0);
1649: }