Actual source code: gasm.c
2: /*
3: This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation.
4: In this version each processor may have any number of subdomains and a subdomain may live on multiple
5: processors.
7: N - total number of subdomains on all processors
8: n - actual number of subdomains on this processor
9: nmax - maximum number of subdomains per processor
10: */
11: #include <private/pcimpl.h> /*I "petscpc.h" I*/
13: typedef struct {
14: PetscInt N,n,nmax;
15: PetscInt overlap; /* overlap requested by user */
16: KSP *ksp; /* linear solvers for each block */
17: Vec gx,gy; /* Merged work vectors */
18: Vec *x,*y; /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */
19: IS gis, gis_local; /* merged ISs */
20: VecScatter grestriction; /* merged restriction */
21: VecScatter gprolongation; /* merged prolongation */
22: IS *is; /* index set that defines each overlapping subdomain */
23: IS *is_local; /* index set that defines each local subdomain (same as subdomain with the overlap removed); may be NULL */
24: Mat *pmat; /* subdomain block matrices */
25: PCGASMType type; /* use reduced interpolation, restriction or both */
26: PetscBool type_set; /* if user set this value (so won't change it for symmetric problems) */
27: PetscBool same_local_solves; /* flag indicating whether all local solvers are same */
28: PetscBool sort_indices; /* flag to sort subdomain indices */
29: } PC_GASM;
33: static PetscErrorCode PCGASMPrintSubdomains(PC pc)
34: {
35: PC_GASM *osm = (PC_GASM*)pc->data;
36: const char *prefix;
37: char fname[PETSC_MAX_PATH_LEN+1];
38: PetscViewer viewer, sviewer;
39: PetscInt i,j,nidx;
40: const PetscInt *idx;
41: char *cidx;
42: PetscBool found;
43: PetscMPIInt size, rank;
47: MPI_Comm_size(((PetscObject)pc)->comm, &size);
48: MPI_Comm_rank(((PetscObject)pc)->comm, &rank);
49: PCGetOptionsPrefix(pc,&prefix);
50: PetscOptionsGetString(prefix,"-pc_gasm_print_subdomains",fname,PETSC_MAX_PATH_LEN,&found);
51: if (!found) { PetscStrcpy(fname,"stdout"); };
52: for (i=0;i<osm->n;++i) {
53: PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);
54: ISGetLocalSize(osm->is[i], &nidx);
55: /*
56: No more than 15 characters per index plus a space.
57: PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
58: in case nidx == 0. That will take care of the space for the trailing '\0' as well.
59: For nidx == 0, the whole string 16 '\0'.
60: */
61: PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx);
62: ISGetIndices(osm->is[i], &idx);
63: PetscViewerStringOpen(((PetscObject)(osm->is[i]))->comm, cidx, 16*(nidx+1), &sviewer);
64: for(j = 0; j < nidx; ++j) {
65: PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);
66: }
67: PetscViewerDestroy(&sviewer);
68: ISRestoreIndices(osm->is[i],&idx);
70: PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
71: PetscViewerASCIIPrintf(viewer, "Subdomain with overlap\n");
72: PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);
73: PetscViewerFlush(viewer);
74: PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
75: PetscViewerASCIIPrintf(viewer, "\n");
76: PetscFree(cidx);
77: PetscViewerDestroy(&viewer);
78: if (osm->is_local) {
79: PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);
80: ISGetLocalSize(osm->is_local[i], &nidx);
81: /*
82: No more than 15 characters per index plus a space.
83: PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
84: in case nidx == 0. That will take care of the space for the trailing '\0' as well.
85: For nidx == 0, the whole string 16 '\0'.
86: */
87: PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx);
88: ISGetIndices(osm->is_local[i], &idx);
89: PetscViewerStringOpen(((PetscObject)(osm->is_local[i]))->comm, cidx, 16*(nidx+1), &sviewer);
90: for(j = 0; j < nidx; ++j) {
91: PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);
92: }
93: PetscViewerDestroy(&sviewer);
94: ISRestoreIndices(osm->is_local[i],&idx);
96: PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
97: PetscViewerASCIIPrintf(viewer, "Subdomain without overlap\n");
98: PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);
99: PetscViewerFlush(viewer);
100: PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
101: PetscViewerASCIIPrintf(viewer, "\n");
102: PetscFree(cidx);
103: PetscViewerDestroy(&viewer);
104: }
105: }
106: return(0);
107: }
112: static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer)
113: {
114: PC_GASM *osm = (PC_GASM*)pc->data;
115: const char *prefix;
117: PetscMPIInt rank, size;
118: PetscInt i,bsz;
119: PetscBool iascii,isstring, print_subdomains=PETSC_FALSE;
120: PetscViewer sviewer;
124: MPI_Comm_size(((PetscObject)pc)->comm, &size);
125: MPI_Comm_rank(((PetscObject)pc)->comm, &rank);
126: PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
127: PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);
128: PCGetOptionsPrefix(pc,&prefix);
129: PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&print_subdomains,PETSC_NULL);
130: if (iascii) {
131: char overlaps[256] = "user-defined overlap",subdomains[256] = "total subdomains set";
132: if (osm->overlap >= 0) {PetscSNPrintf(overlaps,sizeof overlaps,"amount of overlap = %D",osm->overlap);}
133: if (osm->nmax > 0) {PetscSNPrintf(subdomains,sizeof subdomains,"max number of local subdomains = %D",osm->nmax);}
134: PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
135: PetscViewerASCIISynchronizedPrintf(viewer," [%d:%d] number of locally-supported subdomains = %D\n",(int)rank,(int)size,osm->n);
136: PetscViewerFlush(viewer);
137: PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
138: PetscViewerASCIIPrintf(viewer," Generalized additive Schwarz: %s, %s\n",subdomains,overlaps);
139: PetscViewerASCIIPrintf(viewer," Generalized additive Schwarz: restriction/interpolation type - %s\n",PCGASMTypes[osm->type]);
140: MPI_Comm_rank(((PetscObject)pc)->comm,&rank);
141: if (osm->same_local_solves) {
142: if (osm->ksp) {
143: PetscViewerASCIIPrintf(viewer," Local solve is same for all subdomains, in the following KSP and PC objects:\n");
144: PetscViewerGetSingleton(viewer,&sviewer);
145: if (!rank) {
146: PetscViewerASCIIPushTab(viewer);
147: KSPView(osm->ksp[0],sviewer);
148: PetscViewerASCIIPopTab(viewer);
149: }
150: PetscViewerRestoreSingleton(viewer,&sviewer);
151: }
152: } else {
153: PetscViewerFlush(viewer);
154: PetscViewerASCIIPrintf(viewer," Local solve info for each subdomain is in the following KSP and PC objects:\n");
155: PetscViewerASCIIPushTab(viewer);
156: PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");
157: for (i=0; i<osm->nmax; i++) {
158: PetscViewerGetSingleton(viewer,&sviewer);
159: if (i < osm->n) {
160: ISGetLocalSize(osm->is[i],&bsz);
161: PetscViewerASCIISynchronizedAllow(sviewer,PETSC_TRUE);
162: PetscViewerASCIISynchronizedPrintf(sviewer,"[%d:%d] local subdomain number %D, size = %D\n",(int)rank,(int)size,i,bsz);
163: KSPView(osm->ksp[i],sviewer);
164: PetscViewerASCIISynchronizedPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");
165: PetscViewerFlush(sviewer);
166: PetscViewerASCIISynchronizedAllow(sviewer,PETSC_FALSE);
167: }
168: PetscViewerRestoreSingleton(viewer,&sviewer);
169: }
170: PetscViewerASCIIPopTab(viewer);
171: PetscViewerFlush(viewer);
172: }
173: } else if (isstring) {
174: PetscViewerStringSPrintf(viewer," subdomains=%D, overlap=%D, type=%s",osm->n,osm->overlap,PCGASMTypes[osm->type]);
175: PetscViewerGetSingleton(viewer,&sviewer);
176: if (osm->ksp) {KSPView(osm->ksp[0],sviewer);}
177: PetscViewerRestoreSingleton(viewer,&sviewer);
178: } else {
179: SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_SUP,"Viewer type %s not supported for PCGASM",((PetscObject)viewer)->type_name);
180: }
181: PetscViewerFlush(viewer);
182: if(print_subdomains) {
183: PCGASMPrintSubdomains(pc);
184: }
185: return(0);
186: }
194: static PetscErrorCode PCSetUp_GASM(PC pc)
195: {
196: PC_GASM *osm = (PC_GASM*)pc->data;
198: PetscBool symset,flg;
199: PetscInt i;
200: PetscMPIInt rank, size;
201: MatReuse scall = MAT_REUSE_MATRIX;
202: KSP ksp;
203: PC subpc;
204: const char *prefix,*pprefix;
205: PetscInt dn; /* Number of indices in a single subdomain assigned to this processor. */
206: const PetscInt *didx; /* Indices from a single subdomain assigned to this processor. */
207: PetscInt ddn; /* Number of indices in all subdomains assigned to this processor. */
208: PetscInt *ddidx; /* Indices of all subdomains assigned to this processor. */
209: IS gid; /* Identity IS of the size of all subdomains assigned to this processor. */
210: Vec x,y;
211: PetscScalar *gxarray, *gyarray;
212: PetscInt gfirst, glast;
215: MPI_Comm_size(((PetscObject)pc)->comm,&size);
216: MPI_Comm_rank(((PetscObject)pc)->comm,&rank);
217: if (!pc->setupcalled) {
219: if (!osm->type_set) {
220: MatIsSymmetricKnown(pc->pmat,&symset,&flg);
221: if (symset && flg) { osm->type = PC_GASM_BASIC; }
222: }
224: if (osm->N == PETSC_DECIDE && osm->n < 1) {
225: /* no subdomains given, use one per processor */
226: osm->nmax = osm->n = 1;
227: MPI_Comm_size(((PetscObject)pc)->comm,&size);
228: osm->N = size;
229: } else if (osm->N == PETSC_DECIDE) {
230: PetscInt inwork[2], outwork[2];
231: /* determine global number of subdomains and the max number of local subdomains */
232: inwork[0] = inwork[1] = osm->n;
233: MPI_Allreduce(inwork,outwork,1,MPIU_2INT,PetscMaxSum_Op,((PetscObject)pc)->comm);
234: osm->nmax = outwork[0];
235: osm->N = outwork[1];
236: }
237: if (!osm->is){ /* create the index sets */
238: PCGASMCreateSubdomains(pc->pmat,osm->n,&osm->is);
239: }
240: if (!osm->is_local) {
241: /*
242: This indicates that osm->is should define a nonoverlapping decomposition
243: (there is no way to really guarantee that if subdomains are set by the user through PCGASMSetLocalSubdomains,
244: but the assumption is that either the user does the right thing, or subdomains in ossm->is have been created
245: via PCGASMCreateSubdomains, which guarantees a nonoverlapping decomposition).
246: Therefore, osm->is will be used to define osm->is_local.
247: If a nonzero overlap has been requested by the user, then osm->is will be expanded and will overlap,
248: so osm->is_local should obtain a copy of osm->is while they are still (presumably) nonoverlapping.
249: Otherwise (no overlap has been requested), osm->is_local are simply aliases for osm->is.
250: */
251: PetscMalloc(osm->n*sizeof(IS),&osm->is_local);
252: for (i=0; i<osm->n; i++) {
253: if (osm->overlap > 0) { /* With positive overlap, osm->is[i] will be modified */
254: ISDuplicate(osm->is[i],&osm->is_local[i]);
255: ISCopy(osm->is[i],osm->is_local[i]);
256: } else {
257: PetscObjectReference((PetscObject)osm->is[i]);
258: osm->is_local[i] = osm->is[i];
259: }
260: }
261: }
262: /* Beyond this point osm->is_local is not null. */
263: if (osm->overlap > 0) {
264: /* Extend the "overlapping" regions by a number of steps */
265: MatIncreaseOverlap(pc->pmat,osm->n,osm->is,osm->overlap);
266: }
267: PCGetOptionsPrefix(pc,&prefix);
268: flg = PETSC_FALSE;
269: PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&flg,PETSC_NULL);
270: if (flg) { PCGASMPrintSubdomains(pc); }
272: if (osm->sort_indices) {
273: for (i=0; i<osm->n; i++) {
274: ISSort(osm->is[i]);
275: ISSort(osm->is_local[i]);
276: }
277: }
278: /* Merge the ISs, create merged vectors and scatter contexts. */
279: /* Restriction ISs. */
280: ddn = 0;
281: for (i=0; i<osm->n; i++) {
282: ISGetLocalSize(osm->is[i],&dn);
283: ddn += dn;
284: }
285: PetscMalloc(ddn*sizeof(PetscInt), &ddidx);
286: ddn = 0;
287: for (i=0; i<osm->n; i++) {
288: ISGetLocalSize(osm->is[i],&dn);
289: ISGetIndices(osm->is[i],&didx);
290: PetscMemcpy(ddidx+ddn, didx, sizeof(PetscInt)*dn);
291: ISRestoreIndices(osm->is[i], &didx);
292: ddn += dn;
293: }
294: ISCreateGeneral(((PetscObject)(pc))->comm, ddn, ddidx, PETSC_OWN_POINTER, &osm->gis);
295: MatGetVecs(pc->pmat,&x,&y);
296: VecCreateMPI(((PetscObject)pc)->comm, ddn, PETSC_DECIDE, &osm->gx);
297: VecDuplicate(osm->gx,&osm->gy);
298: VecGetOwnershipRange(osm->gx, &gfirst, &glast);
299: ISCreateStride(((PetscObject)pc)->comm,ddn,gfirst,1, &gid);
300: VecScatterCreate(x,osm->gis,osm->gx,gid, &(osm->grestriction));
301: ISDestroy(&gid);
302: /* Prolongation ISs */
303: { PetscInt dn_local; /* Number of indices in the local part of a single domain assigned to this processor. */
304: const PetscInt *didx_local; /* Global indices from the local part of a single domain assigned to this processor. */
305: PetscInt ddn_local; /* Number of indices in the local part of the disjoint union all domains assigned to this processor. */
306: PetscInt *ddidx_local; /* Global indices of the local part of the disjoint union of all domains assigned to this processor. */
307: /**/
308: ISLocalToGlobalMapping ltog; /* Map from global to local indices on the disjoint union of subdomains: "local" ind's run from 0 to ddn-1. */
309: PetscInt *ddidx_llocal; /* Mapped local indices of the disjoint union of local parts of subdomains. */
310: PetscInt ddn_llocal; /* Number of indices in ddidx_llocal; must equal ddn_local, or else gis_local is not a sub-IS of gis. */
311: IS gis_llocal; /* IS with ddidx_llocal indices. */
312: PetscInt j;
313: ddn_local = 0;
314: for (i=0; i<osm->n; i++) {
315: ISGetLocalSize(osm->is_local[i],&dn_local);
316: ddn_local += dn_local;
317: }
318: PetscMalloc(ddn_local*sizeof(PetscInt), &ddidx_local);
319: ddn_local = 0;
320: for (i=0; i<osm->n; i++) {
321: ISGetLocalSize(osm->is_local[i],&dn_local);
322: ISGetIndices(osm->is_local[i],&didx_local);
323: PetscMemcpy(ddidx_local+ddn_local, didx_local, sizeof(PetscInt)*dn_local);
324: ISRestoreIndices(osm->is_local[i], &didx_local);
325: ddn_local += dn_local;
326: }
327: PetscMalloc(sizeof(PetscInt)*ddn_local, &ddidx_llocal);
328: ISCreateGeneral(((PetscObject)pc)->comm, ddn_local, ddidx_local, PETSC_OWN_POINTER, &(osm->gis_local));
329: ISLocalToGlobalMappingCreateIS(osm->gis,<og);
330: ISGlobalToLocalMappingApply(ltog,IS_GTOLM_DROP,ddn_local,ddidx_local,&ddn_llocal,ddidx_llocal);
331: ISLocalToGlobalMappingDestroy(<og);
332: if (ddn_llocal != ddn_local) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"gis_local contains %D indices outside of gis", ddn_llocal - ddn_local);
333: /* Now convert these localized indices into the global indices into the merged output vector. */
334: VecGetOwnershipRange(osm->gy, &gfirst, &glast);
335: for(j=0; j < ddn_llocal; ++j) {
336: ddidx_llocal[j] += gfirst;
337: }
338: ISCreateGeneral(PETSC_COMM_SELF,ddn_llocal,ddidx_llocal,PETSC_OWN_POINTER,&gis_llocal);
339: VecScatterCreate(y,osm->gis_local,osm->gy,gis_llocal,&osm->gprolongation);
340: ISDestroy(&gis_llocal);
341: }
342: /* Create the subdomain work vectors. */
343: PetscMalloc(osm->n*sizeof(Vec),&osm->x);
344: PetscMalloc(osm->n*sizeof(Vec),&osm->y);
345: VecGetOwnershipRange(osm->gx, &gfirst, &glast);
346: VecGetArray(osm->gx, &gxarray);
347: VecGetArray(osm->gy, &gyarray);
348: for (i=0, ddn=0; i<osm->n; ++i, ddn += dn) {
349: PetscInt dN;
350: ISGetLocalSize(osm->is[i],&dn);
351: ISGetSize(osm->is[i],&dN);
352: VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gxarray+ddn,&osm->x[i]);
353: VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gyarray+ddn,&osm->y[i]);
354: }
355: VecRestoreArray(osm->gx, &gxarray);
356: VecRestoreArray(osm->gy, &gyarray);
357: VecDestroy(&x);
358: VecDestroy(&y);
359: /* Create the local solvers */
360: PetscMalloc(osm->n*sizeof(KSP *),&osm->ksp);
361: for (i=0; i<osm->n; i++) { /* KSPs are local */
362: KSPCreate(((PetscObject)(osm->is[i]))->comm,&ksp);
363: PetscLogObjectParent(pc,ksp);
364: PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);
365: KSPSetType(ksp,KSPPREONLY);
366: KSPGetPC(ksp,&subpc);
367: PCGetOptionsPrefix(pc,&prefix);
368: KSPSetOptionsPrefix(ksp,prefix);
369: KSPAppendOptionsPrefix(ksp,"sub_");
370: osm->ksp[i] = ksp;
371: }
372: scall = MAT_INITIAL_MATRIX;
374: } else {
375: /*
376: Destroy the blocks from the previous iteration
377: */
378: if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
379: MatDestroyMatrices(osm->n,&osm->pmat);
380: scall = MAT_INITIAL_MATRIX;
381: }
382: }
384: /*
385: Extract out the submatrices.
386: */
387: if(size > 1) {
388: MatGetSubMatricesParallel(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);
389: }
390: else {
391: MatGetSubMatrices(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);
392: }
393: if (scall == MAT_INITIAL_MATRIX) {
394: PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);
395: for (i=0; i<osm->n; i++) {
396: PetscLogObjectParent(pc,osm->pmat[i]);
397: PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);
398: }
399: }
400:
401: /* Return control to the user so that the submatrices can be modified (e.g., to apply
402: different boundary conditions for the submatrices than for the global problem) */
403: PCModifySubMatrices(pc,osm->n,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);
405: /*
406: Loop over submatrices putting them into local ksp
407: */
408: for (i=0; i<osm->n; i++) {
409: KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);
410: if (!pc->setupcalled) {
411: KSPSetFromOptions(osm->ksp[i]);
412: }
413: }
415: return(0);
416: }
420: static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc)
421: {
422: PC_GASM *osm = (PC_GASM*)pc->data;
424: PetscInt i;
427: for (i=0; i<osm->n; i++) {
428: KSPSetUp(osm->ksp[i]);
429: }
430: return(0);
431: }
435: static PetscErrorCode PCApply_GASM(PC pc,Vec x,Vec y)
436: {
437: PC_GASM *osm = (PC_GASM*)pc->data;
439: PetscInt i;
440: ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
443: /*
444: Support for limiting the restriction or interpolation to only local
445: subdomain values (leaving the other values 0).
446: */
447: if (!(osm->type & PC_GASM_RESTRICT)) {
448: forward = SCATTER_FORWARD_LOCAL;
449: /* have to zero the work RHS since scatter may leave some slots empty */
450: VecZeroEntries(osm->gx);
451: }
452: if (!(osm->type & PC_GASM_INTERPOLATE)) {
453: reverse = SCATTER_REVERSE_LOCAL;
454: }
456: VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);
457: VecZeroEntries(y);
458: VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);
459: /* do the subdomain solves */
460: for (i=0; i<osm->n; ++i) {
461: KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);
462: }
463: VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);
464: VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);
465: return(0);
466: }
470: static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec x,Vec y)
471: {
472: PC_GASM *osm = (PC_GASM*)pc->data;
474: PetscInt i;
475: ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
478: /*
479: Support for limiting the restriction or interpolation to only local
480: subdomain values (leaving the other values 0).
482: Note: these are reversed from the PCApply_GASM() because we are applying the
483: transpose of the three terms
484: */
485: if (!(osm->type & PC_GASM_INTERPOLATE)) {
486: forward = SCATTER_FORWARD_LOCAL;
487: /* have to zero the work RHS since scatter may leave some slots empty */
488: VecZeroEntries(osm->gx);
489: }
490: if (!(osm->type & PC_GASM_RESTRICT)) {
491: reverse = SCATTER_REVERSE_LOCAL;
492: }
494: VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);
495: VecZeroEntries(y);
496: VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);
497: /* do the local solves */
498: for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */
499: KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);
500: }
501: VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);
502: VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);
503: return(0);
504: }
508: static PetscErrorCode PCReset_GASM(PC pc)
509: {
510: PC_GASM *osm = (PC_GASM*)pc->data;
512: PetscInt i;
515: if (osm->ksp) {
516: for (i=0; i<osm->n; i++) {
517: KSPReset(osm->ksp[i]);
518: }
519: }
520: if (osm->pmat) {
521: if (osm->n > 0) {
522: MatDestroyMatrices(osm->n,&osm->pmat);
523: }
524: }
525: if (osm->x) {
526: for (i=0; i<osm->n; i++) {
527: VecDestroy(&osm->x[i]);
528: VecDestroy(&osm->y[i]);
529: }
530: }
531: VecDestroy(&osm->gx);
532: VecDestroy(&osm->gy);
533:
534: VecScatterDestroy(&osm->grestriction);
535: VecScatterDestroy(&osm->gprolongation);
536: if (osm->is) {PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local); osm->is = 0;}
537: ISDestroy(&osm->gis);
538: ISDestroy(&osm->gis_local);
539: return(0);
540: }
544: static PetscErrorCode PCDestroy_GASM(PC pc)
545: {
546: PC_GASM *osm = (PC_GASM*)pc->data;
548: PetscInt i;
551: PCReset_GASM(pc);
552: if (osm->ksp) {
553: for (i=0; i<osm->n; i++) {
554: KSPDestroy(&osm->ksp[i]);
555: }
556: PetscFree(osm->ksp);
557: }
558: PetscFree(osm->x);
559: PetscFree(osm->y);
560: PetscFree(pc->data);
561: return(0);
562: }
566: static PetscErrorCode PCSetFromOptions_GASM(PC pc) {
567: PC_GASM *osm = (PC_GASM*)pc->data;
569: PetscInt blocks,ovl;
570: PetscBool symset,flg;
571: PCGASMType gasmtype;
574: /* set the type to symmetric if matrix is symmetric */
575: if (!osm->type_set && pc->pmat) {
576: MatIsSymmetricKnown(pc->pmat,&symset,&flg);
577: if (symset && flg) { osm->type = PC_GASM_BASIC; }
578: }
579: PetscOptionsHead("Generalized additive Schwarz options");
580: PetscOptionsInt("-pc_gasm_blocks","Number of subdomains","PCGASMSetTotalSubdomains",osm->n,&blocks,&flg);
581: if (flg) {PCGASMSetTotalSubdomains(pc,blocks); }
582: PetscOptionsInt("-pc_gasm_overlap","Number of grid points overlap","PCGASMSetOverlap",osm->overlap,&ovl,&flg);
583: if (flg) {PCGASMSetOverlap(pc,ovl); }
584: flg = PETSC_FALSE;
585: PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);
586: if (flg) {PCGASMSetType(pc,gasmtype); }
587: PetscOptionsTail();
588: return(0);
589: }
591: /*------------------------------------------------------------------------------------*/
596: PetscErrorCode PCGASMSetLocalSubdomains_GASM(PC pc,PetscInt n,IS is[],IS is_local[])
597: {
598: PC_GASM *osm = (PC_GASM*)pc->data;
600: PetscInt i;
603: if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more blocks, n = %D",n);
604: if (pc->setupcalled && (n != osm->n || is)) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetLocalSubdomains() should be called before calling PCSetUp().");
606: if (!pc->setupcalled) {
607: osm->n = n;
608: osm->is = 0;
609: osm->is_local = 0;
610: if (is) {
611: for (i=0; i<n; i++) {PetscObjectReference((PetscObject)is[i]);}
612: }
613: if (is_local) {
614: for (i=0; i<n; i++) {PetscObjectReference((PetscObject)is_local[i]);}
615: }
616: if (osm->is) {
617: PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);
618: }
619: if (is) {
620: PetscMalloc(n*sizeof(IS),&osm->is);
621: for (i=0; i<n; i++) { osm->is[i] = is[i]; }
622: /* Flag indicating that the user has set overlapping subdomains so PCGASM should not increase their size. */
623: osm->overlap = -1;
624: }
625: if (is_local) {
626: PetscMalloc(n*sizeof(IS),&osm->is_local);
627: for (i=0; i<n; i++) { osm->is_local[i] = is_local[i]; }
628: }
629: }
630: return(0);
631: }
637: PetscErrorCode PCGASMSetTotalSubdomains_GASM(PC pc,PetscInt N) {
638: PC_GASM *osm = (PC_GASM*)pc->data;
640: PetscMPIInt rank,size;
641: PetscInt n;
644: if (N < 1) SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N);
646: /*
647: Split the subdomains equally among all processors
648: */
649: MPI_Comm_rank(((PetscObject)pc)->comm,&rank);
650: MPI_Comm_size(((PetscObject)pc)->comm,&size);
651: n = N/size + ((N % size) > rank);
652: if (!n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N);
653: if (pc->setupcalled && n != osm->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before PCSetUp().");
654: if (!pc->setupcalled) {
655: if (osm->is) {
656: PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);
657: }
658: osm->N = N;
659: osm->n = n;
660: osm->is = 0;
661: osm->is_local = 0;
662: }
663: return(0);
664: }/* PCGASMSetTotalSubdomains_GASM() */
670: PetscErrorCode PCGASMSetOverlap_GASM(PC pc,PetscInt ovl)
671: {
672: PC_GASM *osm = (PC_GASM*)pc->data;
675: if (ovl < 0) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
676: if (pc->setupcalled && ovl != osm->overlap) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp().");
677: if (!pc->setupcalled) {
678: osm->overlap = ovl;
679: }
680: return(0);
681: }
687: PetscErrorCode PCGASMSetType_GASM(PC pc,PCGASMType type)
688: {
689: PC_GASM *osm = (PC_GASM*)pc->data;
692: osm->type = type;
693: osm->type_set = PETSC_TRUE;
694: return(0);
695: }
701: PetscErrorCode PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort)
702: {
703: PC_GASM *osm = (PC_GASM*)pc->data;
706: osm->sort_indices = doSort;
707: return(0);
708: }
714: /*
715: FIX: This routine might need to be modified once multiple ranks per subdomain are allowed.
716: In particular, it would upset the global subdomain number calculation.
717: */
718: PetscErrorCode PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp)
719: {
720: PC_GASM *osm = (PC_GASM*)pc->data;
724: if (osm->n < 1) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here");
726: if (n) {
727: *n = osm->n;
728: }
729: if (first) {
730: MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);
731: *first -= osm->n;
732: }
733: if (ksp) {
734: /* Assume that local solves are now different; not necessarily
735: true though! This flag is used only for PCView_GASM() */
736: *ksp = osm->ksp;
737: osm->same_local_solves = PETSC_FALSE;
738: }
739: return(0);
740: }/* PCGASMGetSubKSP_GASM() */
746: /*@C
747: PCGASMSetLocalSubdomains - Sets the local subdomains (for this processor
748: only) for the additive Schwarz preconditioner.
750: Collective on PC
752: Input Parameters:
753: + pc - the preconditioner context
754: . n - the number of subdomains for this processor (default value = 1)
755: . is - the index set that defines the subdomains for this processor
756: (or PETSC_NULL for PETSc to determine subdomains)
757: - is_local - the index sets that define the local part of the subdomains for this processor
758: (or PETSC_NULL to use the default of 1 subdomain per process)
760: Notes:
761: The IS numbering is in the parallel, global numbering of the vector.
763: By default the GASM preconditioner uses 1 block per processor.
765: Use PCGASMSetTotalSubdomains() to set the subdomains for all processors.
767: Level: advanced
769: .keywords: PC, GASM, set, local, subdomains, additive Schwarz
771: .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
772: PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains()
773: @*/
774: PetscErrorCode PCGASMSetLocalSubdomains(PC pc,PetscInt n,IS is[],IS is_local[])
775: {
780: PetscTryMethod(pc,"PCGASMSetLocalSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,is,is_local));
781: return(0);
782: }
786: /*@C
787: PCGASMSetTotalSubdomains - Sets the subdomains for all processor for the
788: additive Schwarz preconditioner. Either all or no processors in the
789: PC communicator must call this routine, with the same index sets.
791: Collective on PC
793: Input Parameters:
794: + pc - the preconditioner context
795: . n - the number of subdomains for all processors
796: . is - the index sets that define the subdomains for all processor
797: (or PETSC_NULL for PETSc to determine subdomains)
798: - is_local - the index sets that define the local part of the subdomains for this processor
799: (or PETSC_NULL to use the default of 1 subdomain per process)
801: Options Database Key:
802: To set the total number of subdomain blocks rather than specify the
803: index sets, use the option
804: . -pc_gasm_blocks <blks> - Sets total blocks
806: Notes:
807: Currently you cannot use this to set the actual subdomains with the argument is.
809: By default the GASM preconditioner uses 1 block per processor.
811: These index sets cannot be destroyed until after completion of the
812: linear solves for which the GASM preconditioner is being used.
814: Use PCGASMSetLocalSubdomains() to set local subdomains.
816: Level: advanced
818: .keywords: PC, GASM, set, total, global, subdomains, additive Schwarz
820: .seealso: PCGASMSetLocalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
821: PCGASMCreateSubdomains2D()
822: @*/
823: PetscErrorCode PCGASMSetTotalSubdomains(PC pc,PetscInt N)
824: {
829: PetscTryMethod(pc,"PCGASMSetTotalSubdomains_C",(PC,PetscInt),(pc,N));
830: return(0);
831: }
835: /*@
836: PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the
837: additive Schwarz preconditioner. Either all or no processors in the
838: PC communicator must call this routine.
840: Logically Collective on PC
842: Input Parameters:
843: + pc - the preconditioner context
844: - ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1)
846: Options Database Key:
847: . -pc_gasm_overlap <ovl> - Sets overlap
849: Notes:
850: By default the GASM preconditioner uses 1 block per processor. To use
851: multiple blocks per perocessor, see PCGASMSetTotalSubdomains() and
852: PCGASMSetLocalSubdomains() (and the option -pc_gasm_blocks <blks>).
854: The overlap defaults to 1, so if one desires that no additional
855: overlap be computed beyond what may have been set with a call to
856: PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(), then ovl
857: must be set to be 0. In particular, if one does not explicitly set
858: the subdomains an application code, then all overlap would be computed
859: internally by PETSc, and using an overlap of 0 would result in an GASM
860: variant that is equivalent to the block Jacobi preconditioner.
862: Note that one can define initial index sets with any overlap via
863: PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(); the routine
864: PCGASMSetOverlap() merely allows PETSc to extend that overlap further
865: if desired.
867: Level: intermediate
869: .keywords: PC, GASM, set, overlap
871: .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(),
872: PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains()
873: @*/
874: PetscErrorCode PCGASMSetOverlap(PC pc,PetscInt ovl)
875: {
881: PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));
882: return(0);
883: }
887: /*@
888: PCGASMSetType - Sets the type of restriction and interpolation used
889: for local problems in the additive Schwarz method.
891: Logically Collective on PC
893: Input Parameters:
894: + pc - the preconditioner context
895: - type - variant of GASM, one of
896: .vb
897: PC_GASM_BASIC - full interpolation and restriction
898: PC_GASM_RESTRICT - full restriction, local processor interpolation
899: PC_GASM_INTERPOLATE - full interpolation, local processor restriction
900: PC_GASM_NONE - local processor restriction and interpolation
901: .ve
903: Options Database Key:
904: . -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
906: Level: intermediate
908: .keywords: PC, GASM, set, type
910: .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(),
911: PCGASMCreateSubdomains2D()
912: @*/
913: PetscErrorCode PCGASMSetType(PC pc,PCGASMType type)
914: {
920: PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));
921: return(0);
922: }
926: /*@
927: PCGASMSetSortIndices - Determines whether subdomain indices are sorted.
929: Logically Collective on PC
931: Input Parameters:
932: + pc - the preconditioner context
933: - doSort - sort the subdomain indices
935: Level: intermediate
937: .keywords: PC, GASM, set, type
939: .seealso: PCGASMSetLocalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(),
940: PCGASMCreateSubdomains2D()
941: @*/
942: PetscErrorCode PCGASMSetSortIndices(PC pc,PetscBool doSort)
943: {
949: PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));
950: return(0);
951: }
955: /*@C
956: PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on
957: this processor.
958:
959: Collective on PC iff first_local is requested
961: Input Parameter:
962: . pc - the preconditioner context
964: Output Parameters:
965: + n_local - the number of blocks on this processor or PETSC_NULL
966: . first_local - the global number of the first block on this processor or PETSC_NULL,
967: all processors must request or all must pass PETSC_NULL
968: - ksp - the array of KSP contexts
970: Note:
971: After PCGASMGetSubKSP() the array of KSPes is not to be freed
973: Currently for some matrix implementations only 1 block per processor
974: is supported.
975:
976: You must call KSPSetUp() before calling PCGASMGetSubKSP().
978: Level: advanced
980: .keywords: PC, GASM, additive Schwarz, get, sub, KSP, context
982: .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMSetOverlap(),
983: PCGASMCreateSubdomains2D(),
984: @*/
985: PetscErrorCode PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
986: {
991: PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));
992: return(0);
993: }
995: /* -------------------------------------------------------------------------------------*/
996: /*MC
997: PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
998: its own KSP object.
1000: Options Database Keys:
1001: + -pc_gasm_truelocal - Activates PCGGASMSetUseTrueLocal()
1002: . -pc_gasm_blocks <blks> - Sets total blocks
1003: . -pc_gasm_overlap <ovl> - Sets overlap
1004: - -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1006: IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
1007: will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use
1008: -pc_gasm_type basic to use the standard GASM.
1010: Notes: Each processor can have one or more blocks, but a block cannot be shared by more
1011: than one processor. Defaults to one block per processor.
1013: To set options on the solvers for each block append -sub_ to all the KSP, and PC
1014: options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
1015:
1016: To set the options on the solvers separate for each block call PCGASMGetSubKSP()
1017: and set the options directly on the resulting KSP object (you can access its PC
1018: with KSPGetPC())
1021: Level: beginner
1023: Concepts: additive Schwarz method
1025: References:
1026: An additive variant of the Schwarz alternating method for the case of many subregions
1027: M Dryja, OB Widlund - Courant Institute, New York University Technical report
1029: Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1030: Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X.
1032: .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC,
1033: PCBJACOBI, PCGASMSetUseTrueLocal(), PCGASMGetSubKSP(), PCGASMSetLocalSubdomains(),
1034: PCGASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCGASMSetOverlap(), PCGASMSetType()
1036: M*/
1041: PetscErrorCode PCCreate_GASM(PC pc)
1042: {
1044: PC_GASM *osm;
1047: PetscNewLog(pc,PC_GASM,&osm);
1048: osm->N = PETSC_DECIDE;
1049: osm->n = 0;
1050: osm->nmax = 0;
1051: osm->overlap = 1;
1052: osm->ksp = 0;
1053: osm->grestriction = 0;
1054: osm->gprolongation = 0;
1055: osm->gx = 0;
1056: osm->gy = 0;
1057: osm->x = 0;
1058: osm->y = 0;
1059: osm->is = 0;
1060: osm->is_local = 0;
1061: osm->pmat = 0;
1062: osm->type = PC_GASM_RESTRICT;
1063: osm->same_local_solves = PETSC_TRUE;
1064: osm->sort_indices = PETSC_TRUE;
1066: pc->data = (void*)osm;
1067: pc->ops->apply = PCApply_GASM;
1068: pc->ops->applytranspose = PCApplyTranspose_GASM;
1069: pc->ops->setup = PCSetUp_GASM;
1070: pc->ops->reset = PCReset_GASM;
1071: pc->ops->destroy = PCDestroy_GASM;
1072: pc->ops->setfromoptions = PCSetFromOptions_GASM;
1073: pc->ops->setuponblocks = PCSetUpOnBlocks_GASM;
1074: pc->ops->view = PCView_GASM;
1075: pc->ops->applyrichardson = 0;
1077: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetLocalSubdomains_C","PCGASMSetLocalSubdomains_GASM",
1078: PCGASMSetLocalSubdomains_GASM);
1079: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetTotalSubdomains_C","PCGASMSetTotalSubdomains_GASM",
1080: PCGASMSetTotalSubdomains_GASM);
1081: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetOverlap_C","PCGASMSetOverlap_GASM",
1082: PCGASMSetOverlap_GASM);
1083: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetType_C","PCGASMSetType_GASM",
1084: PCGASMSetType_GASM);
1085: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetSortIndices_C","PCGASMSetSortIndices_GASM",
1086: PCGASMSetSortIndices_GASM);
1087: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMGetSubKSP_C","PCGASMGetSubKSP_GASM",
1088: PCGASMGetSubKSP_GASM);
1089: return(0);
1090: }
1096: /*@C
1097: PCGASMCreateSubdomains - Creates the index sets for the overlapping Schwarz
1098: preconditioner for a any problem on a general grid.
1100: Collective
1102: Input Parameters:
1103: + A - The global matrix operator
1104: - n - the number of local blocks
1106: Output Parameters:
1107: . outis - the array of index sets defining the subdomains
1109: Level: advanced
1111: Note: this generates nonoverlapping subdomains; the PCGASM will generate the overlap
1112: from these if you use PCGASMSetLocalSubdomains()
1114: In the Fortran version you must provide the array outis[] already allocated of length n.
1116: .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1118: .seealso: PCGASMSetLocalSubdomains(), PCGASMDestroySubdomains()
1119: @*/
1120: PetscErrorCode PCGASMCreateSubdomains(Mat A, PetscInt n, IS* outis[])
1121: {
1122: MatPartitioning mpart;
1123: const char *prefix;
1124: PetscErrorCode (*f)(Mat,MatReuse,Mat*);
1125: PetscMPIInt size;
1126: PetscInt i,j,rstart,rend,bs;
1127: PetscBool isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1128: Mat Ad = PETSC_NULL, adj;
1129: IS ispart,isnumb,*is;
1130: PetscErrorCode ierr;
1135: if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n);
1137: /* Get prefix, row distribution, and block size */
1138: MatGetOptionsPrefix(A,&prefix);
1139: MatGetOwnershipRange(A,&rstart,&rend);
1140: MatGetBlockSize(A,&bs);
1141: if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1143: /* Get diagonal block from matrix if possible */
1144: MPI_Comm_size(((PetscObject)A)->comm,&size);
1145: PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",(void (**)(void))&f);
1146: if (f) {
1147: MatGetDiagonalBlock(A,&Ad);
1148: } else if (size == 1) {
1149: Ad = A;
1150: }
1151: if (Ad) {
1152: PetscTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);
1153: if (!isbaij) {PetscTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);}
1154: }
1155: if (Ad && n > 1) {
1156: PetscBool match,done;
1157: /* Try to setup a good matrix partitioning if available */
1158: MatPartitioningCreate(PETSC_COMM_SELF,&mpart);
1159: PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);
1160: MatPartitioningSetFromOptions(mpart);
1161: PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);
1162: if (!match) {
1163: PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);
1164: }
1165: if (!match) { /* assume a "good" partitioner is available */
1166: PetscInt na,*ia,*ja;
1167: MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);
1168: if (done) {
1169: /* Build adjacency matrix by hand. Unfortunately a call to
1170: MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1171: remove the block-aij structure and we cannot expect
1172: MatPartitioning to split vertices as we need */
1173: PetscInt i,j,*row,len,nnz,cnt,*iia=0,*jja=0;
1174: nnz = 0;
1175: for (i=0; i<na; i++) { /* count number of nonzeros */
1176: len = ia[i+1] - ia[i];
1177: row = ja + ia[i];
1178: for (j=0; j<len; j++) {
1179: if (row[j] == i) { /* don't count diagonal */
1180: len--; break;
1181: }
1182: }
1183: nnz += len;
1184: }
1185: PetscMalloc((na+1)*sizeof(PetscInt),&iia);
1186: PetscMalloc((nnz)*sizeof(PetscInt),&jja);
1187: nnz = 0;
1188: iia[0] = 0;
1189: for (i=0; i<na; i++) { /* fill adjacency */
1190: cnt = 0;
1191: len = ia[i+1] - ia[i];
1192: row = ja + ia[i];
1193: for (j=0; j<len; j++) {
1194: if (row[j] != i) { /* if not diagonal */
1195: jja[nnz+cnt++] = row[j];
1196: }
1197: }
1198: nnz += cnt;
1199: iia[i+1] = nnz;
1200: }
1201: /* Partitioning of the adjacency matrix */
1202: MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,PETSC_NULL,&adj);
1203: MatPartitioningSetAdjacency(mpart,adj);
1204: MatPartitioningSetNParts(mpart,n);
1205: MatPartitioningApply(mpart,&ispart);
1206: ISPartitioningToNumbering(ispart,&isnumb);
1207: MatDestroy(&adj);
1208: foundpart = PETSC_TRUE;
1209: }
1210: MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);
1211: }
1212: MatPartitioningDestroy(&mpart);
1213: }
1215: PetscMalloc(n*sizeof(IS),&is);
1216: *outis = is;
1218: if (!foundpart) {
1220: /* Partitioning by contiguous chunks of rows */
1222: PetscInt mbs = (rend-rstart)/bs;
1223: PetscInt start = rstart;
1224: for (i=0; i<n; i++) {
1225: PetscInt count = (mbs/n + ((mbs % n) > i)) * bs;
1226: ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);
1227: start += count;
1228: }
1230: } else {
1232: /* Partitioning by adjacency of diagonal block */
1234: const PetscInt *numbering;
1235: PetscInt *count,nidx,*indices,*newidx,start=0;
1236: /* Get node count in each partition */
1237: PetscMalloc(n*sizeof(PetscInt),&count);
1238: ISPartitioningCount(ispart,n,count);
1239: if (isbaij && bs > 1) { /* adjust for the block-aij case */
1240: for (i=0; i<n; i++) count[i] *= bs;
1241: }
1242: /* Build indices from node numbering */
1243: ISGetLocalSize(isnumb,&nidx);
1244: PetscMalloc(nidx*sizeof(PetscInt),&indices);
1245: for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1246: ISGetIndices(isnumb,&numbering);
1247: PetscSortIntWithPermutation(nidx,numbering,indices);
1248: ISRestoreIndices(isnumb,&numbering);
1249: if (isbaij && bs > 1) { /* adjust for the block-aij case */
1250: PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);
1251: for (i=0; i<nidx; i++)
1252: for (j=0; j<bs; j++)
1253: newidx[i*bs+j] = indices[i]*bs + j;
1254: PetscFree(indices);
1255: nidx *= bs;
1256: indices = newidx;
1257: }
1258: /* Shift to get global indices */
1259: for (i=0; i<nidx; i++) indices[i] += rstart;
1261: /* Build the index sets for each block */
1262: for (i=0; i<n; i++) {
1263: ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);
1264: ISSort(is[i]);
1265: start += count[i];
1266: }
1268: PetscFree(count);
1269: PetscFree(indices);
1270: ISDestroy(&isnumb);
1271: ISDestroy(&ispart);
1272: }
1274: return(0);
1275: }
1279: /*@C
1280: PCGASMDestroySubdomains - Destroys the index sets created with
1281: PCGASMCreateSubdomains(). Should be called after setting subdomains
1282: with PCGASMSetLocalSubdomains().
1284: Collective
1286: Input Parameters:
1287: + n - the number of index sets
1288: . is - the array of index sets
1289: - is_local - the array of local index sets, can be PETSC_NULL
1291: Level: advanced
1293: .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1295: .seealso: PCGASMCreateSubdomains(), PCGASMSetLocalSubdomains()
1296: @*/
1297: PetscErrorCode PCGASMDestroySubdomains(PetscInt n, IS is[], IS is_local[])
1298: {
1299: PetscInt i;
1302: if (n <= 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"n must be > 0: n = %D",n);
1304: for (i=0; i<n; i++) { ISDestroy(&is[i]); }
1305: PetscFree(is);
1306: if (is_local) {
1308: for (i=0; i<n; i++) { ISDestroy(&is_local[i]); }
1309: PetscFree(is_local);
1310: }
1311: return(0);
1312: }
1315: #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \
1316: { \
1317: PetscInt first_row = first/M, last_row = last/M+1; \
1318: /* \
1319: Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners \
1320: of the bounding box of the intersection of the subdomain with the local ownership range (local \
1321: subdomain). \
1322: Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows \
1323: of the intersection. \
1324: */ \
1325: /* ylow_loc is the grid row containing the first element of the local sumbdomain */ \
1326: *ylow_loc = PetscMax(first_row,ylow); \
1327: /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1328: *xleft_loc = *ylow_loc==first_row?PetscMax(first%M,xleft):xleft; \
1329: /* yhigh_loc is the grid row above the last local subdomain element */ \
1330: *yhigh_loc = PetscMin(last_row,yhigh); \
1331: /* xright is the offset of the end of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1332: *xright_loc = *yhigh_loc==last_row?PetscMin(xright,last%M):xright; \
1333: /* Now compute the size of the local subdomain n. */ \
1334: *n = 0; \
1335: if(*ylow_loc < *yhigh_loc) { \
1336: PetscInt width = xright-xleft; \
1337: *n += width*(*yhigh_loc-*ylow_loc-1); \
1338: *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \
1339: *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \
1340: }\
1341: }
1347: /*@
1348: PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1349: preconditioner for a two-dimensional problem on a regular grid.
1351: Collective
1353: Input Parameters:
1354: + M, N - the global number of mesh points in the x and y directions
1355: . Mdomains, Ndomains - the global number of subdomains in the x and y directions
1356: . dof - degrees of freedom per node
1357: - overlap - overlap in mesh lines
1359: Output Parameters:
1360: + Nsub - the number of local subdomains created
1361: . is - array of index sets defining overlapping (if overlap > 0) subdomains
1362: - is_local - array of index sets defining non-overlapping subdomains
1364: Note:
1365: Presently PCAMSCreateSubdomains2d() is valid only for sequential
1366: preconditioners. More general related routines are
1367: PCGASMSetTotalSubdomains() and PCGASMSetLocalSubdomains().
1369: Level: advanced
1371: .keywords: PC, GASM, additive Schwarz, create, subdomains, 2D, regular grid
1373: .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(),
1374: PCGASMSetOverlap()
1375: @*/
1376: PetscErrorCode PCGASMCreateSubdomains2D(PC pc, PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap, PetscInt *nsub,IS **is,IS **is_local)
1377: {
1379: PetscMPIInt size, rank;
1380: PetscInt i, j;
1381: PetscInt maxheight, maxwidth;
1382: PetscInt xstart, xleft, xright, xleft_loc, xright_loc;
1383: PetscInt ystart, ylow, yhigh, ylow_loc, yhigh_loc;
1384: PetscInt x[2][2], y[2][2], n[2];
1385: PetscInt first, last;
1386: PetscInt nidx, *idx;
1387: PetscInt ii,jj,s,q,d;
1388: PetscInt k,kk;
1389: PetscMPIInt color;
1390: MPI_Comm comm, subcomm;
1391: IS **iis = 0;
1394: PetscObjectGetComm((PetscObject)pc, &comm);
1395: MPI_Comm_size(comm, &size);
1396: MPI_Comm_rank(comm, &rank);
1397: MatGetOwnershipRange(pc->pmat, &first, &last);
1398: if (first%dof || last%dof) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) "
1399: "does not respect the number of degrees of freedom per grid point %D", first, last, dof);
1401: /* Determine the number of domains with nonzero intersections with the local ownership range. */
1402: s = 0;
1403: ystart = 0;
1404: for (j=0; j<Ndomains; ++j) {
1405: maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1406: if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1407: /* Vertical domain limits with an overlap. */
1408: ylow = PetscMax(ystart - overlap,0);
1409: yhigh = PetscMin(ystart + maxheight + overlap,N);
1410: xstart = 0;
1411: for (i=0; i<Mdomains; ++i) {
1412: maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1413: if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1414: /* Horizontal domain limits with an overlap. */
1415: xleft = PetscMax(xstart - overlap,0);
1416: xright = PetscMin(xstart + maxwidth + overlap,M);
1417: /*
1418: Determine whether this subdomain intersects this processor's ownership range of pc->pmat.
1419: */
1420: PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1421: if(nidx) {
1422: ++s;
1423: }
1424: xstart += maxwidth;
1425: }/* for(i = 0; i < Mdomains; ++i) */
1426: ystart += maxheight;
1427: }/* for(j = 0; j < Ndomains; ++j) */
1428: /* Now we can allocate the necessary number of ISs. */
1429: *nsub = s;
1430: PetscMalloc((*nsub)*sizeof(IS*),is);
1431: PetscMalloc((*nsub)*sizeof(IS*),is_local);
1432: s = 0;
1433: ystart = 0;
1434: for (j=0; j<Ndomains; ++j) {
1435: maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1436: if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1437: /* Vertical domain limits with an overlap. */
1438: y[0][0] = PetscMax(ystart - overlap,0);
1439: y[0][1] = PetscMin(ystart + maxheight + overlap,N);
1440: /* Vertical domain limits without an overlap. */
1441: y[1][0] = ystart;
1442: y[1][1] = PetscMin(ystart + maxheight,N);
1443: xstart = 0;
1444: for (i=0; i<Mdomains; ++i) {
1445: maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1446: if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1447: /* Horizontal domain limits with an overlap. */
1448: x[0][0] = PetscMax(xstart - overlap,0);
1449: x[0][1] = PetscMin(xstart + maxwidth + overlap,M);
1450: /* Horizontal domain limits without an overlap. */
1451: x[1][0] = xstart;
1452: x[1][1] = PetscMin(xstart+maxwidth,M);
1453: /*
1454: Determine whether this domain intersects this processor's ownership range of pc->pmat.
1455: Do this twice: first for the domains with overlaps, and once without.
1456: During the first pass create the subcommunicators, and use them on the second pass as well.
1457: */
1458: for(q = 0; q < 2; ++q) {
1459: /*
1460: domain limits, (xleft, xright) and (ylow, yheigh) are adjusted
1461: according to whether the domain with an overlap or without is considered.
1462: */
1463: xleft = x[q][0]; xright = x[q][1];
1464: ylow = y[q][0]; yhigh = y[q][1];
1465: PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1466: nidx *= dof;
1467: n[q] = nidx;
1468: /*
1469: Based on the counted number of indices in the local domain *with an overlap*,
1470: construct a subcommunicator of all the processors supporting this domain.
1471: Observe that a domain with an overlap might have nontrivial local support,
1472: while the domain without an overlap might not. Hence, the decision to participate
1473: in the subcommunicator must be based on the domain with an overlap.
1474: */
1475: if (q == 0) {
1476: if(nidx) {
1477: color = 1;
1478: } else {
1479: color = MPI_UNDEFINED;
1480: }
1481: MPI_Comm_split(comm, color, rank, &subcomm);
1482: }
1483: /*
1484: Proceed only if the number of local indices *with an overlap* is nonzero.
1485: */
1486: if (n[0]) {
1487: if(q == 0) {
1488: iis = is;
1489: }
1490: if (q == 1) {
1491: /*
1492: The IS for the no-overlap subdomain shares a communicator with the overlapping domain.
1493: Moreover, if the overlap is zero, the two ISs are identical.
1494: */
1495: if (overlap == 0) {
1496: (*is_local)[s] = (*is)[s];
1497: PetscObjectReference((PetscObject)(*is)[s]);
1498: continue;
1499: } else {
1500: iis = is_local;
1501: subcomm = ((PetscObject)(*is)[s])->comm;
1502: }
1503: }/* if(q == 1) */
1504: idx = PETSC_NULL;
1505: PetscMalloc(nidx*sizeof(PetscInt),&idx);
1506: if(nidx) {
1507: k = 0;
1508: for (jj=ylow_loc; jj<yhigh_loc; ++jj) {
1509: PetscInt x0 = (jj==ylow_loc)?xleft_loc:xleft;
1510: PetscInt x1 = (jj==yhigh_loc-1)?xright_loc:xright;
1511: kk = dof*(M*jj + x0);
1512: for (ii=x0; ii<x1; ++ii) {
1513: for(d = 0; d < dof; ++d) {
1514: idx[k++] = kk++;
1515: }
1516: }
1517: }
1518: }
1519: ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*iis)+s);
1520: }/* if(n[0]) */
1521: }/* for(q = 0; q < 2; ++q) */
1522: if(n[0]) {
1523: ++s;
1524: }
1525: xstart += maxwidth;
1526: }/* for(i = 0; i < Mdomains; ++i) */
1527: ystart += maxheight;
1528: }/* for(j = 0; j < Ndomains; ++j) */
1529: return(0);
1530: }
1534: /*@C
1535: PCGASMGetLocalSubdomains - Gets the local subdomains (for this processor
1536: only) for the additive Schwarz preconditioner.
1538: Not Collective
1540: Input Parameter:
1541: . pc - the preconditioner context
1543: Output Parameters:
1544: + n - the number of subdomains for this processor (default value = 1)
1545: . is - the index sets that define the subdomains for this processor
1546: - is_local - the index sets that define the local part of the subdomains for this processor (can be PETSC_NULL)
1547:
1549: Notes:
1550: The IS numbering is in the parallel, global numbering of the vector.
1552: Level: advanced
1554: .keywords: PC, GASM, set, local, subdomains, additive Schwarz
1556: .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1557: PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubmatrices()
1558: @*/
1559: PetscErrorCode PCGASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[],IS *is_local[])
1560: {
1561: PC_GASM *osm;
1563: PetscBool match;
1569: PetscTypeCompare((PetscObject)pc,PCGASM,&match);
1570: if (!match) {
1571: if (n) *n = 0;
1572: if (is) *is = PETSC_NULL;
1573: } else {
1574: osm = (PC_GASM*)pc->data;
1575: if (n) *n = osm->n;
1576: if (is) *is = osm->is;
1577: if (is_local) *is_local = osm->is_local;
1578: }
1579: return(0);
1580: }
1584: /*@C
1585: PCGASMGetLocalSubmatrices - Gets the local submatrices (for this processor
1586: only) for the additive Schwarz preconditioner.
1588: Not Collective
1590: Input Parameter:
1591: . pc - the preconditioner context
1593: Output Parameters:
1594: + n - the number of matrices for this processor (default value = 1)
1595: - mat - the matrices
1596:
1598: Level: advanced
1600: .keywords: PC, GASM, set, local, subdomains, additive Schwarz, block Jacobi
1602: .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1603: PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubdomains()
1604: @*/
1605: PetscErrorCode PCGASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1606: {
1607: PC_GASM *osm;
1609: PetscBool match;
1615: if (!pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp().");
1616: PetscTypeCompare((PetscObject)pc,PCGASM,&match);
1617: if (!match) {
1618: if (n) *n = 0;
1619: if (mat) *mat = PETSC_NULL;
1620: } else {
1621: osm = (PC_GASM*)pc->data;
1622: if (n) *n = osm->n;
1623: if (mat) *mat = osm->pmat;
1624: }
1625: return(0);
1626: }