Actual source code: asa.c
2: /* --------------------------------------------------------------------
4: Contributed by Arvid Bessen, Columbia University, June 2007
5:
6: This file implements a ASA preconditioner in PETSc as part of PC.
8: The adaptive smoothed aggregation algorithm is described in the paper
9: "Adaptive Smoothed Aggregation (ASA)", M. Brezina, R. Falgout, S. MacLachlan,
10: T. Manteuffel, S. McCormick, and J. Ruge, SIAM Journal on Scientific Computing,
11: SISC Volume 25 Issue 6, Pages 1896-1920.
13: For an example usage of this preconditioner, see, e.g.
14: $PETSC_DIR/src/ksp/ksp/examples/tutorials/ex38.c ex39.c
15: and other files in that directory.
17: This code is still somewhat experimental. A number of improvements would be
18: - keep vectors allocated on each level, instead of destroying them
19: (see mainly PCApplyVcycleOnLevel_ASA)
20: - in PCCreateTransferOp_ASA we get all of the submatrices at once, this could
21: be optimized by differentiating between local and global matrices
22: - the code does not handle it gracefully if there is just one level
23: - if relaxation is sufficient, exit of PCInitializationStage_ASA is not
24: completely clean
25: - default values could be more reasonable, especially for parallel solves,
26: where we need a parallel LU or similar
27: - the richardson scaling parameter is somewhat special, should be treated in a
28: good default way
29: - a number of parameters for smoother (sor_omega, etc.) that we store explicitly
30: could be kept in the respective smoothers themselves
31: - some parameters have to be set via command line options, there are no direct
32: function calls available
33: - numerous other stuff
35: Example runs in parallel would be with parameters like
36: mpiexec ./program -pc_asa_coarse_pc_factor_mat_solver_package mumps -pc_asa_direct_solver 200
37: -pc_asa_max_cand_vecs 4 -pc_asa_mu_initial 50 -pc_asa_richardson_scale 1.0
38: -pc_asa_rq_improve 0.9 -asa_smoother_pc_type asm -asa_smoother_sub_pc_type sor
40: -------------------------------------------------------------------- */
42: /*
43: This defines the data structures for the smoothed aggregation procedure
44: */
45: #include <../src/ksp/pc/impls/asa/asa.h>
46: #include <petscblaslapack.h>
48: /* -------------------------------------------------------------------------- */
50: /* Event logging */
52: PetscLogEvent PC_InitializationStage_ASA, PC_GeneralSetupStage_ASA;
53: PetscLogEvent PC_CreateTransferOp_ASA, PC_CreateVcycle_ASA;
54: PetscBool asa_events_registered = PETSC_FALSE;
59: /*@C
60: PCASASetDM - Sets the coarse grid information for the grids
62: Collective on PC
64: Input Parameter:
65: + pc - the context
66: - dm - the DM object
68: Level: advanced
70: @*/
71: PetscErrorCode PCASASetDM(PC pc,DM dm)
72: {
77: PetscTryMethod(pc,"PCASASetDM_C",(PC,DM),(pc,dm));
78: return(0);
79: }
84: PetscErrorCode PCASASetDM_ASA(PC pc, DM dm)
85: {
87: PC_ASA *asa = (PC_ASA *) pc->data;
90: PetscObjectReference((PetscObject)dm);
91: asa->dm = dm;
92: return(0);
93: }
98: /*@C
99: PCASASetTolerances - Sets the convergence thresholds for ASA algorithm
101: Collective on PC
103: Input Parameter:
104: + pc - the context
105: . rtol - the relative convergence tolerance
106: (relative decrease in the residual norm)
107: . abstol - the absolute convergence tolerance
108: (absolute size of the residual norm)
109: . dtol - the divergence tolerance
110: (amount residual can increase before KSPDefaultConverged()
111: concludes that the method is diverging)
112: - maxits - maximum number of iterations to use
114: Notes:
115: Use PETSC_DEFAULT to retain the default value of any of the tolerances.
117: Level: advanced
118: @*/
119: PetscErrorCode PCASASetTolerances(PC pc, PetscReal rtol, PetscReal abstol,PetscReal dtol, PetscInt maxits)
120: {
125: PetscTryMethod(pc,"PCASASetTolerances_C",(PC,PetscReal,PetscReal,PetscReal,PetscInt),(pc,rtol,abstol,dtol,maxits));
126: return(0);
127: }
132: PetscErrorCode PCASASetTolerances_ASA(PC pc, PetscReal rtol, PetscReal abstol,PetscReal dtol, PetscInt maxits)
133: {
134: PC_ASA *asa = (PC_ASA *) pc->data;
138: if (rtol != PETSC_DEFAULT) asa->rtol = rtol;
139: if (abstol != PETSC_DEFAULT) asa->abstol = abstol;
140: if (dtol != PETSC_DEFAULT) asa->divtol = dtol;
141: if (maxits != PETSC_DEFAULT) asa->max_it = maxits;
142: return(0);
143: }
148: /*
149: PCCreateLevel_ASA - Creates one level for the ASA algorithm
151: Input Parameters:
152: + level - current level
153: . comm - MPI communicator object
154: . next - pointer to next level
155: . prev - pointer to previous level
156: . ksptype - the KSP type for the smoothers on this level
157: - pctype - the PC type for the smoothers on this level
159: Output Parameters:
160: . new_asa_lev - the newly created level
162: .keywords: ASA, create, levels, multigrid
163: */
164: PetscErrorCode PCCreateLevel_ASA(PC_ASA_level **new_asa_lev, int level,MPI_Comm comm, PC_ASA_level *prev,
165: PC_ASA_level *next,KSPType ksptype, PCType pctype)
166: {
168: PC_ASA_level *asa_lev;
169:
171: PetscMalloc(sizeof(PC_ASA_level), &asa_lev);
173: asa_lev->level = level;
174: asa_lev->size = 0;
176: asa_lev->A = 0;
177: asa_lev->B = 0;
178: asa_lev->x = 0;
179: asa_lev->b = 0;
180: asa_lev->r = 0;
181:
182: asa_lev->dm = 0;
183: asa_lev->aggnum = 0;
184: asa_lev->agg = 0;
185: asa_lev->loc_agg_dofs = 0;
186: asa_lev->agg_corr = 0;
187: asa_lev->bridge_corr = 0;
188:
189: asa_lev->P = 0;
190: asa_lev->Pt = 0;
191: asa_lev->smP = 0;
192: asa_lev->smPt = 0;
194: asa_lev->comm = comm;
196: asa_lev->smoothd = 0;
197: asa_lev->smoothu = 0;
199: asa_lev->prev = prev;
200: asa_lev->next = next;
201:
202: *new_asa_lev = asa_lev;
203: return(0);
204: }
208: PetscErrorCode PrintResNorm(Mat A, Vec x, Vec b, Vec r)
209: {
211: PetscBool destroyr = PETSC_FALSE;
212: PetscReal resnorm;
213: MPI_Comm Acomm;
216: if (!r) {
217: MatGetVecs(A, PETSC_NULL, &r);
218: destroyr = PETSC_TRUE;
219: }
220: MatMult(A, x, r);
221: VecAYPX(r, -1.0, b);
222: VecNorm(r, NORM_2, &resnorm);
223: PetscObjectGetComm((PetscObject) A, &Acomm);
224: PetscPrintf(Acomm, "Residual norm is %f.\n", resnorm);
226: if (destroyr) {
227: VecDestroy(&r);
228: }
229:
230: return(0);
231: }
235: PetscErrorCode PrintEnergyNormOfDiff(Mat A, Vec x, Vec y)
236: {
238: Vec vecdiff, Avecdiff;
239: PetscScalar dotprod;
240: PetscReal dotabs;
241: MPI_Comm Acomm;
244: VecDuplicate(x, &vecdiff);
245: VecWAXPY(vecdiff, -1.0, x, y);
246: MatGetVecs(A, PETSC_NULL, &Avecdiff);
247: MatMult(A, vecdiff, Avecdiff);
248: VecDot(vecdiff, Avecdiff, &dotprod);
249: dotabs = PetscAbsScalar(dotprod);
250: PetscObjectGetComm((PetscObject) A, &Acomm);
251: PetscPrintf(Acomm, "Energy norm %f.\n", dotabs);
252: VecDestroy(&vecdiff);
253: VecDestroy(&Avecdiff);
254: return(0);
255: }
257: /* -------------------------------------------------------------------------- */
258: /*
259: PCDestroyLevel_ASA - Destroys one level of the ASA preconditioner
261: Input Parameter:
262: . asa_lev - pointer to level that should be destroyed
264: */
267: PetscErrorCode PCDestroyLevel_ASA(PC_ASA_level *asa_lev)
268: {
272: MatDestroy(&(asa_lev->A));
273: MatDestroy(&(asa_lev->B));
274: VecDestroy(&(asa_lev->x));
275: VecDestroy(&(asa_lev->b));
276: VecDestroy(&(asa_lev->r));
278: if (asa_lev->dm) {DMDestroy(&asa_lev->dm);}
280: MatDestroy(&(asa_lev->agg));
281: PetscFree(asa_lev->loc_agg_dofs);
282: MatDestroy(&(asa_lev->agg_corr));
283: MatDestroy(&(asa_lev->bridge_corr));
285: MatDestroy(&(asa_lev->P));
286: MatDestroy(&(asa_lev->Pt));
287: MatDestroy(&(asa_lev->smP));
288: MatDestroy(&(asa_lev->smPt));
290: if (asa_lev->smoothd != asa_lev->smoothu) {
291: if (asa_lev->smoothd) {KSPDestroy(&asa_lev->smoothd);}
292: }
293: if (asa_lev->smoothu) {KSPDestroy(&asa_lev->smoothu);}
295: PetscFree(asa_lev);
296: return(0);
297: }
299: /* -------------------------------------------------------------------------- */
300: /*
301: PCComputeSpectralRadius_ASA - Computes the spectral radius of asa_lev->A
302: and stores it it asa_lev->spec_rad
304: Input Parameters:
305: . asa_lev - the level we are treating
307: Compute spectral radius with sqrt(||A||_1 ||A||_inf) >= ||A||_2 >= rho(A)
309: */
312: PetscErrorCode PCComputeSpectralRadius_ASA(PC_ASA_level *asa_lev)
313: {
315: PetscReal norm_1, norm_inf;
318: MatNorm(asa_lev->A, NORM_1, &norm_1);
319: MatNorm(asa_lev->A, NORM_INFINITY, &norm_inf);
320: asa_lev->spec_rad = PetscSqrtReal(norm_1*norm_inf);
321: return(0);
322: }
326: PetscErrorCode PCSetRichardsonScale_ASA(KSP ksp, PetscReal spec_rad, PetscReal richardson_scale) {
328: PC pc;
329: PetscBool flg;
330: PetscReal spec_rad_inv;
333: KSPSetInitialGuessNonzero(ksp, PETSC_TRUE);
334: if (richardson_scale != PETSC_DECIDE) {
335: KSPRichardsonSetScale(ksp, richardson_scale);
336: } else {
337: KSPGetPC(ksp, &pc);
338: PetscTypeCompare((PetscObject)(pc), PCNONE, &flg);
339: if (flg) {
340: /* WORK: this is just an educated guess. Any number between 0 and 2/rho(A)
341: should do. asa_lev->spec_rad has to be an upper bound on rho(A). */
342: spec_rad_inv = 1.0/spec_rad;
343: KSPRichardsonSetScale(ksp, spec_rad_inv);
344: } else {
345: SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP, "Unknown PC type for smoother. Please specify scaling factor with -pc_asa_richardson_scale\n");
346: }
347: }
348: return(0);
349: }
353: PetscErrorCode PCSetSORomega_ASA(PC pc, PetscReal sor_omega)
354: {
358: PCSORSetSymmetric(pc, SOR_SYMMETRIC_SWEEP);
359: if (sor_omega != PETSC_DECIDE) {
360: PCSORSetOmega(pc, sor_omega);
361: }
362: return(0);
363: }
366: /* -------------------------------------------------------------------------- */
367: /*
368: PCSetupSmoothersOnLevel_ASA - Creates the smoothers of the level.
369: We assume that asa_lev->A and asa_lev->spec_rad are correctly computed
371: Input Parameters:
372: + asa - the data structure for the ASA preconditioner
373: . asa_lev - the level we are treating
374: - maxits - maximum number of iterations to use
375: */
378: PetscErrorCode PCSetupSmoothersOnLevel_ASA(PC_ASA *asa, PC_ASA_level *asa_lev, PetscInt maxits)
379: {
380: PetscErrorCode ierr;
381: PetscBool flg;
382: PC pc;
385: /* destroy old smoothers */
386: if (asa_lev->smoothu && asa_lev->smoothu != asa_lev->smoothd) {
387: KSPDestroy(&asa_lev->smoothu);
388: }
389: asa_lev->smoothu = 0;
390: if (asa_lev->smoothd) {
391: KSPDestroy(&asa_lev->smoothd);
392: }
393: asa_lev->smoothd = 0;
394: /* create smoothers */
395: KSPCreate(asa_lev->comm,&asa_lev->smoothd);
396: KSPSetType(asa_lev->smoothd, asa->ksptype_smooth);
397: KSPGetPC(asa_lev->smoothd,&pc);
398: PCSetType(pc,asa->pctype_smooth);
400: /* set up problems for smoothers */
401: KSPSetOperators(asa_lev->smoothd, asa_lev->A, asa_lev->A, DIFFERENT_NONZERO_PATTERN);
402: KSPSetTolerances(asa_lev->smoothd, asa->smoother_rtol, asa->smoother_abstol, asa->smoother_dtol, maxits);
403: PetscTypeCompare((PetscObject)(asa_lev->smoothd), KSPRICHARDSON, &flg);
404: if (flg) {
405: /* special parameters for certain smoothers */
406: KSPSetInitialGuessNonzero(asa_lev->smoothd, PETSC_TRUE);
407: KSPGetPC(asa_lev->smoothd, &pc);
408: PetscTypeCompare((PetscObject)pc, PCSOR, &flg);
409: if (flg) {
410: PCSetSORomega_ASA(pc, asa->sor_omega);
411: } else {
412: /* just set asa->richardson_scale to get some very basic smoother */
413: PCSetRichardsonScale_ASA(asa_lev->smoothd, asa_lev->spec_rad, asa->richardson_scale);
414: }
415: /* this would be the place to add support for other preconditioners */
416: }
417: KSPSetOptionsPrefix(asa_lev->smoothd, "asa_smoother_");
418: KSPSetFromOptions(asa_lev->smoothd);
419: /* set smoothu equal to smoothd, this could change later */
420: asa_lev->smoothu = asa_lev->smoothd;
421: return(0);
422: }
424: /* -------------------------------------------------------------------------- */
425: /*
426: PCSetupDirectSolversOnLevel_ASA - Creates the direct solvers on the coarsest level.
427: We assume that asa_lev->A and asa_lev->spec_rad are correctly computed
429: Input Parameters:
430: + asa - the data structure for the ASA preconditioner
431: . asa_lev - the level we are treating
432: - maxits - maximum number of iterations to use
433: */
436: PetscErrorCode PCSetupDirectSolversOnLevel_ASA(PC_ASA *asa, PC_ASA_level *asa_lev, PetscInt maxits)
437: {
438: PetscErrorCode ierr;
439: PetscBool flg;
440: PetscMPIInt comm_size;
441: PC pc;
444: if (asa_lev->smoothu && asa_lev->smoothu != asa_lev->smoothd) {
445: KSPDestroy(&asa_lev->smoothu);
446: }
447: asa_lev->smoothu = 0;
448: if (asa_lev->smoothd) {
449: KSPDestroy(&asa_lev->smoothd);
450: asa_lev->smoothd = 0;
451: }
452: PetscStrcmp(asa->ksptype_direct, KSPPREONLY, &flg);
453: if (flg) {
454: PetscStrcmp(asa->pctype_direct, PCLU, &flg);
455: if (flg) {
456: MPI_Comm_size(asa_lev->comm, &comm_size);
457: if (comm_size > 1) {
458: /* the LU PC will call MatSolve, we may have to set the correct type for the matrix
459: to have support for this in parallel */
460: MatConvert(asa_lev->A, asa->coarse_mat_type, MAT_REUSE_MATRIX, &(asa_lev->A));
461: }
462: }
463: }
464: /* create new solvers */
465: KSPCreate(asa_lev->comm,&asa_lev->smoothd);
466: KSPSetType(asa_lev->smoothd, asa->ksptype_direct);
467: KSPGetPC(asa_lev->smoothd,&pc);
468: PCSetType(pc,asa->pctype_direct);
469: /* set up problems for direct solvers */
470: KSPSetOperators(asa_lev->smoothd, asa_lev->A, asa_lev->A, DIFFERENT_NONZERO_PATTERN);
471: KSPSetTolerances(asa_lev->smoothd, asa->direct_rtol, asa->direct_abstol, asa->direct_dtol, maxits);
472: /* user can set any option by using -pc_asa_direct_xxx */
473: KSPSetOptionsPrefix(asa_lev->smoothd, "asa_coarse_");
474: KSPSetFromOptions(asa_lev->smoothd);
475: /* set smoothu equal to 0, not used */
476: asa_lev->smoothu = 0;
477: return(0);
478: }
480: /* -------------------------------------------------------------------------- */
481: /*
482: PCCreateAggregates_ASA - Creates the aggregates
484: Input Parameters:
485: . asa_lev - the level for which we should create the projection matrix
487: */
490: PetscErrorCode PCCreateAggregates_ASA(PC_ASA_level *asa_lev)
491: {
492: PetscInt m,n, m_loc,n_loc;
493: PetscInt m_loc_s, m_loc_e;
494: const PetscScalar one = 1.0;
495: PetscErrorCode ierr;
498: /* Create nodal aggregates A_i^l */
499: /* we use the DM grid information for that */
500: if (asa_lev->dm) {
501: /* coarsen DM and get the restriction matrix */
502: DMCoarsen(asa_lev->dm, PETSC_NULL, &(asa_lev->next->dm));
503: DMGetAggregates(asa_lev->next->dm, asa_lev->dm, &(asa_lev->agg));
504: MatGetSize(asa_lev->agg, &m, &n);
505: MatGetLocalSize(asa_lev->agg, &m_loc, &n_loc);
506: if (n!=asa_lev->size) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"DM interpolation matrix has incorrect size!\n");
507: asa_lev->next->size = m;
508: asa_lev->aggnum = m;
509: /* create the correlators, right now just identity matrices */
510: MatCreateMPIAIJ(asa_lev->comm, n_loc, n_loc, n, n, 1, PETSC_NULL, 1, PETSC_NULL,&(asa_lev->agg_corr));
511: MatGetOwnershipRange(asa_lev->agg_corr, &m_loc_s, &m_loc_e);
512: for (m=m_loc_s; m<m_loc_e; m++) {
513: MatSetValues(asa_lev->agg_corr, 1, &m, 1, &m, &one, INSERT_VALUES);
514: }
515: MatAssemblyBegin(asa_lev->agg_corr, MAT_FINAL_ASSEMBLY);
516: MatAssemblyEnd(asa_lev->agg_corr, MAT_FINAL_ASSEMBLY);
517: /* MatShift(asa_lev->agg_corr, 1.0); */
518: } else {
519: /* somehow define the aggregates without knowing the geometry */
520: /* future WORK */
521: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP, "Currently pure algebraic coarsening is not supported!");
522: }
523: return(0);
524: }
526: /* -------------------------------------------------------------------------- */
527: /*
528: PCCreateTransferOp_ASA - Creates the transfer operator P_{l+1}^l for current level
530: Input Parameters:
531: + asa_lev - the level for which should create the transfer operator
532: - construct_bridge - true, if we should construct a bridge operator, false for normal prolongator
534: If we add a second, third, ... candidate vector (i.e. more than one column in B), we
535: have to relate the additional dimensions to the original aggregates. This is done through
536: the "aggregate correlators" agg_corr and bridge_corr.
537: The aggregate that is used in the construction is then given by
538: asa_lev->agg * asa_lev->agg_corr
539: for the regular prolongator construction and
540: asa_lev->agg * asa_lev->bridge_corr
541: for the bridging prolongator constructions.
542: */
545: PetscErrorCode PCCreateTransferOp_ASA(PC_ASA_level *asa_lev, PetscBool construct_bridge)
546: {
549: const PetscReal Ca = 1e-3;
550: PetscReal cutoff;
551: PetscInt nodes_on_lev;
553: Mat logical_agg;
554: PetscInt mat_agg_loc_start, mat_agg_loc_end, mat_agg_loc_size;
555: PetscInt a;
556: const PetscInt *agg = 0;
557: PetscInt **agg_arr = 0;
559: IS *idxm_is_B_arr = 0;
560: PetscInt *idxn_B = 0;
561: IS idxn_is_B, *idxn_is_B_arr = 0;
563: Mat *b_submat_arr = 0;
565: PetscScalar *b_submat = 0, *b_submat_tp = 0;
566: PetscInt *idxm = 0, *idxn = 0;
567: PetscInt cand_vecs_num;
568: PetscInt *cand_vec_length = 0;
569: PetscInt max_cand_vec_length = 0;
570: PetscScalar **b_orth_arr = 0;
572: PetscInt i,j;
574: PetscScalar *tau = 0, *work = 0;
575: PetscBLASInt info,b1,b2;
577: PetscInt max_cand_vecs_to_add;
578: PetscInt *new_loc_agg_dofs = 0;
580: PetscInt total_loc_cols = 0;
581: PetscReal norm;
583: PetscInt a_loc_m, a_loc_n;
584: PetscInt mat_loc_col_start, mat_loc_col_end, mat_loc_col_size;
585: PetscInt loc_agg_dofs_sum;
586: PetscInt row, col;
587: PetscScalar val;
588: PetscMPIInt comm_size, comm_rank;
589: PetscInt *loc_cols = 0;
592: PetscLogEventBegin(PC_CreateTransferOp_ASA,0,0,0,0);
594: MatGetSize(asa_lev->B, &nodes_on_lev, PETSC_NULL);
596: /* If we add another candidate vector, we want to be able to judge, how much the new candidate
597: improves our current projection operators and whether it is worth adding it.
598: This is the precomputation necessary for implementing Notes (4.1) to (4.7).
599: We require that all candidate vectors x stored in B are normalized such that
600: <A x, x> = 1 and we thus do not have to compute this.
601: For each aggregate A we can now test condition (4.5) and (4.6) by computing
602: || quantity to check ||_{A}^2 <= cutoff * card(A)/N_l */
603: cutoff = Ca/(asa_lev->spec_rad);
605: /* compute logical aggregates by using the correlators */
606: if (construct_bridge) {
607: /* construct bridging operator */
608: MatMatMult(asa_lev->agg, asa_lev->bridge_corr, MAT_INITIAL_MATRIX, 1.0, &logical_agg);
609: } else {
610: /* construct "regular" prolongator */
611: MatMatMult(asa_lev->agg, asa_lev->agg_corr, MAT_INITIAL_MATRIX, 1.0, &logical_agg);
612: }
614: /* destroy correlator matrices for next level, these will be rebuilt in this routine */
615: if (asa_lev->next) {
616: MatDestroy(&(asa_lev->next->agg_corr));
617: MatDestroy(&(asa_lev->next->bridge_corr));
618: }
620: /* find out the correct local row indices */
621: MatGetOwnershipRange(logical_agg, &mat_agg_loc_start, &mat_agg_loc_end);
622: mat_agg_loc_size = mat_agg_loc_end-mat_agg_loc_start;
623:
624: cand_vecs_num = asa_lev->cand_vecs;
626: /* construct column indices idxn_B for reading from B */
627: PetscMalloc(sizeof(PetscInt)*(cand_vecs_num), &idxn_B);
628: for (i=0; i<cand_vecs_num; i++) {
629: idxn_B[i] = i;
630: }
631: ISCreateGeneral(asa_lev->comm, asa_lev->cand_vecs, idxn_B,PETSC_COPY_VALUES, &idxn_is_B);
632: PetscFree(idxn_B);
633: PetscMalloc(sizeof(IS)*mat_agg_loc_size, &idxn_is_B_arr);
634: for (a=0; a<mat_agg_loc_size; a++) {
635: idxn_is_B_arr[a] = idxn_is_B;
636: }
637: /* allocate storage for row indices idxm_B */
638: PetscMalloc(sizeof(IS)*mat_agg_loc_size, &idxm_is_B_arr);
640: /* Storage for the orthogonalized submatrices of B and their sizes */
641: PetscMalloc(sizeof(PetscInt)*mat_agg_loc_size, &cand_vec_length);
642: PetscMalloc(sizeof(PetscScalar*)*mat_agg_loc_size, &b_orth_arr);
643: /* Storage for the information about each aggregate */
644: PetscMalloc(sizeof(PetscInt*)*mat_agg_loc_size, &agg_arr);
645: /* Storage for the number of candidate vectors that are orthonormal and used in each submatrix */
646: PetscMalloc(sizeof(PetscInt)*mat_agg_loc_size, &new_loc_agg_dofs);
648: /* loop over local aggregates */
649: for (a=0; a<mat_agg_loc_size; a++) {
650: /* get info about current aggregate, this gives the rows we have to get from B */
651: MatGetRow(logical_agg, a+mat_agg_loc_start, &cand_vec_length[a], &agg, 0);
652: /* copy aggregate information */
653: PetscMalloc(sizeof(PetscInt)*cand_vec_length[a], &(agg_arr[a]));
654: PetscMemcpy(agg_arr[a], agg, sizeof(PetscInt)*cand_vec_length[a]);
655: /* restore row */
656: MatRestoreRow(logical_agg, a+mat_agg_loc_start, &cand_vec_length[a], &agg, 0);
657:
658: /* create index sets */
659: ISCreateGeneral(PETSC_COMM_SELF, cand_vec_length[a], agg_arr[a],PETSC_COPY_VALUES, &(idxm_is_B_arr[a]));
660: /* maximum candidate vector length */
661: if (cand_vec_length[a] > max_cand_vec_length) { max_cand_vec_length = cand_vec_length[a]; }
662: }
663: /* destroy logical_agg, no longer needed */
664: MatDestroy(&logical_agg);
666: /* get the entries for aggregate from B */
667: MatGetSubMatrices(asa_lev->B, mat_agg_loc_size, idxm_is_B_arr, idxn_is_B_arr, MAT_INITIAL_MATRIX, &b_submat_arr);
668:
669: /* clean up all the index sets */
670: for (a=0; a<mat_agg_loc_size; a++) { ISDestroy(&idxm_is_B_arr[a]); }
671: PetscFree(idxm_is_B_arr);
672: ISDestroy(&idxn_is_B);
673: PetscFree(idxn_is_B_arr);
674:
675: /* storage for the values from each submatrix */
676: PetscMalloc(sizeof(PetscScalar)*max_cand_vec_length*cand_vecs_num, &b_submat);
677: PetscMalloc(sizeof(PetscScalar)*max_cand_vec_length*cand_vecs_num, &b_submat_tp);
678: PetscMalloc(sizeof(PetscInt)*max_cand_vec_length, &idxm);
679: for (i=0; i<max_cand_vec_length; i++) { idxm[i] = i; }
680: PetscMalloc(sizeof(PetscInt)*cand_vecs_num, &idxn);
681: for (i=0; i<cand_vecs_num; i++) { idxn[i] = i; }
682: /* work storage for QR algorithm */
683: PetscMalloc(sizeof(PetscScalar)*max_cand_vec_length, &tau);
684: PetscMalloc(sizeof(PetscScalar)*cand_vecs_num, &work);
686: /* orthogonalize all submatrices and store them in b_orth_arr */
687: for (a=0; a<mat_agg_loc_size; a++) {
688: /* Get the entries for aggregate from B. This is row ordered (although internally
689: it is column ordered and we will waste some energy transposing it).
690: WORK: use something like MatGetArray(b_submat_arr[a], &b_submat) but be really
691: careful about all the different matrix types */
692: MatGetValues(b_submat_arr[a], cand_vec_length[a], idxm, cand_vecs_num, idxn, b_submat);
694: if (construct_bridge) {
695: /* if we are constructing a bridging restriction/interpolation operator, we have
696: to use the same number of dofs as in our previous construction */
697: max_cand_vecs_to_add = asa_lev->loc_agg_dofs[a];
698: } else {
699: /* for a normal restriction/interpolation operator, we should make sure that we
700: do not create linear dependence by accident */
701: max_cand_vecs_to_add = PetscMin(cand_vec_length[a], cand_vecs_num);
702: }
704: /* We use LAPACK to compute the QR decomposition of b_submat. For LAPACK we have to
705: transpose the matrix. We might throw out some column vectors during this process.
706: We are keeping count of the number of column vectors that we use (and therefore the
707: number of dofs on the lower level) in new_loc_agg_dofs[a]. */
708: new_loc_agg_dofs[a] = 0;
709: for (j=0; j<max_cand_vecs_to_add; j++) {
710: /* check for condition (4.5) */
711: norm = 0.0;
712: for (i=0; i<cand_vec_length[a]; i++) {
713: norm += PetscRealPart(b_submat[i*cand_vecs_num+j])*PetscRealPart(b_submat[i*cand_vecs_num+j])
714: + PetscImaginaryPart(b_submat[i*cand_vecs_num+j])*PetscImaginaryPart(b_submat[i*cand_vecs_num+j]);
715: }
716: /* only add candidate vector if bigger than cutoff or first candidate */
717: if ((!j) || (norm > cutoff*((PetscReal) cand_vec_length[a])/((PetscReal) nodes_on_lev))) {
718: /* passed criterion (4.5), we have not implemented criterion (4.6) yet */
719: for (i=0; i<cand_vec_length[a]; i++) {
720: b_submat_tp[new_loc_agg_dofs[a]*cand_vec_length[a]+i] = b_submat[i*cand_vecs_num+j];
721: }
722: new_loc_agg_dofs[a]++;
723: }
724: /* #ifdef PCASA_VERBOSE */
725: else {
726: PetscPrintf(asa_lev->comm, "Cutoff criteria invoked\n");
727: }
728: /* #endif */
729: }
731: CHKMEMQ;
732: /* orthogonalize b_submat_tp using the QR algorithm from LAPACK */
733: b1 = PetscBLASIntCast(*(cand_vec_length+a));
734: b2 = PetscBLASIntCast(*(new_loc_agg_dofs+a));
735: #if !defined(PETSC_MISSING_LAPACK_GEQRF)
736: LAPACKgeqrf_(&b1, &b2, b_submat_tp, &b1, tau, work, &b2, &info);
737: if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB, "LAPACKgeqrf_ LAPACK routine failed");
738: #else
739: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"geqrf() - Lapack routine is unavailable\n");
740: #endif
741: #if !defined(PETSC_MISSING_LAPACK_ORGQR)
742: LAPACKungqr_(&b1, &b2, &b2, b_submat_tp, &b1, tau, work, &b2, &info);
743: #else
744: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"ORGQR - Lapack routine is unavailable\nIf linking with ESSL you MUST also link with full LAPACK, for example\nuse ./configure with --with-blas-lib=libessl.a --with-lapack-lib=/usr/local/lib/liblapack.a'");
745: #endif
746: if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB, "LAPACKungqr_ LAPACK routine failed");
748: /* Transpose b_submat_tp and store it in b_orth_arr[a]. If we are constructing a
749: bridging restriction/interpolation operator, we could end up with less dofs than
750: we previously had. We fill those up with zeros. */
751: if (!construct_bridge) {
752: PetscMalloc(sizeof(PetscScalar)*cand_vec_length[a]*new_loc_agg_dofs[a], b_orth_arr+a);
753: for (j=0; j<new_loc_agg_dofs[a]; j++) {
754: for (i=0; i<cand_vec_length[a]; i++) {
755: b_orth_arr[a][i*new_loc_agg_dofs[a]+j] = b_submat_tp[j*cand_vec_length[a]+i];
756: }
757: }
758: } else {
759: /* bridge, might have to fill up */
760: PetscMalloc(sizeof(PetscScalar)*cand_vec_length[a]*max_cand_vecs_to_add, b_orth_arr+a);
761: for (j=0; j<new_loc_agg_dofs[a]; j++) {
762: for (i=0; i<cand_vec_length[a]; i++) {
763: b_orth_arr[a][i*max_cand_vecs_to_add+j] = b_submat_tp[j*cand_vec_length[a]+i];
764: }
765: }
766: for (j=new_loc_agg_dofs[a]; j<max_cand_vecs_to_add; j++) {
767: for (i=0; i<cand_vec_length[a]; i++) {
768: b_orth_arr[a][i*max_cand_vecs_to_add+j] = 0.0;
769: }
770: }
771: new_loc_agg_dofs[a] = max_cand_vecs_to_add;
772: }
773: /* the number of columns in asa_lev->P that are local to this process */
774: total_loc_cols += new_loc_agg_dofs[a];
775: } /* end of loop over local aggregates */
777: /* destroy the submatrices, also frees all allocated space */
778: MatDestroyMatrices(mat_agg_loc_size, &b_submat_arr);
779: /* destroy all other workspace */
780: PetscFree(b_submat);
781: PetscFree(b_submat_tp);
782: PetscFree(idxm);
783: PetscFree(idxn);
784: PetscFree(tau);
785: PetscFree(work);
787: /* destroy old matrix P, Pt */
788: MatDestroy(&(asa_lev->P));
789: MatDestroy(&(asa_lev->Pt));
791: MatGetLocalSize(asa_lev->A, &a_loc_m, &a_loc_n);
793: /* determine local range */
794: MPI_Comm_size(asa_lev->comm, &comm_size);
795: MPI_Comm_rank(asa_lev->comm, &comm_rank);
796: PetscMalloc(comm_size*sizeof(PetscInt), &loc_cols);
797: MPI_Allgather(&total_loc_cols, 1, MPIU_INT, loc_cols, 1, MPIU_INT, asa_lev->comm);
798: mat_loc_col_start = 0;
799: for (i=0;i<comm_rank;i++) {
800: mat_loc_col_start += loc_cols[i];
801: }
802: mat_loc_col_end = mat_loc_col_start + loc_cols[i];
803: mat_loc_col_size = mat_loc_col_end-mat_loc_col_start;
804: if (mat_loc_col_size != total_loc_cols) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_COR, "Local size does not match matrix size");
805: PetscFree(loc_cols);
807: /* we now have enough information to create asa_lev->P */
808: MatCreateMPIAIJ(asa_lev->comm, a_loc_n, total_loc_cols, asa_lev->size, PETSC_DETERMINE,
809: cand_vecs_num, PETSC_NULL, cand_vecs_num, PETSC_NULL, &(asa_lev->P));
810: /* create asa_lev->Pt */
811: MatCreateMPIAIJ(asa_lev->comm, total_loc_cols, a_loc_n, PETSC_DETERMINE, asa_lev->size,
812: max_cand_vec_length, PETSC_NULL, max_cand_vec_length, PETSC_NULL, &(asa_lev->Pt));
813: if (asa_lev->next) {
814: /* create correlator for aggregates of next level */
815: MatCreateMPIAIJ(asa_lev->comm, mat_agg_loc_size, total_loc_cols, PETSC_DETERMINE, PETSC_DETERMINE,
816: cand_vecs_num, PETSC_NULL, cand_vecs_num, PETSC_NULL, &(asa_lev->next->agg_corr));
817: /* create asa_lev->next->bridge_corr matrix */
818: MatCreateMPIAIJ(asa_lev->comm, mat_agg_loc_size, total_loc_cols, PETSC_DETERMINE, PETSC_DETERMINE,
819: cand_vecs_num, PETSC_NULL, cand_vecs_num, PETSC_NULL, &(asa_lev->next->bridge_corr));
820: }
822: /* this is my own hack, but it should give the columns that we should write to */
823: MatGetOwnershipRangeColumn(asa_lev->P, &mat_loc_col_start, &mat_loc_col_end);
824: mat_loc_col_size = mat_loc_col_end-mat_loc_col_start;
825: if (mat_loc_col_size != total_loc_cols) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ, "The number of local columns in asa_lev->P assigned to this processor does not match the local vector size");
827: loc_agg_dofs_sum = 0;
828: /* construct P, Pt, agg_corr, bridge_corr */
829: for (a=0; a<mat_agg_loc_size; a++) {
830: /* store b_orth_arr[a] in P */
831: for (i=0; i<cand_vec_length[a]; i++) {
832: row = agg_arr[a][i];
833: for (j=0; j<new_loc_agg_dofs[a]; j++) {
834: col = mat_loc_col_start + loc_agg_dofs_sum + j;
835: val = b_orth_arr[a][i*new_loc_agg_dofs[a] + j];
836: MatSetValues(asa_lev->P, 1, &row, 1, &col, &val, INSERT_VALUES);
837: val = PetscConj(val);
838: MatSetValues(asa_lev->Pt, 1, &col, 1, &row, &val, INSERT_VALUES);
839: }
840: }
842: /* compute aggregate correlation matrices */
843: if (asa_lev->next) {
844: row = a+mat_agg_loc_start;
845: for (i=0; i<new_loc_agg_dofs[a]; i++) {
846: col = mat_loc_col_start + loc_agg_dofs_sum + i;
847: val = 1.0;
848: MatSetValues(asa_lev->next->agg_corr, 1, &row, 1, &col, &val, INSERT_VALUES);
849: /* for the bridge operator we leave out the newest candidates, i.e.
850: we set bridge_corr to 1.0 for all columns up to asa_lev->loc_agg_dofs[a] and to
851: 0.0 between asa_lev->loc_agg_dofs[a] and new_loc_agg_dofs[a] */
852: if (!(asa_lev->loc_agg_dofs && (i >= asa_lev->loc_agg_dofs[a]))) {
853: MatSetValues(asa_lev->next->bridge_corr, 1, &row, 1, &col, &val, INSERT_VALUES);
854: }
855: }
856: }
858: /* move to next entry point col */
859: loc_agg_dofs_sum += new_loc_agg_dofs[a];
860: } /* end of loop over local aggregates */
862: MatAssemblyBegin(asa_lev->P,MAT_FINAL_ASSEMBLY);
863: MatAssemblyEnd(asa_lev->P,MAT_FINAL_ASSEMBLY);
864: MatAssemblyBegin(asa_lev->Pt,MAT_FINAL_ASSEMBLY);
865: MatAssemblyEnd(asa_lev->Pt,MAT_FINAL_ASSEMBLY);
866: if (asa_lev->next) {
867: MatAssemblyBegin(asa_lev->next->agg_corr,MAT_FINAL_ASSEMBLY);
868: MatAssemblyEnd(asa_lev->next->agg_corr,MAT_FINAL_ASSEMBLY);
869: MatAssemblyBegin(asa_lev->next->bridge_corr,MAT_FINAL_ASSEMBLY);
870: MatAssemblyEnd(asa_lev->next->bridge_corr,MAT_FINAL_ASSEMBLY);
871: }
873: /* if we are not constructing a bridging operator, switch asa_lev->loc_agg_dofs
874: and new_loc_agg_dofs */
875: if (construct_bridge) {
876: PetscFree(new_loc_agg_dofs);
877: } else {
878: if (asa_lev->loc_agg_dofs) {
879: PetscFree(asa_lev->loc_agg_dofs);
880: }
881: asa_lev->loc_agg_dofs = new_loc_agg_dofs;
882: }
884: /* clean up */
885: for (a=0; a<mat_agg_loc_size; a++) {
886: PetscFree(b_orth_arr[a]);
887: PetscFree(agg_arr[a]);
888: }
889: PetscFree(cand_vec_length);
890: PetscFree(b_orth_arr);
891: PetscFree(agg_arr);
893: PetscLogEventEnd(PC_CreateTransferOp_ASA, 0,0,0,0);
894: return(0);
895: }
897: /* -------------------------------------------------------------------------- */
898: /*
899: PCSmoothProlongator_ASA - Computes the smoothed prolongators I and It on the level
901: Input Parameters:
902: . asa_lev - the level for which the smoothed prolongator is constructed
903: */
906: PetscErrorCode PCSmoothProlongator_ASA(PC_ASA_level *asa_lev)
907: {
911: MatDestroy(&(asa_lev->smP));
912: MatDestroy(&(asa_lev->smPt));
913: /* compute prolongator I_{l+1}^l = S_l P_{l+1}^l */
914: /* step 1: compute I_{l+1}^l = A_l P_{l+1}^l */
915: MatMatMult(asa_lev->A, asa_lev->P, MAT_INITIAL_MATRIX, 1, &(asa_lev->smP));
916: MatMatMult(asa_lev->Pt, asa_lev->A, MAT_INITIAL_MATRIX, 1, &(asa_lev->smPt));
917: /* step 2: shift and scale to get I_{l+1}^l = P_{l+1}^l - 4/(3/rho) A_l P_{l+1}^l */
918: MatAYPX(asa_lev->smP, -4./(3.*(asa_lev->spec_rad)), asa_lev->P, SUBSET_NONZERO_PATTERN);
919: MatAYPX(asa_lev->smPt, -4./(3.*(asa_lev->spec_rad)), asa_lev->Pt, SUBSET_NONZERO_PATTERN);
921: return(0);
922: }
925: /* -------------------------------------------------------------------------- */
926: /*
927: PCCreateVcycle_ASA - Creates the V-cycle, when aggregates are already defined
929: Input Parameters:
930: . asa - the preconditioner context
931: */
934: PetscErrorCode PCCreateVcycle_ASA(PC_ASA *asa)
935: {
937: PC_ASA_level *asa_lev, *asa_next_lev;
938: Mat AI;
941: PetscLogEventBegin(PC_CreateVcycle_ASA, 0,0,0,0);
943: if (!asa) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL, "asa pointer is NULL");
944: if (!(asa->levellist)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL, "no levels found");
945: asa_lev = asa->levellist;
946: PCComputeSpectralRadius_ASA(asa_lev);
947: PCSetupSmoothersOnLevel_ASA(asa, asa_lev, asa->nu);
949: while(asa_lev->next) {
950: asa_next_lev = asa_lev->next;
951: /* (a) aggregates are already constructed */
953: /* (b) construct B_{l+1} and P_{l+1}^l using (2.11) */
954: /* construct P_{l+1}^l */
955: PCCreateTransferOp_ASA(asa_lev, PETSC_FALSE);
957: /* construct B_{l+1} */
958: MatDestroy(&(asa_next_lev->B));
959: MatMatMult(asa_lev->Pt, asa_lev->B, MAT_INITIAL_MATRIX, 1, &(asa_next_lev->B));
960: asa_next_lev->cand_vecs = asa_lev->cand_vecs;
962: /* (c) construct smoothed prolongator */
963: PCSmoothProlongator_ASA(asa_lev);
964:
965: /* (d) construct coarse matrix */
966: /* Define coarse matrix A_{l+1} = (I_{l+1}^l)^T A_l I_{l+1}^l */
967: MatDestroy(&(asa_next_lev->A));
968: MatMatMult(asa_lev->A, asa_lev->smP, MAT_INITIAL_MATRIX, 1.0, &AI);
969: MatMatMult(asa_lev->smPt, AI, MAT_INITIAL_MATRIX, 1.0, &(asa_next_lev->A));
970: MatDestroy(&AI);
971: /* MatPtAP(asa_lev->A, asa_lev->smP, MAT_INITIAL_MATRIX, 1, &(asa_next_lev->A)); */
972: MatGetSize(asa_next_lev->A, PETSC_NULL, &(asa_next_lev->size));
973: PCComputeSpectralRadius_ASA(asa_next_lev);
974: PCSetupSmoothersOnLevel_ASA(asa, asa_next_lev, asa->nu);
975: /* create corresponding vectors x_{l+1}, b_{l+1}, r_{l+1} */
976: VecDestroy(&(asa_next_lev->x));
977: VecDestroy(&(asa_next_lev->b));
978: VecDestroy(&(asa_next_lev->r));
979: MatGetVecs(asa_next_lev->A, &(asa_next_lev->x), &(asa_next_lev->b));
980: MatGetVecs(asa_next_lev->A, PETSC_NULL, &(asa_next_lev->r));
982: /* go to next level */
983: asa_lev = asa_lev->next;
984: } /* end of while loop over the levels */
985: /* asa_lev now points to the coarsest level, set up direct solver there */
986: PCComputeSpectralRadius_ASA(asa_lev);
987: PCSetupDirectSolversOnLevel_ASA(asa, asa_lev, asa->nu);
989: PetscLogEventEnd(PC_CreateVcycle_ASA, 0,0,0,0);
990: return(0);
991: }
993: /* -------------------------------------------------------------------------- */
994: /*
995: PCAddCandidateToB_ASA - Inserts a candidate vector in B
997: Input Parameters:
998: + B - the matrix to insert into
999: . col_idx - the column we should insert to
1000: . x - the vector to insert
1001: - A - system matrix
1003: Function will insert normalized x into B, such that <A x, x> = 1
1004: (x itself is not changed). If B is projected down then this property
1005: is kept. If <A_l x_l, x_l> = 1 and the next level is defined by
1006: x_{l+1} = Pt x_l and A_{l+1} = Pt A_l P then
1007: <A_{l+1} x_{l+1}, x_l> = <Pt A_l P Pt x_l, Pt x_l>
1008: = <A_l P Pt x_l, P Pt x_l> = <A_l x_l, x_l> = 1
1009: because of the definition of P in (2.11).
1010: */
1013: PetscErrorCode PCAddCandidateToB_ASA(Mat B, PetscInt col_idx, Vec x, Mat A)
1014: {
1016: Vec Ax;
1017: PetscScalar dotprod;
1018: PetscReal norm;
1019: PetscInt i, loc_start, loc_end;
1020: PetscScalar val, *vecarray;
1023: MatGetVecs(A, PETSC_NULL, &Ax);
1024: MatMult(A, x, Ax);
1025: VecDot(Ax, x, &dotprod);
1026: norm = PetscSqrtReal(PetscAbsScalar(dotprod));
1027: VecGetOwnershipRange(x, &loc_start, &loc_end);
1028: VecGetArray(x, &vecarray);
1029: for (i=loc_start; i<loc_end; i++) {
1030: val = vecarray[i-loc_start]/norm;
1031: MatSetValues(B, 1, &i, 1, &col_idx, &val, INSERT_VALUES);
1032: }
1033: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1034: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1035: VecRestoreArray(x, &vecarray);
1036: VecDestroy(&Ax);
1037: return(0);
1038: }
1040: /* -------------------------------------------------------------------------- */
1041: /*
1042: - x - a starting guess for a hard to approximate vector, if PETSC_NULL, will be generated
1043: */
1046: PetscErrorCode PCInitializationStage_ASA(PC_ASA *asa, Vec x)
1047: {
1049: PetscInt l;
1050: PC_ASA_level *asa_lev, *asa_next_lev;
1051: PetscRandom rctx; /* random number generator context */
1053: Vec ax;
1054: PetscScalar tmp;
1055: PetscReal prevnorm, norm;
1057: PetscBool skip_steps_f_i = PETSC_FALSE;
1058: PetscBool sufficiently_coarsened = PETSC_FALSE;
1060: PetscInt vec_size, vec_loc_size;
1061: PetscInt loc_vec_low, loc_vec_high;
1062: PetscInt i,j;
1064: /* Vec xhat = 0; */
1066: Mat AI;
1068: Vec cand_vec, cand_vec_new;
1069: PetscBool isrichardson;
1070: PC coarse_pc;
1073: PetscLogEventBegin(PC_InitializationStage_ASA,0,0,0,0);
1074: l=1;
1075: /* create first level */
1076: PCCreateLevel_ASA(&(asa->levellist), l, asa->comm, 0, 0, asa->ksptype_smooth, asa->pctype_smooth);
1077: asa_lev = asa->levellist;
1079: /* Set matrix */
1080: asa_lev->A = asa->A;
1081: MatGetSize(asa_lev->A, &i, &j);
1082: asa_lev->size = i;
1083: PCComputeSpectralRadius_ASA(asa_lev);
1084: PCSetupSmoothersOnLevel_ASA(asa, asa_lev, asa->mu_initial);
1086: /* Set DM */
1087: asa_lev->dm = asa->dm;
1088: PetscObjectReference((PetscObject)asa->dm);
1090: PetscPrintf(asa_lev->comm, "Initialization stage\n");
1092: if (x) {
1093: /* use starting guess */
1094: VecDestroy(&(asa_lev->x));
1095: VecDuplicate(x, &(asa_lev->x));
1096: VecCopy(x, asa_lev->x);
1097: } else {
1098: /* select random starting vector */
1099: VecDestroy(&(asa_lev->x));
1100: MatGetVecs(asa_lev->A, &(asa_lev->x), 0);
1101: PetscRandomCreate(asa_lev->comm,&rctx);
1102: PetscRandomSetFromOptions(rctx);
1103: VecSetRandom(asa_lev->x, rctx);
1104: PetscRandomDestroy(&rctx);
1105: }
1107: /* create right hand side */
1108: VecDestroy(&(asa_lev->b));
1109: MatGetVecs(asa_lev->A, &(asa_lev->b), 0);
1110: VecSet(asa_lev->b, 0.0);
1112: /* relax and check whether that's enough already */
1113: /* compute old norm */
1114: MatGetVecs(asa_lev->A, 0, &ax);
1115: MatMult(asa_lev->A, asa_lev->x, ax);
1116: VecDot(asa_lev->x, ax, &tmp);
1117: prevnorm = PetscAbsScalar(tmp);
1118: PetscPrintf(asa_lev->comm, "Residual norm of starting guess: %f\n", prevnorm);
1120: /* apply mu_initial relaxations */
1121: KSPSolve(asa_lev->smoothd, asa_lev->b, asa_lev->x);
1122: /* compute new norm */
1123: MatMult(asa_lev->A, asa_lev->x, ax);
1124: VecDot(asa_lev->x, ax, &tmp);
1125: norm = PetscAbsScalar(tmp);
1126: VecDestroy(&(ax));
1127: PetscPrintf(asa_lev->comm, "Residual norm of relaxation after %g %D relaxations: %g %g\n", asa->epsilon,asa->mu_initial, norm,prevnorm);
1129: /* Check if it already converges by itself */
1130: if (norm/prevnorm <= pow(asa->epsilon, asa->mu_initial)) {
1131: /* converges by relaxation alone */
1132: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP, "Relaxation should be sufficient to treat this problem. "
1133: "Use relaxation or decrease epsilon with -pc_asa_epsilon");
1134: } else {
1135: /* set the number of relaxations to asa->mu from asa->mu_initial */
1136: PCSetupSmoothersOnLevel_ASA(asa, asa_lev, asa->mu);
1138: /* Let's do some multigrid ! */
1139: sufficiently_coarsened = PETSC_FALSE;
1141: /* do the whole initialization stage loop */
1142: while (!sufficiently_coarsened) {
1143: PetscPrintf(asa_lev->comm, "Initialization stage: creating level %D\n", asa_lev->level+1);
1145: /* (a) Set candidate matrix B_l = x_l */
1146: /* get the correct vector sizes and data */
1147: VecGetSize(asa_lev->x, &vec_size);
1148: VecGetOwnershipRange(asa_lev->x, &loc_vec_low, &loc_vec_high);
1149: vec_loc_size = loc_vec_high - loc_vec_low;
1151: /* create matrix for candidates */
1152: MatCreateMPIDense(asa_lev->comm, vec_loc_size, PETSC_DECIDE, vec_size, asa->max_cand_vecs, PETSC_NULL, &(asa_lev->B));
1153: /* set the first column */
1154: PCAddCandidateToB_ASA(asa_lev->B, 0, asa_lev->x, asa_lev->A);
1155: asa_lev->cand_vecs = 1;
1157: /* create next level */
1158: PCCreateLevel_ASA(&(asa_lev->next), asa_lev->level+1, asa_lev->comm, asa_lev, PETSC_NULL, asa->ksptype_smooth, asa->pctype_smooth);
1159: asa_next_lev = asa_lev->next;
1161: /* (b) Create nodal aggregates A_i^l */
1162: PCCreateAggregates_ASA(asa_lev);
1163:
1164: /* (c) Define tentatative prolongator P_{l+1}^l and candidate matrix B_{l+1}
1165: using P_{l+1}^l B_{l+1} = B_l and (P_{l+1}^l)^T P_{l+1}^l = I */
1166: PCCreateTransferOp_ASA(asa_lev, PETSC_FALSE);
1168: /* future WORK: set correct fill ratios for all the operations below */
1169: MatMatMult(asa_lev->Pt, asa_lev->B, MAT_INITIAL_MATRIX, 1, &(asa_next_lev->B));
1170: asa_next_lev->cand_vecs = asa_lev->cand_vecs;
1172: /* (d) Define prolongator I_{l+1}^l = S_l P_{l+1}^l */
1173: PCSmoothProlongator_ASA(asa_lev);
1175: /* (e) Define coarse matrix A_{l+1} = (I_{l+1}^l)^T A_l I_{l+1}^l */
1176: MatMatMult(asa_lev->A, asa_lev->smP, MAT_INITIAL_MATRIX, 1.0, &AI);
1177: MatMatMult(asa_lev->smPt, AI, MAT_INITIAL_MATRIX, 1.0, &(asa_next_lev->A));
1178: MatDestroy(&AI);
1179: /* MatPtAP(asa_lev->A, asa_lev->smP, MAT_INITIAL_MATRIX, 1, &(asa_next_lev->A)); */
1180: MatGetSize(asa_next_lev->A, PETSC_NULL, &(asa_next_lev->size));
1181: PCComputeSpectralRadius_ASA(asa_next_lev);
1182: PCSetupSmoothersOnLevel_ASA(asa, asa_next_lev, asa->mu);
1184: /* coarse enough for direct solver? */
1185: MatGetSize(asa_next_lev->A, &i, &j);
1186: if (PetscMax(i,j) <= asa->direct_solver) {
1187: PetscPrintf(asa_lev->comm, "Level %D can be treated directly.\n"
1188: "Algorithm will use %D levels.\n", asa_next_lev->level,
1189: asa_next_lev->level);
1190: break; /* go to step 5 */
1191: }
1193: if (!skip_steps_f_i) {
1194: /* (f) Set x_{l+1} = B_{l+1}, we just compute it again */
1195: VecDestroy(&(asa_next_lev->x));
1196: MatGetVecs(asa_lev->P, &(asa_next_lev->x), 0);
1197: MatMult(asa_lev->Pt, asa_lev->x, asa_next_lev->x);
1199: /* /\* (g) Make copy \hat{x}_{l+1} = x_{l+1} *\/ */
1200: /* VecDuplicate(asa_next_lev->x, &xhat); */
1201: /* VecCopy(asa_next_lev->x, xhat); */
1202:
1203: /* Create b_{l+1} */
1204: VecDestroy(&(asa_next_lev->b));
1205: MatGetVecs(asa_next_lev->A, &(asa_next_lev->b), 0);
1206: VecSet(asa_next_lev->b, 0.0);
1208: /* (h) Relax mu times on A_{l+1} x = 0 */
1209: /* compute old norm */
1210: MatGetVecs(asa_next_lev->A, 0, &ax);
1211: MatMult(asa_next_lev->A, asa_next_lev->x, ax);
1212: VecDot(asa_next_lev->x, ax, &tmp);
1213: prevnorm = PetscAbsScalar(tmp);
1214: PetscPrintf(asa_next_lev->comm, "Residual norm of starting guess on level %D: %f\n", asa_next_lev->level, prevnorm);
1215: /* apply mu relaxations: WORK, make sure that mu is set correctly */
1216: KSPSolve(asa_next_lev->smoothd, asa_next_lev->b, asa_next_lev->x);
1217: /* compute new norm */
1218: MatMult(asa_next_lev->A, asa_next_lev->x, ax);
1219: VecDot(asa_next_lev->x, ax, &tmp);
1220: norm = PetscAbsScalar(tmp);
1221: VecDestroy(&(ax));
1222: PetscPrintf(asa_next_lev->comm, "Residual norm after Richardson iteration on level %D: %f\n", asa_next_lev->level, norm);
1223: /* (i) Check if it already converges by itself */
1224: if (norm/prevnorm <= pow(asa->epsilon, asa->mu)) {
1225: /* relaxation reduces error sufficiently */
1226: skip_steps_f_i = PETSC_TRUE;
1227: }
1228: }
1229: /* (j) go to next coarser level */
1230: l++;
1231: asa_lev = asa_next_lev;
1232: }
1233: /* Step 5. */
1234: asa->levels = asa_next_lev->level; /* WORK: correct? */
1236: /* Set up direct solvers on coarsest level */
1237: if (asa_next_lev->smoothd != asa_next_lev->smoothu) {
1238: if (asa_next_lev->smoothu) { KSPDestroy(&asa_next_lev->smoothu); }
1239: }
1240: KSPSetType(asa_next_lev->smoothd, asa->ksptype_direct);
1241: PetscTypeCompare((PetscObject)(asa_next_lev->smoothd), KSPRICHARDSON, &isrichardson);
1242: if (isrichardson) {
1243: KSPSetInitialGuessNonzero(asa_next_lev->smoothd, PETSC_TRUE);
1244: } else {
1245: KSPSetInitialGuessNonzero(asa_next_lev->smoothd, PETSC_FALSE);
1246: }
1247: KSPGetPC(asa_next_lev->smoothd, &coarse_pc);
1248: PCSetType(coarse_pc, asa->pctype_direct);
1249: asa_next_lev->smoothu = asa_next_lev->smoothd;
1250: PCSetupDirectSolversOnLevel_ASA(asa, asa_next_lev, asa->nu);
1252: /* update finest-level candidate matrix B_1 = I_2^1 I_3^2 ... I_{L-1}^{L-2} x_{L-1} */
1253: if (!asa_lev->prev) {
1254: /* just one relaxation level */
1255: VecDuplicate(asa_lev->x, &cand_vec);
1256: VecCopy(asa_lev->x, cand_vec);
1257: } else {
1258: /* interpolate up the chain */
1259: cand_vec = asa_lev->x;
1260: asa_lev->x = 0;
1261: while(asa_lev->prev) {
1262: /* interpolate to higher level */
1263: MatGetVecs(asa_lev->prev->smP, 0, &cand_vec_new);
1264: MatMult(asa_lev->prev->smP, cand_vec, cand_vec_new);
1265: VecDestroy(&(cand_vec));
1266: cand_vec = cand_vec_new;
1267:
1268: /* destroy all working vectors on the way */
1269: VecDestroy(&(asa_lev->x));
1270: VecDestroy(&(asa_lev->b));
1272: /* move to next higher level */
1273: asa_lev = asa_lev->prev;
1274: }
1275: }
1276: /* set the first column of B1 */
1277: PCAddCandidateToB_ASA(asa_lev->B, 0, cand_vec, asa_lev->A);
1278: VecDestroy(&(cand_vec));
1280: /* Step 6. Create V-cycle */
1281: PCCreateVcycle_ASA(asa);
1282: }
1283: PetscLogEventEnd(PC_InitializationStage_ASA,0,0,0,0);
1284: return(0);
1285: }
1287: /* -------------------------------------------------------------------------- */
1288: /*
1289: PCApplyVcycleOnLevel_ASA - Applies current V-cycle
1291: Input Parameters:
1292: + asa_lev - the current level we should recurse on
1293: - gamma - the number of recursive cycles we should run
1295: */
1298: PetscErrorCode PCApplyVcycleOnLevel_ASA(PC_ASA_level *asa_lev, PetscInt gamma)
1299: {
1301: PC_ASA_level *asa_next_lev;
1302: PetscInt g;
1305: if (!asa_lev) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL, "Level is empty in PCApplyVcycleOnLevel_ASA");
1306: asa_next_lev = asa_lev->next;
1308: if (asa_next_lev) {
1309: /* 1. Presmoothing */
1310: KSPSolve(asa_lev->smoothd, asa_lev->b, asa_lev->x);
1311: /* 2. Coarse grid corrections */
1312: /* MatGetVecs(asa_lev->A, 0, &tmp); */
1313: /* MatGetVecs(asa_lev->smP, &(asa_next_lev->b), 0); */
1314: /* MatGetVecs(asa_next_lev->A, &(asa_next_lev->x), 0); */
1315: for (g=0; g<gamma; g++) {
1316: /* (a) get coarsened b_{l+1} = (I_{l+1}^l)^T (b_l - A_l x_l) */
1317: MatMult(asa_lev->A, asa_lev->x, asa_lev->r);
1318: VecAYPX(asa_lev->r, -1.0, asa_lev->b);
1319: MatMult(asa_lev->smPt, asa_lev->r, asa_next_lev->b);
1321: /* (b) Set x_{l+1} = 0 and recurse */
1322: VecSet(asa_next_lev->x, 0.0);
1323: PCApplyVcycleOnLevel_ASA(asa_next_lev, gamma);
1325: /* (c) correct solution x_l = x_l + I_{l+1}^l x_{l+1} */
1326: MatMultAdd(asa_lev->smP, asa_next_lev->x, asa_lev->x, asa_lev->x);
1327: }
1328: /* VecDestroy(&(asa_lev->r)); */
1329: /* /\* discard x_{l+1}, b_{l+1} *\/ */
1330: /* VecDestroy(&(asa_next_lev->x)); */
1331: /* VecDestroy(&(asa_next_lev->b)); */
1332:
1333: /* 3. Postsmoothing */
1334: KSPSolve(asa_lev->smoothu, asa_lev->b, asa_lev->x);
1335: } else {
1336: /* Base case: solve directly */
1337: KSPSolve(asa_lev->smoothd, asa_lev->b, asa_lev->x);
1338: }
1339: return(0);
1340: }
1343: /* -------------------------------------------------------------------------- */
1344: /*
1345: PCGeneralSetupStage_ASA - Applies the ASA preconditioner to a vector. Algorithm
1346: 4 from the ASA paper
1348: Input Parameters:
1349: + asa - the data structure for the ASA algorithm
1350: - cand - a possible candidate vector, if PETSC_NULL, will be constructed randomly
1352: Output Parameters:
1353: . cand_added - PETSC_TRUE, if new candidate vector added, PETSC_FALSE otherwise
1354: */
1357: PetscErrorCode PCGeneralSetupStage_ASA(PC_ASA *asa, Vec cand, PetscBool *cand_added)
1358: {
1360: PC_ASA_level *asa_lev, *asa_next_lev;
1362: PetscRandom rctx; /* random number generator context */
1363: PetscReal r;
1364: PetscScalar rs;
1365: PetscBool nd_fast;
1367: Vec ax;
1368: PetscScalar tmp;
1369: PetscReal norm, prevnorm = 0.0;
1370: PetscInt c;
1372: PetscInt loc_vec_low, loc_vec_high;
1373: PetscInt i;
1375: PetscBool skip_steps_d_j = PETSC_FALSE;
1377: PetscInt *idxm, *idxn;
1378: PetscScalar *v;
1380: Mat AI;
1382: Vec cand_vec, cand_vec_new;
1385: *cand_added = PETSC_FALSE;
1386:
1387: asa_lev = asa->levellist;
1388: if (asa_lev == 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL, "No levels found in PCGeneralSetupStage_ASA");
1389: asa_next_lev = asa_lev->next;
1390: if (asa_next_lev == 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL, "Just one level, not implemented yet");
1391:
1392: PetscPrintf(asa_lev->comm, "General setup stage\n");
1394: PetscLogEventBegin(PC_GeneralSetupStage_ASA,0,0,0,0);
1396: /* 1. If max. dof per node on level 2 equals K, stop */
1397: if (asa_next_lev->cand_vecs >= asa->max_dof_lev_2) {
1398: PetscPrintf(PETSC_COMM_WORLD,
1399: "Maximum dof on level 2 reached: %D\n"
1400: "Consider increasing this limit by setting it with -pc_asa_max_dof_lev_2\n",
1401: asa->max_dof_lev_2);
1402: return(0);
1403: }
1405: /* 2. Create copy of B_1 (skipped, we just replace the last column in step 8.) */
1406:
1407: if (!cand) {
1408: /* 3. Select a random x_1 */
1409: VecDestroy(&(asa_lev->x));
1410: MatGetVecs(asa_lev->A, &(asa_lev->x), 0);
1411: PetscRandomCreate(asa_lev->comm,&rctx);
1412: PetscRandomSetFromOptions(rctx);
1413: VecGetOwnershipRange(asa_lev->x, &loc_vec_low, &loc_vec_high);
1414: for (i=loc_vec_low; i<loc_vec_high; i++) {
1415: PetscRandomGetValueReal(rctx, &r);
1416: rs = r;
1417: VecSetValues(asa_lev->x, 1, &i, &rs, INSERT_VALUES);
1418: }
1419: VecAssemblyBegin(asa_lev->x);
1420: VecAssemblyEnd(asa_lev->x);
1421: PetscRandomDestroy(&rctx);
1422: } else {
1423: VecDestroy(&(asa_lev->x));
1424: VecDuplicate(cand, &(asa_lev->x));
1425: VecCopy(cand, asa_lev->x);
1426: }
1428: /* create right hand side */
1429: VecDestroy(&(asa_lev->b));
1430: MatGetVecs(asa_lev->A, &(asa_lev->b), 0);
1431: VecSet(asa_lev->b, 0.0);
1432:
1433: /* Apply mu iterations of current V-cycle */
1434: nd_fast = PETSC_FALSE;
1435: MatGetVecs(asa_lev->A, 0, &ax);
1436: for (c=0; c<asa->mu; c++) {
1437: PCApplyVcycleOnLevel_ASA(asa_lev, asa->gamma);
1438:
1439: MatMult(asa_lev->A, asa_lev->x, ax);
1440: VecDot(asa_lev->x, ax, &tmp);
1441: norm = PetscAbsScalar(tmp);
1442: if (c>0) {
1443: if (norm/prevnorm < asa->epsilon) {
1444: nd_fast = PETSC_TRUE;
1445: break;
1446: }
1447: }
1448: prevnorm = norm;
1449: }
1450: VecDestroy(&(ax));
1452: /* 4. If energy norm decreases sufficiently fast, then stop */
1453: if (nd_fast) {
1454: PetscPrintf(asa_lev->comm, "nd_fast is true\n");
1455: return(0);
1456: }
1458: /* 5. Update B_1, by adding new column x_1 */
1459: if (asa_lev->cand_vecs >= asa->max_cand_vecs) {
1460: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MEM, "Number of candidate vectors will exceed allocated storage space");
1461: } else {
1462: PetscPrintf(asa_lev->comm, "Adding candidate vector %D\n", asa_lev->cand_vecs+1);
1463: }
1464: PCAddCandidateToB_ASA(asa_lev->B, asa_lev->cand_vecs, asa_lev->x, asa_lev->A);
1465: *cand_added = PETSC_TRUE;
1466: asa_lev->cand_vecs++;
1468: /* 6. loop over levels */
1469: while(asa_next_lev && asa_next_lev->next) {
1470: PetscPrintf(asa_lev->comm, "General setup stage: processing level %D\n", asa_next_lev->level);
1471: /* (a) define B_{l+1} and P_{l+1}^L */
1472: /* construct P_{l+1}^l */
1473: PCCreateTransferOp_ASA(asa_lev, PETSC_FALSE);
1475: /* construct B_{l+1} */
1476: MatDestroy(&(asa_next_lev->B));
1477: MatMatMult(asa_lev->Pt, asa_lev->B, MAT_INITIAL_MATRIX, 1.0, &(asa_next_lev->B));
1478: /* do not increase asa_next_lev->cand_vecs until step (j) */
1479:
1480: /* (b) construct prolongator I_{l+1}^l = S_l P_{l+1}^l */
1481: PCSmoothProlongator_ASA(asa_lev);
1482:
1483: /* (c) construct coarse matrix A_{l+1} = (I_{l+1}^l)^T A_l I_{l+1}^l */
1484: MatDestroy(&(asa_next_lev->A));
1485: MatMatMult(asa_lev->A, asa_lev->smP, MAT_INITIAL_MATRIX, 1.0, &AI);
1486: MatMatMult(asa_lev->smPt, AI, MAT_INITIAL_MATRIX, 1.0, &(asa_next_lev->A));
1487: MatDestroy(&AI);
1488: /* MatPtAP(asa_lev->A, asa_lev->smP, MAT_INITIAL_MATRIX, 1, &(asa_next_lev->A)); */
1489: MatGetSize(asa_next_lev->A, PETSC_NULL, &(asa_next_lev->size));
1490: PCComputeSpectralRadius_ASA(asa_next_lev);
1491: PCSetupSmoothersOnLevel_ASA(asa, asa_next_lev, asa->mu);
1493: if (! skip_steps_d_j) {
1494: /* (d) get vector x_{l+1} from last column in B_{l+1} */
1495: VecDestroy(&(asa_next_lev->x));
1496: MatGetVecs(asa_next_lev->B, 0, &(asa_next_lev->x));
1498: VecGetOwnershipRange(asa_next_lev->x, &loc_vec_low, &loc_vec_high);
1499: PetscMalloc(sizeof(PetscInt)*(loc_vec_high-loc_vec_low), &idxm);
1500: for (i=loc_vec_low; i<loc_vec_high; i++)
1501: idxm[i-loc_vec_low] = i;
1502: PetscMalloc(sizeof(PetscInt)*1, &idxn);
1503: idxn[0] = asa_next_lev->cand_vecs;
1505: PetscMalloc(sizeof(PetscScalar)*(loc_vec_high-loc_vec_low), &v);
1506: MatGetValues(asa_next_lev->B, loc_vec_high-loc_vec_low, idxm, 1, idxn, v);
1508: VecSetValues(asa_next_lev->x, loc_vec_high-loc_vec_low, idxm, v, INSERT_VALUES);
1509: VecAssemblyBegin(asa_next_lev->x);
1510: VecAssemblyEnd(asa_next_lev->x);
1512: PetscFree(v);
1513: PetscFree(idxm);
1514: PetscFree(idxn);
1515:
1516: /* (e) create bridge transfer operator P_{l+2}^{l+1}, by using the previously
1517: computed candidates */
1518: PCCreateTransferOp_ASA(asa_next_lev, PETSC_TRUE);
1520: /* (f) construct bridging prolongator I_{l+2}^{l+1} = S_{l+1} P_{l+2}^{l+1} */
1521: PCSmoothProlongator_ASA(asa_next_lev);
1523: /* (g) compute <A_{l+1} x_{l+1}, x_{l+1}> and save it */
1524: MatGetVecs(asa_next_lev->A, 0, &ax);
1525: MatMult(asa_next_lev->A, asa_next_lev->x, ax);
1526: VecDot(asa_next_lev->x, ax, &tmp);
1527: prevnorm = PetscAbsScalar(tmp);
1528: VecDestroy(&(ax));
1530: /* (h) apply mu iterations of current V-cycle */
1531: /* set asa_next_lev->b */
1532: VecDestroy(&(asa_next_lev->b));
1533: VecDestroy(&(asa_next_lev->r));
1534: MatGetVecs(asa_next_lev->A, &(asa_next_lev->b), &(asa_next_lev->r));
1535: VecSet(asa_next_lev->b, 0.0);
1536: /* apply V-cycle */
1537: for (c=0; c<asa->mu; c++) {
1538: PCApplyVcycleOnLevel_ASA(asa_next_lev, asa->gamma);
1539: }
1541: /* (i) check convergence */
1542: /* compute <A_{l+1} x_{l+1}, x_{l+1}> and save it */
1543: MatGetVecs(asa_next_lev->A, 0, &ax);
1544: MatMult(asa_next_lev->A, asa_next_lev->x, ax);
1545: VecDot(asa_next_lev->x, ax, &tmp);
1546: norm = PetscAbsScalar(tmp);
1547: VecDestroy(&(ax));
1549: if (norm/prevnorm <= pow(asa->epsilon, asa->mu)) skip_steps_d_j = PETSC_TRUE;
1550:
1551: /* (j) update candidate B_{l+1} */
1552: PCAddCandidateToB_ASA(asa_next_lev->B, asa_next_lev->cand_vecs, asa_next_lev->x, asa_next_lev->A);
1553: asa_next_lev->cand_vecs++;
1554: }
1555: /* go to next level */
1556: asa_lev = asa_lev->next;
1557: asa_next_lev = asa_next_lev->next;
1558: }
1560: /* 7. update the fine-level candidate */
1561: if (! asa_lev->prev) {
1562: /* just one coarsening level */
1563: VecDuplicate(asa_lev->x, &cand_vec);
1564: VecCopy(asa_lev->x, cand_vec);
1565: } else {
1566: cand_vec = asa_lev->x;
1567: asa_lev->x = 0;
1568: while(asa_lev->prev) {
1569: /* interpolate to higher level */
1570: MatGetVecs(asa_lev->prev->smP, 0, &cand_vec_new);
1571: MatMult(asa_lev->prev->smP, cand_vec, cand_vec_new);
1572: VecDestroy(&(cand_vec));
1573: cand_vec = cand_vec_new;
1575: /* destroy all working vectors on the way */
1576: VecDestroy(&(asa_lev->x));
1577: VecDestroy(&(asa_lev->b));
1579: /* move to next higher level */
1580: asa_lev = asa_lev->prev;
1581: }
1582: }
1583: /* 8. update B_1 by setting the last column of B_1 */
1584: PCAddCandidateToB_ASA(asa_lev->B, asa_lev->cand_vecs-1, cand_vec, asa_lev->A);
1585: VecDestroy(&(cand_vec));
1587: /* 9. create V-cycle */
1588: PCCreateVcycle_ASA(asa);
1589:
1590: PetscLogEventEnd(PC_GeneralSetupStage_ASA,0,0,0,0);
1591: return(0);
1592: }
1594: /* -------------------------------------------------------------------------- */
1595: /*
1596: PCConstructMultigrid_ASA - creates the multigrid preconditionier, this is a fairly
1597: involved process, which runs extensive testing to compute good candidate vectors
1599: Input Parameters:
1600: . pc - the preconditioner context
1602: */
1605: PetscErrorCode PCConstructMultigrid_ASA(PC pc)
1606: {
1608: PC_ASA *asa = (PC_ASA*)pc->data;
1609: PC_ASA_level *asa_lev;
1610: PetscInt i, ls, le;
1611: PetscScalar *d;
1612: PetscBool zeroflag = PETSC_FALSE;
1613: PetscReal rnorm, rnorm_start;
1614: PetscReal rq, rq_prev;
1615: PetscScalar rq_nom, rq_denom;
1616: PetscBool cand_added;
1617: PetscRandom rctx;
1621: /* check if we should scale with diagonal */
1622: if (asa->scale_diag) {
1623: /* Get diagonal scaling factors */
1624: MatGetVecs(pc->pmat,&(asa->invsqrtdiag),0);
1625: MatGetDiagonal(pc->pmat,asa->invsqrtdiag);
1626: /* compute (inverse) sqrt of diagonal */
1627: VecGetOwnershipRange(asa->invsqrtdiag, &ls, &le);
1628: VecGetArray(asa->invsqrtdiag, &d);
1629: for (i=0; i<le-ls; i++) {
1630: if (d[i] == 0.0) {
1631: d[i] = 1.0;
1632: zeroflag = PETSC_TRUE;
1633: } else {
1634: d[i] = 1./PetscSqrtReal(PetscAbsScalar(d[i]));
1635: }
1636: }
1637: VecRestoreArray(asa->invsqrtdiag,&d);
1638: VecAssemblyBegin(asa->invsqrtdiag);
1639: VecAssemblyEnd(asa->invsqrtdiag);
1640: if (zeroflag) {
1641: PetscInfo(pc,"Zero detected in diagonal of matrix, using 1 at those locations\n");
1642: }
1643:
1644: /* scale the matrix and store it: D^{-1/2} A D^{-1/2} */
1645: MatDuplicate(pc->pmat, MAT_COPY_VALUES, &(asa->A)); /* probably inefficient */
1646: MatDiagonalScale(asa->A, asa->invsqrtdiag, asa->invsqrtdiag);
1647: } else {
1648: /* don't scale */
1649: asa->A = pc->pmat;
1650: }
1651: /* Initialization stage */
1652: PCInitializationStage_ASA(asa, PETSC_NULL);
1653:
1654: /* get first level */
1655: asa_lev = asa->levellist;
1657: PetscRandomCreate(asa->comm,&rctx);
1658: PetscRandomSetFromOptions(rctx);
1659: VecSetRandom(asa_lev->x,rctx);
1661: /* compute starting residual */
1662: VecDestroy(&(asa_lev->r));
1663: MatGetVecs(asa_lev->A, PETSC_NULL, &(asa_lev->r));
1664: MatMult(asa_lev->A, asa_lev->x, asa_lev->r);
1665: /* starting residual norm */
1666: VecNorm(asa_lev->r, NORM_2, &rnorm_start);
1667: /* compute Rayleigh quotients */
1668: VecDot(asa_lev->x, asa_lev->r, &rq_nom);
1669: VecDot(asa_lev->x, asa_lev->x, &rq_denom);
1670: rq_prev = PetscAbsScalar(rq_nom / rq_denom);
1672: /* check if we have to add more candidates */
1673: for (i=0; i<asa->max_it; i++) {
1674: if (asa_lev->cand_vecs >= asa->max_cand_vecs) {
1675: /* reached limit for candidate vectors */
1676: break;
1677: }
1678: /* apply V-cycle */
1679: PCApplyVcycleOnLevel_ASA(asa_lev, asa->gamma);
1680: /* check convergence */
1681: MatMult(asa_lev->A, asa_lev->x, asa_lev->r);
1682: VecNorm(asa_lev->r, NORM_2, &rnorm);
1683: PetscPrintf(asa->comm, "After %D iterations residual norm is %f\n", i+1, rnorm);
1684: if (rnorm < rnorm_start*(asa->rtol) || rnorm < asa->abstol) {
1685: /* convergence */
1686: break;
1687: }
1688: /* compute new Rayleigh quotient */
1689: VecDot(asa_lev->x, asa_lev->r, &rq_nom);
1690: VecDot(asa_lev->x, asa_lev->x, &rq_denom);
1691: rq = PetscAbsScalar(rq_nom / rq_denom);
1692: PetscPrintf(asa->comm, "After %D iterations Rayleigh quotient of residual is %f\n", i+1, rq);
1693: /* test Rayleigh quotient decrease and add more candidate vectors if necessary */
1694: if (i && (rq > asa->rq_improve*rq_prev)) {
1695: /* improve interpolation by adding another candidate vector */
1696: PCGeneralSetupStage_ASA(asa, asa_lev->r, &cand_added);
1697: if (!cand_added) {
1698: /* either too many candidates for storage or cycle is already effective */
1699: PetscPrintf(asa->comm, "either too many candidates for storage or cycle is already effective\n");
1700: break;
1701: }
1702: VecSetRandom(asa_lev->x, rctx);
1703: rq_prev = rq*10000.; /* give the new V-cycle some grace period */
1704: } else {
1705: rq_prev = rq;
1706: }
1707: }
1709: VecDestroy(&(asa_lev->x));
1710: VecDestroy(&(asa_lev->b));
1711: PetscRandomDestroy(&rctx);
1712: asa->multigrid_constructed = PETSC_TRUE;
1713: return(0);
1714: }
1716: /* -------------------------------------------------------------------------- */
1717: /*
1718: PCApply_ASA - Applies the ASA preconditioner to a vector.
1720: Input Parameters:
1721: . pc - the preconditioner context
1722: . x - input vector
1724: Output Parameter:
1725: . y - output vector
1727: Application Interface Routine: PCApply()
1728: */
1731: PetscErrorCode PCApply_ASA(PC pc,Vec x,Vec y)
1732: {
1733: PC_ASA *asa = (PC_ASA*)pc->data;
1734: PC_ASA_level *asa_lev;
1739: if (!asa->multigrid_constructed) {
1740: PCConstructMultigrid_ASA(pc);
1741: }
1743: /* get first level */
1744: asa_lev = asa->levellist;
1746: /* set the right hand side */
1747: VecDuplicate(x, &(asa->b));
1748: VecCopy(x, asa->b);
1749: /* set starting vector */
1750: VecDestroy(&(asa->x));
1751: MatGetVecs(asa->A, &(asa->x), PETSC_NULL);
1752: VecSet(asa->x, 0.0);
1753:
1754: /* set vectors */
1755: asa_lev->x = asa->x;
1756: asa_lev->b = asa->b;
1758: PCApplyVcycleOnLevel_ASA(asa_lev, asa->gamma);
1759:
1760: /* Return solution */
1761: VecCopy(asa->x, y);
1763: /* delete working vectors */
1764: VecDestroy(&(asa->x));
1765: VecDestroy(&(asa->b));
1766: asa_lev->x = PETSC_NULL;
1767: asa_lev->b = PETSC_NULL;
1769: return(0);
1770: }
1772: /* -------------------------------------------------------------------------- */
1773: /*
1774: PCApplyRichardson_ASA - Applies the ASA iteration to solve a linear system
1776: Input Parameters:
1777: . pc - the preconditioner context
1778: . b - the right hand side
1780: Output Parameter:
1781: . x - output vector
1783: DOES NOT WORK!!!!!
1785: */
1788: PetscErrorCode PCApplyRichardson_ASA(PC pc,Vec b,Vec x,Vec w,PetscReal rtol,PetscReal abstol, PetscReal dtol,PetscInt its,PetscBool guesszero,PetscInt *outits,PCRichardsonConvergedReason *reason)
1789: {
1790: PC_ASA *asa = (PC_ASA*)pc->data;
1791: PC_ASA_level *asa_lev;
1792: PetscInt i;
1793: PetscReal rnorm, rnorm_start;
1798: if (! asa->multigrid_constructed) {
1799: PCConstructMultigrid_ASA(pc);
1800: }
1802: /* get first level */
1803: asa_lev = asa->levellist;
1805: /* set the right hand side */
1806: VecDuplicate(b, &(asa->b));
1807: if (asa->scale_diag) {
1808: VecPointwiseMult(asa->b, asa->invsqrtdiag, b);
1809: } else {
1810: VecCopy(b, asa->b);
1811: }
1812: /* set starting vector */
1813: VecDuplicate(x, &(asa->x));
1814: VecCopy(x, asa->x);
1815:
1816: /* compute starting residual */
1817: VecDestroy(&(asa->r));
1818: MatGetVecs(asa->A, &(asa->r), PETSC_NULL);
1819: MatMult(asa->A, asa->x, asa->r);
1820: VecAYPX(asa->r, -1.0, asa->b);
1821: /* starting residual norm */
1822: VecNorm(asa->r, NORM_2, &rnorm_start);
1824: /* set vectors */
1825: asa_lev->x = asa->x;
1826: asa_lev->b = asa->b;
1828: *reason = PCRICHARDSON_CONVERGED_ITS;
1829: /* **************** Full algorithm loop *********************************** */
1830: for (i=0; i<its; i++) {
1831: /* apply V-cycle */
1832: PCApplyVcycleOnLevel_ASA(asa_lev, asa->gamma);
1833: /* check convergence */
1834: MatMult(asa->A, asa->x, asa->r);
1835: VecAYPX(asa->r, -1.0, asa->b);
1836: VecNorm(asa->r, NORM_2, &rnorm);
1837: PetscPrintf(asa->comm, "After %D iterations residual norm is %f\n", i+1, rnorm);
1838: if (rnorm < rnorm_start*(rtol)) {
1839: *reason = PCRICHARDSON_CONVERGED_RTOL;
1840: break;
1841: } else if (rnorm < asa->abstol) {
1842: *reason = PCRICHARDSON_CONVERGED_ATOL;
1843: break;
1844: } else if (rnorm > rnorm_start*(dtol)) {
1845: *reason = PCRICHARDSON_DIVERGED_DTOL;
1846: break;
1847: }
1848: }
1849: *outits = i;
1850:
1851: /* Return solution */
1852: if (asa->scale_diag) {
1853: VecPointwiseMult(x, asa->x, asa->invsqrtdiag);
1854: } else {
1855: VecCopy(x, asa->x);
1856: }
1858: /* delete working vectors */
1859: VecDestroy(&(asa->x));
1860: VecDestroy(&(asa->b));
1861: VecDestroy(&(asa->r));
1862: asa_lev->x = PETSC_NULL;
1863: asa_lev->b = PETSC_NULL;
1864: return(0);
1865: }
1867: /* -------------------------------------------------------------------------- */
1868: /*
1869: PCDestroy_ASA - Destroys the private context for the ASA preconditioner
1870: that was created with PCCreate_ASA().
1872: Input Parameter:
1873: . pc - the preconditioner context
1875: Application Interface Routine: PCDestroy()
1876: */
1879: static PetscErrorCode PCDestroy_ASA(PC pc)
1880: {
1881: PC_ASA *asa;
1882: PC_ASA_level *asa_lev;
1883: PC_ASA_level *asa_next_level;
1888: asa = (PC_ASA*)pc->data;
1889: asa_lev = asa->levellist;
1891: /* Delete top level data */
1892: PetscFree(asa->ksptype_smooth);
1893: PetscFree(asa->pctype_smooth);
1894: PetscFree(asa->ksptype_direct);
1895: PetscFree(asa->pctype_direct);
1896: PetscFree(asa->coarse_mat_type);
1898: /* this is destroyed by the levels below */
1899: /* MatDestroy(&(asa->A)); */
1900: VecDestroy(&(asa->invsqrtdiag));
1901: VecDestroy(&(asa->b));
1902: VecDestroy(&(asa->x));
1903: VecDestroy(&(asa->r));
1905: if (asa->dm) {DMDestroy(&asa->dm);}
1907: /* Destroy each of the levels */
1908: while(asa_lev) {
1909: asa_next_level = asa_lev->next;
1910: PCDestroyLevel_ASA(asa_lev);
1911: asa_lev = asa_next_level;
1912: }
1914: PetscFree(asa);
1915: return(0);
1916: }
1920: static PetscErrorCode PCSetFromOptions_ASA(PC pc)
1921: {
1922: PC_ASA *asa = (PC_ASA*)pc->data;
1923: PetscBool flg;
1925: char type[20];
1930: PetscOptionsHead("ASA options");
1931: /* convergence parameters */
1932: PetscOptionsInt("-pc_asa_nu","Number of cycles to run smoother","No manual page yet",asa->nu,&(asa->nu),&flg);
1933: PetscOptionsInt("-pc_asa_gamma","Number of cycles to run coarse grid correction","No manual page yet",asa->gamma,&(asa->gamma),&flg);
1934: PetscOptionsReal("-pc_asa_epsilon","Tolerance for the relaxation method","No manual page yet",asa->epsilon,&(asa->epsilon),&flg);
1935: PetscOptionsInt("-pc_asa_mu","Number of cycles to relax in setup stages","No manual page yet",asa->mu,&(asa->mu),&flg);
1936: PetscOptionsInt("-pc_asa_mu_initial","Number of cycles to relax for generating first candidate vector","No manual page yet",asa->mu_initial,&(asa->mu_initial),&flg);
1937: PetscOptionsInt("-pc_asa_direct_solver","For which matrix size should we use the direct solver?","No manual page yet",asa->direct_solver,&(asa->direct_solver),&flg);
1938: PetscOptionsBool("-pc_asa_scale_diag","Should we scale the matrix with the inverse of its diagonal?","No manual page yet",asa->scale_diag,&(asa->scale_diag),&flg);
1939: /* type of smoother used */
1940: PetscOptionsList("-pc_asa_smoother_ksp_type","The type of KSP to be used in the smoothers","No manual page yet",KSPList,asa->ksptype_smooth,type,20,&flg);
1941: if (flg) {
1942: PetscFree(asa->ksptype_smooth);
1943: PetscStrallocpy(type,&(asa->ksptype_smooth));
1944: }
1945: PetscOptionsList("-pc_asa_smoother_pc_type","The type of PC to be used in the smoothers","No manual page yet",PCList,asa->pctype_smooth,type,20,&flg);
1946: if (flg) {
1947: PetscFree(asa->pctype_smooth);
1948: PetscStrallocpy(type,&(asa->pctype_smooth));
1949: }
1950: PetscOptionsList("-pc_asa_direct_ksp_type","The type of KSP to be used in the direct solver","No manual page yet",KSPList,asa->ksptype_direct,type,20,&flg);
1951: if (flg) {
1952: PetscFree(asa->ksptype_direct);
1953: PetscStrallocpy(type,&(asa->ksptype_direct));
1954: }
1955: PetscOptionsList("-pc_asa_direct_pc_type","The type of PC to be used in the direct solver","No manual page yet",PCList,asa->pctype_direct,type,20,&flg);
1956: if (flg) {
1957: PetscFree(asa->pctype_direct);
1958: PetscStrallocpy(type,&(asa->pctype_direct));
1959: }
1960: /* options specific for certain smoothers */
1961: PetscOptionsReal("-pc_asa_richardson_scale","Scaling parameter for preconditioning in relaxation, if smoothing KSP is Richardson","No manual page yet",asa->richardson_scale,&(asa->richardson_scale),&flg);
1962: PetscOptionsReal("-pc_asa_sor_omega","Scaling parameter for preconditioning in relaxation, if smoothing KSP is Richardson","No manual page yet",asa->sor_omega,&(asa->sor_omega),&flg);
1963: /* options for direct solver */
1964: PetscOptionsString("-pc_asa_coarse_mat_type","The coarse level matrix type (e.g. SuperLU, MUMPS, ...)","No manual page yet",asa->coarse_mat_type, type,20,&flg);
1965: if (flg) {
1966: PetscFree(asa->coarse_mat_type);
1967: PetscStrallocpy(type,&(asa->coarse_mat_type));
1968: }
1969: /* storage allocation parameters */
1970: PetscOptionsInt("-pc_asa_max_cand_vecs","Maximum number of candidate vectors","No manual page yet",asa->max_cand_vecs,&(asa->max_cand_vecs),&flg);
1971: PetscOptionsInt("-pc_asa_max_dof_lev_2","The maximum number of degrees of freedom per node on level 2 (K in paper)","No manual page yet",asa->max_dof_lev_2,&(asa->max_dof_lev_2),&flg);
1972: /* construction parameters */
1973: PetscOptionsReal("-pc_asa_rq_improve","Threshold in RQ improvement for adding another candidate","No manual page yet",asa->rq_improve,&(asa->rq_improve),&flg);
1974: PetscOptionsTail();
1975: return(0);
1976: }
1980: static PetscErrorCode PCView_ASA(PC pc,PetscViewer viewer)
1981: {
1982: PC_ASA *asa = (PC_ASA*)pc->data;
1984: PetscBool iascii;
1985: PC_ASA_level *asa_lev = asa->levellist;
1988: PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1989: if (iascii) {
1990: PetscViewerASCIIPrintf(viewer," ASA:\n");
1991: asa_lev = asa->levellist;
1992: while (asa_lev) {
1993: if (!asa_lev->next) {
1994: PetscViewerASCIIPrintf(viewer,"Coarse grid solver -- level %D -------------------------------\n",0);
1995: } else {
1996: PetscViewerASCIIPrintf(viewer,"Down solver (pre-smoother) on level ? -------------------------------\n");
1997: }
1998: PetscViewerASCIIPushTab(viewer);
1999: KSPView(asa_lev->smoothd,viewer);
2000: PetscViewerASCIIPopTab(viewer);
2001: if (asa_lev->next && asa_lev->smoothd == asa_lev->smoothu) {
2002: PetscViewerASCIIPrintf(viewer,"Up solver (post-smoother) same as down solver (pre-smoother)\n");
2003: } else if (asa_lev->next){
2004: PetscViewerASCIIPrintf(viewer,"Up solver (post-smoother) on level ? -------------------------------\n");
2005: PetscViewerASCIIPushTab(viewer);
2006: KSPView(asa_lev->smoothu,viewer);
2007: PetscViewerASCIIPopTab(viewer);
2008: }
2009: asa_lev = asa_lev->next;
2010: }
2011: } else {
2012: SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_SUP,"Viewer type %s not supported for PCASA",((PetscObject)viewer)->type_name);
2013: }
2014: return(0);
2015: }
2017: /* -------------------------------------------------------------------------- */
2018: /*
2019: PCCreate_ASA - Creates a ASA preconditioner context, PC_ASA,
2020: and sets this as the private data within the generic preconditioning
2021: context, PC, that was created within PCCreate().
2023: Input Parameter:
2024: . pc - the preconditioner context
2026: Application Interface Routine: PCCreate()
2027: */
2031: PetscErrorCode PCCreate_ASA(PC pc)
2032: {
2034: PC_ASA *asa;
2039: /*
2040: Set the pointers for the functions that are provided above.
2041: Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
2042: are called, they will automatically call these functions. Note we
2043: choose not to provide a couple of these functions since they are
2044: not needed.
2045: */
2046: pc->ops->apply = PCApply_ASA;
2047: /* pc->ops->applytranspose = PCApply_ASA;*/
2048: pc->ops->applyrichardson = PCApplyRichardson_ASA;
2049: pc->ops->setup = 0;
2050: pc->ops->destroy = PCDestroy_ASA;
2051: pc->ops->setfromoptions = PCSetFromOptions_ASA;
2052: pc->ops->view = PCView_ASA;
2054: /* Set the data to pointer to 0 */
2055: pc->data = (void*)0;
2057: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASASetDM_C","PCASASetDM_ASA",PCASASetDM_ASA);
2058: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASASetTolerances_C","PCASASetTolerances_ASA",PCASASetTolerances_ASA);
2060: /* register events */
2061: if (! asa_events_registered) {
2062: PetscLogEventRegister("PCInitializationStage_ASA", PC_CLASSID,&PC_InitializationStage_ASA);
2063: PetscLogEventRegister("PCGeneralSetupStage_ASA", PC_CLASSID,&PC_GeneralSetupStage_ASA);
2064: PetscLogEventRegister("PCCreateTransferOp_ASA", PC_CLASSID,&PC_CreateTransferOp_ASA);
2065: PetscLogEventRegister("PCCreateVcycle_ASA", PC_CLASSID,&PC_CreateVcycle_ASA);
2066: asa_events_registered = PETSC_TRUE;
2067: }
2069: /* Create new PC_ASA object */
2070: PetscNewLog(pc,PC_ASA,&asa);
2071: pc->data = (void*)asa;
2073: /* WORK: find some better initial values */
2074: asa->nu = 3;
2075: asa->gamma = 1;
2076: asa->epsilon = 1e-4;
2077: asa->mu = 3;
2078: asa->mu_initial = 20;
2079: asa->direct_solver = 100;
2080: asa->scale_diag = PETSC_TRUE;
2081: PetscStrallocpy(KSPRICHARDSON, (char **) &(asa->ksptype_smooth));
2082: PetscStrallocpy(PCSOR, (char **) &(asa->pctype_smooth));
2083: asa->smoother_rtol = 1e-10;
2084: asa->smoother_abstol = 1e-20;
2085: asa->smoother_dtol = PETSC_DEFAULT;
2086: PetscStrallocpy(KSPPREONLY, (char **) &(asa->ksptype_direct));
2087: PetscStrallocpy(PCREDUNDANT, (char **) &(asa->pctype_direct));
2088: asa->direct_rtol = 1e-10;
2089: asa->direct_abstol = 1e-20;
2090: asa->direct_dtol = PETSC_DEFAULT;
2091: asa->richardson_scale = PETSC_DECIDE;
2092: asa->sor_omega = PETSC_DECIDE;
2093: PetscStrallocpy(MATSAME, (char **) &(asa->coarse_mat_type));
2095: asa->max_cand_vecs = 4;
2096: asa->max_dof_lev_2 = 640; /* I don't think this parameter really matters, 640 should be enough for everyone! */
2098: asa->multigrid_constructed = PETSC_FALSE;
2100: asa->rtol = 1e-10;
2101: asa->abstol = 1e-15;
2102: asa->divtol = 1e5;
2103: asa->max_it = 10000;
2104: asa->rq_improve = 0.9;
2105:
2106: asa->A = 0;
2107: asa->invsqrtdiag = 0;
2108: asa->b = 0;
2109: asa->x = 0;
2110: asa->r = 0;
2112: asa->dm = 0;
2113:
2114: asa->levels = 0;
2115: asa->levellist = 0;
2117: asa->comm = ((PetscObject)pc)->comm;
2118: return(0);
2119: }