Actual source code: ex48.c

  1: static const char help[] = "Toy hydrostatic ice flow with multigrid in 3D\n\
  2: \n\
  3: Solves the hydrostatic (aka Blatter/Pattyn/First Order) equations for ice sheet flow\n\
  4: using multigrid.  The ice uses a power-law rheology with \"Glen\" exponent 3 (corresponds\n\
  5: to p=4/3 in a p-Laplacian).  The focus is on ISMIP-HOM experiments which assume periodic\n\
  6: boundary conditions in the x- and y-directions.\n\
  7: \n\
  8: Equations are rescaled so that the domain size and solution are O(1), details of this scaling\n\
  9: can be controlled by the options -units_meter, -units_second, and -units_kilogram.\n\
 10: \n\
 11: A VTK StructuredGrid output file can be written using the option -o filename.vts\n\
 12: \n\n";

 14: /*
 15: The equations for horizontal velocity (u,v) are

 17:   - [eta (4 u_x + 2 v_y)]_x - [eta (u_y + v_x)]_y - [eta u_z]_z + rho g s_x = 0
 18:   - [eta (4 v_y + 2 u_x)]_y - [eta (u_y + v_x)]_x - [eta v_z]_z + rho g s_y = 0

 20: where

 22:   eta = B/2 (epsilon + gamma)^((p-2)/2)

 24: is the nonlinear effective viscosity with regularization epsilon and hardness parameter B,
 25: written in terms of the second invariant

 27:   gamma = u_x^2 + v_y^2 + u_x v_y + (1/4) (u_y + v_x)^2 + (1/4) u_z^2 + (1/4) v_z^2

 29: The surface boundary conditions are the natural conditions.  The basal boundary conditions
 30: are either no-slip, or Navier (linear) slip with spatially variant friction coefficient beta^2.

 32: In the code, the equations for (u,v) are multiplied through by 1/(rho g) so that residuals are O(1).

 34: The discretization is Q1 finite elements, managed by a DMDA.  The grid is never distorted in the
 35: map (x,y) plane, but the bed and surface may be bumpy.  This is handled as usual in FEM, through
 36: the Jacobian of the coordinate transformation from a reference element to the physical element.

 38: Since ice-flow is tightly coupled in the z-direction (within columns), the DMDA is managed
 39: specially so that columns are never distributed, and are always contiguous in memory.
 40: This amounts to reversing the meaning of X,Y,Z compared to the DMDA's internal interpretation,
 41: and then indexing as vec[i][j][k].  The exotic coarse spaces require 2D DMDAs which are made to
 42: use compatible domain decomposition relative to the 3D DMDAs.

 44: There are two compile-time options:

 46:   NO_SSE2:
 47:     If the host supports SSE2, we use integration code that has been vectorized with SSE2
 48:     intrinsics, unless this macro is defined.  The intrinsics speed up integration by about
 49:     30% on my architecture (P8700, gcc-4.5 snapshot).

 51:   COMPUTE_LOWER_TRIANGULAR:
 52:     The element matrices we assemble are lower-triangular so it is not necessary to compute
 53:     all entries explicitly.  If this macro is defined, the lower-triangular entries are
 54:     computed explicitly.

 56: */

 58: #include <petscdmmg.h>
 59: #include <ctype.h>              /* toupper() */
 60: #include <private/daimpl.h>     /* There is not yet a public interface to manipulate dm->ops */

 62: #if !defined __STDC_VERSION__ || __STDC_VERSION__ < 199901L
 63: #  if defined __cplusplus       /* C++ restrict is nonstandard and compilers have inconsistent rules about where it can be used */
 64: #    define restrict
 65: #  else
 66: #    define restrict PETSC_RESTRICT
 67: #  endif
 68: #endif
 69: #if defined __SSE2__
 70: #  include <emmintrin.h>
 71: #endif

 73: /* The SSE2 kernels are only for PetscScalar=double on architectures that support it */
 74: #define USE_SSE2_KERNELS (!defined NO_SSE2                              \
 75:                           && !defined PETSC_USE_COMPLEX                 \
 76:                           && !defined PETSC_USE_REAL_SINGLE           \
 77:                           && !defined PETSC_USE_REAL_LONG_DOUBLE      \
 78:                           && defined __SSE2__)

 80: static PetscClassId THI_CLASSID;

 82: typedef enum {QUAD_GAUSS,QUAD_LOBATTO} QuadratureType;
 83: static const char *QuadratureTypes[] = {"gauss","lobatto","QuadratureType","QUAD_",0};
 84: static const PetscReal HexQWeights[8] = {1,1,1,1,1,1,1,1};
 85: static const PetscReal HexQNodes[]    = {-0.57735026918962573, 0.57735026918962573};
 86: #define G 0.57735026918962573
 87: #define H (0.5*(1.+G))
 88: #define L (0.5*(1.-G))
 89: #define M (-0.5)
 90: #define P (0.5)
 91: /* Special quadrature: Lobatto in horizontal, Gauss in vertical */
 92: static const PetscReal HexQInterp_Lobatto[8][8] = {{H,0,0,0,L,0,0,0},
 93:                                                    {0,H,0,0,0,L,0,0},
 94:                                                    {0,0,H,0,0,0,L,0},
 95:                                                    {0,0,0,H,0,0,0,L},
 96:                                                    {L,0,0,0,H,0,0,0},
 97:                                                    {0,L,0,0,0,H,0,0},
 98:                                                    {0,0,L,0,0,0,H,0},
 99:                                                    {0,0,0,L,0,0,0,H}};
100: static const PetscReal HexQDeriv_Lobatto[8][8][3] = {
101:   {{M*H,M*H,M},{P*H,0,0}  ,{0,0,0}    ,{0,P*H,0}  ,{M*L,M*L,P},{P*L,0,0}  ,{0,0,0}    ,{0,P*L,0}  },
102:   {{M*H,0,0}  ,{P*H,M*H,M},{0,P*H,0}  ,{0,0,0}    ,{M*L,0,0}  ,{P*L,M*L,P},{0,P*L,0}  ,{0,0,0}    },
103:   {{0,0,0}    ,{0,M*H,0}  ,{P*H,P*H,M},{M*H,0,0}  ,{0,0,0}    ,{0,M*L,0}  ,{P*L,P*L,P},{M*L,0,0}  },
104:   {{0,M*H,0}  ,{0,0,0}    ,{P*H,0,0}  ,{M*H,P*H,M},{0,M*L,0}  ,{0,0,0}    ,{P*L,0,0}  ,{M*L,P*L,P}},
105:   {{M*L,M*L,M},{P*L,0,0}  ,{0,0,0}    ,{0,P*L,0}  ,{M*H,M*H,P},{P*H,0,0}  ,{0,0,0}    ,{0,P*H,0}  },
106:   {{M*L,0,0}  ,{P*L,M*L,M},{0,P*L,0}  ,{0,0,0}    ,{M*H,0,0}  ,{P*H,M*H,P},{0,P*H,0}  ,{0,0,0}    },
107:   {{0,0,0}    ,{0,M*L,0}  ,{P*L,P*L,M},{M*L,0,0}  ,{0,0,0}    ,{0,M*H,0}  ,{P*H,P*H,P},{M*H,0,0}  },
108:   {{0,M*L,0}  ,{0,0,0}    ,{P*L,0,0}  ,{M*L,P*L,M},{0,M*H,0}  ,{0,0,0}    ,{P*H,0,0}  ,{M*H,P*H,P}}};
109: /* Stanndard Gauss */
110: static const PetscReal HexQInterp_Gauss[8][8] = {{H*H*H,L*H*H,L*L*H,H*L*H, H*H*L,L*H*L,L*L*L,H*L*L},
111:                                                  {L*H*H,H*H*H,H*L*H,L*L*H, L*H*L,H*H*L,H*L*L,L*L*L},
112:                                                  {L*L*H,H*L*H,H*H*H,L*H*H, L*L*L,H*L*L,H*H*L,L*H*L},
113:                                                  {H*L*H,L*L*H,L*H*H,H*H*H, H*L*L,L*L*L,L*H*L,H*H*L},
114:                                                  {H*H*L,L*H*L,L*L*L,H*L*L, H*H*H,L*H*H,L*L*H,H*L*H},
115:                                                  {L*H*L,H*H*L,H*L*L,L*L*L, L*H*H,H*H*H,H*L*H,L*L*H},
116:                                                  {L*L*L,H*L*L,H*H*L,L*H*L, L*L*H,H*L*H,H*H*H,L*H*H},
117:                                                  {H*L*L,L*L*L,L*H*L,H*H*L, H*L*H,L*L*H,L*H*H,H*H*H}};
118: static const PetscReal HexQDeriv_Gauss[8][8][3] = {
119:   {{M*H*H,H*M*H,H*H*M},{P*H*H,L*M*H,L*H*M},{P*L*H,L*P*H,L*L*M},{M*L*H,H*P*H,H*L*M}, {M*H*L,H*M*L,H*H*P},{P*H*L,L*M*L,L*H*P},{P*L*L,L*P*L,L*L*P},{M*L*L,H*P*L,H*L*P}},
120:   {{M*H*H,L*M*H,L*H*M},{P*H*H,H*M*H,H*H*M},{P*L*H,H*P*H,H*L*M},{M*L*H,L*P*H,L*L*M}, {M*H*L,L*M*L,L*H*P},{P*H*L,H*M*L,H*H*P},{P*L*L,H*P*L,H*L*P},{M*L*L,L*P*L,L*L*P}},
121:   {{M*L*H,L*M*H,L*L*M},{P*L*H,H*M*H,H*L*M},{P*H*H,H*P*H,H*H*M},{M*H*H,L*P*H,L*H*M}, {M*L*L,L*M*L,L*L*P},{P*L*L,H*M*L,H*L*P},{P*H*L,H*P*L,H*H*P},{M*H*L,L*P*L,L*H*P}},
122:   {{M*L*H,H*M*H,H*L*M},{P*L*H,L*M*H,L*L*M},{P*H*H,L*P*H,L*H*M},{M*H*H,H*P*H,H*H*M}, {M*L*L,H*M*L,H*L*P},{P*L*L,L*M*L,L*L*P},{P*H*L,L*P*L,L*H*P},{M*H*L,H*P*L,H*H*P}},
123:   {{M*H*L,H*M*L,H*H*M},{P*H*L,L*M*L,L*H*M},{P*L*L,L*P*L,L*L*M},{M*L*L,H*P*L,H*L*M}, {M*H*H,H*M*H,H*H*P},{P*H*H,L*M*H,L*H*P},{P*L*H,L*P*H,L*L*P},{M*L*H,H*P*H,H*L*P}},
124:   {{M*H*L,L*M*L,L*H*M},{P*H*L,H*M*L,H*H*M},{P*L*L,H*P*L,H*L*M},{M*L*L,L*P*L,L*L*M}, {M*H*H,L*M*H,L*H*P},{P*H*H,H*M*H,H*H*P},{P*L*H,H*P*H,H*L*P},{M*L*H,L*P*H,L*L*P}},
125:   {{M*L*L,L*M*L,L*L*M},{P*L*L,H*M*L,H*L*M},{P*H*L,H*P*L,H*H*M},{M*H*L,L*P*L,L*H*M}, {M*L*H,L*M*H,L*L*P},{P*L*H,H*M*H,H*L*P},{P*H*H,H*P*H,H*H*P},{M*H*H,L*P*H,L*H*P}},
126:   {{M*L*L,H*M*L,H*L*M},{P*L*L,L*M*L,L*L*M},{P*H*L,L*P*L,L*H*M},{M*H*L,H*P*L,H*H*M}, {M*L*H,H*M*H,H*L*P},{P*L*H,L*M*H,L*L*P},{P*H*H,L*P*H,L*H*P},{M*H*H,H*P*H,H*H*P}}};
127: static const PetscReal (*HexQInterp)[8],(*HexQDeriv)[8][3];
128: /* Standard 2x2 Gauss quadrature for the bottom layer. */
129: static const PetscReal QuadQInterp[4][4] = {{H*H,L*H,L*L,H*L},
130:                                             {L*H,H*H,H*L,L*L},
131:                                             {L*L,H*L,H*H,L*H},
132:                                             {H*L,L*L,L*H,H*H}};
133: static const PetscReal QuadQDeriv[4][4][2] = {
134:   {{M*H,M*H},{P*H,M*L},{P*L,P*L},{M*L,P*H}},
135:   {{M*H,M*L},{P*H,M*H},{P*L,P*H},{M*L,P*L}},
136:   {{M*L,M*L},{P*L,M*H},{P*H,P*H},{M*H,P*L}},
137:   {{M*L,M*H},{P*L,M*L},{P*H,P*L},{M*H,P*H}}};
138: #undef G
139: #undef H
140: #undef L
141: #undef M
142: #undef P

144: #define HexExtract(x,i,j,k,n) do {              \
145:     (n)[0] = (x)[i][j][k];                      \
146:     (n)[1] = (x)[i+1][j][k];                    \
147:     (n)[2] = (x)[i+1][j+1][k];                  \
148:     (n)[3] = (x)[i][j+1][k];                    \
149:     (n)[4] = (x)[i][j][k+1];                    \
150:     (n)[5] = (x)[i+1][j][k+1];                  \
151:     (n)[6] = (x)[i+1][j+1][k+1];                \
152:     (n)[7] = (x)[i][j+1][k+1];                  \
153:   } while (0)

155: #define HexExtractRef(x,i,j,k,n) do {           \
156:     (n)[0] = &(x)[i][j][k];                     \
157:     (n)[1] = &(x)[i+1][j][k];                   \
158:     (n)[2] = &(x)[i+1][j+1][k];                 \
159:     (n)[3] = &(x)[i][j+1][k];                   \
160:     (n)[4] = &(x)[i][j][k+1];                   \
161:     (n)[5] = &(x)[i+1][j][k+1];                 \
162:     (n)[6] = &(x)[i+1][j+1][k+1];               \
163:     (n)[7] = &(x)[i][j+1][k+1];                 \
164:   } while (0)

166: #define QuadExtract(x,i,j,n) do {               \
167:     (n)[0] = (x)[i][j];                         \
168:     (n)[1] = (x)[i+1][j];                       \
169:     (n)[2] = (x)[i+1][j+1];                     \
170:     (n)[3] = (x)[i][j+1];                       \
171:   } while (0)

173: static PetscScalar Sqr(PetscScalar a) {return a*a;}

175: static void HexGrad(const PetscReal dphi[][3],const PetscReal zn[],PetscReal dz[])
176: {
177:   PetscInt i;
178:   dz[0] = dz[1] = dz[2] = 0;
179:   for (i=0; i<8; i++) {
180:     dz[0] += dphi[i][0] * zn[i];
181:     dz[1] += dphi[i][1] * zn[i];
182:     dz[2] += dphi[i][2] * zn[i];
183:   }
184: }

186: static void HexComputeGeometry(PetscInt q,PetscReal hx,PetscReal hy,const PetscReal dz[restrict],PetscReal phi[restrict],PetscReal dphi[restrict][3],PetscReal *restrict jw)
187: {
188:   const PetscReal
189:     jac[3][3] = {{hx/2,0,0}, {0,hy/2,0}, {dz[0],dz[1],dz[2]}}
190:   ,ijac[3][3] = {{1/jac[0][0],0,0}, {0,1/jac[1][1],0}, {-jac[2][0]/(jac[0][0]*jac[2][2]),-jac[2][1]/(jac[1][1]*jac[2][2]),1/jac[2][2]}}
191:   ,jdet = jac[0][0]*jac[1][1]*jac[2][2];
192:   PetscInt i;

194:   for (i=0; i<8; i++) {
195:     const PetscReal *dphir = HexQDeriv[q][i];
196:     phi[i] = HexQInterp[q][i];
197:     dphi[i][0] = dphir[0]*ijac[0][0] + dphir[1]*ijac[1][0] + dphir[2]*ijac[2][0];
198:     dphi[i][1] = dphir[0]*ijac[0][1] + dphir[1]*ijac[1][1] + dphir[2]*ijac[2][1];
199:     dphi[i][2] = dphir[0]*ijac[0][2] + dphir[1]*ijac[1][2] + dphir[2]*ijac[2][2];
200:   }
201:   *jw = 1.0 * jdet;
202: }

204: typedef struct _p_THI   *THI;
205: typedef struct _n_Units *Units;

207: typedef struct {
208:   PetscScalar u,v;
209: } Node;

211: typedef struct {
212:   PetscScalar b;                /* bed */
213:   PetscScalar h;                /* thickness */
214:   PetscScalar beta2;            /* friction */
215: } PrmNode;

217: typedef struct {
218:   PetscReal min,max,cmin,cmax;
219: } PRange;

221: typedef enum {THIASSEMBLY_TRIDIAGONAL,THIASSEMBLY_FULL} THIAssemblyMode;

223: struct _p_THI {
224:   PETSCHEADER(int);
225:   void (*initialize)(THI,PetscReal x,PetscReal y,PrmNode *p);
226:   PetscInt  nlevels;
227:   PetscInt  zlevels;
228:   PetscReal Lx,Ly,Lz;           /* Model domain */
229:   PetscReal alpha;              /* Bed angle */
230:   Units     units;
231:   PetscReal dirichlet_scale;
232:   PetscReal ssa_friction_scale;
233:   PRange    eta;
234:   PRange    beta2;
235:   struct {
236:     PetscReal Bd2,eps,exponent;
237:   } viscosity;
238:   struct {
239:     PetscReal irefgam,eps2,exponent,refvel,epsvel;
240:   } friction;
241:   PetscReal rhog;
242:   PetscBool  no_slip;
243:   PetscBool  tridiagonal;
244:   PetscBool  coarse2d;
245:   PetscBool  verbose;
246:   MatType mattype;
247: };

249: struct _n_Units {
250:   /* fundamental */
251:   PetscReal meter;
252:   PetscReal kilogram;
253:   PetscReal second;
254:   /* derived */
255:   PetscReal Pascal;
256:   PetscReal year;
257: };

259: static void PrmHexGetZ(const PrmNode pn[],PetscInt k,PetscInt zm,PetscReal zn[])
260: {
261:   const PetscScalar zm1 = zm-1,
262:     znl[8] = {pn[0].b + pn[0].h*(PetscScalar)k/zm1,
263:               pn[1].b + pn[1].h*(PetscScalar)k/zm1,
264:               pn[2].b + pn[2].h*(PetscScalar)k/zm1,
265:               pn[3].b + pn[3].h*(PetscScalar)k/zm1,
266:               pn[0].b + pn[0].h*(PetscScalar)(k+1)/zm1,
267:               pn[1].b + pn[1].h*(PetscScalar)(k+1)/zm1,
268:               pn[2].b + pn[2].h*(PetscScalar)(k+1)/zm1,
269:               pn[3].b + pn[3].h*(PetscScalar)(k+1)/zm1};
270:   PetscInt i;
271:   for (i=0; i<8; i++) zn[i] = PetscRealPart(znl[i]);
272: }

274: /* Tests A and C are from the ISMIP-HOM paper (Pattyn et al. 2008) */
275: static void THIInitialize_HOM_A(THI thi,PetscReal x,PetscReal y,PrmNode *p)
276: {
277:   Units units = thi->units;
278:   PetscReal s = -x*sin(thi->alpha);
279:   p->b = s - 1000*units->meter + 500*units->meter * sin(x*2*PETSC_PI/thi->Lx) * sin(y*2*PETSC_PI/thi->Ly);
280:   p->h = s - p->b;
281:   p->beta2 = 1e30;
282: }

284: static void THIInitialize_HOM_C(THI thi,PetscReal x,PetscReal y,PrmNode *p)
285: {
286:   Units units = thi->units;
287:   PetscReal s = -x*sin(thi->alpha);
288:   p->b = s - 1000*units->meter;
289:   p->h = s - p->b;
290:   /* tau_b = beta2 v   is a stress (Pa) */
291:   p->beta2 = 1000 * (1 + sin(x*2*PETSC_PI/thi->Lx)*sin(y*2*PETSC_PI/thi->Ly)) * units->Pascal * units->year / units->meter;
292: }

294: /* These are just toys */

296: /* Same bed as test A, free slip everywhere except for a discontinuous jump to a circular sticky region in the middle. */
297: static void THIInitialize_HOM_X(THI thi,PetscReal xx,PetscReal yy,PrmNode *p)
298: {
299:   Units units = thi->units;
300:   PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */
301:   PetscReal r = sqrt(x*x + y*y),s = -x*sin(thi->alpha);
302:   p->b = s - 1000*units->meter + 500*units->meter * sin(x + PETSC_PI) * sin(y + PETSC_PI);
303:   p->h = s - p->b;
304:   p->beta2 = 1000 * (r < 1 ? 2 : 0) * units->Pascal * units->year / units->meter;
305: }

307: /* Like Z, but with 200 meter cliffs */
308: static void THIInitialize_HOM_Y(THI thi,PetscReal xx,PetscReal yy,PrmNode *p)
309: {
310:   Units units = thi->units;
311:   PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */
312:   PetscReal r = sqrt(x*x + y*y),s = -x*sin(thi->alpha);
313:   p->b = s - 1000*units->meter + 500*units->meter * sin(x + PETSC_PI) * sin(y + PETSC_PI);
314:   if (PetscRealPart(p->b) > -700*units->meter) p->b += 200*units->meter;
315:   p->h = s - p->b;
316:   p->beta2 = 1000 * (1. + sin(sqrt(16*r))/sqrt(1e-2 + 16*r)*cos(x*3/2)*cos(y*3/2)) * units->Pascal * units->year / units->meter;
317: }

319: /* Same bed as A, smoothly varying slipperiness, similar to MATLAB's "sombrero" (uncorrelated with bathymetry) */
320: static void THIInitialize_HOM_Z(THI thi,PetscReal xx,PetscReal yy,PrmNode *p)
321: {
322:   Units units = thi->units;
323:   PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */
324:   PetscReal r = sqrt(x*x + y*y),s = -x*sin(thi->alpha);
325:   p->b = s - 1000*units->meter + 500*units->meter * sin(x + PETSC_PI) * sin(y + PETSC_PI);
326:   p->h = s - p->b;
327:   p->beta2 = 1000 * (1. + sin(sqrt(16*r))/sqrt(1e-2 + 16*r)*cos(x*3/2)*cos(y*3/2)) * units->Pascal * units->year / units->meter;
328: }

330: static void THIFriction(THI thi,PetscReal rbeta2,PetscReal gam,PetscReal *beta2,PetscReal *dbeta2)
331: {
332:   if (thi->friction.irefgam == 0) {
333:     Units units = thi->units;
334:     thi->friction.irefgam = 1./(0.5*PetscSqr(thi->friction.refvel * units->meter / units->year));
335:     thi->friction.eps2 = 0.5*PetscSqr(thi->friction.epsvel * units->meter / units->year) * thi->friction.irefgam;
336:   }
337:   if (thi->friction.exponent == 0) {
338:     *beta2 = rbeta2;
339:     *dbeta2 = 0;
340:   } else {
341:     *beta2 = rbeta2 * pow(thi->friction.eps2 + gam*thi->friction.irefgam,thi->friction.exponent);
342:     *dbeta2 = thi->friction.exponent * *beta2 / (thi->friction.eps2 + gam*thi->friction.irefgam) * thi->friction.irefgam;
343:   }
344: }

346: static void THIViscosity(THI thi,PetscReal gam,PetscReal *eta,PetscReal *deta)
347: {
348:   PetscReal Bd2,eps,exponent;
349:   if (thi->viscosity.Bd2 == 0) {
350:     Units units = thi->units;
351:     const PetscReal
352:       n = 3.,                                           /* Glen exponent */
353:       p = 1. + 1./n,                                    /* for Stokes */
354:       A = 1.e-16 * pow(units->Pascal,-n) / units->year, /* softness parameter (Pa^{-n}/s) */
355:       B = pow(A,-1./n);                                 /* hardness parameter */
356:     thi->viscosity.Bd2      = B/2;
357:     thi->viscosity.exponent = (p-2)/2;
358:     thi->viscosity.eps      = 0.5*PetscSqr(1e-5 / units->year);
359:   }
360:   Bd2      = thi->viscosity.Bd2;
361:   exponent = thi->viscosity.exponent;
362:   eps      = thi->viscosity.eps;
363:   *eta = Bd2 * pow(eps + gam,exponent);
364:   *deta = exponent * (*eta) / (eps + gam);
365: }

367: static void RangeUpdate(PetscReal *min,PetscReal *max,PetscReal x)
368: {
369:   if (x < *min) *min = x;
370:   if (x > *max) *max = x;
371: }

373: static void PRangeClear(PRange *p)
374: {
375:   p->cmin = p->min = 1e100;
376:   p->cmax = p->max = -1e100;
377: }

381: static PetscErrorCode PRangeMinMax(PRange *p,PetscReal min,PetscReal max)
382: {

385:   p->cmin = min;
386:   p->cmax = max;
387:   if (min < p->min) p->min = min;
388:   if (max > p->max) p->max = max;
389:   return(0);
390: }

394: static PetscErrorCode THIDestroy(THI *thi)
395: {

399:   if (!*thi) return(0);
400:   if (--((PetscObject)(*thi))->refct > 0) {*thi = 0; return(0);}
401:   PetscFree((*thi)->units);
402:   PetscFree((*thi)->mattype);
403:   PetscHeaderDestroy(thi);
404:   return(0);
405: }

409: static PetscErrorCode THICreate(MPI_Comm comm,THI *inthi)
410: {
411:   static PetscBool  registered = PETSC_FALSE;
412:   THI thi;
413:   Units units;

417:   *inthi = 0;
418:   if (!registered) {
419:     PetscClassIdRegister("Toy Hydrostatic Ice",&THI_CLASSID);
420:     registered = PETSC_TRUE;
421:   }
422:   PetscHeaderCreate(thi,_p_THI,0,THI_CLASSID,-1,"THI","Toy Hydrostatic Ice","",comm,THIDestroy,0);

424:   PetscNew(struct _n_Units,&thi->units);
425:   units = thi->units;
426:   units->meter  = 1e-2;
427:   units->second = 1e-7;
428:   units->kilogram = 1e-12;
429:   PetscOptionsBegin(comm,NULL,"Scaled units options","");
430:   {
431:     PetscOptionsReal("-units_meter","1 meter in scaled length units","",units->meter,&units->meter,NULL);
432:     PetscOptionsReal("-units_second","1 second in scaled time units","",units->second,&units->second,NULL);
433:     PetscOptionsReal("-units_kilogram","1 kilogram in scaled mass units","",units->kilogram,&units->kilogram,NULL);
434:   }
435:   PetscOptionsEnd();
436:   units->Pascal = units->kilogram / (units->meter * PetscSqr(units->second));
437:   units->year = 31556926. * units->second, /* seconds per year */

439:   thi->Lx              = 10.e3;
440:   thi->Ly              = 10.e3;
441:   thi->Lz              = 1000;
442:   thi->nlevels         = 1;
443:   thi->dirichlet_scale = 1;
444:   thi->verbose         = PETSC_FALSE;

446:   PetscOptionsBegin(comm,NULL,"Toy Hydrostatic Ice options","");
447:   {
448:     QuadratureType quad = QUAD_GAUSS;
449:     char homexp[] = "A";
450:     char mtype[256] = MATSBAIJ;
451:     PetscReal L,m = 1.0;
452:     PetscBool  flg;
453:     L = thi->Lx;
454:     PetscOptionsReal("-thi_L","Domain size (m)","",L,&L,&flg);
455:     if (flg) thi->Lx = thi->Ly = L;
456:     PetscOptionsReal("-thi_Lx","X Domain size (m)","",thi->Lx,&thi->Lx,NULL);
457:     PetscOptionsReal("-thi_Ly","Y Domain size (m)","",thi->Ly,&thi->Ly,NULL);
458:     PetscOptionsReal("-thi_Lz","Z Domain size (m)","",thi->Lz,&thi->Lz,NULL);
459:     PetscOptionsString("-thi_hom","ISMIP-HOM experiment (A or C)","",homexp,homexp,sizeof(homexp),NULL);
460:     switch (homexp[0] = toupper(homexp[0])) {
461:       case 'A':
462:         thi->initialize = THIInitialize_HOM_A;
463:         thi->no_slip = PETSC_TRUE;
464:         thi->alpha = 0.5;
465:         break;
466:       case 'C':
467:         thi->initialize = THIInitialize_HOM_C;
468:         thi->no_slip = PETSC_FALSE;
469:         thi->alpha = 0.1;
470:         break;
471:       case 'X':
472:         thi->initialize = THIInitialize_HOM_X;
473:         thi->no_slip = PETSC_FALSE;
474:         thi->alpha = 0.3;
475:         break;
476:       case 'Y':
477:         thi->initialize = THIInitialize_HOM_Y;
478:         thi->no_slip = PETSC_FALSE;
479:         thi->alpha = 0.5;
480:         break;
481:       case 'Z':
482:         thi->initialize = THIInitialize_HOM_Z;
483:         thi->no_slip = PETSC_FALSE;
484:         thi->alpha = 0.5;
485:         break;
486:       default:
487:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"HOM experiment '%c' not implemented",homexp[0]);
488:     }
489:     PetscOptionsEnum("-thi_quadrature","Quadrature to use for 3D elements","",QuadratureTypes,(PetscEnum)quad,(PetscEnum*)&quad,NULL);
490:     switch (quad) {
491:       case QUAD_GAUSS:
492:         HexQInterp = HexQInterp_Gauss;
493:         HexQDeriv  = HexQDeriv_Gauss;
494:         break;
495:       case QUAD_LOBATTO:
496:         HexQInterp = HexQInterp_Lobatto;
497:         HexQDeriv  = HexQDeriv_Lobatto;
498:         break;
499:     }
500:     PetscOptionsReal("-thi_alpha","Bed angle (degrees)","",thi->alpha,&thi->alpha,NULL);
501:     thi->friction.refvel = 100.;
502:     thi->friction.epsvel = 1.;
503:     PetscOptionsReal("-thi_friction_refvel","Reference velocity for sliding","",thi->friction.refvel,&thi->friction.refvel,NULL);
504:     PetscOptionsReal("-thi_friction_epsvel","Regularization velocity for sliding","",thi->friction.epsvel,&thi->friction.epsvel,NULL);
505:     PetscOptionsReal("-thi_friction_m","Friction exponent, 0=Coulomb, 1=Navier","",m,&m,NULL);
506:     thi->friction.exponent = (m-1)/2;
507:     PetscOptionsReal("-thi_dirichlet_scale","Scale Dirichlet boundary conditions by this factor","",thi->dirichlet_scale,&thi->dirichlet_scale,NULL);
508:     PetscOptionsReal("-thi_ssa_friction_scale","Scale slip boundary conditions by this factor in SSA (2D) assembly","",thi->ssa_friction_scale,&thi->ssa_friction_scale,NULL);
509:     PetscOptionsInt("-thi_nlevels","Number of levels of refinement","",thi->nlevels,&thi->nlevels,NULL);
510:     PetscOptionsBool("-thi_coarse2d","Use a 2D coarse space corresponding to SSA","",thi->coarse2d,&thi->coarse2d,NULL);
511:     PetscOptionsBool("-thi_tridiagonal","Assemble a tridiagonal system (column coupling only) on the finest level","",thi->tridiagonal,&thi->tridiagonal,NULL);
512:     PetscOptionsList("-thi_mat_type","Matrix type","MatSetType",MatList,mtype,(char*)mtype,sizeof(mtype),NULL);
513:     PetscStrallocpy(mtype,&thi->mattype);
514:     PetscOptionsBool("-thi_verbose","Enable verbose output (like matrix sizes and statistics)","",thi->verbose,&thi->verbose,NULL);
515:   }
516:   PetscOptionsEnd();

518:   /* dimensionalize */
519:   thi->Lx     *= units->meter;
520:   thi->Ly     *= units->meter;
521:   thi->Lz     *= units->meter;
522:   thi->alpha  *= PETSC_PI / 180;

524:   PRangeClear(&thi->eta);
525:   PRangeClear(&thi->beta2);

527:   {
528:     PetscReal u = 1000*units->meter/(3e7*units->second),
529:       gradu = u / (100*units->meter),eta,deta,
530:       rho = 910 * units->kilogram/pow(units->meter,3),
531:       grav = 9.81 * units->meter/PetscSqr(units->second),
532:       driving = rho * grav * sin(thi->alpha) * 1000*units->meter;
533:     THIViscosity(thi,0.5*gradu*gradu,&eta,&deta);
534:     thi->rhog = rho * grav;
535:     if (thi->verbose) {
536:       PetscPrintf(((PetscObject)thi)->comm,"Units: meter %8.2g  second %8.2g  kg %8.2g  Pa %8.2g\n",units->meter,units->second,units->kilogram,units->Pascal);
537:       PetscPrintf(((PetscObject)thi)->comm,"Domain (%6.2g,%6.2g,%6.2g), pressure %8.2g, driving stress %8.2g\n",thi->Lx,thi->Ly,thi->Lz,rho*grav*1e3*units->meter,driving);
538:       PetscPrintf(((PetscObject)thi)->comm,"Large velocity 1km/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n",u,gradu,eta,2*eta*gradu,2*eta*gradu/driving);
539:       THIViscosity(thi,0.5*PetscSqr(1e-3*gradu),&eta,&deta);
540:       PetscPrintf(((PetscObject)thi)->comm,"Small velocity 1m/a  %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n",1e-3*u,1e-3*gradu,eta,2*eta*1e-3*gradu,2*eta*1e-3*gradu/driving);
541:     }
542:   }

544:   *inthi = thi;
545:   return(0);
546: }

550: static PetscErrorCode THIInitializePrm(THI thi,DM da2prm,Vec prm)
551: {
552:   PrmNode **p;
553:   PetscInt i,j,xs,xm,ys,ym,mx,my;

557:   DMDAGetGhostCorners(da2prm,&ys,&xs,0,&ym,&xm,0);
558:   DMDAGetInfo(da2prm,0, &my,&mx,0, 0,0,0, 0,0,0,0,0,0);
559:   DMDAVecGetArray(da2prm,prm,&p);
560:   for (i=xs; i<xs+xm; i++) {
561:     for (j=ys; j<ys+ym; j++) {
562:       PetscReal xx = thi->Lx*i/mx,yy = thi->Ly*j/my;
563:       thi->initialize(thi,xx,yy,&p[i][j]);
564:     }
565:   }
566:   DMDAVecRestoreArray(da2prm,prm,&p);
567:   return(0);
568: }

572: static PetscErrorCode THISetDMMG(THI thi,DMMG *dmmg)
573: {
575:   PetscInt i;

578:   if (DMMGGetLevels(dmmg) != thi->nlevels) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_CORRUPT,"DMMG nlevels does not agree with THI");
579:   for (i=0; i<thi->nlevels; i++) {
580:     PetscInt Mx,My,Mz,mx,my,s,dim;
581:     DMDAStencilType  st;
582:     DM da = dmmg[i]->dm,da2prm;
583:     Vec X;
584:     DMDAGetInfo(da,&dim, &Mz,&My,&Mx, 0,&my,&mx, 0,&s,0,0,0,&st);
585:     if (dim == 2) {
586:       DMDAGetInfo(da,&dim, &My,&Mx,0, &my,&mx,0, 0,&s,0,0,0,&st);
587:     }
588:     DMDACreate2d(((PetscObject)thi)->comm,DMDA_BOUNDARY_PERIODIC,DMDA_BOUNDARY_PERIODIC,st,My,Mx,my,mx,sizeof(PrmNode)/sizeof(PetscScalar),s,0,0,&da2prm);
589:     DMCreateLocalVector(da2prm,&X);
590:     {
591:       PetscReal Lx = thi->Lx / thi->units->meter,Ly = thi->Ly / thi->units->meter,Lz = thi->Lz / thi->units->meter;
592:       if (dim == 2) {
593:         PetscPrintf(((PetscObject)thi)->comm,"Level %d domain size (m) %8.2g x %8.2g, num elements %3d x %3d (%8d), size (m) %g x %g\n",i,Lx,Ly,Mx,My,Mx*My,Lx/Mx,Ly/My);
594:       } else {
595:         PetscPrintf(((PetscObject)thi)->comm,"Level %d domain size (m) %8.2g x %8.2g x %8.2g, num elements %3d x %3d x %3d (%8d), size (m) %g x %g x %g\n",i,Lx,Ly,Lz,Mx,My,Mz,Mx*My*Mz,Lx/Mx,Ly/My,1000./(Mz-1));
596:       }
597:     }
598:     THIInitializePrm(thi,da2prm,X);
599:     PetscObjectCompose((PetscObject)da,"DMDA2Prm",(PetscObject)da2prm);
600:     PetscObjectCompose((PetscObject)da,"DMDA2Prm_Vec",(PetscObject)X);
601:     DMDestroy(&da2prm);
602:     VecDestroy(&X);
603:   }
604:   return(0);
605: }

609: static PetscErrorCode THIDAGetPrm(DM da,PrmNode ***prm)
610: {
612:   DM             da2prm;
613:   Vec            X;

616:   PetscObjectQuery((PetscObject)da,"DMDA2Prm",(PetscObject*)&da2prm);
617:   if (!da2prm) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm composed with given DMDA");
618:   PetscObjectQuery((PetscObject)da,"DMDA2Prm_Vec",(PetscObject*)&X);
619:   if (!X) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm_Vec composed with given DMDA");
620:   DMDAVecGetArray(da2prm,X,prm);
621:   return(0);
622: }

626: static PetscErrorCode THIDARestorePrm(DM da,PrmNode ***prm)
627: {
629:   DM             da2prm;
630:   Vec            X;

633:   PetscObjectQuery((PetscObject)da,"DMDA2Prm",(PetscObject*)&da2prm);
634:   if (!da2prm) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm composed with given DMDA");
635:   PetscObjectQuery((PetscObject)da,"DMDA2Prm_Vec",(PetscObject*)&X);
636:   if (!X) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm_Vec composed with given DMDA");
637:   DMDAVecRestoreArray(da2prm,X,prm);
638:   return(0);
639: }

643: static PetscErrorCode THIInitial(DMMG dmmg,Vec X)
644: {
645:   THI         thi   = (THI)dmmg->user;
646:   DM          da    = dmmg->dm;
647:   PetscInt    i,j,k,xs,xm,ys,ym,zs,zm,mx,my;
648:   PetscReal   hx,hy;
649:   PrmNode     **prm;
650:   Node        ***x;

654:   DMDAGetInfo(da,0, 0,&my,&mx, 0,0,0, 0,0,0,0,0,0);
655:   DMDAGetCorners(da,&zs,&ys,&xs,&zm,&ym,&xm);
656:   DMDAVecGetArray(da,X,&x);
657:   THIDAGetPrm(da,&prm);
658:   hx = thi->Lx / mx;
659:   hy = thi->Ly / my;
660:   for (i=xs; i<xs+xm; i++) {
661:     for (j=ys; j<ys+ym; j++) {
662:       for (k=zs; k<zs+zm; k++) {
663:         const PetscScalar zm1 = zm-1,
664:           drivingx = thi->rhog * (prm[i+1][j].b+prm[i+1][j].h - prm[i-1][j].b-prm[i-1][j].h) / (2*hx),
665:           drivingy = thi->rhog * (prm[i][j+1].b+prm[i][j+1].h - prm[i][j-1].b-prm[i][j-1].h) / (2*hy);
666:         x[i][j][k].u = 0. * drivingx * prm[i][j].h*(PetscScalar)k/zm1;
667:         x[i][j][k].v = 0. * drivingy * prm[i][j].h*(PetscScalar)k/zm1;
668:       }
669:     }
670:   }
671:   DMDAVecRestoreArray(da,X,&x);
672:   THIDARestorePrm(da,&prm);
673:   return(0);
674: }

676: static void PointwiseNonlinearity(THI thi,const Node n[restrict],const PetscReal phi[restrict],PetscReal dphi[restrict][3],PetscScalar *restrict u,PetscScalar *restrict v,PetscScalar du[restrict],PetscScalar dv[restrict],PetscReal *eta,PetscReal *deta)
677: {
678:   PetscInt l,ll;
679:   PetscScalar gam;

681:   du[0] = du[1] = du[2] = 0;
682:   dv[0] = dv[1] = dv[2] = 0;
683:   *u = 0;
684:   *v = 0;
685:   for (l=0; l<8; l++) {
686:     *u += phi[l] * n[l].u;
687:     *v += phi[l] * n[l].v;
688:     for (ll=0; ll<3; ll++) {
689:       du[ll] += dphi[l][ll] * n[l].u;
690:       dv[ll] += dphi[l][ll] * n[l].v;
691:     }
692:   }
693:   gam = Sqr(du[0]) + Sqr(dv[1]) + du[0]*dv[1] + 0.25*Sqr(du[1]+dv[0]) + 0.25*Sqr(du[2]) + 0.25*Sqr(dv[2]);
694:   THIViscosity(thi,PetscRealPart(gam),eta,deta);
695: }

697: static void PointwiseNonlinearity2D(THI thi,Node n[],PetscReal phi[],PetscReal dphi[4][2],PetscScalar *u,PetscScalar *v,PetscScalar du[],PetscScalar dv[],PetscReal *eta,PetscReal *deta)
698: {
699:   PetscInt l,ll;
700:   PetscScalar gam;

702:   du[0] = du[1] = 0;
703:   dv[0] = dv[1] = 0;
704:   *u = 0;
705:   *v = 0;
706:   for (l=0; l<4; l++) {
707:     *u += phi[l] * n[l].u;
708:     *v += phi[l] * n[l].v;
709:     for (ll=0; ll<2; ll++) {
710:       du[ll] += dphi[l][ll] * n[l].u;
711:       dv[ll] += dphi[l][ll] * n[l].v;
712:     }
713:   }
714:   gam = Sqr(du[0]) + Sqr(dv[1]) + du[0]*dv[1] + 0.25*Sqr(du[1]+dv[0]);
715:   THIViscosity(thi,PetscRealPart(gam),eta,deta);
716: }

720: static PetscErrorCode THIFunctionLocal(DMDALocalInfo *info,Node ***x,Node ***f,THI thi)
721: {
722:   PetscInt       xs,ys,xm,ym,zm,i,j,k,q,l;
723:   PetscReal      hx,hy,etamin,etamax,beta2min,beta2max;
724:   PrmNode        **prm;

728:   xs = info->zs;
729:   ys = info->ys;
730:   xm = info->zm;
731:   ym = info->ym;
732:   zm = info->xm;
733:   hx = thi->Lx / info->mz;
734:   hy = thi->Ly / info->my;

736:   etamin   = 1e100;
737:   etamax   = 0;
738:   beta2min = 1e100;
739:   beta2max = 0;

741:   THIDAGetPrm(info->da,&prm);

743:   for (i=xs; i<xs+xm; i++) {
744:     for (j=ys; j<ys+ym; j++) {
745:       PrmNode pn[4];
746:       QuadExtract(prm,i,j,pn);
747:       for (k=0; k<zm-1; k++) {
748:         PetscInt ls = 0;
749:         Node n[8],*fn[8];
750:         PetscReal zn[8],etabase = 0;
751:         PrmHexGetZ(pn,k,zm,zn);
752:         HexExtract(x,i,j,k,n);
753:         HexExtractRef(f,i,j,k,fn);
754:         if (thi->no_slip && k == 0) {
755:           for (l=0; l<4; l++) n[l].u = n[l].v = 0;
756:           /* The first 4 basis functions lie on the bottom layer, so their contribution is exactly 0, hence we can skip them */
757:           ls = 4;
758:         }
759:         for (q=0; q<8; q++) {
760:           PetscReal dz[3],phi[8],dphi[8][3],jw,eta,deta;
761:           PetscScalar du[3],dv[3],u,v;
762:           HexGrad(HexQDeriv[q],zn,dz);
763:           HexComputeGeometry(q,hx,hy,dz,phi,dphi,&jw);
764:           PointwiseNonlinearity(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta);
765:           jw /= thi->rhog;      /* scales residuals to be O(1) */
766:           if (q == 0) etabase = eta;
767:           RangeUpdate(&etamin,&etamax,eta);
768:           for (l=ls; l<8; l++) { /* test functions */
769:             const PetscReal ds[2] = {-sin(thi->alpha),0};
770:             const PetscReal pp=phi[l],*dp = dphi[l];
771:             fn[l]->u += dp[0]*jw*eta*(4.*du[0]+2.*dv[1]) + dp[1]*jw*eta*(du[1]+dv[0]) + dp[2]*jw*eta*du[2] + pp*jw*thi->rhog*ds[0];
772:             fn[l]->v += dp[1]*jw*eta*(2.*du[0]+4.*dv[1]) + dp[0]*jw*eta*(du[1]+dv[0]) + dp[2]*jw*eta*dv[2] + pp*jw*thi->rhog*ds[1];
773:           }
774:         }
775:         if (k == 0) { /* we are on a bottom face */
776:           if (thi->no_slip) {
777:             /* Note: Non-Galerkin coarse grid operators are very sensitive to the scaling of Dirichlet boundary
778:             * conditions.  After shenanigans above, etabase contains the effective viscosity at the closest quadrature
779:             * point to the bed.  We want the diagonal entry in the Dirichlet condition to have similar magnitude to the
780:             * diagonal entry corresponding to the adjacent node.  The fundamental scaling of the viscous part is in
781:             * diagu, diagv below.  This scaling is easy to recognize by considering the finite difference operator after
782:             * scaling by element size.  The no-slip Dirichlet condition is scaled by this factor, and also in the
783:             * assembled matrix (see the similar block in THIJacobianLocal).
784:             *
785:             * Note that the residual at this Dirichlet node is linear in the state at this node, but also depends
786:             * (nonlinearly in general) on the neighboring interior nodes through the local viscosity.  This will make
787:             * a matrix-free Jacobian have extra entries in the corresponding row.  We assemble only the diagonal part,
788:             * so the solution will exactly satisfy the boundary condition after the first linear iteration.
789:             */
790:             const PetscReal hz = PetscRealPart(pn[0].h)/(zm-1.);
791:             const PetscScalar diagu = 2*etabase/thi->rhog*(hx*hy/hz + hx*hz/hy + 4*hy*hz/hx),diagv = 2*etabase/thi->rhog*(hx*hy/hz + 4*hx*hz/hy + hy*hz/hx);
792:             fn[0]->u = thi->dirichlet_scale*diagu*x[i][j][k].u;
793:             fn[0]->v = thi->dirichlet_scale*diagv*x[i][j][k].v;
794:           } else {              /* Integrate over bottom face to apply boundary condition */
795:             for (q=0; q<4; q++) {
796:               const PetscReal jw = 0.25*hx*hy/thi->rhog,*phi = QuadQInterp[q];
797:               PetscScalar u=0,v=0,rbeta2=0;
798:               PetscReal beta2,dbeta2;
799:               for (l=0; l<4; l++) {
800:                 u     += phi[l]*n[l].u;
801:                 v     += phi[l]*n[l].v;
802:                 rbeta2 += phi[l]*pn[l].beta2;
803:               }
804:               THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2);
805:               RangeUpdate(&beta2min,&beta2max,beta2);
806:               for (l=0; l<4; l++) {
807:                 const PetscReal pp = phi[l];
808:                 fn[ls+l]->u += pp*jw*beta2*u;
809:                 fn[ls+l]->v += pp*jw*beta2*v;
810:               }
811:             }
812:           }
813:         }
814:       }
815:     }
816:   }

818:   THIDARestorePrm(info->da,&prm);

820:   PRangeMinMax(&thi->eta,etamin,etamax);
821:   PRangeMinMax(&thi->beta2,beta2min,beta2max);
822:   return(0);
823: }

827: static PetscErrorCode THIMatrixStatistics(THI thi,Mat B,PetscViewer viewer)
828: {
830:   PetscReal      nrm;
831:   PetscInt       m;
832:   PetscMPIInt    rank;

835:   MatNorm(B,NORM_FROBENIUS,&nrm);
836:   MatGetSize(B,&m,0);
837:   MPI_Comm_rank(((PetscObject)B)->comm,&rank);
838:   if (!rank) {
839:     PetscScalar val0,val2;
840:     MatGetValue(B,0,0,&val0);
841:     MatGetValue(B,2,2,&val2);
842:     PetscViewerASCIIPrintf(viewer,"Matrix dim %8d  norm %8.2e, (0,0) %8.2e  (2,2) %8.2e, eta [%8.2e,%8.2e] beta2 [%8.2e,%8.2e]\n",m,nrm,PetscRealPart(val0),PetscRealPart(val2),thi->eta.cmin,thi->eta.cmax,thi->beta2.cmin,thi->beta2.cmax);
843:   }
844:   return(0);
845: }

849: static PetscErrorCode THISurfaceStatistics(DM da,Vec X,PetscReal *min,PetscReal *max,PetscReal *mean)
850: {
852:   Node           ***x;
853:   PetscInt       i,j,xs,ys,zs,xm,ym,zm,mx,my,mz;
854:   PetscReal      umin = 1e100,umax=-1e100;
855:   PetscScalar    usum=0.0,gusum;

858:   *min = *max = *mean = 0;
859:   DMDAGetInfo(da,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0);
860:   DMDAGetCorners(da,&zs,&ys,&xs,&zm,&ym,&xm);
861:   if (zs != 0 || zm != mz) SETERRQ(PETSC_COMM_SELF,1,"Unexpected decomposition");
862:   DMDAVecGetArray(da,X,&x);
863:   for (i=xs; i<xs+xm; i++) {
864:     for (j=ys; j<ys+ym; j++) {
865:       PetscReal u = PetscRealPart(x[i][j][zm-1].u);
866:       RangeUpdate(&umin,&umax,u);
867:       usum += u;
868:     }
869:   }
870:   DMDAVecRestoreArray(da,X,&x);
871:   MPI_Allreduce(&umin,min,1,MPIU_REAL,MPIU_MIN,((PetscObject)da)->comm);
872:   MPI_Allreduce(&umax,max,1,MPIU_REAL,MPIU_MAX,((PetscObject)da)->comm);
873:   MPI_Allreduce(&usum,&gusum,1,MPIU_SCALAR,MPIU_SUM,((PetscObject)da)->comm);
874:   *mean = PetscRealPart(gusum) / (mx*my);
875:   return(0);
876: }

880: static PetscErrorCode THISolveStatistics(THI thi,DMMG *dmmg,PetscInt coarsened,const char name[])
881: {
882:   MPI_Comm       comm    = ((PetscObject)thi)->comm;
883:   PetscInt       nlevels = DMMGGetLevels(dmmg),level = nlevels-1-coarsened;
884:   SNES           snes    = dmmg[level]->snes;
885:   Vec            X       = dmmg[level]->x;

889:   PetscPrintf(comm,"Solution statistics after solve: %s\n",name);
890:   {
891:     PetscInt its,lits;
892:     SNESConvergedReason reason;
893:     SNESGetIterationNumber(snes,&its);
894:     SNESGetConvergedReason(snes,&reason);
895:     SNESGetLinearSolveIterations(snes,&lits);
896:     PetscPrintf(comm,"%s: Number of Newton iterations = %d, total linear iterations = %d\n",SNESConvergedReasons[reason],its,lits);
897:   }
898:   {
899:     PetscReal nrm2,tmin[3]={1e100,1e100,1e100},tmax[3]={-1e100,-1e100,-1e100},min[3],max[3];
900:     PetscInt i,j,m;
901:     PetscScalar *x;
902:     VecNorm(X,NORM_2,&nrm2);
903:     VecGetLocalSize(X,&m);
904:     VecGetArray(X,&x);
905:     for (i=0; i<m; i+=2) {
906:       PetscReal u = PetscRealPart(x[i]),v = PetscRealPart(x[i+1]),c = sqrt(u*u+v*v);
907:       tmin[0] = PetscMin(u,tmin[0]);
908:       tmin[1] = PetscMin(v,tmin[1]);
909:       tmin[2] = PetscMin(c,tmin[2]);
910:       tmax[0] = PetscMax(u,tmax[0]);
911:       tmax[1] = PetscMax(v,tmax[1]);
912:       tmax[2] = PetscMax(c,tmax[2]);
913:     }
914:     VecRestoreArray(X,&x);
915:     MPI_Allreduce(tmin,min,3,MPIU_REAL,MPIU_MIN,((PetscObject)thi)->comm);
916:     MPI_Allreduce(tmax,max,3,MPIU_REAL,MPIU_MAX,((PetscObject)thi)->comm);
917:     /* Dimensionalize to meters/year */
918:     nrm2 *= thi->units->year / thi->units->meter;
919:     for (j=0; j<3; j++) {
920:       min[j] *= thi->units->year / thi->units->meter;
921:       max[j] *= thi->units->year / thi->units->meter;
922:     }
923:     PetscPrintf(comm,"|X|_2 %g   u in [%g, %g]   v in [%g, %g]   c in [%g, %g] \n",nrm2,min[0],max[0],min[1],max[1],min[2],max[2]);
924:     {
925:       PetscReal umin,umax,umean;
926:       THISurfaceStatistics(dmmg[level]->dm,X,&umin,&umax,&umean);
927:       umin  *= thi->units->year / thi->units->meter;
928:       umax  *= thi->units->year / thi->units->meter;
929:       umean *= thi->units->year / thi->units->meter;
930:       PetscPrintf(comm,"Surface statistics: u in [%12.6e, %12.6e] mean %12.6e\n",umin,umax,umean);
931:     }
932:     /* These values stay nondimensional */
933:     PetscPrintf(comm,"Global eta range   [%g, %g], converged range [%g, %g]\n",thi->eta.min,thi->eta.max,thi->eta.cmin,thi->eta.cmax);
934:     PetscPrintf(comm,"Global beta2 range [%g, %g], converged range [%g, %g]\n",thi->beta2.min,thi->beta2.max,thi->beta2.cmin,thi->beta2.cmax);
935:   }
936:   PetscPrintf(comm,"\n");
937:   return(0);
938: }

942: static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo *info,Node **x,Mat B,THI thi)
943: {
944:   PetscInt       xs,ys,xm,ym,i,j,q,l,ll;
945:   PetscReal      hx,hy;
946:   PrmNode        **prm;

950:   xs = info->ys;
951:   ys = info->xs;
952:   xm = info->ym;
953:   ym = info->xm;
954:   hx = thi->Lx / info->my;
955:   hy = thi->Ly / info->mx;

957:   MatZeroEntries(B);
958:   THIDAGetPrm(info->da,&prm);

960:   for (i=xs; i<xs+xm; i++) {
961:     for (j=ys; j<ys+ym; j++) {
962:       Node n[4];
963:       PrmNode pn[4];
964:       PetscScalar Ke[4*2][4*2];
965:       QuadExtract(prm,i,j,pn);
966:       QuadExtract(x,i,j,n);
967:       PetscMemzero(Ke,sizeof(Ke));
968:       for (q=0; q<4; q++) {
969:         PetscReal phi[4],dphi[4][2],jw,eta,deta,beta2,dbeta2;
970:         PetscScalar u,v,du[2],dv[2],h = 0,rbeta2 = 0;
971:         for (l=0; l<4; l++) {
972:           phi[l] = QuadQInterp[q][l];
973:           dphi[l][0] = QuadQDeriv[q][l][0]*2./hx;
974:           dphi[l][1] = QuadQDeriv[q][l][1]*2./hy;
975:           h += phi[l] * pn[l].h;
976:           rbeta2 += phi[l] * pn[l].beta2;
977:         }
978:         jw = 0.25*hx*hy / thi->rhog; /* rhog is only scaling */
979:         PointwiseNonlinearity2D(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta);
980:         THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2);
981:         for (l=0; l<4; l++) {
982:           const PetscReal pp = phi[l],*dp = dphi[l];
983:           for (ll=0; ll<4; ll++) {
984:             const PetscReal ppl = phi[ll],*dpl = dphi[ll];
985:             PetscScalar dgdu,dgdv;
986:             dgdu = 2.*du[0]*dpl[0] + dv[1]*dpl[0] + 0.5*(du[1]+dv[0])*dpl[1];
987:             dgdv = 2.*dv[1]*dpl[1] + du[0]*dpl[1] + 0.5*(du[1]+dv[0])*dpl[0];
988:             /* Picard part */
989:             Ke[l*2+0][ll*2+0] += dp[0]*jw*eta*4.*dpl[0] + dp[1]*jw*eta*dpl[1] + pp*jw*(beta2/h)*ppl*thi->ssa_friction_scale;
990:             Ke[l*2+0][ll*2+1] += dp[0]*jw*eta*2.*dpl[1] + dp[1]*jw*eta*dpl[0];
991:             Ke[l*2+1][ll*2+0] += dp[1]*jw*eta*2.*dpl[0] + dp[0]*jw*eta*dpl[1];
992:             Ke[l*2+1][ll*2+1] += dp[1]*jw*eta*4.*dpl[1] + dp[0]*jw*eta*dpl[0] + pp*jw*(beta2/h)*ppl*thi->ssa_friction_scale;
993:             /* extra Newton terms */
994:             Ke[l*2+0][ll*2+0] += dp[0]*jw*deta*dgdu*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdu*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*u*u*ppl*thi->ssa_friction_scale;
995:             Ke[l*2+0][ll*2+1] += dp[0]*jw*deta*dgdv*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdv*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*u*v*ppl*thi->ssa_friction_scale;
996:             Ke[l*2+1][ll*2+0] += dp[1]*jw*deta*dgdu*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdu*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*v*u*ppl*thi->ssa_friction_scale;
997:             Ke[l*2+1][ll*2+1] += dp[1]*jw*deta*dgdv*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdv*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*v*v*ppl*thi->ssa_friction_scale;
998:           }
999:         }
1000:       }
1001:       {
1002:         const MatStencil rc[4] = {{0,i,j,0},{0,i+1,j,0},{0,i+1,j+1,0},{0,i,j+1,0}};
1003:         MatSetValuesBlockedStencil(B,4,rc,4,rc,&Ke[0][0],ADD_VALUES);
1004:       }
1005:     }
1006:   }
1007:   THIDARestorePrm(info->da,&prm);

1009:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1010:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1011:   MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);
1012:   if (thi->verbose) {THIMatrixStatistics(thi,B,PETSC_VIEWER_STDOUT_WORLD);}
1013:   return(0);
1014: }

1018: static PetscErrorCode THIJacobianLocal_3D(DMDALocalInfo *info,Node ***x,Mat B,THI thi,THIAssemblyMode amode)
1019: {
1020:   PetscInt       xs,ys,xm,ym,zm,i,j,k,q,l,ll;
1021:   PetscReal      hx,hy;
1022:   PrmNode        **prm;

1026:   xs = info->zs;
1027:   ys = info->ys;
1028:   xm = info->zm;
1029:   ym = info->ym;
1030:   zm = info->xm;
1031:   hx = thi->Lx / info->mz;
1032:   hy = thi->Ly / info->my;

1034:   MatZeroEntries(B);
1035:   THIDAGetPrm(info->da,&prm);

1037:   for (i=xs; i<xs+xm; i++) {
1038:     for (j=ys; j<ys+ym; j++) {
1039:       PrmNode pn[4];
1040:       QuadExtract(prm,i,j,pn);
1041:       for (k=0; k<zm-1; k++) {
1042:         Node n[8];
1043:         PetscReal zn[8],etabase = 0;
1044:         PetscScalar Ke[8*2][8*2];
1045:         PetscInt ls = 0;

1047:         PrmHexGetZ(pn,k,zm,zn);
1048:         HexExtract(x,i,j,k,n);
1049:         PetscMemzero(Ke,sizeof(Ke));
1050:         if (thi->no_slip && k == 0) {
1051:           for (l=0; l<4; l++) n[l].u = n[l].v = 0;
1052:           ls = 4;
1053:         }
1054:         for (q=0; q<8; q++) {
1055:           PetscReal dz[3],phi[8],dphi[8][3],jw,eta,deta;
1056:           PetscScalar du[3],dv[3],u,v;
1057:           HexGrad(HexQDeriv[q],zn,dz);
1058:           HexComputeGeometry(q,hx,hy,dz,phi,dphi,&jw);
1059:           PointwiseNonlinearity(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta);
1060:           jw /= thi->rhog;      /* residuals are scaled by this factor */
1061:           if (q == 0) etabase = eta;
1062:           for (l=ls; l<8; l++) { /* test functions */
1063:             const PetscReal *restrict dp = dphi[l];
1064: #if USE_SSE2_KERNELS
1065:             /* gcc (up to my 4.5 snapshot) is really bad at hoisting intrinsics so we do it manually */
1066:             __m128d
1067:               p4 = _mm_set1_pd(4),p2 = _mm_set1_pd(2),p05 = _mm_set1_pd(0.5),
1068:               p42 = _mm_setr_pd(4,2),p24 = _mm_shuffle_pd(p42,p42,_MM_SHUFFLE2(0,1)),
1069:               du0 = _mm_set1_pd(du[0]),du1 = _mm_set1_pd(du[1]),du2 = _mm_set1_pd(du[2]),
1070:               dv0 = _mm_set1_pd(dv[0]),dv1 = _mm_set1_pd(dv[1]),dv2 = _mm_set1_pd(dv[2]),
1071:               jweta = _mm_set1_pd(jw*eta),jwdeta = _mm_set1_pd(jw*deta),
1072:               dp0 = _mm_set1_pd(dp[0]),dp1 = _mm_set1_pd(dp[1]),dp2 = _mm_set1_pd(dp[2]),
1073:               dp0jweta = _mm_mul_pd(dp0,jweta),dp1jweta = _mm_mul_pd(dp1,jweta),dp2jweta = _mm_mul_pd(dp2,jweta),
1074:               p4du0p2dv1 = _mm_add_pd(_mm_mul_pd(p4,du0),_mm_mul_pd(p2,dv1)), /* 4 du0 + 2 dv1 */
1075:               p4dv1p2du0 = _mm_add_pd(_mm_mul_pd(p4,dv1),_mm_mul_pd(p2,du0)), /* 4 dv1 + 2 du0 */
1076:               pdu2dv2 = _mm_unpacklo_pd(du2,dv2),                             /* [du2, dv2] */
1077:               du1pdv0 = _mm_add_pd(du1,dv0),                                  /* du1 + dv0 */
1078:               t1 = _mm_mul_pd(dp0,p4du0p2dv1),                                /* dp0 (4 du0 + 2 dv1) */
1079:               t2 = _mm_mul_pd(dp1,p4dv1p2du0);                                /* dp1 (4 dv1 + 2 du0) */

1081: #endif
1082: #if defined COMPUTE_LOWER_TRIANGULAR  /* The element matrices are always symmetric so computing the lower-triangular part is not necessary */
1083:             for (ll=ls; ll<8; ll++) { /* trial functions */
1084: #else
1085:             for (ll=l; ll<8; ll++) {
1086: #endif
1087:               const PetscReal *restrict dpl = dphi[ll];
1088:               if (amode == THIASSEMBLY_TRIDIAGONAL && (l-ll)%4) continue; /* these entries would not be inserted */
1089: #if !USE_SSE2_KERNELS
1090:               /* The analytic Jacobian in nice, easy-to-read form */
1091:               {
1092:                 PetscScalar dgdu,dgdv;
1093:                 dgdu = 2.*du[0]*dpl[0] + dv[1]*dpl[0] + 0.5*(du[1]+dv[0])*dpl[1] + 0.5*du[2]*dpl[2];
1094:                 dgdv = 2.*dv[1]*dpl[1] + du[0]*dpl[1] + 0.5*(du[1]+dv[0])*dpl[0] + 0.5*dv[2]*dpl[2];
1095:                 /* Picard part */
1096:                 Ke[l*2+0][ll*2+0] += dp[0]*jw*eta*4.*dpl[0] + dp[1]*jw*eta*dpl[1] + dp[2]*jw*eta*dpl[2];
1097:                 Ke[l*2+0][ll*2+1] += dp[0]*jw*eta*2.*dpl[1] + dp[1]*jw*eta*dpl[0];
1098:                 Ke[l*2+1][ll*2+0] += dp[1]*jw*eta*2.*dpl[0] + dp[0]*jw*eta*dpl[1];
1099:                 Ke[l*2+1][ll*2+1] += dp[1]*jw*eta*4.*dpl[1] + dp[0]*jw*eta*dpl[0] + dp[2]*jw*eta*dpl[2];
1100:                 /* extra Newton terms */
1101:                 Ke[l*2+0][ll*2+0] += dp[0]*jw*deta*dgdu*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdu*(du[1]+dv[0]) + dp[2]*jw*deta*dgdu*du[2];
1102:                 Ke[l*2+0][ll*2+1] += dp[0]*jw*deta*dgdv*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdv*(du[1]+dv[0]) + dp[2]*jw*deta*dgdv*du[2];
1103:                 Ke[l*2+1][ll*2+0] += dp[1]*jw*deta*dgdu*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdu*(du[1]+dv[0]) + dp[2]*jw*deta*dgdu*dv[2];
1104:                 Ke[l*2+1][ll*2+1] += dp[1]*jw*deta*dgdv*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdv*(du[1]+dv[0]) + dp[2]*jw*deta*dgdv*dv[2];
1105:               }
1106: #else
1107:               /* This SSE2 code is an exact replica of above, but uses explicit packed instructions for some speed
1108:               * benefit.  On my hardware, these intrinsics are almost twice as fast as above, reducing total assembly cost
1109:               * by 25 to 30 percent. */
1110:               {
1111:                 __m128d
1112:                   keu = _mm_loadu_pd(&Ke[l*2+0][ll*2+0]),
1113:                   kev = _mm_loadu_pd(&Ke[l*2+1][ll*2+0]),
1114:                   dpl01 = _mm_loadu_pd(&dpl[0]),dpl10 = _mm_shuffle_pd(dpl01,dpl01,_MM_SHUFFLE2(0,1)),dpl2 = _mm_set_sd(dpl[2]),
1115:                   t0,t3,pdgduv;
1116:                 keu = _mm_add_pd(keu,_mm_add_pd(_mm_mul_pd(_mm_mul_pd(dp0jweta,p42),dpl01),
1117:                                                 _mm_add_pd(_mm_mul_pd(dp1jweta,dpl10),
1118:                                                            _mm_mul_pd(dp2jweta,dpl2))));
1119:                 kev = _mm_add_pd(kev,_mm_add_pd(_mm_mul_pd(_mm_mul_pd(dp1jweta,p24),dpl01),
1120:                                                 _mm_add_pd(_mm_mul_pd(dp0jweta,dpl10),
1121:                                                            _mm_mul_pd(dp2jweta,_mm_shuffle_pd(dpl2,dpl2,_MM_SHUFFLE2(0,1))))));
1122:                 pdgduv = _mm_mul_pd(p05,_mm_add_pd(_mm_add_pd(_mm_mul_pd(p42,_mm_mul_pd(du0,dpl01)),
1123:                                                               _mm_mul_pd(p24,_mm_mul_pd(dv1,dpl01))),
1124:                                                    _mm_add_pd(_mm_mul_pd(du1pdv0,dpl10),
1125:                                                               _mm_mul_pd(pdu2dv2,_mm_set1_pd(dpl[2]))))); /* [dgdu, dgdv] */
1126:                 t0 = _mm_mul_pd(jwdeta,pdgduv);  /* jw deta [dgdu, dgdv] */
1127:                 t3 = _mm_mul_pd(t0,du1pdv0);     /* t0 (du1 + dv0) */
1128:                 _mm_storeu_pd(&Ke[l*2+0][ll*2+0],_mm_add_pd(keu,_mm_add_pd(_mm_mul_pd(t1,t0),
1129:                                                                           _mm_add_pd(_mm_mul_pd(dp1,t3),
1130:                                                                                      _mm_mul_pd(t0,_mm_mul_pd(dp2,du2))))));
1131:                 _mm_storeu_pd(&Ke[l*2+1][ll*2+0],_mm_add_pd(kev,_mm_add_pd(_mm_mul_pd(t2,t0),
1132:                                                                           _mm_add_pd(_mm_mul_pd(dp0,t3),
1133:                                                                                      _mm_mul_pd(t0,_mm_mul_pd(dp2,dv2))))));
1134:               }
1135: #endif
1136:             }
1137:           }
1138:         }
1139:         if (k == 0) { /* on a bottom face */
1140:           if (thi->no_slip) {
1141:             const PetscReal hz = PetscRealPart(pn[0].h)/(zm-1);
1142:             const PetscScalar diagu = 2*etabase/thi->rhog*(hx*hy/hz + hx*hz/hy + 4*hy*hz/hx),diagv = 2*etabase/thi->rhog*(hx*hy/hz + 4*hx*hz/hy + hy*hz/hx);
1143:             Ke[0][0] = thi->dirichlet_scale*diagu;
1144:             Ke[1][1] = thi->dirichlet_scale*diagv;
1145:           } else {
1146:             for (q=0; q<4; q++) {
1147:               const PetscReal jw = 0.25*hx*hy/thi->rhog,*phi = QuadQInterp[q];
1148:               PetscScalar u=0,v=0,rbeta2=0;
1149:               PetscReal beta2,dbeta2;
1150:               for (l=0; l<4; l++) {
1151:                 u     += phi[l]*n[l].u;
1152:                 v     += phi[l]*n[l].v;
1153:                 rbeta2 += phi[l]*pn[l].beta2;
1154:               }
1155:               THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2);
1156:               for (l=0; l<4; l++) {
1157:                 const PetscReal pp = phi[l];
1158:                 for (ll=0; ll<4; ll++) {
1159:                   const PetscReal ppl = phi[ll];
1160:                   Ke[l*2+0][ll*2+0] += pp*jw*beta2*ppl + pp*jw*dbeta2*u*u*ppl;
1161:                   Ke[l*2+0][ll*2+1] +=                   pp*jw*dbeta2*u*v*ppl;
1162:                   Ke[l*2+1][ll*2+0] +=                   pp*jw*dbeta2*v*u*ppl;
1163:                   Ke[l*2+1][ll*2+1] += pp*jw*beta2*ppl + pp*jw*dbeta2*v*v*ppl;
1164:                 }
1165:               }
1166:             }
1167:           }
1168:         }
1169:         {
1170:           const MatStencil rc[8] = {{i,j,k,0},{i+1,j,k,0},{i+1,j+1,k,0},{i,j+1,k,0},{i,j,k+1,0},{i+1,j,k+1,0},{i+1,j+1,k+1,0},{i,j+1,k+1,0}};
1171:           if (amode == THIASSEMBLY_TRIDIAGONAL) {
1172:             for (l=0; l<4; l++) { /* Copy out each of the blocks, discarding horizontal coupling */
1173:               const PetscInt l4 = l+4;
1174:               const MatStencil rcl[2] = {{rc[l].k,rc[l].j,rc[l].i,0},{rc[l4].k,rc[l4].j,rc[l4].i,0}};
1175: #if defined COMPUTE_LOWER_TRIANGULAR
1176:               const PetscScalar Kel[4][4] = {{Ke[2*l+0][2*l+0] ,Ke[2*l+0][2*l+1] ,Ke[2*l+0][2*l4+0] ,Ke[2*l+0][2*l4+1]},
1177:                                              {Ke[2*l+1][2*l+0] ,Ke[2*l+1][2*l+1] ,Ke[2*l+1][2*l4+0] ,Ke[2*l+1][2*l4+1]},
1178:                                              {Ke[2*l4+0][2*l+0],Ke[2*l4+0][2*l+1],Ke[2*l4+0][2*l4+0],Ke[2*l4+0][2*l4+1]},
1179:                                              {Ke[2*l4+1][2*l+0],Ke[2*l4+1][2*l+1],Ke[2*l4+1][2*l4+0],Ke[2*l4+1][2*l4+1]}};
1180: #else
1181:               /* Same as above except for the lower-left block */
1182:               const PetscScalar Kel[4][4] = {{Ke[2*l+0][2*l+0] ,Ke[2*l+0][2*l+1] ,Ke[2*l+0][2*l4+0] ,Ke[2*l+0][2*l4+1]},
1183:                                              {Ke[2*l+1][2*l+0] ,Ke[2*l+1][2*l+1] ,Ke[2*l+1][2*l4+0] ,Ke[2*l+1][2*l4+1]},
1184:                                              {Ke[2*l+0][2*l4+0],Ke[2*l+1][2*l4+0],Ke[2*l4+0][2*l4+0],Ke[2*l4+0][2*l4+1]},
1185:                                              {Ke[2*l+0][2*l4+1],Ke[2*l+1][2*l4+1],Ke[2*l4+1][2*l4+0],Ke[2*l4+1][2*l4+1]}};
1186: #endif
1187:               MatSetValuesBlockedStencil(B,2,rcl,2,rcl,&Kel[0][0],ADD_VALUES);
1188:             }
1189:           } else {
1190: #if !defined COMPUTE_LOWER_TRIANGULAR /* fill in lower-triangular part, this is really cheap compared to computing the entries */
1191:             for (l=0; l<8; l++) {
1192:               for (ll=l+1; ll<8; ll++) {
1193:                 Ke[ll*2+0][l*2+0] = Ke[l*2+0][ll*2+0];
1194:                 Ke[ll*2+1][l*2+0] = Ke[l*2+0][ll*2+1];
1195:                 Ke[ll*2+0][l*2+1] = Ke[l*2+1][ll*2+0];
1196:                 Ke[ll*2+1][l*2+1] = Ke[l*2+1][ll*2+1];
1197:               }
1198:             }
1199: #endif
1200:             MatSetValuesBlockedStencil(B,8,rc,8,rc,&Ke[0][0],ADD_VALUES);
1201:           }
1202:         }
1203:       }
1204:     }
1205:   }
1206:   THIDARestorePrm(info->da,&prm);

1208:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1209:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1210:   MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);
1211:   if (thi->verbose) {THIMatrixStatistics(thi,B,PETSC_VIEWER_STDOUT_WORLD);}
1212:   return(0);
1213: }

1217: static PetscErrorCode THIJacobianLocal_3D_Full(DMDALocalInfo *info,Node ***x,Mat B,THI thi)
1218: {

1222:   THIJacobianLocal_3D(info,x,B,thi,THIASSEMBLY_FULL);
1223:   return(0);
1224: }

1228: static PetscErrorCode THIJacobianLocal_3D_Tridiagonal(DMDALocalInfo *info,Node ***x,Mat B,THI thi)
1229: {

1233:   THIJacobianLocal_3D(info,x,B,thi,THIASSEMBLY_TRIDIAGONAL);
1234:   return(0);
1235: }

1239: static PetscErrorCode DMRefineHierarchy_THI(DM dac0,PetscInt nlevels,DM hierarchy[])
1240: {
1242:   THI thi;
1243:   PetscInt dim,M,N,m,n,s,dof;
1244:   DM dac,daf;
1245:   DMDAStencilType  st;
1246:   DM_DA *ddf,*ddc;

1249:   PetscObjectQuery((PetscObject)dac0,"THI",(PetscObject*)&thi);
1250:   if (!thi) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot refine this DMDA, missing composed THI instance");
1251:   if (nlevels > 1) {
1252:     DMRefineHierarchy(dac0,nlevels-1,hierarchy);
1253:     dac = hierarchy[nlevels-2];
1254:   } else {
1255:     dac = dac0;
1256:   }
1257:   DMDAGetInfo(dac,&dim, &N,&M,0, &n,&m,0, &dof,&s,0,0,0,&st);
1258:   if (dim != 2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"This function can only refine 2D DMDAs");
1259:   /* Creates a 3D DMDA with the same map-plane layout as the 2D one, with contiguous columns */
1260:   DMDACreate3d(((PetscObject)dac)->comm,DMDA_BOUNDARY_NONE,DMDA_BOUNDARY_PERIODIC,DMDA_BOUNDARY_PERIODIC,st,thi->zlevels,N,M,1,n,m,dof,s,PETSC_NULL,PETSC_NULL,PETSC_NULL,&daf);
1261:   daf->ops->getmatrix        = dac->ops->getmatrix;
1262:   daf->ops->getinterpolation = dac->ops->getinterpolation;
1263:   daf->ops->getcoloring      = dac->ops->getcoloring;
1264:   ddf = (DM_DA*)daf->data;
1265:   ddc = (DM_DA*)dac->data;
1266:   ddf->interptype            = ddc->interptype;

1268:   DMDASetFieldName(daf,0,"x-velocity");
1269:   DMDASetFieldName(daf,1,"y-velocity");
1270:   hierarchy[nlevels-1] = daf;
1271:   return(0);
1272: }

1276: static PetscErrorCode DMGetInterpolation_DA_THI(DM dac,DM daf,Mat *A,Vec *scale)
1277: {
1279:   PetscInt       dim;

1286:   DMDAGetInfo(daf,&dim,0,0,0,0,0,0,0,0,0,0,0,0);
1287:   if (dim  == 2) {
1288:     /* We are in the 2D problem and use normal DMDA interpolation */
1289:     DMGetInterpolation(dac,daf,A,scale);
1290:   } else {
1291:     PetscInt i,j,k,xs,ys,zs,xm,ym,zm,mx,my,mz,rstart,cstart;
1292:     Mat B;

1294:     DMDAGetInfo(daf,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0);
1295:     DMDAGetCorners(daf,&zs,&ys,&xs,&zm,&ym,&xm);
1296:     if (zs != 0) SETERRQ(PETSC_COMM_SELF,1,"unexpected");
1297:     MatCreate(((PetscObject)daf)->comm,&B);
1298:     MatSetSizes(B,xm*ym*zm,xm*ym,mx*my*mz,mx*my);
1299: 
1300:     MatSetType(B,MATAIJ);
1301:     MatSeqAIJSetPreallocation(B,1,NULL);
1302:     MatMPIAIJSetPreallocation(B,1,NULL,0,NULL);
1303:     MatGetOwnershipRange(B,&rstart,NULL);
1304:     MatGetOwnershipRangeColumn(B,&cstart,NULL);
1305:     for (i=xs; i<xs+xm; i++) {
1306:       for (j=ys; j<ys+ym; j++) {
1307:         for (k=zs; k<zs+zm; k++) {
1308:           PetscInt i2 = i*ym+j,i3 = i2*zm+k;
1309:           PetscScalar val = ((k == 0 || k == mz-1) ? 0.5 : 1.) / (mz-1.); /* Integration using trapezoid rule */
1310:           MatSetValue(B,cstart+i3,rstart+i2,val,INSERT_VALUES);
1311:         }
1312:       }
1313:     }
1314:     MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1315:     MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1316:     MatCreateMAIJ(B,sizeof(Node)/sizeof(PetscScalar),A);
1317:     MatDestroy(&B);
1318:   }
1319:   return(0);
1320: }

1324: static PetscErrorCode DMGetMatrix_THI_Tridiagonal(DM da,const MatType mtype,Mat *J)
1325: {
1327:   Mat A;
1328:   PetscInt xm,ym,zm,dim,dof = 2,starts[3],dims[3];
1329:   ISLocalToGlobalMapping ltog,ltogb;

1332:   DMDAGetInfo(da,&dim, 0,0,0, 0,0,0, 0,0,0,0,0,0);
1333:   if (dim != 3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Expected DMDA to be 3D");
1334:   DMDAGetCorners(da,0,0,0,&zm,&ym,&xm);
1335:   DMGetLocalToGlobalMapping(da,&ltog);
1336:   DMGetLocalToGlobalMappingBlock(da,&ltogb);
1337:   MatCreate(((PetscObject)da)->comm,&A);
1338:   MatSetSizes(A,dof*xm*ym*zm,dof*xm*ym*zm,PETSC_DETERMINE,PETSC_DETERMINE);
1339:   MatSetType(A,mtype);
1340:   MatSetFromOptions(A);
1341:   MatSeqAIJSetPreallocation(A,6,PETSC_NULL);
1342:   MatMPIAIJSetPreallocation(A,6,PETSC_NULL,0,PETSC_NULL);
1343:   MatSeqBAIJSetPreallocation(A,dof,3,PETSC_NULL);
1344:   MatMPIBAIJSetPreallocation(A,dof,3,PETSC_NULL,0,PETSC_NULL);
1345:   MatSeqSBAIJSetPreallocation(A,dof,2,PETSC_NULL);
1346:   MatMPISBAIJSetPreallocation(A,dof,2,PETSC_NULL,0,PETSC_NULL);
1347:   MatSetBlockSize(A,dof);
1348:   MatSetLocalToGlobalMapping(A,ltog,ltog);
1349:   MatSetLocalToGlobalMappingBlock(A,ltogb,ltogb);
1350:   DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);
1351:   MatSetStencil(A,dim,dims,starts,dof);
1352:   *J = A;
1353:   return(0);
1354: }

1358: static PetscErrorCode THIDAVecView_VTK_XML(THI thi,DM da,Vec X,const char filename[])
1359: {
1360:   const PetscInt dof   = 2;
1361:   Units          units = thi->units;
1362:   MPI_Comm       comm;
1364:   PetscViewer    viewer;
1365:   PetscMPIInt    rank,size,tag,nn,nmax;
1366:   PetscInt       mx,my,mz,r,range[6];
1367:   PetscScalar    *x;

1370:   comm = ((PetscObject)thi)->comm;
1371:   DMDAGetInfo(da,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0);
1372:   MPI_Comm_size(comm,&size);
1373:   MPI_Comm_rank(comm,&rank);
1374:   PetscViewerASCIIOpen(comm,filename,&viewer);
1375:   PetscViewerASCIIPrintf(viewer,"<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n");
1376:   PetscViewerASCIIPrintf(viewer,"  <StructuredGrid WholeExtent=\"%d %d %d %d %d %d\">\n",0,mz-1,0,my-1,0,mx-1);

1378:   DMDAGetCorners(da,range,range+1,range+2,range+3,range+4,range+5);
1379:   nn = PetscMPIIntCast(range[3]*range[4]*range[5]*dof);
1380:   MPI_Reduce(&nn,&nmax,1,MPI_INT,MPI_MAX,0,comm);
1381:   tag  = ((PetscObject) viewer)->tag;
1382:   VecGetArray(X,&x);
1383:   if (!rank) {
1384:     PetscScalar *array;
1385:     PetscMalloc(nmax*sizeof(PetscScalar),&array);
1386:     for (r=0; r<size; r++) {
1387:       PetscInt i,j,k,xs,xm,ys,ym,zs,zm;
1388:       PetscScalar *ptr;
1389:       MPI_Status status;
1390:       if (r) {
1391:         MPI_Recv(range,6,MPIU_INT,r,tag,comm,MPI_STATUS_IGNORE);
1392:       }
1393:       zs = range[0];ys = range[1];xs = range[2];zm = range[3];ym = range[4];xm = range[5];
1394:       if (xm*ym*zm*dof > nmax) SETERRQ(PETSC_COMM_SELF,1,"should not happen");
1395:       if (r) {
1396:         MPI_Recv(array,nmax,MPIU_SCALAR,r,tag,comm,&status);
1397:         MPI_Get_count(&status,MPIU_SCALAR,&nn);
1398:         if (nn != xm*ym*zm*dof) SETERRQ(PETSC_COMM_SELF,1,"should not happen");
1399:         ptr = array;
1400:       } else ptr = x;
1401:       PetscViewerASCIIPrintf(viewer,"    <Piece Extent=\"%d %d %d %d %d %d\">\n",zs,zs+zm-1,ys,ys+ym-1,xs,xs+xm-1);

1403:       PetscViewerASCIIPrintf(viewer,"      <Points>\n");
1404:       PetscViewerASCIIPrintf(viewer,"        <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n");
1405:       for (i=xs; i<xs+xm; i++) {
1406:         for (j=ys; j<ys+ym; j++) {
1407:           for (k=zs; k<zs+zm; k++) {
1408:             PrmNode p;
1409:             PetscReal xx = thi->Lx*i/mx,yy = thi->Ly*j/my,zz;
1410:             thi->initialize(thi,xx,yy,&p);
1411:             zz = PetscRealPart(p.b) + PetscRealPart(p.h)*k/(mz-1);
1412:             PetscViewerASCIIPrintf(viewer,"%f %f %f\n",xx,yy,zz);
1413:           }
1414:         }
1415:       }
1416:       PetscViewerASCIIPrintf(viewer,"        </DataArray>\n");
1417:       PetscViewerASCIIPrintf(viewer,"      </Points>\n");

1419:       PetscViewerASCIIPrintf(viewer,"      <PointData>\n");
1420:       PetscViewerASCIIPrintf(viewer,"        <DataArray type=\"Float32\" Name=\"velocity\" NumberOfComponents=\"3\" format=\"ascii\">\n");
1421:       for (i=0; i<nn; i+=dof) {
1422:         PetscViewerASCIIPrintf(viewer,"%f %f %f\n",PetscRealPart(ptr[i])*units->year/units->meter,PetscRealPart(ptr[i+1])*units->year/units->meter,0.0);
1423:       }
1424:       PetscViewerASCIIPrintf(viewer,"        </DataArray>\n");

1426:       PetscViewerASCIIPrintf(viewer,"        <DataArray type=\"Int32\" Name=\"rank\" NumberOfComponents=\"1\" format=\"ascii\">\n");
1427:       for (i=0; i<nn; i+=dof) {
1428:         PetscViewerASCIIPrintf(viewer,"%d\n",r);
1429:       }
1430:       PetscViewerASCIIPrintf(viewer,"        </DataArray>\n");
1431:       PetscViewerASCIIPrintf(viewer,"      </PointData>\n");

1433:       PetscViewerASCIIPrintf(viewer,"    </Piece>\n");
1434:     }
1435:     PetscFree(array);
1436:   } else {
1437:     MPI_Send(range,6,MPIU_INT,0,tag,comm);
1438:     MPI_Send(x,nn,MPIU_SCALAR,0,tag,comm);
1439:   }
1440:   VecRestoreArray(X,&x);
1441:   PetscViewerASCIIPrintf(viewer,"  </StructuredGrid>\n");
1442:   PetscViewerASCIIPrintf(viewer,"</VTKFile>\n");
1443:   PetscViewerDestroy(&viewer);
1444:   return(0);
1445: }

1449: int main(int argc,char *argv[])
1450: {
1451:   MPI_Comm       comm;
1452:   DMMG           *dmmg;
1453:   THI            thi;
1454:   PetscInt       i;
1456:   PETSC_UNUSED PetscLogStage  stages[3];
1457:   PetscBool      repeat_fine_solve = PETSC_FALSE;

1459:   PetscInitialize(&argc,&argv,0,help);
1460:   comm = PETSC_COMM_WORLD;

1462:   /* We define two stages.  The first includes all setup costs and solves from a naive initial guess.  The second solve
1463:   * is more indicative of what might occur during time-stepping.  The initial guess is interpolated from the next
1464:   * coarser (as in the last step of grid sequencing), and so requires fewer Newton steps. */
1465:   PetscOptionsGetBool(NULL,"-repeat_fine_solve",&repeat_fine_solve,NULL);
1466:   PetscLogStageRegister("Full solve",&stages[0]);
1467:   if (repeat_fine_solve) {
1468:     PetscLogStageRegister("Fine-1 solve",&stages[1]);
1469:     PetscLogStageRegister("Fine-only solve",&stages[2]);
1470:   }

1472:   PetscLogStagePush(stages[0]);

1474:   THICreate(comm,&thi);
1475:   DMMGCreate(PETSC_COMM_WORLD,thi->nlevels,thi,&dmmg);
1476:   {
1477:     DM da;
1478:     PetscInt M = 3,N = 3,P = 2;
1479:     PetscOptionsBegin(comm,NULL,"Grid resolution options","");
1480:     {
1481:       PetscOptionsInt("-M","Number of elements in x-direction on coarse level","",M,&M,NULL);
1482:       N = M;
1483:       PetscOptionsInt("-N","Number of elements in y-direction on coarse level (if different from M)","",N,&N,NULL);
1484:       if (thi->coarse2d) {
1485:         PetscOptionsInt("-zlevels","Number of elements in z-direction on fine level","",thi->zlevels,&thi->zlevels,NULL);
1486:       } else {
1487:         PetscOptionsInt("-P","Number of elements in z-direction on coarse level","",P,&P,NULL);
1488:       }
1489:     }
1490:     PetscOptionsEnd();
1491:     if (thi->coarse2d) {
1492:       DMDACreate2d(comm,DMDA_BOUNDARY_PERIODIC,DMDA_BOUNDARY_PERIODIC,DMDA_STENCIL_BOX,N,M,PETSC_DETERMINE,PETSC_DETERMINE,sizeof(Node)/sizeof(PetscScalar),1,0,0,&da);
1493:       da->ops->refinehierarchy  = DMRefineHierarchy_THI;
1494:       da->ops->getinterpolation = DMGetInterpolation_DA_THI;
1495:       PetscObjectCompose((PetscObject)da,"THI",(PetscObject)thi);
1496:     } else {
1497:       DMDACreate3d(comm,DMDA_BOUNDARY_NONE,DMDA_BOUNDARY_PERIODIC,DMDA_BOUNDARY_PERIODIC, DMDA_STENCIL_BOX,P,N,M,1,PETSC_DETERMINE,PETSC_DETERMINE,sizeof(Node)/sizeof(PetscScalar),1,0,0,0,&da);
1498:     }
1499:     DMDASetFieldName(da,0,"x-velocity");
1500:     DMDASetFieldName(da,1,"y-velocity");
1501:     DMMGSetDM(dmmg,(DM)da);
1502:     DMDestroy(&da);
1503:   }
1504:   if (thi->tridiagonal) {
1505:     (DMMGGetDM(dmmg))->ops->getmatrix = DMGetMatrix_THI_Tridiagonal;
1506:   }
1507:   {
1508:     /* Use the user-defined matrix type on all but the coarse level */
1509:     DMMGSetMatType(dmmg,thi->mattype);
1510:     /* PCREDUNDANT only works with AIJ, and so do the third-party direct solvers.  So when running in parallel, we can't
1511:     * use the faster (S)BAIJ formats on the coarse level. */
1512:     PetscFree(dmmg[0]->mtype);
1513:     PetscStrallocpy(MATAIJ,&dmmg[0]->mtype);
1514:   }
1515:   PetscOptionsSetValue("-dmmg_form_function_ghost","1"); /* Spectacularly ugly API, our function evaluation provides ghost values */
1516:   DMMGSetSNESLocal(dmmg,THIFunctionLocal,THIJacobianLocal_3D_Full,0,0);
1517:   if (thi->tridiagonal) {
1518:     DMDASetLocalJacobian(DMMGGetDM(dmmg),(DMDALocalFunction1)THIJacobianLocal_3D_Tridiagonal);
1519:   }
1520:   if (thi->coarse2d) {
1521:     for (i=0; i<DMMGGetLevels(dmmg)-1; i++) {
1522:       DMDASetLocalJacobian(dmmg[i]->dm,(DMDALocalFunction1)THIJacobianLocal_2D);
1523:     }
1524:   }
1525:   for (i=0; i<DMMGGetLevels(dmmg); i++) {
1526:     /* This option is only valid for the SBAIJ format.  The matrices we assemble are symmetric, but the SBAIJ assembly
1527:     * functions will complain if we provide lower-triangular entries without setting this option. */
1528:     Mat B = dmmg[i]->B;
1529:     PetscBool  flg1,flg2;
1530:     PetscTypeCompare((PetscObject)B,MATSEQSBAIJ,&flg1);
1531:     PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg2);
1532:     if (flg1 || flg2) {
1533:       MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);
1534:     }
1535:   }
1536:   MatSetOptionsPrefix(DMMGGetB(dmmg),"thi_");
1537:   DMMGSetFromOptions(dmmg);
1538:   THISetDMMG(thi,dmmg);

1540:   DMMGSetInitialGuess(dmmg,THIInitial);
1541:   DMMGSolve(dmmg);

1543:   PetscLogStagePop();
1544:   THISolveStatistics(thi,dmmg,0,"Full");
1545:   /* The first solve is complete */

1547:   if (repeat_fine_solve && DMMGGetLevels(dmmg) > 1) {
1548:     PetscInt nlevels = DMMGGetLevels(dmmg);
1549:     DMMG dmmgc = dmmg[nlevels-2],dmmgf = dmmg[nlevels-1];
1550:     Vec Xc = dmmgc->x,Xf = dmmgf->x;
1551:     MatRestrict(dmmgf->R,Xf,Xc);
1552:     VecPointwiseMult(Xc,Xc,dmmgf->Rscale);

1554:     /* Solve on the level with one coarsening, this is a more stringent test of latency */
1555:     PetscLogStagePush(stages[1]);
1556:     (*dmmgc->solve)(dmmg,nlevels-2);
1557:     PetscLogStagePop();
1558:     THISolveStatistics(thi,dmmg,1,"Fine-1");

1560:     MatInterpolate(dmmgf->R,Xc,Xf);

1562:     /* Solve again on the finest level, this is representative of what is needed in a time-stepping code */
1563:     PetscLogStagePush(stages[2]);
1564:     (*dmmgf->solve)(dmmg,nlevels-1);
1565:     PetscLogStagePop();
1566:     THISolveStatistics(thi,dmmg,0,"Fine");
1567:   }

1569:   {
1570:     PetscBool  flg;
1571:     char filename[PETSC_MAX_PATH_LEN] = "";
1572:     PetscOptionsGetString(PETSC_NULL,"-o",filename,sizeof(filename),&flg);
1573:     if (flg) {
1574:       THIDAVecView_VTK_XML(thi,DMMGGetDM(dmmg),DMMGGetx(dmmg),filename);
1575:     }
1576:   }

1578:   DMMGDestroy(dmmg);
1579:   THIDestroy(&thi);
1580:   PetscFinalize();
1581:   return 0;
1582: }