download the original source code.
  1 /*
  2    Example 10
  3 
  4    Interface:      Finite Element Interface (FEI)
  5 
  6    Compile with:   make ex10
  7 
  8    Sample run:     mpirun -np 4 ex10 -n 120 -solver 2
  9 
 10    To see options: ex10 -help
 11 
 12    Description:    This code solves a system corresponding to a discretization
 13                    of the Laplace equation -Delta u = 1 with zero boundary
 14                    conditions on the unit square.  The domain is split into
 15                    a n x n grid of quadrilateral elements and each processors
 16                    owns a horizontal strip of size m x n, where m = n/nprocs. We
 17                    use bilinear finite element discretization, so there are
 18                    nodes (vertices) that are shared between neighboring
 19                    processors. The Finite Element Interface is used to assemble
 20                    the matrix and solve the problem. Nine different solvers are
 21                    available.
 22 */
 23 
 24 #include <math.h>
 25 #include <iostream>
 26 #include <fstream>
 27 #include "_hypre_utilities.h"
 28 #include "LLNL_FEI_Impl.h"
 29 
 30 using namespace std;
 31 
 32 #include "vis.c"
 33 
 34 int main(int argc, char *argv[])
 35 {
 36    int i, j, k;
 37 
 38    int nprocs, mypid;
 39 
 40    int n, m, offset;
 41    double h;
 42 
 43    int solverID;
 44    int vis;
 45 
 46    // Initialize MPI
 47    MPI_Init(&argc, &argv);
 48    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
 49    MPI_Comm_rank(MPI_COMM_WORLD, &mypid);
 50 
 51    // Set default parameters
 52    n = 4*nprocs;
 53    solverID = 2;
 54    vis = 0;
 55 
 56    // Parse command line
 57    {
 58       int arg_index = 0;
 59       int print_usage = 0;
 60 
 61       while (arg_index < argc)
 62       {
 63          if ( strcmp(argv[arg_index], "-n") == 0 )
 64          {
 65             arg_index++;
 66             n = atoi(argv[arg_index++]);
 67          }
 68          else if ( strcmp(argv[arg_index], "-solver") == 0 )
 69          {
 70             arg_index++;
 71             solverID = atoi(argv[arg_index++]);
 72          }
 73          else if ( strcmp(argv[arg_index], "-vis") == 0 )
 74          {
 75             arg_index++;
 76             vis = 1;
 77          }
 78          else if ( strcmp(argv[arg_index], "-help") == 0 )
 79          {
 80             print_usage = 1;
 81             break;
 82          }
 83          else
 84          {
 85             arg_index++;
 86          }
 87       }
 88 
 89       if ((print_usage) && (mypid == 0))
 90       {
 91          printf("\n");
 92          printf("Usage: %s [<options>]\n", argv[0]);
 93          printf("\n");
 94          printf("  -n <n>              : problem size per processor (default: %d)\n", 4*nprocs);
 95          printf("  -solver <ID>        : solver ID\n");
 96          printf("                        0 - DS-PCG\n");
 97          printf("                        1 - ParaSails-PCG\n");
 98          printf("                        2 - AMG-PCG (default)\n");
 99          printf("                        3 - AMGSA-PCG\n");
100          printf("                        4 - Euclid-PCG\n");
101          printf("                        5 - DS-GMRES\n");
102          printf("                        6 - AMG-GMRES\n");
103          printf("                        7 - AMGSA-GMRES\n");
104          printf("                        8 - Euclid-GMRES\n");
105          printf("  -print_solution     : print the solution vector\n");
106          printf("\n");
107       }
108 
109       if (print_usage)
110       {
111          MPI_Finalize();
112          return (0);
113       }
114    }
115 
116    // Each processor owns a m x n grid of quadrilateral finite elements.
117    // The unknowns are located in the nodes (vertices of the mesh) and
118    // are numbered globally starting from the lower left corner and moving
119    // row-wise to the upper right corner.
120    m = n / nprocs;
121    offset = mypid*(m*(n+1));
122 
123    h = 1.0 / n; // mesh size
124 
125    // 1. FEI initialization phase
126 
127    // Instantiate the FEI object
128    LLNL_FEI_Impl *feiPtr = new LLNL_FEI_Impl(MPI_COMM_WORLD);
129 
130    // Set the matrix storage type to HYPRE
131    {
132       char **paramStrings = new char*[1];
133       paramStrings[0] = new char[100];
134       strcpy(paramStrings[0], "externalSolver HYPRE");
135       feiPtr->parameters(1, paramStrings);
136       delete paramStrings[0];
137       delete [] paramStrings;
138    }
139 
140    // The unknowns in FEI are called fields. Each field has an
141    // identifier (fieldID) and rank (fieldSize).
142    int nFields = 1;
143    int *fieldSizes = new int[nFields];  fieldSizes[0] = 1;
144    int *fieldIDs = new int[nFields];    fieldIDs[0] = 0;
145 
146    // Pass the field information to the FEI
147    feiPtr->initFields(nFields, fieldSizes, fieldIDs);
148 
149    // Elements are grouped into blocks (in this case one block), and we
150    // have to describe the number of elements in the block (nElems) as
151    // well as the fields (unknowns) per element.
152    int elemBlkID = 0;
153    int nElems = m*n;
154    int elemNNodes = 4; // number of (shared) nodes per element
155    int *nodeNFields = new int[elemNNodes]; // fields per node
156    int **nodeFieldIDs = new int*[elemNNodes]; // node-fields IDs
157    int elemNFields = 0; // number of (non-shared) fields per element
158    int *elemFieldIDs = NULL; // element-fields IDs
159    for (i = 0; i < elemNNodes; i++)
160    {
161       nodeNFields[i] = 1;
162       nodeFieldIDs[i] = new int[nodeNFields[i]];
163       nodeFieldIDs[i][0] = fieldIDs[0];
164    }
165 
166    // Pass the block information to the FEI. The interleave parameter
167    // controls how different fields are ordered in the element matrices.
168    int interleave = 0;
169    feiPtr->initElemBlock(elemBlkID, nElems, elemNNodes, nodeNFields,
170                          nodeFieldIDs, elemNFields, elemFieldIDs, interleave);
171 
172    // List the global indexes (IDs) of the nodes in each element
173    int **elemConn = new int*[nElems];
174    for (i = 0; i < m; i++)
175       for (j = 0; j < n; j++)
176       {
177          elemConn[i*n+j] = new int[elemNNodes]; // element with coordinates (i,j)
178          elemConn[i*n+j][0] = offset + i*(n+1)+j;     // node in the lower left
179          elemConn[i*n+j][1] = elemConn[i*n+j][0]+1;   // node in the lower right
180          elemConn[i*n+j][2] = elemConn[i*n+j][1]+n+1; // node in the upper right
181          elemConn[i*n+j][3] = elemConn[i*n+j][2]-1;   // node in the upper left
182       }
183 
184    // Pass the element topology information to the FEI
185    for (i = 0; i < nElems; i++)
186       feiPtr->initElem(elemBlkID, i, elemConn[i]);
187 
188    // List the global indexes of nodes that are shared between processors
189    int nShared, *SharedIDs, *SharedLengs, **SharedProcs;
190    if (mypid == 0)
191    {
192       // Nodes in the top row are shared
193       nShared = n+1;
194       SharedIDs = new int[nShared];
195       for (i = 0; i < nShared; i++)
196          SharedIDs[i] = offset + m*(n+1) + i;
197       SharedLengs = new int[nShared];
198       for (i = 0; i < nShared; i++)
199          SharedLengs[i] = 2;
200       SharedProcs = new int*[nShared];
201       for (i = 0; i < nShared; i++)
202       {
203          SharedProcs[i] = new int[SharedLengs[i]];
204          SharedProcs[i][0] = mypid;
205          SharedProcs[i][1] = mypid+1;
206       }
207    }
208    else if (mypid == nprocs-1)
209    {
210       // Nodes in the bottom row are shared
211       nShared = n+1;
212       SharedIDs = new int[nShared];
213       for (i = 0; i < nShared; i++)
214          SharedIDs[i] = offset + i;
215       SharedLengs = new int[nShared];
216       for (i = 0; i < nShared; i++)
217          SharedLengs[i] = 2;
218       SharedProcs = new int*[nShared];
219       for (i = 0; i < nShared; i++)
220       {
221          SharedProcs[i] = new int[SharedLengs[i]];
222          SharedProcs[i][0] = mypid-1;
223          SharedProcs[i][1] = mypid;
224       }
225    }
226    else
227    {
228       // Nodes in the top and bottom rows are shared
229       nShared = 2*(n+1);
230       SharedIDs = new int[nShared];
231       for (i = 0; i < n+1; i++)
232       {
233          SharedIDs[i] = offset + i;
234          SharedIDs[n+1+i] = offset + m*(n+1) + i;
235       }
236       SharedLengs = new int[nShared];
237       for (i = 0; i < nShared; i++)
238          SharedLengs[i] = 2;
239       SharedProcs = new int*[nShared];
240       for (i = 0; i < n+1; i++)
241       {
242          SharedProcs[i] = new int[SharedLengs[i]];
243          SharedProcs[i][0] = mypid-1;
244          SharedProcs[i][1] = mypid;
245 
246          SharedProcs[n+1+i] = new int[SharedLengs[n+1+i]];
247          SharedProcs[n+1+i][0] = mypid;
248          SharedProcs[n+1+i][1] = mypid+1;
249       }
250    }
251 
252    // Pass the shared nodes information to the FEI
253    if (nprocs != 1 && nShared > 0)
254       feiPtr->initSharedNodes(nShared, SharedIDs, SharedLengs, SharedProcs);
255 
256    // Finish the FEI initialization phase
257    feiPtr->initComplete();
258 
259    // 2. FEI load phase
260 
261    // Specify the boundary conditions
262    int nBCs, *BCEqn;
263    double **alpha, **beta, **gamma;
264    if (mypid == 0)
265    {
266       // Nodes in the bottom row and left and right columns
267       nBCs = n+1 + 2*m;
268       BCEqn = new int[nBCs];
269       for (i = 0; i < n+1; i++)
270          BCEqn[i] = offset + i;
271       for (i = 0; i < m; i++)
272       {
273          BCEqn[n+1+2*i] = offset + (i+1)*(n+1);
274          BCEqn[n+2+2*i] = offset + (i+1)*(n+1)+n;
275       }
276    }
277    else if (mypid == nprocs-1)
278    {
279       // Nodes in the top row and left and right columns
280       nBCs = n+1 + 2*m;
281       BCEqn = new int[nBCs];
282       for (i = 0; i < n+1; i++)
283          BCEqn[i] = offset + m*(n+1) + i;
284       for (i = 0; i < m; i++)
285       {
286          BCEqn[n+1+2*i] = offset + i*(n+1);
287          BCEqn[n+2+2*i] = offset + i*(n+1)+n;
288       }
289    }
290    else
291    {
292       // Nodes in the left and right columns
293       nBCs = 2*(m+1);
294       BCEqn = new int[nBCs];
295       for (i = 0; i < m+1; i++)
296       {
297          BCEqn[2*i]   = offset + i*(n+1);
298          BCEqn[2*i+1] = offset + i*(n+1)+n;
299       }
300    }
301 
302    // The arrays alpha, beta and gamma specify the type of boundary
303    // condition (essential, natural, mixed). The most general form
304    // for Laplace problems is alpha U + beta dU/dn = gamma. In this
305    // example we impose zero Dirichlet boundary conditions.
306    alpha = new double*[nBCs];
307    beta  = new double*[nBCs];
308    gamma = new double*[nBCs];
309    for (i = 0; i < nBCs; i++)
310    {
311       alpha[i] = new double[1];  alpha[i][0] = 1.0;
312       beta[i]  = new double[1];  beta[i][0]  = 0.0;
313       gamma[i] = new double[1];  gamma[i][0] = 0.0;
314    }
315 
316    // Pass the boundary condition information to the FEI
317    feiPtr->loadNodeBCs(nBCs, BCEqn, fieldIDs[0], alpha, beta, gamma);
318 
319    // Specify element stiffness matrices
320    double ***elemStiff = new double**[nElems];
321    for (i = 0; i < m; i++)
322       for (j = 0; j < n; j++)
323       {
324          // Element with coordinates (i,j)
325          elemStiff[i*n+j] = new double*[elemNNodes];
326          for (k = 0; k < elemNNodes; k++)
327             elemStiff[i*n+j][k] = new double[elemNNodes];
328 
329          // Stiffness matrix for the reference square
330          //                3 +---+ 2
331          //                  |   |
332          //                0 +---+ 1
333 
334          double **A = elemStiff[i*n+j];
335 
336          for (k = 0; k < 4; k++)
337             A[k][k] = 2/3.;
338 
339          A[0][1] = A[1][0] = -1/6.;
340          A[0][2] = A[2][0] = -1/3.;
341          A[0][3] = A[3][0] = -1/6.;
342          A[1][2] = A[2][1] = -1/6.;
343          A[1][3] = A[3][1] = -1/3.;
344          A[2][3] = A[3][2] = -1/6.;
345       }
346 
347    // Specify element load vectors
348    double *elemLoad = new double[nElems*elemNNodes];
349    for (i = 0; i < nElems*elemNNodes; i++)
350       elemLoad[i] = h*h/4;
351 
352    // Assemble the matrix. The elemFormat parameter describes
353    // the storage (symmetric/non-symmetric, row/column-wise)
354    // of the element stiffness matrices.
355    int elemFormat = 0;
356    for (i = 0; i < nElems; i++)
357       feiPtr->sumInElem(elemBlkID, i, elemConn[i], elemStiff[i],
358                         &(elemLoad[i*elemNNodes]), elemFormat);
359 
360    // Finish the FEI load phase
361    feiPtr->loadComplete();
362 
363    // Clean up
364    for (i = 0; i < nElems; i++) delete [] elemConn[i];
365    delete [] elemConn;
366    for (i = 0; i < nElems; i++)
367    {
368       for (j = 0; j < elemNNodes; j++) delete [] elemStiff[i][j];
369       delete [] elemStiff[i];
370    }
371    delete [] elemStiff;
372    delete [] elemLoad;
373 
374    delete [] BCEqn;
375    for (i = 0; i < nBCs; i++)
376    {
377       delete [] alpha[i];
378       delete [] beta[i];
379       delete [] gamma[i];
380    }
381    delete [] alpha;
382    delete [] beta;
383    delete [] gamma;
384 
385    if (nShared > 0)
386    {
387       delete [] SharedIDs;
388       delete [] SharedLengs;
389       for (i = 0; i < nShared; i++) delete [] SharedProcs[i];
390       delete [] SharedProcs;
391    }
392 
393    delete [] nodeNFields;
394    for (i = 0; i < elemNNodes; i++) delete [] nodeFieldIDs[i];
395    delete [] nodeFieldIDs;
396 
397    delete [] fieldSizes;
398    delete [] fieldIDs;
399 
400    // 3. Set up problem parameters and pass them to the FEI
401    {
402       int nParams = 19;
403       char **paramStrings = new char*[nParams];
404       for (i = 0; i < nParams; i++)
405          paramStrings[i] = new char[100];
406 
407       strcpy(paramStrings[0], "outputLevel 2");
408       switch(solverID)
409       {
410          case 0:
411             strcpy(paramStrings[1], "solver cg");
412             strcpy(paramStrings[2], "preconditioner diagonal");
413             break;
414          case 1:
415             strcpy(paramStrings[1], "solver cg");
416             strcpy(paramStrings[2], "preconditioner parasails");
417             break;
418          default:
419          case 2:
420             strcpy(paramStrings[1], "solver cg");
421             strcpy(paramStrings[2], "preconditioner boomeramg");
422             break;
423          case 3:
424             strcpy(paramStrings[1], "solver cg");
425             strcpy(paramStrings[2], "preconditioner mli");
426             break;
427          case 4:
428             strcpy(paramStrings[1], "solver cg");
429             strcpy(paramStrings[2], "preconditioner euclid");
430             break;
431          case 5:
432             strcpy(paramStrings[1], "solver gmres");
433             strcpy(paramStrings[2], "preconditioner diagonal");
434             break;
435          case 6:
436             strcpy(paramStrings[1], "solver gmres");
437             strcpy(paramStrings[2], "preconditioner boomeramg");
438             break;
439          case 7:
440             strcpy(paramStrings[1], "solver gmres");
441             strcpy(paramStrings[2], "preconditioner mli");
442             break;
443          case 8:
444             strcpy(paramStrings[1], "solver gmres");
445             strcpy(paramStrings[2], "preconditioner euclid");
446             break;
447       }
448       strcpy(paramStrings[3], "maxIterations 100");
449       strcpy(paramStrings[4], "tolerance 1e-6");
450       strcpy(paramStrings[5], "gmresDim 30");
451       strcpy(paramStrings[6], "amgNumSweeps 1");
452       strcpy(paramStrings[7], "amgCoarsenType falgout");
453       strcpy(paramStrings[8], "amgRelaxType hybridsym");
454       strcpy(paramStrings[9], "amgSystemSize 1");
455       strcpy(paramStrings[10], "amgStrongThreshold 0.25");
456       strcpy(paramStrings[11], "MLI smoother HSGS");
457       strcpy(paramStrings[12], "MLI numSweeps 1");
458       strcpy(paramStrings[13], "MLI smootherWeight 1.0");
459       strcpy(paramStrings[14], "MLI nodeDOF 1");
460       strcpy(paramStrings[15], "MLI nullSpaceDim 1");
461       strcpy(paramStrings[16], "MLI minCoarseSize 50");
462       strcpy(paramStrings[17], "MLI outputLevel 0");
463       strcpy(paramStrings[18], "parasailsSymmetric outputLevel 0");
464 
465       feiPtr->parameters(nParams, paramStrings);
466 
467       for (i = 0; i < nParams; i++)
468          delete [] paramStrings[i];
469       delete [] paramStrings;
470    }
471 
472    // 4. Solve the system
473    int status;
474    feiPtr->solve(&status);
475 
476    // 5. Save the solution for GLVis visualization, see vis/glvis-ex10.sh
477    if (vis)
478    {
479       int numNodes, *nodeIDList, *solnOffsets;
480       double *solnValues;
481 
482       // Get the number of nodes in the element block
483       feiPtr->getNumBlockActNodes(elemBlkID, &numNodes);
484 
485       // Get their global IDs
486       nodeIDList = new int[numNodes];
487       feiPtr->getBlockNodeIDList(elemBlkID, numNodes, nodeIDList);
488 
489       // Get the values corresponding to nodeIDList
490       solnOffsets = new int[numNodes];
491       solnValues = new double[numNodes];
492       feiPtr->getBlockNodeSolution(elemBlkID, numNodes, nodeIDList,
493                                    solnOffsets, solnValues);
494 
495       // Find the location of the ith local node
496       for (i = 0; i < numNodes; i++)
497          solnOffsets[nodeIDList[i]-offset] = i;
498 
499       // Save the ordered nodal values to a file
500       char sol_out[20];
501       sprintf(sol_out, "%s.%06d", "vis/ex10.sol", mypid);
502       ofstream sol(sol_out);
503       sol << "FiniteElementSpace\n"
504           << "FiniteElementCollection: H1_2D_P1\n"
505           << "VDim: 1\n"
506           << "Ordering: 0\n\n";
507       for (i = 0; i < numNodes; i++)
508          sol << solnValues[solnOffsets[i]] << endl;
509 
510       // Save local finite element mesh
511       GLVis_PrintLocalSquareMesh("vis/ex10.mesh", n, m, h, 0, mypid*h*m, mypid);
512 
513       // additional visualization data
514       if (mypid == 0)
515       {
516          char data_out[20];
517          sprintf(data_out, "%s", "vis/ex10.data");
518          ofstream data(data_out);
519          data << "np " << nprocs << endl;
520       }
521 
522       // Clean up
523       delete [] solnValues;
524       delete [] solnOffsets;
525       delete [] nodeIDList;
526    }
527    delete feiPtr;
528 
529    // Finalize MPI
530    MPI_Finalize();
531 
532    return (0);
533 }


syntax highlighted by Code2HTML, v. 0.9.1