Zoltan2
Zoltan2_AlgScotch.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // Zoltan2: A package of combinatorial algorithms for scientific computing
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Karen Devine (kddevin@sandia.gov)
39 // Erik Boman (egboman@sandia.gov)
40 // Siva Rajamanickam (srajama@sandia.gov)
41 //
42 // ***********************************************************************
43 //
44 // @HEADER
45 #ifndef _ZOLTAN2_ALGSCOTCH_HPP_
46 #define _ZOLTAN2_ALGSCOTCH_HPP_
47 
48 #include <Zoltan2_GraphModel.hpp>
49 #include <Zoltan2_Algorithm.hpp>
51 #include <Zoltan2_Util.hpp>
52 #include <Zoltan2_TPLTraits.hpp>
53 
57 
59 #ifndef HAVE_ZOLTAN2_SCOTCH
60 
61 namespace Zoltan2 {
62 // Error handling for when Scotch is requested
63 // but Zoltan2 not built with Scotch.
64 
65 template <typename Adapter>
66 class AlgPTScotch : public Algorithm<Adapter>
67 {
68 public:
69  AlgPTScotch(const RCP<const Environment> &env,
70  const RCP<const Comm<int> > &problemComm,
72  )
73  {
74  throw std::runtime_error(
75  "BUILD ERROR: Scotch requested but not compiled into Zoltan2.\n"
76  "Please set CMake flag Zoltan2_ENABLE_Scotch:BOOL=ON.");
77  }
78 };
79 }
80 #endif
81 
84 
85 #ifdef HAVE_ZOLTAN2_SCOTCH
86 
87 namespace Zoltan2 {
88 
89 // stdint.h for int64_t in scotch header
90 #include <stdint.h>
91 extern "C" {
92 #ifndef HAVE_ZOLTAN2_MPI
93 #include "scotch.h"
94 #else
95 #include "ptscotch.h"
96 #endif
97 }
98 
99 #ifdef SHOW_ZOLTAN2_SCOTCH_MEMORY
100 //
101 // Scotch keeps track of memory high water mark, but doesn't
102 // provide a way to get that number. So add this function:
103 // "size_t SCOTCH_getMemoryMax() { return memorymax;}"
104 // to src/libscotch/common_memory.c
105 //
106 // and this macro:
107 // "#define HAVE_SCOTCH_GETMEMORYMAX
108 // to include/ptscotch.h
109 //
110 // and compile scotch with -DCOMMON_MEMORY_TRACE
111 //
112 #ifdef HAVE_SCOTCH_GETMEMORYMAX
113  extern "C"{
114  extern size_t SCOTCH_getMemoryMax();
115  }
116 #else
117 #error "Either turn off ZOLTAN2_ENABLE_SCOTCH_MEMORY_REPORT in cmake configure, or see SHOW_ZOLTAN2_SCOTCH_MEMORY comment in Zoltan2_AlgScotch.hpp"
118 #endif // HAVE_SCOTCH_GETMEMORYMAX
119 #endif // SHOW_ZOLTAN2_SCOTCH_MEMORY
120 
121 template <typename Adapter>
122 class AlgPTScotch : public Algorithm<Adapter>
123 {
124 public:
125 
127  typedef typename Adapter::lno_t lno_t;
128  typedef typename Adapter::gno_t gno_t;
129  typedef typename Adapter::scalar_t scalar_t;
130  typedef typename Adapter::part_t part_t;
131 
142  AlgPTScotch(const RCP<const Environment> &env__,
143  const RCP<const Comm<int> > &problemComm__,
144  const RCP<graphModel_t> &model__) :
145  env(env__), problemComm(problemComm__),
146 #ifdef HAVE_ZOLTAN2_MPI
147  mpicomm(Teuchos::getRawMpiComm(*problemComm__)),
148 #endif
149  model(model__)
150  { }
151 
152  void partition(const RCP<PartitioningSolution<Adapter> > &solution);
153 
154 private:
155 
156  const RCP<const Environment> env;
157  const RCP<const Comm<int> > problemComm;
158 #ifdef HAVE_ZOLTAN2_MPI
159  const MPI_Comm mpicomm;
160 #endif
161  const RCP<GraphModel<typename Adapter::base_adapter_t> > model;
162 
163  void scale_weights(size_t n, StridedData<lno_t, scalar_t> &fwgts,
164  SCOTCH_Num *iwgts);
165 };
166 
167 
169 template <typename Adapter>
171  const RCP<PartitioningSolution<Adapter> > &solution
172 )
173 {
174  HELLO;
175 
176  size_t numGlobalParts = solution->getTargetGlobalNumberOfParts();
177 
178  SCOTCH_Num partnbr=0;
179  TPL_Traits<SCOTCH_Num, size_t>::ASSIGN_TPL_T(partnbr, numGlobalParts);
180 
181 #ifdef HAVE_ZOLTAN2_MPI
182  int ierr = 0;
183  int me = problemComm->getRank();
184 
185  const SCOTCH_Num baseval = 0; // Base value for array indexing.
186  // GraphModel returns GNOs from base 0.
187 
188  SCOTCH_Strat stratstr; // Strategy string
189  // TODO: Set from parameters
190  SCOTCH_stratInit(&stratstr);
191 
192  // Allocate and initialize PTScotch Graph data structure.
193  SCOTCH_Dgraph *gr = SCOTCH_dgraphAlloc(); // Scotch distributed graph
194  ierr = SCOTCH_dgraphInit(gr, mpicomm);
195 
196  env->globalInputAssertion(__FILE__, __LINE__, "SCOTCH_dgraphInit",
197  !ierr, BASIC_ASSERTION, problemComm);
198 
199  // Get vertex info
200  ArrayView<const gno_t> vtxID;
201  ArrayView<StridedData<lno_t, scalar_t> > vwgts;
202  size_t nVtx = model->getVertexList(vtxID, vwgts);
203  SCOTCH_Num vertlocnbr=0;
205  SCOTCH_Num vertlocmax = vertlocnbr; // Assumes no holes in global nums.
206 
207  // Get edge info
208  ArrayView<const gno_t> edgeIds;
209  ArrayView<const lno_t> offsets;
210  ArrayView<StridedData<lno_t, scalar_t> > ewgts;
211 
212  size_t nEdge = model->getEdgeList(edgeIds, offsets, ewgts);
213 
214  SCOTCH_Num edgelocnbr=0;
216  const SCOTCH_Num edgelocsize = edgelocnbr; // Assumes adj array is compact.
217 
218  SCOTCH_Num *vertloctab; // starting adj/vtx
220 
221  SCOTCH_Num *edgeloctab; // adjacencies
223 
224  // We don't use these arrays, but we need them as arguments to Scotch.
225  SCOTCH_Num *vendloctab = NULL; // Assume consecutive storage for adj
226  SCOTCH_Num *vlblloctab = NULL; // Vertex label array
227  SCOTCH_Num *edgegsttab = NULL; // Array for ghost vertices
228 
229  // Get weight info.
230  SCOTCH_Num *velotab = NULL; // Vertex weights
231  SCOTCH_Num *edlotab = NULL; // Edge weights
232 
233  int nVwgts = model->getNumWeightsPerVertex();
234  int nEwgts = model->getNumWeightsPerEdge();
235  if (nVwgts > 1 && me == 0) {
236  std::cerr << "Warning: NumWeightsPerVertex is " << nVwgts
237  << " but Scotch allows only one weight. "
238  << " Zoltan2 will use only the first weight per vertex."
239  << std::endl;
240  }
241  if (nEwgts > 1 && me == 0) {
242  std::cerr << "Warning: NumWeightsPerEdge is " << nEwgts
243  << " but Scotch allows only one weight. "
244  << " Zoltan2 will use only the first weight per edge."
245  << std::endl;
246  }
247 
248  if (nVwgts) {
249  velotab = new SCOTCH_Num[nVtx+1]; // +1 since Scotch wants all procs
250  // to have non-NULL arrays
251  scale_weights(nVtx, vwgts[0], velotab);
252  }
253 
254  if (nEwgts) {
255  edlotab = new SCOTCH_Num[nEdge+1]; // +1 since Scotch wants all procs
256  // to have non-NULL arrays
257  scale_weights(nEdge, ewgts[0], edlotab);
258  }
259 
260  // Build PTScotch distributed data structure
261  ierr = SCOTCH_dgraphBuild(gr, baseval, vertlocnbr, vertlocmax,
262  vertloctab, vendloctab, velotab, vlblloctab,
263  edgelocnbr, edgelocsize,
264  edgeloctab, edgegsttab, edlotab);
265 
266  env->globalInputAssertion(__FILE__, __LINE__, "SCOTCH_dgraphBuild",
267  !ierr, BASIC_ASSERTION, problemComm);
268 
269  // Create array for Scotch to return results in.
270  ArrayRCP<part_t> partList(new part_t[nVtx], 0, nVtx,true);
271  SCOTCH_Num *partloctab = NULL;
272  if (nVtx && (sizeof(SCOTCH_Num) == sizeof(part_t))) {
273  // Can write directly into the solution's memory
274  partloctab = (SCOTCH_Num *) partList.getRawPtr();
275  }
276  else {
277  // Can't use solution memory directly; will have to copy later.
278  // Note: Scotch does not like NULL arrays, so add 1 to always have non-null.
279  // ParMETIS has this same "feature." See Zoltan bug 4299.
280  partloctab = new SCOTCH_Num[nVtx+1];
281  }
282 
283  // Get target part sizes
284  float *partsizes = new float[numGlobalParts];
285  if (!solution->criteriaHasUniformPartSizes(0))
286  for (size_t i=0; i<numGlobalParts; i++)
287  partsizes[i] = solution->getCriteriaPartSize(0, i);
288  else
289  for (size_t i=0; i<numGlobalParts; i++)
290  partsizes[i] = 1.0 / float(numGlobalParts);
291 
292  // Allocate and initialize PTScotch target architecture data structure
293  SCOTCH_Arch archdat;
294  SCOTCH_archInit(&archdat);
295 
296  SCOTCH_Num velosum = 0;
297  SCOTCH_dgraphSize (gr, &velosum, NULL, NULL, NULL);
298  SCOTCH_Num *goalsizes = new SCOTCH_Num[partnbr];
299  // TODO: The goalsizes are set as in Zoltan; not sure it is correct there
300  // or here.
301  // It appears velosum is global NUMBER of vertices, not global total
302  // vertex weight. I think we should use the latter.
303  // Fix this when we add vertex weights.
304  for (SCOTCH_Num i = 0; i < partnbr; i++)
305  goalsizes[i] = SCOTCH_Num(ceil(velosum * partsizes[i]));
306  delete [] partsizes;
307 
308  SCOTCH_archCmpltw(&archdat, partnbr, goalsizes);
309 
310  // Call partitioning; result returned in partloctab.
311  ierr = SCOTCH_dgraphMap(gr, &archdat, &stratstr, partloctab);
312 
313  env->globalInputAssertion(__FILE__, __LINE__, "SCOTCH_dgraphMap",
314  !ierr, BASIC_ASSERTION, problemComm);
315 
316  SCOTCH_archExit(&archdat);
317  delete [] goalsizes;
318 
319  // TODO - metrics
320 
321 #ifdef SHOW_ZOLTAN2_SCOTCH_MEMORY
322  int me = env->comm_->getRank();
323 #endif
324 
325 #ifdef HAVE_SCOTCH_ZOLTAN2_GETMEMORYMAX
326  if (me == 0){
327  size_t scotchBytes = SCOTCH_getMemoryMax();
328  std::cout << "Rank " << me << ": Maximum bytes used by Scotch: ";
329  std::cout << scotchBytes << std::endl;
330  }
331 #endif
332 
333  // Clean up PTScotch
334  SCOTCH_dgraphExit(gr);
335  free(gr);
336  SCOTCH_stratExit(&stratstr);
337 
338  // Load answer into the solution.
339 
340  if ((sizeof(SCOTCH_Num) != sizeof(part_t)) || (nVtx == 0)) {
341  for (size_t i = 0; i < nVtx; i++) partList[i] = partloctab[i];
342  delete [] partloctab;
343  }
344 
345  solution->setParts(partList);
346 
347  env->memory("Zoltan2-Scotch: After creating solution");
348 
349  // Clean up copies made due to differing data sizes.
352 
353  if (nVwgts) delete [] velotab;
354  if (nEwgts) delete [] edlotab;
355 
356 #else // DO NOT HAVE MPI
357 
358  // TODO: Handle serial case with calls to Scotch.
359  // TODO: For now, assign everything to rank 0 and assume only one part.
360  // TODO: Can probably use the code above for loading solution,
361  // TODO: instead of duplicating it here.
362  // TODO
363  // TODO: Actual logic should call Scotch when number of processes == 1.
364  ArrayView<const gno_t> vtxID;
365  ArrayView<StridedData<lno_t, scalar_t> > vwgts;
366  size_t nVtx = model->getVertexList(vtxID, vwgts);
367 
368  ArrayRCP<part_t> partList(new part_t[nVtx], 0, nVtx, true);
369  for (size_t i = 0; i < nVtx; i++) partList[i] = 0;
370 
371  solution->setParts(partList);
372 
373 #endif // DO NOT HAVE MPI
374 }
375 
377 // Scale and round scalar_t weights (typically float or double) to
378 // SCOTCH_Num (typically int or long).
379 // subject to sum(weights) <= max_wgt_sum.
380 // Only scale if deemed necessary.
381 //
382 // Note that we use ceil() instead of round() to avoid
383 // rounding to zero weights.
384 // Based on Zoltan's scale_round_weights, mode 1.
385 
386 template <typename Adapter>
388  size_t n,
390  SCOTCH_Num *iwgts
391 )
392 {
393  const double INT_EPSILON = 1e-5;
394 
395  SCOTCH_Num nonint, nonint_local = 0;
396  double sum_wgt, sum_wgt_local = 0.;
397  double max_wgt, max_wgt_local = 0.;
398 
399  // Compute local sums of the weights
400  // Check whether all weights are integers
401  for (size_t i = 0; i < n; i++) {
402  double fw = double(fwgts[i]);
403  if (!nonint_local){
404  SCOTCH_Num tmp = (SCOTCH_Num) floor(fw + .5); /* Nearest int */
405  if (fabs((double)tmp-fw) > INT_EPSILON) {
406  nonint_local = 1;
407  }
408  }
409  sum_wgt_local += fw;
410  if (fw > max_wgt_local) max_wgt_local = fw;
411  }
412 
413  Teuchos::reduceAll<int,SCOTCH_Num>(*problemComm, Teuchos::REDUCE_MAX, 1,
414  &nonint_local, &nonint);
415  Teuchos::reduceAll<int,double>(*problemComm, Teuchos::REDUCE_SUM, 1,
416  &sum_wgt_local, &sum_wgt);
417  Teuchos::reduceAll<int,double>(*problemComm, Teuchos::REDUCE_MAX, 1,
418  &max_wgt_local, &max_wgt);
419 
420  double scale = 1.;
421  const double max_wgt_sum = double(SCOTCH_NUMMAX/8);
422 
423  // Scaling needed if weights are not integers or weights'
424  // range is not sufficient
425  if (nonint || (max_wgt <= INT_EPSILON) || (sum_wgt > max_wgt_sum)) {
426  /* Calculate scale factor */
427  if (sum_wgt != 0.) scale = max_wgt_sum/sum_wgt;
428  }
429 
430  /* Convert weights to positive integers using the computed scale factor */
431  for (size_t i = 0; i < n; i++)
432  iwgts[i] = (SCOTCH_Num) ceil(double(fwgts[i])*scale);
433 
434 }
435 
436 } // namespace Zoltan2
437 
438 #endif // HAVE_ZOLTAN2_SCOTCH
439 
441 
442 
443 #endif
#define HELLO
static void DELETE_TPL_T_ARRAY(tpl_t **a)
fast typical checks for valid arguments
Defines the PartitioningSolution class.
AlgPTScotch(const RCP< const Environment > &env, const RCP< const Comm< int > > &problemComm, const RCP< GraphModel< typename Adapter::base_adapter_t > > &model)
Adapter::scalar_t scalar_t
A PartitioningSolution is a solution to a partitioning problem.
Adapter::part_t part_t
The StridedData class manages lists of weights or coordinates.
Algorithm defines the base class for all algorithms.
static void ASSIGN_TPL_T_ARRAY(tpl_t **a, ArrayView< zno_t > &b)
virtual void partition(const RCP< PartitioningSolution< Adapter > > &solution)
Partitioning method.
Traits class to handle conversions between gno_t/lno_t and TPL data types (e.g., ParMETIS&#39;s idx_t...
GraphModel defines the interface required for graph models.
static void ASSIGN_TPL_T(tpl_t &a, zno_t b)
Defines the GraphModel interface.
A gathering of useful namespace methods.