Zoltan2
Zoltan2_TaskMapping.hpp
Go to the documentation of this file.
1 #ifndef _ZOLTAN2_COORD_PARTITIONMAPPING_HPP_
2 #define _ZOLTAN2_COORD_PARTITIONMAPPING_HPP_
3 
4 #include <fstream>
5 #include <ctime>
6 #include <vector>
8 #include "Teuchos_ArrayViewDecl.hpp"
11 #include "Teuchos_ReductionOp.hpp"
13 
14 #include "Teuchos_Comm.hpp"
15 #ifdef HAVE_ZOLTAN2_MPI
16 # include "Teuchos_DefaultMpiComm.hpp"
17 #else
18 # include "Teuchos_DefaultSerialComm.hpp"
19 #endif // HAVE_ZOLTAN2_MPI
20 
21 //#define gnuPlot
22 
23 namespace Teuchos{
24 
27 template <typename Ordinal, typename T>
28 class Zoltan2_ReduceBestMapping : public ValueTypeReductionOp<Ordinal,T>
29 {
30 private:
31  T _EPSILON;
32 
33 public:
36  Zoltan2_ReduceBestMapping ():_EPSILON (std::numeric_limits<T>::epsilon()){}
37 
40  void reduce( const Ordinal count, const T inBuffer[], T inoutBuffer[]) const
41  {
42 
43  for (Ordinal i=0; i < count; i++){
44  if (inBuffer[0] - inoutBuffer[0] < -_EPSILON){
45  inoutBuffer[0] = inBuffer[0];
46  inoutBuffer[1] = inBuffer[1];
47  } else if(inBuffer[0] - inoutBuffer[0] < _EPSILON &&
48  inBuffer[1] - inoutBuffer[1] < _EPSILON){
49  inoutBuffer[0] = inBuffer[0];
50  inoutBuffer[1] = inBuffer[1];
51  }
52  }
53  }
54 };
55 } // namespace Teuchos
56 
57 
58 namespace Zoltan2{
59 
60 template <typename it>
61 inline it z2Fact(it x) {
62  return (x == 1 ? x : x * z2Fact<it>(x - 1));
63 }
64 
65 template <typename gno_t, typename part_t>
67 public:
68  gno_t gno;
69  part_t part;
70 };
71 
72 //returns the ith permutation indices.
73 template <typename IT>
74 void ithPermutation(const IT n, IT i, IT *perm)
75 {
76  IT j, k = 0;
77  IT *fact = new IT[n];
78 
79 
80  // compute factorial numbers
81  fact[k] = 1;
82  while (++k < n)
83  fact[k] = fact[k - 1] * k;
84 
85  // compute factorial code
86  for (k = 0; k < n; ++k)
87  {
88  perm[k] = i / fact[n - 1 - k];
89  i = i % fact[n - 1 - k];
90  }
91 
92  // readjust values to obtain the permutation
93  // start from the end and check if preceding values are lower
94  for (k = n - 1; k > 0; --k)
95  for (j = k - 1; j >= 0; --j)
96  if (perm[j] <= perm[k])
97  perm[k]++;
98 
99  delete [] fact;
100 }
101 
102 template <typename part_t>
103 void getGridCommunicationGraph(part_t taskCount, part_t *&task_comm_xadj, part_t *&task_comm_adj, std::vector <int> grid_dims){
104  int dim = grid_dims.size();
105  int neighborCount = 2 * dim;
106  task_comm_xadj = allocMemory<part_t>(taskCount+1);
107  task_comm_adj = allocMemory<part_t>(taskCount * neighborCount);
108 
109  part_t neighBorIndex = 0;
110  task_comm_xadj[0] = 0;
111  for (part_t i = 0; i < taskCount; ++i){
112  part_t prevDimMul = 1;
113  for (int j = 0; j < dim; ++j){
114  part_t lNeighbor = i - prevDimMul;
115  part_t rNeighbor = i + prevDimMul;
116  prevDimMul *= grid_dims[j];
117  if (lNeighbor >= 0 && lNeighbor/ prevDimMul == i / prevDimMul && lNeighbor < taskCount){
118  task_comm_adj[neighBorIndex++] = lNeighbor;
119  }
120  if (rNeighbor >= 0 && rNeighbor/ prevDimMul == i / prevDimMul && rNeighbor < taskCount){
121  task_comm_adj[neighBorIndex++] = rNeighbor;
122  }
123  }
124  task_comm_xadj[i+1] = neighBorIndex;
125  }
126 
127 }
128 //returns the center of the parts.
129 template <typename Adapter, typename scalar_t, typename part_t>
131  const Environment *envConst,
132  const Teuchos::Comm<int> *comm,
135  int coordDim,
136  part_t ntasks,
137  scalar_t **partCenters){
138 
139  typedef typename Adapter::lno_t lno_t;
140  typedef typename Adapter::gno_t gno_t;
141 
142  typedef StridedData<lno_t, scalar_t> input_t;
143  ArrayView<const gno_t> gnos;
144  ArrayView<input_t> xyz;
145  ArrayView<input_t> wgts;
146  coords->getCoordinates(gnos, xyz, wgts);
147 
148  //local and global num coordinates.
149  lno_t numLocalCoords = coords->getLocalNumCoordinates();
150  //gno_t numGlobalCoords = coords->getGlobalNumCoordinates();
151 
152 
153 
154  //local number of points in each part.
155  gno_t *point_counts = allocMemory<gno_t>(ntasks);
156  memset(point_counts, 0, sizeof(gno_t) * ntasks);
157 
158  //global number of points in each part.
159  gno_t *global_point_counts = allocMemory<gno_t>(ntasks);
160 
161 
162  scalar_t **multiJagged_coordinates = allocMemory<scalar_t *>(coordDim);
163 
164  for (int dim=0; dim < coordDim; dim++){
165  ArrayRCP<const scalar_t> ar;
166  xyz[dim].getInputArray(ar);
167  //multiJagged coordinate values assignment
168  multiJagged_coordinates[dim] = (scalar_t *)ar.getRawPtr();
169  memset(partCenters[dim], 0, sizeof(scalar_t) * ntasks);
170  }
171 
172  //get parts with parallel gnos.
173  const part_t *parts = soln_->getPartListView();
174  /*
175  for (lno_t i=0; i < numLocalCoords; i++){
176  cout << "me:" << comm->getRank() << " gno:" << soln_gnos[i] << " tmp.part :" << parts[i]<< endl;
177  }
178  */
179 
180 
181  envConst->timerStart(MACRO_TIMERS, "Mapping - Hashing Creation");
182  //hash vector
183  std::vector< std::vector <GNO_LNO_PAIR<gno_t, part_t> > > hash(numLocalCoords);
184 
185  //insert each point in solution to hash.
186  for (lno_t i=0; i < numLocalCoords; i++){
188  tmp.gno = gnos[i];
189  tmp.part = parts[i];
190  //cout << "gno:" << tmp.gno << " tmp.part :" << tmp.part << endl;
191  //count the local number of points in each part.
192  ++point_counts[tmp.part];
193  lno_t hash_index = tmp.gno % numLocalCoords;
194  hash[hash_index].push_back(tmp);
195  }
196 
197  envConst->timerStop(MACRO_TIMERS, "Mapping - Hashing Creation");
198  //get global number of points in each part.
199  reduceAll<int, gno_t>(*comm, Teuchos::REDUCE_SUM,
200  ntasks, point_counts, global_point_counts
201  );
202 
203 
204 
205  envConst->timerStart(MACRO_TIMERS, "Mapping - Hashing Search");
206  //add up all coordinates in each part.
207  for (lno_t i=0; i < numLocalCoords; i++){
208  gno_t g = gnos[i];
209  lno_t hash_index = g % numLocalCoords;
210  lno_t hash_size = hash[hash_index].size();
211  part_t p = -1;
212  for (lno_t j =0; j < hash_size; ++j){
213  if (hash[hash_index][j].gno == g){
214  p = hash[hash_index][j].part;
215  break;
216  }
217  }
218  if(p == -1) {
219  std::cerr << "ERROR AT HASHING FOR GNO:"<< g << " LNO:" << i << std::endl;
220  }
221  //add uo all coordinates in each part.
222  for(int j = 0; j < coordDim; ++j){
223  scalar_t c = multiJagged_coordinates[j][i];
224  partCenters[j][p] += c;
225  }
226  }
227  envConst->timerStop(MACRO_TIMERS, "Mapping - Hashing Search");
228 
229  for(int j = 0; j < coordDim; ++j){
230  for (part_t i=0; i < ntasks; ++i){
231  partCenters[j][i] /= global_point_counts[i];
232  }
233  }
234 
235  scalar_t *tmpCoords = allocMemory<scalar_t>(ntasks);
236  for(int j = 0; j < coordDim; ++j){
237  reduceAll<int, scalar_t>(*comm, Teuchos::REDUCE_SUM,
238  ntasks, partCenters[j], tmpCoords
239  );
240 
241  scalar_t *tmp = partCenters[j];
242  partCenters[j] = tmpCoords;
243  tmpCoords = tmp;
244  }
245 
246  freeArray<gno_t> (point_counts);
247  freeArray<gno_t> (global_point_counts);
248 
249  freeArray<scalar_t> (tmpCoords);
250  freeArray<scalar_t *>(multiJagged_coordinates);
251 }
252 
253 
256 template <class IT, class WT>
258  IT heapSize;
259  IT *indices;
260  WT *values;
261  WT _EPSILON;
262 
263 
264 public:
265  void setHeapsize(IT heapsize_){
266  this->heapSize = heapsize_;
267  this->indices = allocMemory<IT>(heapsize_ );
268  this->values = allocMemory<WT>(heapsize_ );
269  this->_EPSILON = std::numeric_limits<WT>::epsilon();
270  }
271 
273  freeArray<IT>(this->indices);
274  freeArray<WT>(this->values);
275  }
276 
277 
278  void addPoint(IT index, WT distance){
279  WT maxVal = this->values[0];
280  //add only the distance is smaller than the maximum distance.
281  //cout << "indeX:" << index << "distance:" <<distance << " maxVal:" << maxVal << endl;
282  if (distance >= maxVal) return;
283  else {
284  this->values[0] = distance;
285  this->indices[0] = index;
286  this->push_down(0);
287  }
288  }
289 
290  //heap push down operation
291  void push_down(IT index_on_heap){
292  IT child_index1 = 2 * index_on_heap + 1;
293  IT child_index2 = 2 * index_on_heap + 2;
294 
295  IT biggerIndex = -1;
296  if(child_index1 < this->heapSize && child_index2 < this->heapSize){
297 
298  if (this->values[child_index1] < this->values[child_index2]){
299  biggerIndex = child_index2;
300  }
301  else {
302  biggerIndex = child_index1;
303  }
304  }
305  else if(child_index1 < this->heapSize){
306  biggerIndex = child_index1;
307 
308  }
309  else if(child_index2 < this->heapSize){
310  biggerIndex = child_index2;
311  }
312  if (biggerIndex >= 0 && this->values[biggerIndex] > this->values[index_on_heap]){
313  WT tmpVal = this->values[biggerIndex];
314  this->values[biggerIndex] = this->values[index_on_heap];
315  this->values[index_on_heap] = tmpVal;
316 
317  IT tmpIndex = this->indices[biggerIndex];
318  this->indices[biggerIndex] = this->indices[index_on_heap];
319  this->indices[index_on_heap] = tmpIndex;
320  this->push_down(biggerIndex);
321  }
322  }
323 
324  void initValues(){
325  WT MAXVAL = std::numeric_limits<WT>::max();
326  for(IT i = 0; i < this->heapSize; ++i){
327  this->values[i] = MAXVAL;
328  this->indices[i] = -1;
329  }
330  }
331 
332  //returns the total distance to center in the cluster.
334 
335  WT nc = 0;
336  for(IT j = 0; j < this->heapSize; ++j){
337  nc += this->values[j];
338 
339  //cout << "index:" << this->indices[j] << " distance:" << this->values[j] << endl;
340  }
341  return nc;
342  }
343 
344  //returns the new center of the cluster.
345  bool getNewCenters(WT *center, WT **coords, int dimension){
346  bool moved = false;
347  for(int i = 0; i < dimension; ++i){
348  WT nc = 0;
349  for(IT j = 0; j < this->heapSize; ++j){
350  IT k = this->indices[j];
351  //cout << "i:" << i << " dim:" << dimension << " k:" << k << " heapSize:" << heapSize << endl;
352  nc += coords[i][k];
353  }
354  nc /= this->heapSize;
355  moved = (ZOLTAN2_ABS(center[i] - nc) > this->_EPSILON || moved );
356  center[i] = nc;
357 
358  }
359  return moved;
360  }
361 
362  void copyCoordinates(IT *permutation){
363  for(IT i = 0; i < this->heapSize; ++i){
364  permutation[i] = this->indices[i];
365  }
366  }
367 };
368 
371 template <class IT, class WT>
373 
374  int dimension;
375  KmeansHeap<IT,WT> closestPoints;
376 
377 public:
378  WT *center;
380  freeArray<WT>(center);
381  }
382 
383  void setParams(int dimension_, int heapsize){
384  this->dimension = dimension_;
385  this->center = allocMemory<WT>(dimension_);
386  this->closestPoints.setHeapsize(heapsize);
387  }
388 
389  void clearHeap(){
390  this->closestPoints.initValues();
391  }
392 
393  bool getNewCenters( WT **coords){
394  return this->closestPoints.getNewCenters(center, coords, dimension);
395  }
396 
397  //returns the distance of the coordinate to the center.
398  //also adds it to the heap.
399  WT getDistance(IT index, WT **elementCoords){
400  WT distance = 0;
401  for (int i = 0; i < this->dimension; ++i){
402  WT d = (center[i] - elementCoords[i][index]);
403  distance += d * d;
404  }
405  distance = pow(distance, WT(1.0 / this->dimension));
406  closestPoints.addPoint(index, distance);
407  return distance;
408  }
409 
411  return closestPoints.getTotalDistance();
412  }
413 
414  void copyCoordinates(IT *permutation){
415  closestPoints.copyCoordinates(permutation);
416  }
417 };
418 
422 template <class IT, class WT>
424 
425  int dim;
426  IT numElements;
427  WT **elementCoords;
428  IT numClusters;
429  IT required_elements;
430  KMeansCluster <IT,WT> *clusters;
431  WT *maxCoordinates;
432  WT *minCoordinates;
433 public:
435  freeArray<KMeansCluster <IT,WT> >(clusters);
436  freeArray<WT>(maxCoordinates);
437  freeArray<WT>(minCoordinates);
438  }
439 
443  int dim_ ,
444  IT numElements_,
445  WT **elementCoords_,
446  IT required_elements_):
447  dim(dim_),
448  numElements(numElements_),
449  elementCoords(elementCoords_),
450  numClusters ((1 << dim_) + 1),
451  required_elements(required_elements_)
452  {
453  this->clusters = allocMemory<KMeansCluster <IT,WT> >(this->numClusters);
454  //set dimension and the number of required elements for all clusters.
455  for (int i = 0; i < numClusters; ++i){
456  this->clusters[i].setParams(this->dim, this->required_elements);
457  }
458 
459  this->maxCoordinates = allocMemory <WT> (this->dim);
460  this->minCoordinates = allocMemory <WT> (this->dim);
461 
462  //obtain the min and max coordiantes for each dimension.
463  for (int j = 0; j < dim; ++j){
464  this->minCoordinates[j] = this->maxCoordinates[j] = this->elementCoords[j][0];
465  for(IT i = 1; i < numElements; ++i){
466  WT t = this->elementCoords[j][i];
467  if(t > this->maxCoordinates[j]){
468  this->maxCoordinates[j] = t;
469  }
470  if (t < minCoordinates[j]){
471  this->minCoordinates[j] = t;
472  }
473  }
474  }
475 
476 
477  //assign initial cluster centers.
478  for (int j = 0; j < dim; ++j){
479  int mod = (1 << (j+1));
480  for (int i = 0; i < numClusters - 1; ++i){
481  WT c = 0;
482  if ( (i % mod) < mod / 2){
483  c = this->maxCoordinates[j];
484  //cout << "i:" << i << " j:" << j << " setting max:" << c << endl;
485  }
486  else {
487  c = this->minCoordinates[j];
488  }
489  this->clusters[i].center[j] = c;
490  }
491  }
492 
493  //last cluster center is placed to middle.
494  for (int j = 0; j < dim; ++j){
495  this->clusters[numClusters - 1].center[j] = (this->maxCoordinates[j] + this->minCoordinates[j]) / 2;
496  }
497 
498 
499  /*
500  for (int i = 0; i < numClusters; ++i){
501  //cout << endl << "cluster:" << i << endl << "\t";
502  for (int j = 0; j < dim; ++j){
503  cout << this->clusters[i].center[j] << " ";
504  }
505  }
506  */
507  }
508 
509  //performs kmeans clustering of coordinates.
510  void kmeans(){
511  for(int it = 0; it < 10; ++it){
512  //cout << "it:" << it << endl;
513  for (IT j = 0; j < this->numClusters; ++j){
514  this->clusters[j].clearHeap();
515  }
516  for (IT i = 0; i < this->numElements; ++i){
517  //cout << "i:" << i << " numEl:" << this->numElements << endl;
518  for (IT j = 0; j < this->numClusters; ++j){
519  //cout << "j:" << j << " numClusters:" << this->numClusters << endl;
520  this->clusters[j].getDistance(i,this->elementCoords);
521  }
522  }
523  bool moved = false;
524  for (IT j = 0; j < this->numClusters; ++j){
525  moved =(this->clusters[j].getNewCenters(this->elementCoords) || moved );
526  }
527  if (!moved){
528  break;
529  }
530  }
531 
532 
533  }
534 
535  //finds the cluster in which the coordinates are the closest to each other.
536  void getMinDistanceCluster(IT *procPermutation){
537 
538  WT minDistance = this->clusters[0].getDistanceToCenter();
539  IT minCluster = 0;
540  //cout << "j:" << 0 << " minDistance:" << minDistance << " minTmpDistance:" << minDistance<< " minCluster:" << minCluster << endl;
541  for (IT j = 1; j < this->numClusters; ++j){
542  WT minTmpDistance = this->clusters[j].getDistanceToCenter();
543  //cout << "j:" << j << " minDistance:" << minDistance << " minTmpDistance:" << minTmpDistance<< " minCluster:" << minCluster << endl;
544  if(minTmpDistance < minDistance){
545  minDistance = minTmpDistance;
546  minCluster = j;
547  }
548  }
549 
550  //cout << "minCluster:" << minCluster << endl;
551  this->clusters[minCluster].copyCoordinates(procPermutation);
552  }
553 };
554 
555 
556 
557 #define MINOF(a,b) (((a)<(b))?(a):(b))
558 
565 template <typename T>
566 void fillContinousArray(T *arr, size_t arrSize, T *val){
567  if(val == NULL){
568 
569 #ifdef HAVE_ZOLTAN2_OMP
570 #pragma omp parallel for
571 #endif
572  for(size_t i = 0; i < arrSize; ++i){
573  arr[i] = i;
574  }
575 
576  }
577  else {
578  T v = *val;
579 #ifdef HAVE_ZOLTAN2_OMP
580 #pragma omp parallel for
581 #endif
582  for(size_t i = 0; i < arrSize; ++i){
583  //cout << "writing to i:" << i << " arr:" << arrSize << endl;
584  arr[i] = v;
585  }
586  }
587 }
588 
591 template <typename part_t, typename pcoord_t>
593 protected:
594  double commCost;
595 public:
596 
597  part_t no_procs; //the number of processors
598  part_t no_tasks; //the number of taks.
599  CommunicationModel(): commCost(),no_procs(0), no_tasks(0){}
600  CommunicationModel(part_t no_procs_, part_t no_tasks_):
601  commCost(),
602  no_procs(no_procs_),
603  no_tasks(no_tasks_){}
605  part_t getNProcs() const{
606  return this->no_procs;
607  }
608  part_t getNTasks()const{
609  return this->no_tasks;
610  }
611 
612 
614  part_t *task_to_proc,
615  part_t *task_communication_xadj,
616  part_t *task_communication_adj,
617  pcoord_t *task_communication_edge_weight){
618 
619  double totalCost = 0;
620 
621  part_t commCount = 0;
622  for (part_t task = 0; task < this->no_tasks; ++task){
623  int assigned_proc = task_to_proc[task];
624  //cout << "task:" << task << endl;
625  part_t task_adj_begin = task_communication_xadj[task];
626  part_t task_adj_end = task_communication_xadj[task+1];
627 
628  commCount += task_adj_end - task_adj_begin;
629  //cout << "task:" << task << " proc:" << assigned_proc << endl;
630  for (part_t task2 = task_adj_begin; task2 < task_adj_end; ++task2){
631 
632  //cout << "task2:" << task2 << endl;
633  part_t neighborTask = task_communication_adj[task2];
634  //cout << "neighborTask :" << neighborTask << endl;
635  int neighborProc = task_to_proc[neighborTask];
636  double distance = getProcDistance(assigned_proc, neighborProc);
637  //cout << "assigned_proc:" << assigned_proc << " neighborProc:" << neighborProc << " d:" << distance << endl;
638 
639 
640  if (task_communication_edge_weight == NULL){
641  totalCost += distance ;
642  }
643  else {
644  totalCost += distance * task_communication_edge_weight[task2];
645  }
646  }
647  }
648 
649  this->commCost = totalCost;// commCount;
650  }
651 
653  return this->commCost;
654  }
655 
656  virtual double getProcDistance(int procId1, int procId2) const = 0;
657 
664  virtual void getMapping(
665  int myRank,
666  const RCP<const Environment> &env,
667  ArrayRCP <part_t> &proc_to_task_xadj, // = allocMemory<part_t> (this->no_procs+1); //holds the pointer to the task array
668  ArrayRCP <part_t> &proc_to_task_adj, // = allocMemory<part_t>(this->no_tasks); //holds the indices of tasks wrt to proc_to_task_xadj array.
669  ArrayRCP <part_t> &task_to_proc //allocMemory<part_t>(this->no_tasks); //holds the processors mapped to tasks.
670  ) const = 0;
671 };
675 template <typename pcoord_t, typename tcoord_t, typename part_t>
676 class CoordinateCommunicationModel:public CommunicationModel<part_t, pcoord_t> {
677 public:
678  //private:
679  int proc_coord_dim; //dimension of the processors
680  pcoord_t **proc_coords; //the processor coordinates. allocated outside of the class.
681  int task_coord_dim; //dimension of the tasks coordinates.
682  tcoord_t **task_coords; //the task coordinates allocated outside of the class.
684  part_t *partNoArray;
685 
686  //public:
688  CommunicationModel<part_t, pcoord_t>(),
689  proc_coord_dim(0),
690  proc_coords(0),
691  task_coord_dim(0),
692  task_coords(0),
693  partArraySize(-1),
694  partNoArray(NULL){}
695 
697 
707  int pcoord_dim_,
708  pcoord_t **pcoords_,
709  int tcoord_dim_,
710  tcoord_t **tcoords_,
711  part_t no_procs_,
712  part_t no_tasks_
713  ):
714  CommunicationModel<part_t, pcoord_t>(no_procs_, no_tasks_),
715  proc_coord_dim(pcoord_dim_), proc_coords(pcoords_),
716  task_coord_dim(tcoord_dim_), task_coords(tcoords_),
717  partArraySize(std::min(tcoord_dim_, pcoord_dim_)),
718  partNoArray(NULL){
719  }
720 
721 
722  void setPartArraySize(int psize){
723  this->partArraySize = psize;
724  }
725  void setPartArray(part_t *pNo){
726  this->partNoArray = pNo;
727  }
728 
735  void getClosestSubset(part_t *proc_permutation, part_t nprocs, part_t ntasks) const{
736  //currently returns a random subset.
737 
738  part_t minCoordDim = MINOF(this->task_coord_dim, this->proc_coord_dim);
740  minCoordDim, nprocs,
741  this->proc_coords, ntasks);
742 
743  kma.kmeans();
744  kma.getMinDistanceCluster(proc_permutation);
745 
746  for(int i = ntasks; i < nprocs; ++i){
747  proc_permutation[i] = -1;
748  }
749  /*
750  //fill array.
751  fillContinousArray<part_t>(proc_permutation, nprocs, NULL);
752  int _u_umpa_seed = 847449649;
753  srand (time(NULL));
754  int a = rand() % 1000 + 1;
755  _u_umpa_seed -= a;
756  //permute array randomly.
757  update_visit_order(proc_permutation, nprocs,_u_umpa_seed, 1);
758  */
759  }
760 
761  //temporary, necessary for random permutation.
762  static part_t umpa_uRandom(part_t l, int &_u_umpa_seed)
763  {
764  int a = 16807;
765  int m = 2147483647;
766  int q = 127773;
767  int r = 2836;
768  int lo, hi, test;
769  double d;
770 
771  lo = _u_umpa_seed % q;
772  hi = _u_umpa_seed / q;
773  test = (a*lo)-(r*hi);
774  if (test>0)
775  _u_umpa_seed = test;
776  else
777  _u_umpa_seed = test + m;
778  d = (double) ((double) _u_umpa_seed / (double) m);
779  return (part_t) (d*(double)l);
780  }
781 
782  virtual double getProcDistance(int procId1, int procId2) const{
783  double distance = 0;
784  for (int i = 0 ; i < this->proc_coord_dim; ++i){
785  distance += ZOLTAN2_ABS(proc_coords[i][procId1] - proc_coords[i][procId2]);
786  }
787  return distance;
788  }
789 
790 
791  //temporary, does random permutation.
792  void update_visit_order(part_t* visitOrder, part_t n, int &_u_umpa_seed, part_t rndm) {
793  part_t *a = visitOrder;
794 
795 
796  if (rndm){
797  part_t i, u, v, tmp;
798 
799  if (n <= 4)
800  return;
801 
802  //srand ( time(NULL) );
803 
804  //_u_umpa_seed = _u_umpa_seed1 - (rand()%100);
805  for (i=0; i<n; i+=16)
806  {
807  u = umpa_uRandom(n-4, _u_umpa_seed);
808  v = umpa_uRandom(n-4, _u_umpa_seed);
809 
810  // FIXME (mfh 30 Sep 2015) This requires including Zoltan2_AlgMultiJagged.hpp.
811 
812  ZOLTAN2_ALGMULTIJAGGED_SWAP(a[v], a[u], tmp);
813  ZOLTAN2_ALGMULTIJAGGED_SWAP(a[v+1], a[u+1], tmp);
814  ZOLTAN2_ALGMULTIJAGGED_SWAP(a[v+2], a[u+2], tmp);
815  ZOLTAN2_ALGMULTIJAGGED_SWAP(a[v+3], a[u+3], tmp);
816 
817  }
818  }
819  else {
820  part_t i, end = n / 4;
821 
822  for (i=1; i<end; i++)
823  {
824  part_t j=umpa_uRandom(n-i, _u_umpa_seed);
825  part_t t=a[j];
826  a[j] = a[n-i];
827  a[n-i] = t;
828  }
829  }
830  //PermuteInPlace(visitOrder, n);
831  }
832 
833 
834 
841  virtual void getMapping(
842  int myRank,
843  const RCP<const Environment> &env,
844  ArrayRCP <part_t> &rcp_proc_to_task_xadj, // = allocMemory<part_t> (this->no_procs+1); //holds the pointer to the task array
845  ArrayRCP <part_t> &rcp_proc_to_task_adj, // = allocMemory<part_t>(this->no_tasks); //holds the indices of tasks wrt to proc_to_task_xadj array.
846  ArrayRCP <part_t> &rcp_task_to_proc //allocMemory<part_t>(this->no_tasks); //holds the processors mapped to tasks.
847  ) const{
848 
849  rcp_proc_to_task_xadj = ArrayRCP <part_t> (this->no_procs+1);
850  rcp_proc_to_task_adj = ArrayRCP <part_t> (this->no_tasks);
851  rcp_task_to_proc = ArrayRCP <part_t> (this->no_tasks);
852 
853  part_t *proc_to_task_xadj = rcp_proc_to_task_xadj.getRawPtr(); //holds the pointer to the task array
854  part_t *proc_to_task_adj = rcp_proc_to_task_adj.getRawPtr(); //holds the indices of tasks wrt to proc_to_task_xadj array.
855  part_t *task_to_proc = rcp_task_to_proc.getRawPtr(); //holds the processors mapped to tasks.);
856 
857 
858  part_t invalid = 0;
859  fillContinousArray<part_t> (proc_to_task_xadj, this->no_procs+1, &invalid);
860 
861  //obtain the number of parts that should be divided.
862  part_t num_parts = MINOF(this->no_procs, this->no_tasks);
863  //obtain the min coordinate dim.
864  part_t minCoordDim = MINOF(this->task_coord_dim, this->proc_coord_dim);
865 
866  int recursion_depth = partArraySize;
867  if(partArraySize < minCoordDim) recursion_depth = minCoordDim;
868 
869  int taskPerm = z2Fact<int>(this->task_coord_dim); //get the number of different permutations for task dimension ordering
870  int procPerm = z2Fact<int>(this->proc_coord_dim); //get the number of different permutations for proc dimension ordering
871  int permutations = taskPerm * procPerm; //total number of permutations
872 
873  //holds the pointers to proc_adjList
874  part_t *proc_xadj = allocMemory<part_t> (num_parts+1);
875  //holds the processors in parts according to the result of partitioning algorithm.
876  //the processors assigned to part x is at proc_adjList[ proc_xadj[x] : proc_xadj[x+1] ]
877  part_t *proc_adjList = allocMemory<part_t>(this->no_procs);
878 
879 
880  part_t used_num_procs = this->no_procs;
881  if(this->no_procs > this->no_tasks){
882  //obtain the subset of the processors that are closest to each other.
883  this->getClosestSubset(proc_adjList, this->no_procs, this->no_tasks);
884  used_num_procs = this->no_tasks;
885  }
886  else {
887  fillContinousArray<part_t>(proc_adjList,this->no_procs, NULL);
888  }
889 
890  int myPermutation = myRank % permutations; //the index of the permutation
891 
892  int myProcPerm= myPermutation % procPerm; // the index of the proc permutation
893  int myTaskPerm = myPermutation / procPerm; // the index of the task permutation
894 
895  int *permutation = allocMemory<int> ((this->proc_coord_dim > this->task_coord_dim)
896  ? this->proc_coord_dim : this->task_coord_dim);
897 
898  //get the permutation order from the proc permutation index.
899  ithPermutation<int>(this->proc_coord_dim, myProcPerm, permutation);
900  //reorder the coordinate dimensions.
901  pcoord_t **pcoords = allocMemory<pcoord_t *> (this->proc_coord_dim);
902  for(int i = 0; i < this->proc_coord_dim; ++i){
903  pcoords[i] = this->proc_coords[permutation[i]];
904  //cout << permutation[i] << " ";
905  }
906 
907 
908  //do the partitioning and renumber the parts.
909  env->timerStart(MACRO_TIMERS, "Mapping - Proc Partitioning");
910 
912  mj_partitioner.sequential_task_partitioning(
913  env,
914  this->no_procs,
915  used_num_procs,
916  num_parts,
917  minCoordDim,
918  pcoords,//this->proc_coords,
919  proc_adjList,
920  proc_xadj,
921  recursion_depth,
922  partNoArray
923  //,"proc_partitioning"
924  );
925  env->timerStop(MACRO_TIMERS, "Mapping - Proc Partitioning");
926  freeArray<pcoord_t *> (pcoords);
927 
928 
929  part_t *task_xadj = allocMemory<part_t> (num_parts+1);
930  part_t *task_adjList = allocMemory<part_t>(this->no_tasks);
931  //fill task_adjList st: task_adjList[i] <- i.
932  fillContinousArray<part_t>(task_adjList,this->no_tasks, NULL);
933 
934  //get the permutation order from the task permutation index.
935  ithPermutation<int>(this->task_coord_dim, myTaskPerm, permutation);
936 
937  //reorder task coordinate dimensions.
938  tcoord_t **tcoords = allocMemory<tcoord_t *> (this->task_coord_dim);
939  for(int i = 0; i < this->task_coord_dim; ++i){
940  tcoords[i] = this->task_coords[permutation[i]];
941  }
942 
943  env->timerStart(MACRO_TIMERS, "Mapping - Task Partitioning");
944  //partitioning of tasks
945  mj_partitioner.sequential_task_partitioning(
946  env,
947  this->no_tasks,
948  this->no_tasks,
949  num_parts,
950  minCoordDim,
951  tcoords, //this->task_coords,
952  task_adjList,
953  task_xadj,
954  recursion_depth,
955  partNoArray
956  //,"task_partitioning"
957  );
958  env->timerStop(MACRO_TIMERS, "Mapping - Task Partitioning");
959  freeArray<pcoord_t *> (tcoords);
960  freeArray<int> (permutation);
961 
962 
963  //filling proc_to_task_xadj, proc_to_task_adj, task_to_proc arrays.
964  for(part_t i = 0; i < num_parts; ++i){
965 
966  part_t proc_index_begin = proc_xadj[i];
967  part_t task_begin_index = task_xadj[i];
968  part_t proc_index_end = proc_xadj[i+1];
969  part_t task_end_index = task_xadj[i+1];
970 
971 
972  if(proc_index_end - proc_index_begin != 1){
973  std::cerr << "Error at partitioning of processors" << std::endl;
974  std::cerr << "PART:" << i << " is assigned to " << proc_index_end - proc_index_begin << " processors." << std::endl;
975  exit(1);
976  }
977  part_t assigned_proc = proc_adjList[proc_index_begin];
978  proc_to_task_xadj[assigned_proc] = task_end_index - task_begin_index;
979  }
980 
981 
982  //holds the pointer to the task array
983  //convert proc_to_task_xadj to CSR index array
984  part_t *proc_to_task_xadj_work = allocMemory<part_t> (this->no_procs);
985  part_t sum = 0;
986  for(part_t i = 0; i < this->no_procs; ++i){
987  part_t tmp = proc_to_task_xadj[i];
988  proc_to_task_xadj[i] = sum;
989  sum += tmp;
990  proc_to_task_xadj_work[i] = sum;
991  }
992  proc_to_task_xadj[this->no_procs] = sum;
993 
994  for(part_t i = 0; i < num_parts; ++i){
995 
996  part_t proc_index_begin = proc_xadj[i];
997  part_t task_begin_index = task_xadj[i];
998  part_t task_end_index = task_xadj[i+1];
999 
1000  part_t assigned_proc = proc_adjList[proc_index_begin];
1001 
1002  for (part_t j = task_begin_index; j < task_end_index; ++j){
1003  part_t taskId = task_adjList[j];
1004 
1005  task_to_proc[taskId] = assigned_proc;
1006 
1007  proc_to_task_adj [ --proc_to_task_xadj_work[assigned_proc] ] = taskId;
1008  }
1009  }
1010 
1011  freeArray<part_t>(proc_to_task_xadj_work);
1012  freeArray<part_t>(task_xadj);
1013  freeArray<part_t>(task_adjList);
1014  freeArray<part_t>(proc_xadj);
1015  freeArray<part_t>(proc_adjList);
1016  }
1017 
1018 };
1019 
1020 template <typename Adapter, typename part_t>
1022 protected:
1023 
1024 #ifndef DOXYGEN_SHOULD_SKIP_THIS
1025 
1026  typedef typename Adapter::scalar_t pcoord_t;
1027  typedef typename Adapter::scalar_t tcoord_t;
1028 
1029 #endif
1030 
1031  //RCP<const Environment> env;
1032  ArrayRCP<part_t> proc_to_task_xadj; // = allocMemory<part_t> (this->no_procs+1); //holds the pointer to the task array
1033  ArrayRCP<part_t> proc_to_task_adj; // = allocMemory<part_t>(this->no_tasks); //holds the indices of tasks wrt to proc_to_task_xadj array.
1034  ArrayRCP<part_t> task_to_proc; //allocMemory<part_t>(this->no_procs); //holds the processors mapped to tasks.
1037  part_t nprocs;
1038  part_t ntasks;
1039  ArrayRCP<part_t>task_communication_xadj;
1040  ArrayRCP<part_t>task_communication_adj;
1041 
1042 
1045  void doMapping(int myRank){
1046 
1047  if(this->proc_task_comm){
1048  this->proc_task_comm->getMapping(
1049  myRank,
1050  Teuchos::RCP<const Environment>(this->env, false),
1051  this->proc_to_task_xadj, // = allocMemory<part_t> (this->no_procs+1); //holds the pointer to the task array
1052  this->proc_to_task_adj, // = allocMemory<part_t>(this->no_tasks); //holds the indices of tasks wrt to proc_to_task_xadj array.
1053  this->task_to_proc //allocMemory<part_t>(this->no_procs); //holds the processors mapped to tasks.);
1054  );
1055  }
1056  else {
1057  std::cerr << "communicationModel is not specified in the Mapper" << std::endl;
1058  exit(1);
1059  }
1060  }
1061 
1062 
1065  RCP<Comm<int> > create_subCommunicator(){
1066  int procDim = this->proc_task_comm->proc_coord_dim;
1067  int taskDim = this->proc_task_comm->task_coord_dim;
1068 
1069  int taskPerm = z2Fact<int>(procDim); //get the number of different permutations for task dimension ordering
1070  int procPerm = z2Fact<int>(taskDim); //get the number of different permutations for proc dimension ordering
1071  int idealGroupSize = taskPerm * procPerm; //total number of permutations
1072 
1073  int myRank = this->comm->getRank();
1074  int commSize = this->comm->getSize();
1075 
1076  int myGroupIndex = myRank / idealGroupSize;
1077 
1078  int prevGroupBegin = (myGroupIndex - 1)* idealGroupSize;
1079  if (prevGroupBegin < 0) prevGroupBegin = 0;
1080  int myGroupBegin = myGroupIndex * idealGroupSize;
1081  int myGroupEnd = (myGroupIndex + 1) * idealGroupSize;
1082  int nextGroupEnd = (myGroupIndex + 2)* idealGroupSize;
1083 
1084  if (myGroupEnd > commSize){
1085  myGroupBegin = prevGroupBegin;
1086  myGroupEnd = commSize;
1087  }
1088  if (nextGroupEnd > commSize){
1089  myGroupEnd = commSize;
1090  }
1091  int myGroupSize = myGroupEnd - myGroupBegin;
1092 
1093  part_t *myGroup = allocMemory<part_t>(myGroupSize);
1094  for (int i = 0; i < myGroupSize; ++i){
1095  myGroup[i] = myGroupBegin + i;
1096  }
1097  //cout << "me:" << myRank << " myGroupBegin:" << myGroupBegin << " myGroupEnd:" << myGroupEnd << endl;
1098 
1099  ArrayView<const part_t> myGroupView(myGroup, myGroupSize);
1100 
1101  RCP<Comm<int> > subComm = this->comm->createSubcommunicator(myGroupView);
1102  freeArray<part_t>(myGroup);
1103  return subComm;
1104  }
1105 
1106 
1110  //create the sub group.
1111  RCP<Comm<int> > subComm = this->create_subCommunicator();
1112  //calculate cost.
1113  double myCost = this->proc_task_comm->getCommunicationCostMetric();
1114  //cout << "me:" << this->comm->getRank() << " myCost:" << myCost << endl;
1115  double localCost[2], globalCost[2];
1116 
1117  localCost[0] = myCost;
1118  localCost[1] = double(subComm->getRank());
1119 
1120  globalCost[1] = globalCost[0] = std::numeric_limits<double>::max();
1122  reduceAll<int, double>(*subComm, reduceBest,
1123  2, localCost, globalCost);
1124 
1125  int sender = int(globalCost[1]);
1126 
1127  //cout << "me:" << localCost[1] << " localcost:" << localCost[0]<< " bestcost:" << globalCost[0] << endl;
1128  //cout << "me:" << localCost[1] << " proc:" << globalCost[1] << endl;
1129  broadcast (*subComm, sender, this->ntasks, this->task_to_proc.getRawPtr());
1130  broadcast (*subComm, sender, this->nprocs, this->proc_to_task_xadj.getRawPtr());
1131  broadcast (*subComm, sender, this->ntasks, this->proc_to_task_adj.getRawPtr());
1132  }
1133 
1134 
1135 
1136  //write mapping to gnuPlot code to visualize.
1138  std::ofstream gnuPlotCode ("gnuPlot.plot", std::ofstream::out);
1139 
1140  int mindim = MINOF(proc_task_comm->proc_coord_dim, proc_task_comm->task_coord_dim);
1141  std::string ss = "";
1142  for(part_t i = 0; i < this->nprocs; ++i){
1143 
1144  std::string procFile = toString<int>(i) + "_mapping.txt";
1145  if (i == 0){
1146  gnuPlotCode << "plot \"" << procFile << "\"\n";
1147  }
1148  else {
1149  gnuPlotCode << "replot \"" << procFile << "\"\n";
1150  }
1151 
1152  std::ofstream inpFile (procFile.c_str(), std::ofstream::out);
1153 
1154  std::string gnuPlotArrow = "set arrow from ";
1155  for(int j = 0; j < mindim; ++j){
1156  if (j == mindim - 1){
1157  inpFile << proc_task_comm->proc_coords[j][i];
1158  gnuPlotArrow += toString<float>(proc_task_comm->proc_coords[j][i]);
1159 
1160  }
1161  else {
1162  inpFile << proc_task_comm->proc_coords[j][i] << " ";
1163  gnuPlotArrow += toString<float>(proc_task_comm->proc_coords[j][i]) +",";
1164  }
1165  }
1166  gnuPlotArrow += " to ";
1167 
1168 
1169  inpFile << std::endl;
1170  ArrayView<part_t> a = this->getAssignedTaksForProc(i);
1171  for(int k = 0; k < a.size(); ++k){
1172  int j = a[k];
1173  //cout << "i:" << i << " j:"
1174  std::string gnuPlotArrow2 = gnuPlotArrow;
1175  for(int z = 0; z < mindim; ++z){
1176  if(z == mindim - 1){
1177 
1178  //cout << "z:" << z << " j:" << j << " " << proc_task_comm->task_coords[z][j] << endl;
1179  inpFile << proc_task_comm->task_coords[z][j];
1180  gnuPlotArrow2 += toString<float>(proc_task_comm->task_coords[z][j]);
1181  }
1182  else{
1183  inpFile << proc_task_comm->task_coords[z][j] << " ";
1184  gnuPlotArrow2 += toString<float>(proc_task_comm->task_coords[z][j]) +",";
1185  }
1186  }
1187  ss += gnuPlotArrow2 + "\n";
1188  inpFile << std::endl;
1189  }
1190  inpFile.close();
1191 
1192  }
1193  gnuPlotCode << ss;
1194  gnuPlotCode << "\nreplot\n pause -1 \n";
1195  gnuPlotCode.close();
1196 
1197  }
1198 
1199 
1200  //write mapping to gnuPlot code to visualize.
1201  void writeMapping2(int myRank){
1202 
1203  std::string rankStr = toString<int>(myRank);
1204  std::string gnuPlots = "gnuPlot", extentionS = ".plot";
1205  std::string outF = gnuPlots + rankStr+ extentionS;
1206  std::ofstream gnuPlotCode ( outF.c_str(), std::ofstream::out);
1207 
1209  static_cast <CoordinateCommunicationModel<pcoord_t, tcoord_t, part_t> * > (proc_task_comm);
1210  int mindim = MINOF(tmpproc_task_comm->proc_coord_dim, tmpproc_task_comm->task_coord_dim);
1211  std::string ss = "";
1212  std::string procs = "", parts = "";
1213  for(part_t i = 0; i < this->nprocs; ++i){
1214 
1215  //inpFile << std::endl;
1216  ArrayView<part_t> a = this->getAssignedTaksForProc(i);
1217  if (a.size() == 0){
1218  continue;
1219  }
1220 
1221  //std::ofstream inpFile (procFile.c_str(), std::ofstream::out);
1222 
1223  std::string gnuPlotArrow = "set arrow from ";
1224  for(int j = 0; j < mindim; ++j){
1225  if (j == mindim - 1){
1226  //inpFile << proc_task_comm->proc_coords[j][i];
1227  gnuPlotArrow += toString<float>(tmpproc_task_comm->proc_coords[j][i]);
1228  procs += toString<float>(tmpproc_task_comm->proc_coords[j][i]);
1229 
1230  }
1231  else {
1232  //inpFile << proc_task_comm->proc_coords[j][i] << " ";
1233  gnuPlotArrow += toString<float>(tmpproc_task_comm->proc_coords[j][i]) +",";
1234  procs += toString<float>(tmpproc_task_comm->proc_coords[j][i])+ " ";
1235  }
1236  }
1237  procs += "\n";
1238 
1239  gnuPlotArrow += " to ";
1240 
1241 
1242  for(int k = 0; k < a.size(); ++k){
1243  int j = a[k];
1244  //cout << "i:" << i << " j:"
1245  std::string gnuPlotArrow2 = gnuPlotArrow;
1246  for(int z = 0; z < mindim; ++z){
1247  if(z == mindim - 1){
1248 
1249  //cout << "z:" << z << " j:" << j << " " << proc_task_comm->task_coords[z][j] << endl;
1250  //inpFile << proc_task_comm->task_coords[z][j];
1251  gnuPlotArrow2 += toString<float>(tmpproc_task_comm->task_coords[z][j]);
1252  parts += toString<float>(tmpproc_task_comm->task_coords[z][j]);
1253  }
1254  else{
1255  //inpFile << proc_task_comm->task_coords[z][j] << " ";
1256  gnuPlotArrow2 += toString<float>(tmpproc_task_comm->task_coords[z][j]) +",";
1257  parts += toString<float>(tmpproc_task_comm->task_coords[z][j]) + " ";
1258  }
1259  }
1260  parts += "\n";
1261  ss += gnuPlotArrow2 + " nohead\n";
1262  //inpFile << std::endl;
1263  }
1264  //inpFile.close();
1265 
1266  }
1267 
1268 
1269  std::ofstream procFile ("procPlot.plot", std::ofstream::out);
1270  procFile << procs << "\n";
1271  procFile.close();
1272 
1273  std::ofstream partFile ("partPlot.plot", std::ofstream::out);
1274  partFile << parts<< "\n";
1275  partFile.close();
1276 
1277  std::ofstream extraProcFile ("allProc.plot", std::ofstream::out);
1278 
1279  for(part_t j = 0; j < this->nprocs; ++j){
1280  for(int i = 0; i < mindim; ++i){
1281  extraProcFile << tmpproc_task_comm->proc_coords[i][j] << " ";
1282  }
1283  extraProcFile << std::endl;
1284  }
1285 
1286  extraProcFile.close();
1287 
1288  gnuPlotCode << ss;
1289  if(mindim == 2){
1290  gnuPlotCode << "plot \"procPlot.plot\" with points pointsize 3\n";
1291  } else {
1292  gnuPlotCode << "splot \"procPlot.plot\" with points pointsize 3\n";
1293  }
1294  gnuPlotCode << "replot \"partPlot.plot\" with points pointsize 3\n";
1295  gnuPlotCode << "replot \"allProc.plot\" with points pointsize 0.65\n";
1296  gnuPlotCode << "\nreplot\n pause -1 \n";
1297  gnuPlotCode.close();
1298 
1299  }
1300 
1301 
1302 // KDD Need to provide access to algorithm for getPartBoxes
1303 #ifdef gnuPlot
1304  void writeGnuPlot(
1305  const Teuchos::Comm<int> *comm_,
1307  int coordDim,
1308  tcoord_t **partCenters
1309  ){
1310  std::string file = "gggnuPlot";
1311  std::string exten = ".plot";
1312  std::ofstream mm("2d.txt");
1313  file += toString<int>(comm_->getRank()) + exten;
1314  std::ofstream ff(file.c_str());
1315  //ff.seekg (0, ff.end);
1316  std::vector <Zoltan2::coordinateModelPartBox <tcoord_t, part_t> > outPartBoxes = ((Zoltan2::PartitioningSolution<Adapter> *)soln_)->getPartBoxesView();
1317 
1318  for (part_t i = 0; i < this->ntasks;++i){
1319  outPartBoxes[i].writeGnuPlot(ff, mm);
1320  }
1321  if (coordDim == 2){
1322  ff << "plot \"2d.txt\"" << std::endl;
1323  //ff << "\n pause -1" << endl;
1324  }
1325  else {
1326  ff << "splot \"2d.txt\"" << std::endl;
1327  //ff << "\n pause -1" << endl;
1328  }
1329  mm.close();
1330 
1331  ff << "set style arrow 5 nohead size screen 0.03,15,135 ls 1" << std::endl;
1332  for (part_t i = 0; i < this->ntasks;++i){
1333  part_t pb = task_communication_xadj[i];
1334  part_t pe = task_communication_xadj[i+1];
1335  for (part_t p = pb; p < pe; ++p){
1336  part_t n = task_communication_adj[p];
1337 
1338  //cout << "i:" << i << " n:" << n << endl;
1339  std::string arrowline = "set arrow from ";
1340  for (int j = 0; j < coordDim - 1; ++j){
1341  arrowline += toString<tcoord_t>(partCenters[j][n]) + ",";
1342  }
1343  arrowline += toString<tcoord_t>(partCenters[coordDim -1][n]) + " to ";
1344 
1345 
1346  for (int j = 0; j < coordDim - 1; ++j){
1347  arrowline += toString<tcoord_t>(partCenters[j][i]) + ",";
1348  }
1349  arrowline += toString<tcoord_t>(partCenters[coordDim -1][i]) + " as 5\n";
1350 
1351  //cout << "arrow:" << arrowline << endl;
1352  ff << arrowline;
1353  }
1354  }
1355 
1356  ff << "replot\n pause -1" << std::endl;
1357  ff.close();
1358  }
1359 #endif // gnuPlot
1360 
1361 public:
1362 
1363  void getProcTask(part_t* &proc_to_task_xadj_, part_t* &proc_to_task_adj_){
1364  proc_to_task_xadj_ = this->proc_to_task_xadj.getRawPtr();
1365  proc_to_task_adj_ = this->proc_to_task_adj.getRawPtr();
1366  }
1367 
1368 
1370  //freeArray<part_t> (proc_to_task_xadj);
1371  //freeArray<part_t> (proc_to_task_adj);
1372  //freeArray<part_t> (task_to_proc);
1373  if(this->isOwnerofModel){
1374  delete this->proc_task_comm;
1375  }
1376  }
1377 
1378 
1390  const Teuchos::Comm<int> *comm_,
1391  const MachineRepresentation<pcoord_t> *machine_,
1394  const Environment *envConst):
1395  PartitionMapping<Adapter> (comm_, machine_, model_, soln_, envConst),
1396  proc_to_task_xadj(0),
1397  proc_to_task_adj(0),
1398  task_to_proc(0),
1399  isOwnerofModel(true),
1400  proc_task_comm(0),
1401  task_communication_xadj(0),
1402  task_communication_adj(0){
1403 
1404  pcoord_t *task_communication_edge_weight_ = NULL;
1405  //if mapping type is 0 then it is coordinate mapping
1406  int procDim = machine_->getProcDim();
1407  this->nprocs = machine_->getNumProcs();
1408  //get processor coordinates.
1409  pcoord_t **procCoordinates = machine_->getProcCoords();
1410 
1411  int coordDim = ((Zoltan2::CoordinateModel<typename Adapter::base_adapter_t> *)model_)->getCoordinateDim();
1412  this->ntasks = soln_->getActualGlobalNumberOfParts();
1413  if (part_t (soln_->getTargetGlobalNumberOfParts()) > this->ntasks){
1414  this->ntasks = soln_->getTargetGlobalNumberOfParts();
1415  }
1416  //cout << "actual: " << this->ntasks << endl;
1417 
1418  //alloc memory for part centers.
1419  tcoord_t **partCenters = NULL;
1420  partCenters = allocMemory<tcoord_t *>(coordDim);
1421  for (int i = 0; i < coordDim; ++i){
1422  partCenters[i] = allocMemory<tcoord_t>(this->ntasks);
1423  }
1424 
1425 
1426  envConst->timerStart(MACRO_TIMERS, "Mapping - Solution Center");
1427  //get centers for the parts.
1428  getSolutionCenterCoordinates<Adapter, typename Adapter::scalar_t,part_t>(
1429  envConst,
1430  comm_,
1432  this->soln,
1433  coordDim,
1434  ntasks,
1435  partCenters);
1436 
1437  envConst->timerStop(MACRO_TIMERS, "Mapping - Solution Center");
1438 
1439 
1440  //create coordinate communication model.
1441  this->proc_task_comm =
1443  procDim,
1444  procCoordinates,
1445  coordDim,
1446  partCenters,
1447  this->nprocs,
1448  this->ntasks
1449  );
1450 
1451  int myRank = comm_->getRank();
1452 
1453 
1454  envConst->timerStart(MACRO_TIMERS, "Mapping - Processor Task map");
1455  this->doMapping(myRank);
1456  envConst->timerStop(MACRO_TIMERS, "Mapping - Processor Task map");
1457 
1458 
1459  envConst->timerStart(MACRO_TIMERS, "Mapping - Communication Graph");
1460  soln_->getCommunicationGraph(task_communication_xadj,
1461  task_communication_adj);
1462 
1463  envConst->timerStop(MACRO_TIMERS, "Mapping - Communication Graph");
1464  #ifdef gnuPlot
1465  if (comm_->getRank() == 0){
1466 
1467  part_t taskCommCount = task_communication_xadj.size();
1468  std::cout << " TotalComm:" << task_communication_xadj[taskCommCount] << std::endl;
1469  part_t maxN = task_communication_xadj[0];
1470  for (part_t i = 1; i <= taskCommCount; ++i){
1471  part_t nc = task_communication_xadj[i] - task_communication_xadj[i-1];
1472  if (maxN < nc) maxN = nc;
1473  }
1474  std::cout << " maxNeighbor:" << maxN << std::endl;
1475  }
1476 
1477  this->writeGnuPlot(comm_, soln_, coordDim, partCenters);
1478  #endif
1479 
1480  envConst->timerStart(MACRO_TIMERS, "Mapping - Communication Cost");
1481  this->proc_task_comm->calculateCommunicationCost(
1482  task_to_proc.getRawPtr(),
1483  task_communication_xadj.getRawPtr(),
1484  task_communication_adj.getRawPtr(),
1485  task_communication_edge_weight_
1486  );
1487 
1488 
1489  envConst->timerStop(MACRO_TIMERS, "Mapping - Communication Cost");
1490 
1491  //processors are divided into groups of size numProc! * numTasks!
1492  //each processor in the group obtains a mapping with a different rotation
1493  //and best one is broadcasted all processors.
1494  this->getBestMapping();
1495  #ifdef gnuPlot
1496  this->writeMapping2(comm_->getRank());
1497  #endif
1498 
1499  for (int i = 0; i < coordDim; ++i){
1500  freeArray<tcoord_t>(partCenters[i]);
1501  }
1502  freeArray<tcoord_t *>(partCenters);
1503 
1504  }
1505 
1506 
1554  const Environment *env_const_,
1555  const Teuchos::Comm<int> *problemComm,
1556  int proc_dim,
1557  int num_processors,
1558  pcoord_t **machine_coords,
1559 
1560  int task_dim,
1561  part_t num_tasks,
1562  tcoord_t **task_coords,
1563  ArrayRCP<part_t>task_comm_xadj,
1564  ArrayRCP<part_t>task_comm_adj,
1565  pcoord_t *task_communication_edge_weight_,
1566  int recursion_depth,
1567  part_t *part_no_array,
1568  const part_t *machine_dimensions
1569  ): PartitionMapping<Adapter>(problemComm, NULL, NULL, NULL, env_const_),
1570  proc_to_task_xadj(0),
1571  proc_to_task_adj(0),
1572  task_to_proc(0),
1573  isOwnerofModel(true),
1574  proc_task_comm(0),
1575  task_communication_xadj(task_comm_xadj),
1576  task_communication_adj(task_comm_adj){
1577 
1578  //if mapping type is 0 then it is coordinate mapping
1579  pcoord_t ** virtual_machine_coordinates = machine_coords;
1580  if (machine_dimensions){
1581  virtual_machine_coordinates =
1582  this->shiftMachineCoordinates (
1583  proc_dim,
1584  machine_dimensions,
1585  num_processors,
1586  machine_coords);
1587  }
1588 
1589  this->nprocs = num_processors;
1590 
1591  int coordDim = task_dim;
1592  this->ntasks = num_tasks;
1593 
1594  //alloc memory for part centers.
1595  tcoord_t **partCenters = task_coords;
1596 
1597  //create coordinate communication model.
1598  this->proc_task_comm =
1600  proc_dim,
1601  virtual_machine_coordinates,
1602  coordDim,
1603  partCenters,
1604  this->nprocs,
1605  this->ntasks
1606  );
1607  this->proc_task_comm->setPartArraySize(recursion_depth);
1608  this->proc_task_comm->setPartArray(part_no_array);
1609 
1610  int myRank = problemComm->getRank();
1611 
1612  this->doMapping(myRank);
1613 #ifdef gnuPlot
1614  this->writeMapping2(myRank);
1615 #endif
1616 
1617  if (task_communication_xadj.getRawPtr() && task_communication_adj.getRawPtr()){
1618  this->proc_task_comm->calculateCommunicationCost(
1619  task_to_proc.getRawPtr(),
1620  task_communication_xadj.getRawPtr(),
1621  task_communication_adj.getRawPtr(),
1622  task_communication_edge_weight_
1623  );
1624 
1625 
1626  this->getBestMapping();
1627 
1628 /*
1629  if (myRank == 0){
1630  this->proc_task_comm->calculateCommunicationCost(
1631  task_to_proc.getRawPtr(),
1632  task_communication_xadj.getRawPtr(),
1633  task_communication_adj.getRawPtr(),
1634  task_communication_edge_weight_
1635  );
1636  cout << "me: " << problemComm->getRank() << " cost:" << this->proc_task_comm->getCommunicationCostMetric() << endl;
1637  }
1638 */
1639 
1640  }
1641 
1642  if (machine_dimensions){
1643  for (int i = 0; i < proc_dim; ++i){
1644  delete [] virtual_machine_coordinates[i];
1645  }
1646  delete [] virtual_machine_coordinates;
1647  }
1648 #ifdef gnuPlot
1649  if(comm_->getRank() == 0)
1650  this->writeMapping2(-1);
1651 #endif
1652  }
1653 
1654 
1656  return this->proc_task_comm->getCommCost();
1657  }
1658 
1661  virtual size_t getLocalNumberOfParts() const{
1662  return 0;
1663  }
1664 
1674  int machine_dim,
1675  const part_t *machine_dimensions,
1676  part_t numProcs,
1677  pcoord_t **mCoords){
1678  pcoord_t **result_machine_coords = NULL;
1679  result_machine_coords = new pcoord_t*[machine_dim];
1680  for (int i = 0; i < machine_dim; ++i){
1681  result_machine_coords[i] = new pcoord_t [numProcs];
1682  }
1683 
1684  for (int i = 0; i < machine_dim; ++i){
1685  part_t numMachinesAlongDim = machine_dimensions[i];
1686  part_t *machineCounts= new part_t[numMachinesAlongDim];
1687  memset(machineCounts, 0, sizeof(part_t) *numMachinesAlongDim);
1688 
1689  int *filledCoordinates= new int[numMachinesAlongDim];
1690 
1691  pcoord_t *coords = mCoords[i];
1692  for(part_t j = 0; j < numProcs; ++j){
1693  part_t mc = (part_t) coords[j];
1694  ++machineCounts[mc];
1695  }
1696 
1697  part_t filledCoordinateCount = 0;
1698  for(part_t j = 0; j < numMachinesAlongDim; ++j){
1699  if (machineCounts[j] > 0){
1700  filledCoordinates[filledCoordinateCount++] = j;
1701  }
1702  }
1703 
1704  part_t firstProcCoord = filledCoordinates[0];
1705  part_t firstProcCount = machineCounts[firstProcCoord];
1706 
1707  part_t lastProcCoord = filledCoordinates[filledCoordinateCount - 1];
1708  part_t lastProcCount = machineCounts[lastProcCoord];
1709 
1710  part_t firstLastGap = numMachinesAlongDim - lastProcCoord + firstProcCoord;
1711  part_t firstLastGapProc = lastProcCount + firstProcCount;
1712 
1713  part_t leftSideProcCoord = firstProcCoord;
1714  part_t leftSideProcCount = firstProcCount;
1715  part_t biggestGap = 0;
1716  part_t biggestGapProc = numProcs;
1717 
1718  part_t shiftBorderCoordinate = -1;
1719  for(part_t j = 1; j < filledCoordinateCount; ++j){
1720  part_t rightSideProcCoord= filledCoordinates[j];
1721  part_t rightSideProcCount = machineCounts[rightSideProcCoord];
1722 
1723  part_t gap = rightSideProcCoord - leftSideProcCoord;
1724  part_t gapProc = rightSideProcCount + leftSideProcCount;
1725 
1726  /* Pick the largest gap in this dimension. Use fewer process on either side
1727  of the largest gap to break the tie. An easy addition to this would
1728  be to weight the gap by the number of processes. */
1729  if (gap > biggestGap || (gap == biggestGap && biggestGapProc > gapProc)){
1730  shiftBorderCoordinate = rightSideProcCoord;
1731  biggestGapProc = gapProc;
1732  biggestGap = gap;
1733  }
1734  leftSideProcCoord = rightSideProcCoord;
1735  leftSideProcCount = rightSideProcCount;
1736  }
1737 
1738 
1739  if (!(biggestGap > firstLastGap || (biggestGap == firstLastGap && biggestGapProc < firstLastGapProc))){
1740  shiftBorderCoordinate = -1;
1741  }
1742 
1743  for(part_t j = 0; j < numProcs; ++j){
1744 
1745  if (coords[j] < shiftBorderCoordinate){
1746  result_machine_coords[i][j] = coords[j] + numMachinesAlongDim;
1747 
1748  }
1749  else {
1750  result_machine_coords[i][j] = coords[j];
1751  }
1752  //cout << "I:" << i << "j:" << j << " coord:" << coords[j] << " now:" << result_machine_coords[i][j] << endl;
1753  }
1754  delete [] machineCounts;
1755  delete [] filledCoordinates;
1756  }
1757 
1758  return result_machine_coords;
1759 
1760  }
1761 
1768  virtual void getProcsForPart(part_t taskId, part_t &numProcs, part_t *procs) const{
1769  numProcs = 1;
1770  procs = this->task_to_proc.getRawPtr() + taskId;
1771  }
1775  inline part_t getAssignedProcForTask(part_t taskId){
1776  return this->task_to_proc[taskId];
1777  }
1784  virtual void getPartsForProc(int procId, part_t &numParts, part_t *parts) const{
1785 
1786  part_t task_begin = this->proc_to_task_xadj[procId];
1787  part_t taskend = this->proc_to_task_xadj[procId+1];
1788  parts = this->proc_to_task_adj.getRawPtr() + task_begin;
1789  numParts = taskend - task_begin;
1790  }
1791 
1792  ArrayView<part_t> getAssignedTaksForProc(part_t procId){
1793  part_t task_begin = this->proc_to_task_xadj[procId];
1794  part_t taskend = this->proc_to_task_xadj[procId+1];
1795 
1796  /*
1797  cout << "part_t:" << procId << " taskCount:" << taskend - task_begin << endl;
1798  for(part_t i = task_begin; i < taskend; ++i){
1799  cout << "part_t:" << procId << " task:" << proc_to_task_adj[i] << endl;
1800  }
1801  */
1802  if (taskend - task_begin > 0){
1803  ArrayView <part_t> assignedParts(this->proc_to_task_adj.getRawPtr() + task_begin, taskend - task_begin);
1804  return assignedParts;
1805  }
1806  else {
1807  ArrayView <part_t> assignedParts;
1808  return assignedParts;
1809  }
1810  }
1811 
1812 };
1813 
1814 
1815 
1884 template <typename part_t, typename pcoord_t, typename tcoord_t>
1886  RCP<const Teuchos::Comm<int> > problemComm,
1887  int proc_dim,
1888  int num_processors,
1889  pcoord_t **machine_coords,
1890  int task_dim,
1891  part_t num_tasks,
1892  tcoord_t **task_coords,
1893  part_t *task_comm_xadj,
1894  part_t *task_comm_adj,
1895  pcoord_t *task_communication_edge_weight_, /*float-like, same size with task_communication_adj_ weight of the corresponding edge.*/
1896  part_t *proc_to_task_xadj, /*output*/
1897  part_t *proc_to_task_adj, /*output*/
1898  int recursion_depth,
1899  part_t *part_no_array,
1900  const part_t *machine_dimensions
1901 )
1902 {
1903 
1904  const Environment *envConst_ = new Environment();
1905 
1906  // mfh 03 Mar 2015: It's OK to omit the Node template
1907  // parameter in Tpetra, if you're just going to use the
1908  // default Node.
1909  typedef Tpetra::MultiVector<tcoord_t, part_t, part_t> tMVector_t;
1910 
1911  Teuchos::ArrayRCP<part_t> task_communication_xadj (task_comm_xadj, 0, num_tasks+1, false);
1912 
1913  Teuchos::ArrayRCP<part_t> task_communication_adj;
1914  if (task_comm_xadj){
1915  Teuchos::ArrayRCP<part_t> tmp_task_communication_adj (task_comm_adj, 0, task_comm_xadj[num_tasks], false);
1916  task_communication_adj = tmp_task_communication_adj;
1917  }
1918 
1919 
1922  envConst_,
1923  problemComm.getRawPtr(),
1924  proc_dim,
1925  num_processors,
1926  machine_coords,//machine_coords_,
1927 
1928  task_dim,
1929  num_tasks,
1930  task_coords,
1931 
1932  task_communication_xadj,
1933  task_communication_adj,
1934  task_communication_edge_weight_,
1935  recursion_depth,
1936  part_no_array,
1937  machine_dimensions
1938  );
1939 
1940 
1941  part_t* proc_to_task_xadj_;
1942  part_t* proc_to_task_adj_;
1943 
1944  ctm->getProcTask(proc_to_task_xadj_, proc_to_task_adj_);
1945 
1946  for (part_t i = 0; i <= num_processors; ++i){
1947  proc_to_task_xadj[i] = proc_to_task_xadj_[i];
1948  }
1949  for (part_t i = 0; i < num_tasks; ++i){
1950  proc_to_task_adj[i] = proc_to_task_adj_[i];
1951  }
1952  delete ctm;
1953  delete envConst_;
1954 
1955 }
1956 
1957 
1958 }// namespace Zoltan2
1959 
1960 #endif
void setParams(int dimension_, int heapsize)
CommunicationModel Base Class that performs mapping between the coordinate partitioning result...
void getBestMapping()
finds the lowest cost mapping and broadcasts solution to everyone.
void ithPermutation(const IT n, IT i, IT *perm)
CommunicationModel(part_t no_procs_, part_t no_tasks_)
Time an algorithm (or other entity) as a whole.
pcoord_t ** getProcCoords() const
getProcDim function returns the coordinates of processors in two dimensional array.
WT getDistance(IT index, WT **elementCoords)
virtual size_t getLocalNumberOfParts() const
Returns the number of parts to be assigned to this process.
bool getNewCenters(WT **coords)
virtual void getPartsForProc(int procId, part_t &numParts, part_t *parts) const
getAssignedProcForTask function, returns the assigned tasks with the number of tasks.
void calculateCommunicationCost(part_t *task_to_proc, part_t *task_communication_xadj, part_t *task_communication_adj, pcoord_t *task_communication_edge_weight)
void getSolutionCenterCoordinates(const Environment *envConst, const Teuchos::Comm< int > *comm, const Zoltan2::CoordinateModel< typename Adapter::base_adapter_t > *coords, const Zoltan2::PartitioningSolution< Adapter > *soln_, int coordDim, part_t ntasks, scalar_t **partCenters)
pcoord_t ** shiftMachineCoordinates(int machine_dim, const part_t *machine_dimensions, part_t numProcs, pcoord_t **mCoords)
Using the machine dimensions provided, create virtual machine coordinates by assigning the largest ga...
virtual void getMapping(int myRank, const RCP< const Environment > &env, ArrayRCP< part_t > &rcp_proc_to_task_xadj, ArrayRCP< part_t > &rcp_proc_to_task_adj, ArrayRCP< part_t > &rcp_task_to_proc) const
Function is called whenever nprocs > no_task. Function returns only the subset of processors that are...
part_t getAssignedProcForTask(part_t taskId)
getAssignedProcForTask function, returns the assigned processor id for the given task ...
void getMinDistanceCluster(IT *procPermutation)
void coordinateTaskMapperInterface(RCP< const Teuchos::Comm< int > > problemComm, int proc_dim, int num_processors, pcoord_t **machine_coords, int task_dim, part_t num_tasks, tcoord_t **task_coords, part_t *task_comm_xadj, part_t *task_comm_adj, pcoord_t *task_communication_edge_weight_, part_t *proc_to_task_xadj, part_t *proc_to_task_adj, int recursion_depth, part_t *part_no_array, const part_t *machine_dimensions)
Constructor The interface function that calls CoordinateTaskMapper which will also perform the mappin...
Defines the XpetraMultiVectorAdapter.
Multi Jagged coordinate partitioning algorithm.
void timerStop(TimerType tt, const char *timerName) const
Stop a named timer.
This class provides geometric coordinates with optional weights to the Zoltan2 algorithm.
Contains the Multi-jagged algorthm.
size_t getCoordinates(ArrayView< const gno_t > &Ids, ArrayView< input_t > &xyz, ArrayView< input_t > &wgts) const
Returns the coordinate ids, values and optional weights.
PartitionMapping maps a solution or an input distribution to ranks.
void getProcTask(part_t *&proc_to_task_xadj_, part_t *&proc_to_task_adj_)
void sequential_task_partitioning(const RCP< const Environment > &env, mj_lno_t num_total_coords, mj_lno_t num_selected_coords, size_t num_target_part, int coord_dim, mj_scalar_t **mj_coordinates, mj_lno_t *initial_selected_coords_output_permutation, mj_lno_t *output_xadj, int recursion_depth, const mj_part_t *part_no_array)
Special function for partitioning for task mapping. Runs sequential, and performs deterministic parti...
A PartitioningSolution is a solution to a partitioning problem.
#define ZOLTAN2_ABS(x)
void getClosestSubset(part_t *proc_permutation, part_t nprocs, part_t ntasks) const
Function is called whenever nprocs > no_task. Function returns only the subset of processors that are...
const part_t * getPartListView() const
Returns the part list corresponding to the global ID list.
void copyCoordinates(IT *permutation)
void getGridCommunicationGraph(part_t taskCount, part_t *&task_comm_xadj, part_t *&task_comm_adj, std::vector< int > grid_dims)
CoordinateCommunicationModel(int pcoord_dim_, pcoord_t **pcoords_, int tcoord_dim_, tcoord_t **tcoords_, part_t no_procs_, part_t no_tasks_)
Class Constructor:
virtual void getProcsForPart(part_t taskId, part_t &numProcs, part_t *procs) const
getAssignedProcForTask function, returns the assigned tasks with the number of tasks.
Zoltan2_ReduceBestMapping Class, reduces the minimum cost mapping, ties breaks with minimum proc id...
KMeansCluster Class.
ArrayView< part_t > getAssignedTaksForProc(part_t procId)
void reduce(const Ordinal count, const T inBuffer[], T inoutBuffer[]) const
Implement Teuchos::ValueTypeReductionOp interface.
The StridedData class manages lists of weights or coordinates.
The user parameters, debug, timing and memory profiling output objects, and error checking methods...
static part_t umpa_uRandom(part_t l, int &_u_umpa_seed)
void addPoint(IT index, WT distance)
bool getNewCenters(WT *center, WT **coords, int dimension)
MachineRepresentation Class Finds the coordinate of the processor. Used to find the processor coordin...
size_t getLocalNumCoordinates() const
Returns the number of coordinates on this process.
CoordinateModelInput Class that performs mapping between the coordinate partitioning result and mpi r...
CoordinateTaskMapper(const Environment *env_const_, const Teuchos::Comm< int > *problemComm, int proc_dim, int num_processors, pcoord_t **machine_coords, int task_dim, part_t num_tasks, tcoord_t **task_coords, ArrayRCP< part_t >task_comm_xadj, ArrayRCP< part_t >task_comm_adj, pcoord_t *task_communication_edge_weight_, int recursion_depth, part_t *part_no_array, const part_t *machine_dimensions)
Constructor The mapping constructor which will also perform the mapping operation. The result mapping can be obtained by –getAssignedProcForTask function: which returns the assigned processor id for the given task –getPartsForProc: which returns the assigned tasks with the number of tasks.
void doMapping(int myRank)
doMapping function, calls getMapping function of communicationModel object.
Tpetra::MultiVector< double, int, int > tMVector_t
void update_visit_order(part_t *visitOrder, part_t n, int &_u_umpa_seed, part_t rndm)
virtual double getProcDistance(int procId1, int procId2) const
Zoltan2_ReduceBestMapping()
Default Constructor.
CoordinateTaskMapper(const Teuchos::Comm< int > *comm_, const MachineRepresentation< pcoord_t > *machine_, const Zoltan2::Model< typename Adapter::base_adapter_t > *model_, const Zoltan2::PartitioningSolution< Adapter > *soln_, const Environment *envConst)
Constructor. When this constructor is called, in order to calculate the communication metric...
void getCommunicationGraph(ArrayRCP< part_t > &comXAdj, ArrayRCP< part_t > &comAdj) const
returns communication graph resulting from geometric partitioning.
int getProcDim() const
getProcDim function returns the dimension of the physical processor layout.
KmeansHeap Class, max heap, but holds the minimum values.
void fillContinousArray(T *arr, size_t arrSize, T *val)
fillContinousArray function
#define ZOLTAN2_ALGMULTIJAGGED_SWAP(a, b, temp)
void push_down(IT index_on_heap)
#define epsilon
RCP< Comm< int > > create_subCommunicator()
creates and returns the subcommunicator for the processor group.
size_t getActualGlobalNumberOfParts() const
Returns the actual global number of parts provided in setParts().
KMeansAlgorithm Class that performs clustering of the coordinates, and returns the closest set of coo...
void timerStart(TimerType tt, const char *timerName) const
Start a named timer.
KMeansAlgorithm(int dim_, IT numElements_, WT **elementCoords_, IT required_elements_)
KMeansAlgorithm Constructor.
int getNumProcs() const
getNumProcs function returns the number of processors.
void copyCoordinates(IT *permutation)
size_t getTargetGlobalNumberOfParts() const
Returns the global number of parts desired in the solution.
#define MINOF(a, b)
void setHeapsize(IT heapsize_)
CoordinateCommunicationModel< pcoord_t, tcoord_t, part_t > * proc_task_comm