Zoltan2
Zoltan2_PartitioningSolution.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // Zoltan2: A package of combinatorial algorithms for scientific computing
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Karen Devine (kddevin@sandia.gov)
39 // Erik Boman (egboman@sandia.gov)
40 // Siva Rajamanickam (srajama@sandia.gov)
41 //
42 // ***********************************************************************
43 //
44 // @HEADER
45 
50 #ifndef _ZOLTAN2_PARTITIONINGSOLUTION_HPP_
51 #define _ZOLTAN2_PARTITIONINGSOLUTION_HPP_
52 
53 namespace Zoltan2 {
54 template <typename Adapter>
56 }
57 
58 #include <Zoltan2_Environment.hpp>
59 #include <Zoltan2_Solution.hpp>
60 #include <Zoltan2_GreedyMWM.hpp>
61 #include <Zoltan2_Algorithm.hpp>
63 #include <cmath>
64 #include <algorithm>
65 #include <vector>
66 #include <limits>
67 
68 #ifdef _MSC_VER
69 #define NOMINMAX
70 #include <windows.h>
71 #endif
72 
73 
74 namespace Zoltan2 {
75 
90 template <typename Adapter>
91  class PartitioningSolution : public Solution
92 {
93 public:
94 
95 #ifndef DOXYGEN_SHOULD_SKIP_THIS
96  typedef typename Adapter::gno_t gno_t;
97  typedef typename Adapter::scalar_t scalar_t;
98  typedef typename Adapter::lno_t lno_t;
99  typedef typename Adapter::part_t part_t;
100  typedef typename Adapter::user_t user_t;
101 #endif
102 
120  PartitioningSolution( const RCP<const Environment> &env,
121  RCP<const Comm<int> > &comm,
122  int nUserWeights,
123  const RCP<Algorithm<Adapter> > &algorithm = Teuchos::null);
124 
154  PartitioningSolution(const RCP<const Environment> &env,
155  RCP<const Comm<int> > &comm,
156  int nUserWeights, ArrayView<ArrayRCP<part_t> > reqPartIds,
157  ArrayView<ArrayRCP<scalar_t> > reqPartSizes,
158  const RCP<Algorithm<Adapter> > &algorithm = Teuchos::null);
159 
161  // Information that the algorithm may wish to query.
162 
165  size_t getTargetGlobalNumberOfParts() const { return nGlobalParts_; }
166 
169  size_t getActualGlobalNumberOfParts() const { return nGlobalPartsSolution_; }
170 
173  size_t getLocalNumberOfParts() const { return nLocalParts_; }
174 
182  scalar_t getLocalFractionOfPart() const { return localFraction_; }
183 
193  bool oneToOnePartDistribution() const { return onePartPerProc_; }
194 
214  const int *getPartDistribution() const {
215  if (partDist_.size() > 0) return &partDist_[0];
216  else return NULL;
217  }
218 
235  const part_t *getProcDistribution() const {
236  if (procDist_.size() > 0) return &procDist_[0];
237  else return NULL;
238  }
239 
243  int getNumberOfCriteria() const { return nWeightsPerObj_; }
244 
245 
252  bool criteriaHasUniformPartSizes(int idx) const { return pSizeUniform_[idx];}
253 
266  scalar_t getCriteriaPartSize(int idx, part_t part) const {
267  if (pSizeUniform_[idx])
268  return 1.0 / nGlobalParts_;
269  else if (pCompactIndex_[idx].size())
270  return pSize_[idx][pCompactIndex_[idx][part]];
271  else
272  return pSize_[idx][part];
273  }
274 
288  bool criteriaHaveSamePartSizes(int c1, int c2) const;
289 
291  // Method used by the algorithm to set results.
292 
319  void setParts(ArrayRCP<part_t> &partList);
320 
322 
334  void RemapParts();
335 
337  /* Return the weight of objects staying with a given remap.
338  * If remap is NULL, compute weight of objects staying with given partition
339  */
340  long measure_stays(part_t *remap, int *idx, part_t *adj, long *wgt,
341  part_t nrhs, part_t nlhs)
342  {
343  long staying = 0;
344  for (part_t i = 0; i < nrhs; i++) {
345  part_t k = (remap ? remap[i] : i);
346  for (part_t j = idx[k]; j < idx[k+1]; j++) {
347  if (i == (adj[j]-nlhs)) {
348  staying += wgt[j];
349  break;
350  }
351  }
352  }
353  return staying;
354  }
355 
357  // Results that may be queried by the user, by migration methods,
358  // or by metric calculation methods.
359  // We return raw pointers so users don't have to learn about our
360  // pointer wrappers.
361 
364  inline const RCP<const Comm<int> > &getCommunicator() const { return comm_;}
365 
368  inline const RCP<const Environment> &getEnvironment() const { return env_;}
369 
372  const part_t *getPartListView() const {
373  if (parts_.size() > 0) return parts_.getRawPtr();
374  else return NULL;
375  }
376 
381  const int *getProcListView() const {
382  if (procs_.size() > 0) return procs_.getRawPtr();
383  else return NULL;
384  }
385 
386 
389  std::vector<Zoltan2::coordinateModelPartBox<scalar_t, part_t> > &
391  {
392  return this->algorithm_->getPartBoxesView();
393  }
394 
396  // when a point lies on a part boundary, the lowest part
397  // number on that boundary is returned.
398  // Note that not all partitioning algorithms will support
399  // this method.
400  //
401  // \param dim : the number of dimensions specified for the point in space
402  // \param point : the coordinates of the point in space; array of size dim
403  // \return the part number of a part overlapping the given point
404  part_t pointAssign(int dim, scalar_t *point) const
405  {
406  part_t p;
407  try {
408  if (this->algorithm_ == Teuchos::null)
409  throw std::logic_error("no partitioning algorithm has been run yet");
410 
411  p = this->algorithm_->pointAssign(dim, point);
412  }
414  return p;
415  }
416 
418  // This method allocates memory for the return argument, but does not
419  // control that memory. The user is responsible for freeing the
420  // memory.
421  //
422  // \param dim : (in) the number of dimensions specified for the box
423  // \param lower : (in) the coordinates of the lower corner of the box;
424  // array of size dim
425  // \param upper : (in) the coordinates of the upper corner of the box;
426  // array of size dim
427  // \param nPartsFound : (out) the number of parts overlapping the box
428  // \param partsFound : (out) array of parts overlapping the box
429  void boxAssign(int dim, scalar_t *lower, scalar_t *upper,
430  size_t &nPartsFound, part_t **partsFound) const
431  {
432  try {
433  if (this->algorithm_ == Teuchos::null)
434  throw std::logic_error("no partitioning algorithm has been run yet");
435 
436  this->algorithm_->boxAssign(dim, lower, upper, nPartsFound, partsFound);
437  }
439  }
440 
441 
444  void getCommunicationGraph(ArrayRCP <part_t> &comXAdj,
445  ArrayRCP <part_t> &comAdj) const
446  {
447  try {
448  if (this->algorithm_ == Teuchos::null)
449  throw std::logic_error("no partitioning algorithm has been run yet");
450 
451  this->algorithm_->getCommunicationGraph(this, comXAdj, comAdj);
452  }
454  }
455 
473  void getPartsForProc(int procId, double &numParts, part_t &partMin,
474  part_t &partMax) const
475  {
476  env_->localInputAssertion(__FILE__, __LINE__, "invalid process id",
477  procId >= 0 && procId < comm_->getSize(), BASIC_ASSERTION);
478 
479  procToPartsMap(procId, numParts, partMin, partMax);
480  }
481 
493  void getProcsForPart(part_t partId, part_t &procMin, part_t &procMax) const
494  {
495  env_->localInputAssertion(__FILE__, __LINE__, "invalid part id",
496  partId >= 0 && partId < nGlobalParts_, BASIC_ASSERTION);
497 
498  partToProcsMap(partId, procMin, procMax);
499  }
500 
501 private:
502  void partToProc(bool doCheck, bool haveNumLocalParts, bool haveNumGlobalParts,
503  int numLocalParts, int numGlobalParts);
504 
505  void procToPartsMap(int procId, double &numParts, part_t &partMin,
506  part_t &partMax) const;
507 
508  void partToProcsMap(part_t partId, int &procMin, int &procMax) const;
509 
510  void setPartDistribution();
511 
512  void setPartSizes(ArrayView<ArrayRCP<part_t> > reqPartIds,
513  ArrayView<ArrayRCP<scalar_t> > reqPartSizes);
514 
515  void computePartSizes(int widx, ArrayView<part_t> ids,
516  ArrayView<scalar_t> sizes);
517 
518  void broadcastPartSizes(int widx);
519 
520 
521  RCP<const Environment> env_; // has application communicator
522  RCP<const Comm<int> > comm_; // the problem communicator
523 
524  //part box boundaries as a result of geometric partitioning algorithm.
525  RCP < std::vector <Zoltan2::coordinateModelPartBox <scalar_t, part_t> > > partBoxes;
526 
527  part_t nGlobalParts_;// target global number of parts
528  part_t nLocalParts_; // number of parts to be on this process
529 
530  scalar_t localFraction_; // approx fraction of a part on this process
531  int nWeightsPerObj_; // if user has no weights, this is 1 TODO: WHY???
532 
533  // If process p is to be assigned part p for all p, then onePartPerProc_
534  // is true. Otherwise it is false, and either procDist_ or partDist_
535  // describes the allocation of parts to processes.
536  //
537  // If parts are never split across processes, then procDist_ is defined
538  // as follows:
539  //
540  // partId = procDist_[procId]
541  // partIdNext = procDist_[procId+1]
542  // globalNumberOfParts = procDist_[numProcs]
543  //
544  // meaning that the parts assigned to process procId range from
545  // [partId, partIdNext). If partIdNext is the same as partId, then
546  // process procId has no parts.
547  //
548  // If the number parts is less than the number of processes, and the
549  // user did not specify "num_local_parts" for each of the processes, then
550  // parts are split across processes, and partDist_ is defined rather than
551  // procDist_.
552  //
553  // procId = partDist_[partId]
554  // procIdNext = partDist_[partId+1]
555  // globalNumberOfProcs = partDist_[numParts]
556  //
557  // which implies that the part partId is shared by processes in the
558  // the range [procId, procIdNext).
559  //
560  // We use std::vector so we can use upper_bound algorithm
561 
562  bool onePartPerProc_; // either this is true...
563  std::vector<int> partDist_; // or this is defined ...
564  std::vector<part_t> procDist_; // or this is defined.
565  bool procDistEquallySpread_; // if procDist_ is used and
566  // #parts > #procs and
567  // num_local_parts is not specified,
568  // parts are evenly distributed to procs
569 
570  // In order to minimize the storage required for part sizes, we
571  // have three different representations.
572  //
573  // If the part sizes for weight index w are all the same, then:
574  // pSizeUniform_[w] = true
575  // pCompactIndex_[w].size() = 0
576  // pSize_[w].size() = 0
577  //
578  // and the size for part p is 1.0 / nparts.
579  //
580  // If part sizes differ for each part in weight index w, but there
581  // are no more than 64 distinct sizes:
582  // pSizeUniform_[w] = false
583  // pCompactIndex_[w].size() = number of parts
584  // pSize_[w].size() = number of different sizes
585  //
586  // and the size for part p is pSize_[pCompactIndex_[p]].
587  //
588  // If part sizes differ for each part in weight index w, and there
589  // are more than 64 distinct sizes:
590  // pSizeUniform_[w] = false
591  // pCompactIndex_[w].size() = 0
592  // pSize_[w].size() = nparts
593  //
594  // and the size for part p is pSize_[p].
595  //
596  // NOTE: If we expect to have similar cases, i.e. a long list of scalars
597  // where it is highly possible that just a few unique values appear,
598  // then we may want to make this a class. The advantage is that we
599  // save a long list of 1-byte indices instead of a long list of scalars.
600 
601  ArrayRCP<bool> pSizeUniform_;
602  ArrayRCP<ArrayRCP<unsigned char> > pCompactIndex_;
603  ArrayRCP<ArrayRCP<scalar_t> > pSize_;
604 
606  // The algorithm sets these values upon completion.
607 
608  ArrayRCP<part_t> parts_; // part number assigned to localid[i]
609 
610  bool haveSolution_;
611 
612  part_t nGlobalPartsSolution_; // global number of parts in solution
613 
615  // The solution calculates this from the part assignments,
616  // unless onePartPerProc_.
617 
618  ArrayRCP<int> procs_; // process rank assigned to localid[i]
619 
621  // Algorithm used to compute the solution;
622  // needed for post-processing with pointAssign or getCommunicationGraph
623  const RCP<Algorithm<Adapter> > algorithm_; //
624 };
625 
627 // Definitions
629 
630 template <typename Adapter>
632  const RCP<const Environment> &env,
633  RCP<const Comm<int> > &comm,
634  int nUserWeights,
635  const RCP<Algorithm<Adapter> > &algorithm)
636  : env_(env), comm_(comm),
637  partBoxes(),
638  nGlobalParts_(0), nLocalParts_(0),
639  localFraction_(0), nWeightsPerObj_(),
640  onePartPerProc_(false), partDist_(), procDist_(),
641  procDistEquallySpread_(false),
642  pSizeUniform_(), pCompactIndex_(), pSize_(),
643  parts_(), haveSolution_(false), nGlobalPartsSolution_(0),
644  procs_(), algorithm_(algorithm)
645 {
646  nWeightsPerObj_ = (nUserWeights ? nUserWeights : 1); // TODO: WHY?? WHY NOT ZERO?
647 
648  setPartDistribution();
649 
650  // We must call setPartSizes() because part sizes may have
651  // been provided by the user on other processes.
652 
653  ArrayRCP<part_t> *noIds = new ArrayRCP<part_t> [nWeightsPerObj_];
654  ArrayRCP<scalar_t> *noSizes = new ArrayRCP<scalar_t> [nWeightsPerObj_];
655  ArrayRCP<ArrayRCP<part_t> > ids(noIds, 0, nWeightsPerObj_, true);
656  ArrayRCP<ArrayRCP<scalar_t> > sizes(noSizes, 0, nWeightsPerObj_, true);
657 
658  setPartSizes(ids.view(0, nWeightsPerObj_), sizes.view(0, nWeightsPerObj_));
659 
660  env_->memory("After construction of solution");
661 }
662 
663 template <typename Adapter>
665  const RCP<const Environment> &env,
666  RCP<const Comm<int> > &comm,
667  int nUserWeights,
668  ArrayView<ArrayRCP<part_t> > reqPartIds,
669  ArrayView<ArrayRCP<scalar_t> > reqPartSizes,
670  const RCP<Algorithm<Adapter> > &algorithm)
671  : env_(env), comm_(comm),
672  partBoxes(),
673  nGlobalParts_(0), nLocalParts_(0),
674  localFraction_(0), nWeightsPerObj_(),
675  onePartPerProc_(false), partDist_(), procDist_(),
676  procDistEquallySpread_(false),
677  pSizeUniform_(), pCompactIndex_(), pSize_(),
678  parts_(), haveSolution_(false), nGlobalPartsSolution_(0),
679  procs_(), algorithm_(algorithm)
680 {
681  nWeightsPerObj_ = (nUserWeights ? nUserWeights : 1); // TODO: WHY?? WHY NOT ZERO?
682 
683  setPartDistribution();
684 
685  setPartSizes(reqPartIds, reqPartSizes);
686 
687  env_->memory("After construction of solution");
688 }
689 
690 template <typename Adapter>
692 {
693  // Did the caller define num_global_parts and/or num_local_parts?
694 
695  const ParameterList &pl = env_->getParameters();
696  size_t haveGlobalNumParts=0, haveLocalNumParts=0;
697  int numLocal=0, numGlobal=0;
698  double val;
699 
700  const Teuchos::ParameterEntry *pe = pl.getEntryPtr("num_global_parts");
701 
702  if (pe){
703  val = pe->getValue<double>(&val); // TODO: KDD Skip this double get
704  haveGlobalNumParts = 1; // TODO: KDD Should be unnecessary once
705  numGlobal = static_cast<int>(val); // TODO: KDD paramlist handles long long.
706  nGlobalParts_ = part_t(numGlobal); // TODO: KDD also do below.
707  }
708 
709  pe = pl.getEntryPtr("num_local_parts");
710 
711  if (pe){
712  val = pe->getValue<double>(&val);
713  haveLocalNumParts = 1;
714  numLocal = static_cast<int>(val);
715  nLocalParts_ = part_t(numLocal);
716  }
717 
718  try{
719  // Sets onePartPerProc_, partDist_, and procDist_
720 
721  partToProc(true, haveLocalNumParts, haveGlobalNumParts,
722  numLocal, numGlobal);
723  }
725 
726  int nprocs = comm_->getSize();
727  int rank = comm_->getRank();
728 
729  if (onePartPerProc_){
730  nGlobalParts_ = nprocs;
731  nLocalParts_ = 1;
732  }
733  else if (partDist_.size() > 0){ // more procs than parts
734  nGlobalParts_ = partDist_.size() - 1;
735  int pstart = partDist_[0];
736  for (part_t i=1; i <= nGlobalParts_; i++){
737  int pend = partDist_[i];
738  if (rank >= pstart && rank < pend){
739  int numOwners = pend - pstart;
740  nLocalParts_ = 1;
741  localFraction_ = 1.0 / numOwners;
742  break;
743  }
744  pstart = pend;
745  }
746  }
747  else if (procDist_.size() > 0){ // more parts than procs
748  nGlobalParts_ = procDist_[nprocs];
749  nLocalParts_ = procDist_[rank+1] - procDist_[rank];
750  }
751  else {
752  throw std::logic_error("partToProc error");
753  }
754 
755 }
756 
757 template <typename Adapter>
759  ArrayView<ArrayRCP<part_t> > ids, ArrayView<ArrayRCP<scalar_t> > sizes)
760 {
761  int widx = nWeightsPerObj_;
762  bool fail=false;
763 
764  size_t *countBuf = new size_t [widx*2];
765  ArrayRCP<size_t> counts(countBuf, 0, widx*2, true);
766 
767  fail = ((ids.size() != widx) || (sizes.size() != widx));
768 
769  for (int w=0; !fail && w < widx; w++){
770  counts[w] = ids[w].size();
771  if (ids[w].size() != sizes[w].size()) fail=true;
772  }
773 
774  env_->globalBugAssertion(__FILE__, __LINE__, "bad argument arrays", fail==0,
775  COMPLEX_ASSERTION, comm_);
776 
777  // Are all part sizes the same? This is the common case.
778 
779  ArrayRCP<scalar_t> *emptySizes= new ArrayRCP<scalar_t> [widx];
780  pSize_ = arcp(emptySizes, 0, widx);
781 
782  ArrayRCP<unsigned char> *emptyIndices= new ArrayRCP<unsigned char> [widx];
783  pCompactIndex_ = arcp(emptyIndices, 0, widx);
784 
785  bool *info = new bool [widx];
786  pSizeUniform_ = arcp(info, 0, widx);
787  for (int w=0; w < widx; w++)
788  pSizeUniform_[w] = true;
789 
790  if (nGlobalParts_ == 1){
791  return; // there's only one part in the whole problem
792  }
793 
794  size_t *ptr1 = counts.getRawPtr();
795  size_t *ptr2 = counts.getRawPtr() + widx;
796 
797  try{
798  reduceAll<int, size_t>(*comm_, Teuchos::REDUCE_MAX, widx, ptr1, ptr2);
799  }
800  Z2_THROW_OUTSIDE_ERROR(*env_);
801 
802  bool zero = true;
803 
804  for (int w=0; w < widx; w++)
805  if (counts[widx+w] > 0){
806  zero = false;
807  pSizeUniform_[w] = false;
808  }
809 
810  if (zero) // Part sizes for all criteria are uniform.
811  return;
812 
813  // Compute the part sizes for criteria for which part sizes were
814  // supplied. Normalize for each criteria so part sizes sum to one.
815 
816  int nprocs = comm_->getSize();
817  int rank = comm_->getRank();
818 
819  for (int w=0; w < widx; w++){
820  if (pSizeUniform_[w]) continue;
821 
822  // Send all ids and sizes to one process.
823  // (There is no simple gather method in Teuchos.)
824 
825  part_t length = ids[w].size();
826  part_t *allLength = new part_t [nprocs];
827  Teuchos::gatherAll<int, part_t>(*comm_, 1, &length,
828  nprocs, allLength);
829 
830  if (rank == 0){
831  int total = 0;
832  for (int i=0; i < nprocs; i++)
833  total += allLength[i];
834 
835  part_t *partNums = new part_t [total];
836  scalar_t *partSizes = new scalar_t [total];
837 
838  ArrayView<part_t> idArray(partNums, total);
839  ArrayView<scalar_t> sizeArray(partSizes, total);
840 
841  if (length > 0){
842  for (int i=0; i < length; i++){
843  *partNums++ = ids[w][i];
844  *partSizes++ = sizes[w][i];
845  }
846  }
847 
848  for (int p=1; p < nprocs; p++){
849  if (allLength[p] > 0){
850  Teuchos::receive<int, part_t>(*comm_, p,
851  allLength[p], partNums);
852  Teuchos::receive<int, scalar_t>(*comm_, p,
853  allLength[p], partSizes);
854  partNums += allLength[p];
855  partSizes += allLength[p];
856  }
857  }
858 
859  delete [] allLength;
860 
861  try{
862  computePartSizes(w, idArray, sizeArray);
863  }
865 
866  delete [] idArray.getRawPtr();
867  delete [] sizeArray.getRawPtr();
868  }
869  else{
870  delete [] allLength;
871  if (length > 0){
872  Teuchos::send<int, part_t>(*comm_, length, ids[w].getRawPtr(), 0);
873  Teuchos::send<int, scalar_t>(*comm_, length, sizes[w].getRawPtr(), 0);
874  }
875  }
876 
877  broadcastPartSizes(w);
878  }
879 }
880 
881 template <typename Adapter>
883 {
884  env_->localBugAssertion(__FILE__, __LINE__, "preallocations",
885  pSize_.size()>widx &&
886  pSizeUniform_.size()>widx && pCompactIndex_.size()>widx,
888 
889  int rank = comm_->getRank();
890  int nprocs = comm_->getSize();
891  part_t nparts = nGlobalParts_;
892 
893  if (nprocs < 2)
894  return;
895 
896  char flag=0;
897 
898  if (rank == 0){
899  if (pSizeUniform_[widx] == true)
900  flag = 1;
901  else if (pCompactIndex_[widx].size() > 0)
902  flag = 2;
903  else
904  flag = 3;
905  }
906 
907  try{
908  Teuchos::broadcast<int, char>(*comm_, 0, 1, &flag);
909  }
910  Z2_THROW_OUTSIDE_ERROR(*env_);
911 
912  if (flag == 1){
913  if (rank > 0)
914  pSizeUniform_[widx] = true;
915 
916  return;
917  }
918 
919  if (flag == 2){
920 
921  // broadcast the indices into the size list
922 
923  unsigned char *idxbuf = NULL;
924 
925  if (rank > 0){
926  idxbuf = new unsigned char [nparts];
927  env_->localMemoryAssertion(__FILE__, __LINE__, nparts, idxbuf);
928  }
929  else{
930  env_->localBugAssertion(__FILE__, __LINE__, "index list size",
931  pCompactIndex_[widx].size() == nparts, COMPLEX_ASSERTION);
932  idxbuf = pCompactIndex_[widx].getRawPtr();
933  }
934 
935  try{
936  // broadcast of unsigned char is not supported
937  Teuchos::broadcast<int, char>(*comm_, 0, nparts,
938  reinterpret_cast<char *>(idxbuf));
939  }
940  Z2_THROW_OUTSIDE_ERROR(*env_);
941 
942  if (rank > 0)
943  pCompactIndex_[widx] = arcp(idxbuf, 0, nparts, true);
944 
945  // broadcast the list of different part sizes
946 
947  unsigned char maxIdx=0;
948  for (part_t p=0; p < nparts; p++)
949  if (idxbuf[p] > maxIdx) maxIdx = idxbuf[p];
950 
951  int numSizes = maxIdx + 1;
952 
953  scalar_t *sizeList = NULL;
954 
955  if (rank > 0){
956  sizeList = new scalar_t [numSizes];
957  env_->localMemoryAssertion(__FILE__, __LINE__, numSizes, sizeList);
958  }
959  else{
960  env_->localBugAssertion(__FILE__, __LINE__, "wrong number of sizes",
961  numSizes == pSize_[widx].size(), COMPLEX_ASSERTION);
962 
963  sizeList = pSize_[widx].getRawPtr();
964  }
965 
966  try{
967  Teuchos::broadcast<int, scalar_t>(*comm_, 0, numSizes, sizeList);
968  }
969  Z2_THROW_OUTSIDE_ERROR(*env_);
970 
971  if (rank > 0)
972  pSize_[widx] = arcp(sizeList, 0, numSizes, true);
973 
974  return;
975  }
976 
977  if (flag == 3){
978 
979  // broadcast the size of each part
980 
981  scalar_t *sizeList = NULL;
982 
983  if (rank > 0){
984  sizeList = new scalar_t [nparts];
985  env_->localMemoryAssertion(__FILE__, __LINE__, nparts, sizeList);
986  }
987  else{
988  env_->localBugAssertion(__FILE__, __LINE__, "wrong number of sizes",
989  nparts == pSize_[widx].size(), COMPLEX_ASSERTION);
990 
991  sizeList = pSize_[widx].getRawPtr();
992  }
993 
994  try{
995  Teuchos::broadcast<int, scalar_t >(*comm_, 0, nparts, sizeList);
996  }
997  Z2_THROW_OUTSIDE_ERROR(*env_);
998 
999  if (rank > 0)
1000  pSize_[widx] = arcp(sizeList, 0, nparts);
1001 
1002  return;
1003  }
1004 }
1005 
1006 template <typename Adapter>
1008  ArrayView<part_t> ids, ArrayView<scalar_t> sizes)
1009 {
1010  int len = ids.size();
1011 
1012  if (len == 0){
1013  pSizeUniform_[widx] = true;
1014  return;
1015  }
1016 
1017  env_->localBugAssertion(__FILE__, __LINE__, "bad array sizes",
1018  len>0 && sizes.size()==len, COMPLEX_ASSERTION);
1019 
1020  env_->localBugAssertion(__FILE__, __LINE__, "bad index",
1021  widx>=0 && widx<nWeightsPerObj_, COMPLEX_ASSERTION);
1022 
1023  env_->localBugAssertion(__FILE__, __LINE__, "preallocations",
1024  pSize_.size()>widx &&
1025  pSizeUniform_.size()>widx && pCompactIndex_.size()>widx,
1027 
1028  // Check ids and sizes and find min, max and average sizes.
1029  // If sizes are very close to uniform, call them uniform parts.
1030 
1031  part_t nparts = nGlobalParts_;
1032  unsigned char *buf = new unsigned char [nparts];
1033  env_->localMemoryAssertion(__FILE__, __LINE__, nparts, buf);
1034  memset(buf, 0, nparts);
1035  ArrayRCP<unsigned char> partIdx(buf, 0, nparts, true);
1036 
1037  scalar_t epsilon = 10e-5 / nparts;
1038  scalar_t min=sizes[0], max=sizes[0], sum=0;
1039 
1040  for (int i=0; i < len; i++){
1041  part_t id = ids[i];
1042  scalar_t size = sizes[i];
1043 
1044  env_->localInputAssertion(__FILE__, __LINE__, "invalid part id",
1045  id>=0 && id<nparts, BASIC_ASSERTION);
1046 
1047  env_->localInputAssertion(__FILE__, __LINE__, "invalid part size", size>=0,
1048  BASIC_ASSERTION);
1049 
1050  // TODO: we could allow users to specify multiple sizes for the same
1051  // part if we add a parameter that says what we are to do with them:
1052  // add them or take the max.
1053 
1054  env_->localInputAssertion(__FILE__, __LINE__,
1055  "multiple sizes provided for one part", partIdx[id]==0, BASIC_ASSERTION);
1056 
1057  partIdx[id] = 1; // mark that we have a size for this part
1058 
1059  if (size < min) min = size;
1060  if (size > max) max = size;
1061  sum += size;
1062  }
1063 
1064  if (sum == 0){
1065 
1066  // User has given us a list of parts of size 0, we'll set
1067  // the rest to them to equally sized parts.
1068 
1069  scalar_t *allSizes = new scalar_t [2];
1070  env_->localMemoryAssertion(__FILE__, __LINE__, 2, allSizes);
1071 
1072  ArrayRCP<scalar_t> sizeArray(allSizes, 0, 2, true);
1073 
1074  int numNonZero = nparts - len;
1075 
1076  allSizes[0] = 0.0;
1077  allSizes[1] = 1.0 / numNonZero;
1078 
1079  for (part_t p=0; p < nparts; p++)
1080  buf[p] = 1; // index to default part size
1081 
1082  for (int i=0; i < len; i++)
1083  buf[ids[i]] = 0; // index to part size zero
1084 
1085  pSize_[widx] = sizeArray;
1086  pCompactIndex_[widx] = partIdx;
1087 
1088  return;
1089  }
1090 
1091  if (max - min <= epsilon){
1092  pSizeUniform_[widx] = true;
1093  return;
1094  }
1095 
1096  // A size for parts that were not specified:
1097  scalar_t avg = sum / nparts;
1098 
1099  // We are going to merge part sizes that are very close. This takes
1100  // computation time now, but can save considerably in the storage of
1101  // all part sizes on each process. For example, a common case may
1102  // be some parts are size 1 and all the rest are size 2.
1103 
1104  scalar_t *tmp = new scalar_t [len];
1105  env_->localMemoryAssertion(__FILE__, __LINE__, len, tmp);
1106  memcpy(tmp, sizes.getRawPtr(), sizeof(scalar_t) * len);
1107  ArrayRCP<scalar_t> partSizes(tmp, 0, len, true);
1108 
1109  std::sort(partSizes.begin(), partSizes.end());
1110 
1111  // create a list of sizes that are unique within epsilon
1112 
1113  Array<scalar_t> nextUniqueSize;
1114  nextUniqueSize.push_back(partSizes[len-1]); // largest
1115  scalar_t curr = partSizes[len-1];
1116  int avgIndex = len;
1117  bool haveAvg = false;
1118  if (curr - avg <= epsilon)
1119  avgIndex = 0;
1120 
1121  for (int i=len-2; i >= 0; i--){
1122  scalar_t val = partSizes[i];
1123  if (curr - val > epsilon){
1124  nextUniqueSize.push_back(val); // the highest in the group
1125  curr = val;
1126  if (avgIndex==len && val > avg && val - avg <= epsilon){
1127  // the average would be in this group
1128  avgIndex = nextUniqueSize.size() - 1;
1129  haveAvg = true;
1130  }
1131  }
1132  }
1133 
1134  partSizes.clear();
1135 
1136  size_t numSizes = nextUniqueSize.size();
1137  int sizeArrayLen = numSizes;
1138 
1139  if (numSizes < 64){
1140  // We can store the size for every part in a compact way.
1141 
1142  // Create a list of all sizes in increasing order
1143 
1144  if (!haveAvg) sizeArrayLen++; // need to include average
1145 
1146  scalar_t *allSizes = new scalar_t [sizeArrayLen];
1147  env_->localMemoryAssertion(__FILE__, __LINE__, sizeArrayLen, allSizes);
1148  ArrayRCP<scalar_t> sizeArray(allSizes, 0, sizeArrayLen, true);
1149 
1150  int newAvgIndex = sizeArrayLen;
1151 
1152  for (int i=numSizes-1, idx=0; i >= 0; i--){
1153 
1154  if (newAvgIndex == sizeArrayLen){
1155 
1156  if (haveAvg && i==avgIndex)
1157  newAvgIndex = idx;
1158 
1159  else if (!haveAvg && avg < nextUniqueSize[i]){
1160  newAvgIndex = idx;
1161  allSizes[idx++] = avg;
1162  }
1163  }
1164 
1165  allSizes[idx++] = nextUniqueSize[i];
1166  }
1167 
1168  env_->localBugAssertion(__FILE__, __LINE__, "finding average in list",
1169  newAvgIndex < sizeArrayLen, COMPLEX_ASSERTION);
1170 
1171  for (int i=0; i < nparts; i++){
1172  buf[i] = newAvgIndex; // index to default part size
1173  }
1174 
1175  sum = (nparts - len) * allSizes[newAvgIndex];
1176 
1177  for (int i=0; i < len; i++){
1178  int id = ids[i];
1179  scalar_t size = sizes[i];
1180  int index;
1181 
1182  // Find the first size greater than or equal to this size.
1183 
1184  if (size < avg && avg - size <= epsilon)
1185  index = newAvgIndex;
1186  else{
1187  typename ArrayRCP<scalar_t>::iterator found =
1188  std::lower_bound(sizeArray.begin(), sizeArray.end(), size);
1189 
1190  env_->localBugAssertion(__FILE__, __LINE__, "size array",
1191  found != sizeArray.end(), COMPLEX_ASSERTION);
1192 
1193  index = found - sizeArray.begin();
1194  }
1195 
1196  buf[id] = index;
1197  sum += allSizes[index];
1198  }
1199 
1200  for (int i=0; i < sizeArrayLen; i++){
1201  sizeArray[i] /= sum;
1202  }
1203 
1204  pCompactIndex_[widx] = partIdx;
1205  pSize_[widx] = sizeArray;
1206  }
1207  else{
1208  // To have access to part sizes, we must store nparts scalar_ts on
1209  // every process. We expect this is a rare case.
1210 
1211  tmp = new scalar_t [nparts];
1212  env_->localMemoryAssertion(__FILE__, __LINE__, nparts, tmp);
1213 
1214  sum += ((nparts - len) * avg);
1215 
1216  for (int i=0; i < nparts; i++){
1217  tmp[i] = avg/sum;
1218  }
1219 
1220  for (int i=0; i < len; i++){
1221  tmp[ids[i]] = sizes[i]/sum;
1222  }
1223 
1224  pSize_[widx] = arcp(tmp, 0, nparts);
1225  }
1226 }
1227 
1228 template <typename Adapter>
1229  void PartitioningSolution<Adapter>::setParts(ArrayRCP<part_t> &partList)
1230 {
1231  env_->debug(DETAILED_STATUS, "Entering setParts");
1232 
1233  size_t len = partList.size();
1234 
1235  // Find the actual number of parts in the solution, which can
1236  // be more or less than the nGlobalParts_ target.
1237  // (We may want to compute the imbalance of a given solution with
1238  // respect to a desired solution. This solution may have more or
1239  // fewer parts that the desired solution.)
1240 
1241  part_t lMax = std::numeric_limits<part_t>::min();
1242  part_t lMin = std::numeric_limits<part_t>::max();
1243  part_t gMax, gMin;
1244 
1245  for (size_t i = 0; i < len; i++) {
1246  if (partList[i] < lMin) lMin = partList[i];
1247  if (partList[i] > lMax) lMax = partList[i];
1248  }
1249  Teuchos::reduceAll<int, part_t>(*comm_, Teuchos::REDUCE_MIN, 1, &lMin, &gMin);
1250  Teuchos::reduceAll<int, part_t>(*comm_, Teuchos::REDUCE_MAX, 1, &lMax, &gMax);
1251 
1252  nGlobalPartsSolution_ = gMax - gMin + 1;
1253  parts_ = partList;
1254 
1255  // Now determine which process gets each object, if not one-to-one.
1256 
1257  if (!onePartPerProc_){
1258 
1259  int *procs = new int [len];
1260  env_->localMemoryAssertion(__FILE__, __LINE__, len, procs);
1261  procs_ = arcp<int>(procs, 0, len);
1262 
1263  part_t *parts = partList.getRawPtr();
1264 
1265  if (procDist_.size() > 0){ // parts are not split across procs
1266 
1267  int procId;
1268  for (size_t i=0; i < len; i++){
1269  partToProcsMap(parts[i], procs[i], procId);
1270  }
1271  }
1272  else{ // harder - we need to split the parts across multiple procs
1273 
1274  lno_t *partCounter = new lno_t [nGlobalPartsSolution_];
1275  env_->localMemoryAssertion(__FILE__, __LINE__, nGlobalPartsSolution_,
1276  partCounter);
1277 
1278  int numProcs = comm_->getSize();
1279 
1280  //MD NOTE: there was no initialization for partCounter.
1281  //I added the line below, correct me if I am wrong.
1282  memset(partCounter, 0, sizeof(lno_t) * nGlobalPartsSolution_);
1283 
1284  for (typename ArrayRCP<part_t>::size_type i=0; i < partList.size(); i++)
1285  partCounter[parts[i]]++;
1286 
1287  lno_t *procCounter = new lno_t [numProcs];
1288  env_->localMemoryAssertion(__FILE__, __LINE__, numProcs, procCounter);
1289 
1290  int proc1;
1291  int proc2 = partDist_[0];
1292 
1293  for (part_t part=1; part < nGlobalParts_; part++){
1294  proc1 = proc2;
1295  proc2 = partDist_[part+1];
1296  int numprocs = proc2 - proc1;
1297 
1298  double dNum = partCounter[part];
1299  double dProcs = numprocs;
1300 
1301  //cout << "dNum:" << dNum << " dProcs:" << dProcs << endl;
1302  double each = floor(dNum/dProcs);
1303  double extra = fmod(dNum,dProcs);
1304 
1305  for (int proc=proc1, i=0; proc<proc2; proc++, i++){
1306  if (i < extra)
1307  procCounter[proc] = lno_t(each) + 1;
1308  else
1309  procCounter[proc] = lno_t(each);
1310  }
1311  }
1312 
1313  delete [] partCounter;
1314 
1315  for (typename ArrayRCP<part_t>::size_type i=0; i < partList.size(); i++){
1316  if (partList[i] >= nGlobalParts_){
1317  // Solution has more parts that targeted. These
1318  // objects just remain on this process.
1319  procs[i] = comm_->getRank();
1320  continue;
1321  }
1322  part_t partNum = parts[i];
1323  proc1 = partDist_[partNum];
1324  proc2 = partDist_[partNum + 1];
1325 
1326  int proc;
1327  for (proc=proc1; proc < proc2; proc++){
1328  if (procCounter[proc] > 0){
1329  procs[i] = proc;
1330  procCounter[proc]--;
1331  break;
1332  }
1333  }
1334  env_->localBugAssertion(__FILE__, __LINE__, "part to proc",
1335  proc < proc2, COMPLEX_ASSERTION);
1336  }
1337 
1338  delete [] procCounter;
1339  }
1340  }
1341 
1342  // Now that parts_ info is back on home process, remap the parts.
1343  // TODO: The parts will be inconsistent with the proc assignments after
1344  // TODO: remapping. This problem will go away after we separate process
1345  // TODO: mapping from setParts. But for MueLu's use case, the part
1346  // TODO: remapping is all that matters; they do not use the process mapping.
1347  int doRemap = 0;
1348  const Teuchos::ParameterEntry *pe =
1349  env_->getParameters().getEntryPtr("remap_parts");
1350  if (pe) doRemap = pe->getValue(&doRemap);
1351  if (doRemap) RemapParts();
1352 
1353  haveSolution_ = true;
1354 
1355  env_->memory("After Solution has processed algorithm's answer");
1356  env_->debug(DETAILED_STATUS, "Exiting setParts");
1357 }
1358 
1359 
1360 template <typename Adapter>
1362  double &numParts, part_t &partMin, part_t &partMax) const
1363 {
1364  if (onePartPerProc_){
1365  numParts = 1.0;
1366  partMin = partMax = procId;
1367  }
1368  else if (procDist_.size() > 0){
1369  partMin = procDist_[procId];
1370  partMax = procDist_[procId+1] - 1;
1371  numParts = procDist_[procId+1] - partMin;
1372  }
1373  else{
1374  // find the first p such that partDist_[p] > procId
1375 
1376  std::vector<int>::const_iterator entry;
1377  entry = std::upper_bound(partDist_.begin(), partDist_.end(), procId);
1378 
1379  size_t partIdx = entry - partDist_.begin();
1380  int numProcs = partDist_[partIdx] - partDist_[partIdx-1];
1381  partMin = partMax = int(partIdx) - 1;
1382  numParts = 1.0 / numProcs;
1383  }
1384 }
1385 
1386 template <typename Adapter>
1388  int &procMin, int &procMax) const
1389 {
1390  if (partId >= nGlobalParts_){
1391  // setParts() may be given an initial solution which uses a
1392  // different number of parts than the desired solution. It is
1393  // still a solution. We keep it on this process.
1394  procMin = procMax = comm_->getRank();
1395  }
1396  else if (onePartPerProc_){
1397  procMin = procMax = int(partId);
1398  }
1399  else if (procDist_.size() > 0){
1400  if (procDistEquallySpread_) {
1401  // Avoid binary search.
1402  double fProcs = comm_->getSize();
1403  double fParts = nGlobalParts_;
1404  double each = fParts / fProcs;
1405  procMin = int(partId / each);
1406  while (procDist_[procMin] > partId) procMin--;
1407  while (procDist_[procMin+1] <= partId) procMin++;
1408  procMax = procMin;
1409  }
1410  else {
1411  // find the first p such that procDist_[p] > partId.
1412  // For now, do a binary search.
1413 
1414  typename std::vector<part_t>::const_iterator entry;
1415  entry = std::upper_bound(procDist_.begin(), procDist_.end(), partId);
1416 
1417  size_t procIdx = entry - procDist_.begin();
1418  procMin = procMax = int(procIdx) - 1;
1419  }
1420  }
1421  else{
1422  procMin = partDist_[partId];
1423  procMax = partDist_[partId+1] - 1;
1424  }
1425 }
1426 
1427 template <typename Adapter>
1429  int c1, int c2) const
1430 {
1431  if (c1 < 0 || c1 >= nWeightsPerObj_ || c2 < 0 || c2 >= nWeightsPerObj_ )
1432  throw std::logic_error("criteriaHaveSamePartSizes error");
1433 
1434  bool theSame = false;
1435 
1436  if (c1 == c2)
1437  theSame = true;
1438 
1439  else if (pSizeUniform_[c1] == true && pSizeUniform_[c2] == true)
1440  theSame = true;
1441 
1442  else if (pCompactIndex_[c1].size() == pCompactIndex_[c2].size()){
1443  theSame = true;
1444  bool useIndex = pCompactIndex_[c1].size() > 0;
1445  if (useIndex){
1446  for (part_t p=0; theSame && p < nGlobalParts_; p++)
1447  if (pSize_[c1][pCompactIndex_[c1][p]] !=
1448  pSize_[c2][pCompactIndex_[c2][p]])
1449  theSame = false;
1450  }
1451  else{
1452  for (part_t p=0; theSame && p < nGlobalParts_; p++)
1453  if (pSize_[c1][p] != pSize_[c2][p])
1454  theSame = false;
1455  }
1456  }
1457 
1458  return theSame;
1459 }
1460 
1476 template <typename Adapter>
1478  bool doCheck, bool haveNumLocalParts, bool haveNumGlobalParts,
1479  int numLocalParts, int numGlobalParts)
1480 {
1481 #ifdef _MSC_VER
1482  typedef SSIZE_T ssize_t;
1483 #endif
1484  int nprocs = comm_->getSize();
1485  ssize_t reducevals[4];
1486  ssize_t sumHaveGlobal=0, sumHaveLocal=0;
1487  ssize_t sumGlobal=0, sumLocal=0;
1488  ssize_t maxGlobal=0, maxLocal=0;
1489  ssize_t vals[4] = {haveNumGlobalParts, haveNumLocalParts,
1490  numGlobalParts, numLocalParts};
1491 
1492  partDist_.clear();
1493  procDist_.clear();
1494 
1495  if (doCheck){
1496 
1497  try{
1498  reduceAll<int, ssize_t>(*comm_, Teuchos::REDUCE_SUM, 4, vals, reducevals);
1499  }
1500  Z2_THROW_OUTSIDE_ERROR(*env_);
1501 
1502  sumHaveGlobal = reducevals[0];
1503  sumHaveLocal = reducevals[1];
1504  sumGlobal = reducevals[2];
1505  sumLocal = reducevals[3];
1506 
1507  env_->localInputAssertion(__FILE__, __LINE__,
1508  "Either all procs specify num_global/local_parts or none do",
1509  (sumHaveGlobal == 0 || sumHaveGlobal == nprocs) &&
1510  (sumHaveLocal == 0 || sumHaveLocal == nprocs),
1511  BASIC_ASSERTION);
1512  }
1513  else{
1514  if (haveNumLocalParts)
1515  sumLocal = numLocalParts * nprocs;
1516  if (haveNumGlobalParts)
1517  sumGlobal = numGlobalParts * nprocs;
1518 
1519  sumHaveGlobal = haveNumGlobalParts ? nprocs : 0;
1520  sumHaveLocal = haveNumLocalParts ? nprocs : 0;
1521 
1522  maxLocal = numLocalParts;
1523  maxGlobal = numGlobalParts;
1524  }
1525 
1526  if (!haveNumLocalParts && !haveNumGlobalParts){
1527  onePartPerProc_ = true; // default if user did not specify
1528  return;
1529  }
1530 
1531  if (haveNumGlobalParts){
1532  if (doCheck){
1533  vals[0] = numGlobalParts;
1534  vals[1] = numLocalParts;
1535  try{
1536  reduceAll<int, ssize_t>(
1537  *comm_, Teuchos::REDUCE_MAX, 2, vals, reducevals);
1538  }
1539  Z2_THROW_OUTSIDE_ERROR(*env_);
1540 
1541  maxGlobal = reducevals[0];
1542  maxLocal = reducevals[1];
1543 
1544  env_->localInputAssertion(__FILE__, __LINE__,
1545  "Value for num_global_parts is different on different processes.",
1546  maxGlobal * nprocs == sumGlobal, BASIC_ASSERTION);
1547  }
1548 
1549  if (sumLocal){
1550  env_->localInputAssertion(__FILE__, __LINE__,
1551  "Sum of num_local_parts does not equal requested num_global_parts",
1552  sumLocal == numGlobalParts, BASIC_ASSERTION);
1553 
1554  if (sumLocal == nprocs && maxLocal == 1){
1555  onePartPerProc_ = true; // user specified one part per proc
1556  return;
1557  }
1558  }
1559  else{
1560  if (maxGlobal == nprocs){
1561  onePartPerProc_ = true; // user specified num parts is num procs
1562  return;
1563  }
1564  }
1565  }
1566 
1567  // If we are here, we do not have #parts == #procs.
1568 
1569  if (sumHaveLocal == nprocs){
1570  //
1571  // We will go by the number of local parts specified.
1572  //
1573 
1574  try{
1575  procDist_.resize(nprocs+1);
1576  }
1577  catch (std::exception &e){
1578  throw(std::bad_alloc());
1579  }
1580 
1581  part_t *procArray = &procDist_[0];
1582 
1583  try{
1584  part_t tmp = part_t(numLocalParts);
1585  gatherAll<int, part_t>(*comm_, 1, &tmp, nprocs, procArray + 1);
1586  }
1587  Z2_THROW_OUTSIDE_ERROR(*env_);
1588 
1589  procArray[0] = 0;
1590 
1591  for (int proc=0; proc < nprocs; proc++)
1592  procArray[proc+1] += procArray[proc];
1593  }
1594  else{
1595  //
1596  // We will allocate global number of parts to the processes.
1597  //
1598  double fParts = numGlobalParts;
1599  double fProcs = nprocs;
1600 
1601  if (fParts < fProcs){
1602 
1603  try{
1604  partDist_.resize(size_t(fParts+1));
1605  }
1606  catch (std::exception &e){
1607  throw(std::bad_alloc());
1608  }
1609 
1610  int *partArray = &partDist_[0];
1611 
1612  double each = floor(fProcs / fParts);
1613  double extra = fmod(fProcs, fParts);
1614  partDist_[0] = 0;
1615 
1616  for (part_t part=0; part < numGlobalParts; part++){
1617  int numOwners = int(each + ((part<extra) ? 1 : 0));
1618  partArray[part+1] = partArray[part] + numOwners;
1619  }
1620 
1621  env_->globalBugAssertion(__FILE__, __LINE__, "#parts != #procs",
1622  partDist_[numGlobalParts] == nprocs, COMPLEX_ASSERTION, comm_);
1623  }
1624  else if (fParts > fProcs){
1625 
1626  // User did not specify local number of parts per proc;
1627  // Distribute the parts evenly among the procs.
1628 
1629  procDistEquallySpread_ = true;
1630 
1631  try{
1632  procDist_.resize(size_t(fProcs+1));
1633  }
1634  catch (std::exception &e){
1635  throw(std::bad_alloc());
1636  }
1637 
1638  part_t *procArray = &procDist_[0];
1639 
1640  double each = floor(fParts / fProcs);
1641  double extra = fmod(fParts, fProcs);
1642  procArray[0] = 0;
1643 
1644  for (int proc=0; proc < nprocs; proc++){
1645  part_t numParts = part_t(each + ((proc<extra) ? 1 : 0));
1646  procArray[proc+1] = procArray[proc] + numParts;
1647  }
1648 
1649  env_->globalBugAssertion(__FILE__, __LINE__, "#parts != #procs",
1650  procDist_[nprocs] == numGlobalParts, COMPLEX_ASSERTION, comm_);
1651  }
1652  else{
1653  env_->globalBugAssertion(__FILE__, __LINE__,
1654  "should never get here", 1, COMPLEX_ASSERTION, comm_);
1655  }
1656  }
1657 }
1658 
1660 // Remap a new part assignment vector for maximum overlap with an input
1661 // part assignment.
1662 //
1663 // Assumptions for this version:
1664 // input part assignment == processor rank for every local object.
1665 // assuming nGlobalParts_ <= num ranks
1666 // TODO: Write a version that takes the input part number as input;
1667 // this change requires input parts in adapters to be provided in
1668 // the Adapter.
1669 // TODO: For repartitioning, compare to old remapping results; see Zoltan1.
1670 
1671 template <typename Adapter>
1673 {
1674  size_t len = parts_.size();
1675 
1676  part_t me = comm_->getRank();
1677  int np = comm_->getSize();
1678 
1679  if (np < nGlobalParts_) {
1680  if (me == 0)
1681  std::cout << "Remapping not yet supported for "
1682  << "num_global_parts " << nGlobalParts_
1683  << " > num procs " << np << std::endl;
1684  return;
1685  }
1686  // Build edges of a bipartite graph with np + nGlobalParts_ vertices,
1687  // and edges between a process vtx and any parts to which that process'
1688  // objects are assigned.
1689  // Weight edge[parts_[i]] by the number of objects that are going from
1690  // this rank to parts_[i].
1691  // We use a std::map, assuming the number of unique parts in the parts_ array
1692  // is small to keep the binary search efficient.
1693  // TODO We use the count of objects to move; should change to SIZE of objects
1694  // to move; need SIZE function in Adapter.
1695 
1696  std::map<part_t, long> edges;
1697  long lstaying = 0; // Total num of local objects staying if we keep the
1698  // current mapping. TODO: change to SIZE of local objs
1699  long gstaying = 0; // Total num of objects staying in the current partition
1700 
1701  for (size_t i = 0; i < len; i++) {
1702  edges[parts_[i]]++; // TODO Use obj size instead of count
1703  if (parts_[i] == me) lstaying++; // TODO Use obj size instead of count
1704  }
1705 
1706  Teuchos::reduceAll<int, long>(*comm_, Teuchos::REDUCE_SUM, 1,
1707  &lstaying, &gstaying);
1708 //TODO if (gstaying == Adapter::getGlobalNumObjs()) return; // Nothing to do
1709 
1710  part_t *remap = NULL;
1711 
1712  int nedges = edges.size();
1713 
1714  // Gather the graph to rank 0.
1715  part_t tnVtx = np + nGlobalParts_; // total # vertices
1716  int *idx = NULL; // Pointer index into graph adjacencies
1717  int *sizes = NULL; // nedges per rank
1718  if (me == 0) {
1719  idx = new int[tnVtx+1];
1720  sizes = new int[np];
1721  sizes[0] = nedges;
1722  }
1723  if (np > 1)
1724  Teuchos::gather<int, int>(&nedges, 1, sizes, 1, 0, *comm_);
1725 
1726  // prefix sum to build the idx array
1727  if (me == 0) {
1728  idx[0] = 0;
1729  for (int i = 0; i < np; i++)
1730  idx[i+1] = idx[i] + sizes[i];
1731  }
1732 
1733  // prepare to send edges
1734  int cnt = 0;
1735  part_t *bufv = NULL;
1736  long *bufw = NULL;
1737  if (nedges) {
1738  bufv = new part_t[nedges];
1739  bufw = new long[nedges];
1740  // Create buffer with edges (me, part[i]) and weight edges[parts_[i]].
1741  for (typename std::map<part_t, long>::iterator it = edges.begin();
1742  it != edges.end(); it++) {
1743  bufv[cnt] = it->first; // target part
1744  bufw[cnt] = it->second; // weight
1745  cnt++;
1746  }
1747  }
1748 
1749  // Prepare to receive edges on rank 0
1750  part_t *adj = NULL;
1751  long *wgt = NULL;
1752  if (me == 0) {
1753 //SYM adj = new part_t[2*idx[np]]; // need 2x space to symmetrize later
1754 //SYM wgt = new long[2*idx[np]]; // need 2x space to symmetrize later
1755  adj = new part_t[idx[np]];
1756  wgt = new long[idx[np]];
1757  }
1758 
1759  Teuchos::gatherv<int, part_t>(bufv, cnt, adj, sizes, idx, 0, *comm_);
1760  Teuchos::gatherv<int, long>(bufw, cnt, wgt, sizes, idx, 0, *comm_);
1761  delete [] bufv;
1762  delete [] bufw;
1763 
1764  // Now have constructed graph on rank 0.
1765  // Call the matching algorithm
1766 
1767  int doRemap;
1768  if (me == 0) {
1769  // We have the "LHS" vertices of the bipartite graph; need to create
1770  // "RHS" vertices.
1771  for (int i = 0; i < idx[np]; i++) {
1772  adj[i] += np; // New RHS vertex number; offset by num LHS vertices
1773  }
1774 
1775  // Build idx for RHS vertices
1776  for (part_t i = np; i < tnVtx; i++) {
1777  idx[i+1] = idx[i]; // No edges for RHS vertices
1778  }
1779 
1780 #ifdef KDDKDD_DEBUG
1781  cout << "IDX ";
1782  for (part_t i = 0; i <= tnVtx; i++) std::cout << idx[i] << " ";
1783  std::cout << std::endl;
1784 
1785  std::cout << "ADJ ";
1786  for (part_t i = 0; i < idx[tnVtx]; i++) std::cout << adj[i] << " ";
1787  std::cout << std::endl;
1788 
1789  std::cout << "WGT ";
1790  for (part_t i = 0; i < idx[tnVtx]; i++) std::cout << wgt[i] << " ";
1791  std::cout << std::endl;
1792 #endif
1793 
1794  // Perform matching on the graph
1795  part_t *match = new part_t[tnVtx];
1796  for (part_t i = 0; i < tnVtx; i++) match[i] = i;
1797  part_t nmatches =
1798  Zoltan2::GreedyMWM<part_t, long>(idx, adj, wgt, tnVtx, match);
1799 
1800 #ifdef KDDKDD_DEBUG
1801  std::cout << "After matching: " << nmatches << " found" << std::endl;
1802  for (part_t i = 0; i < tnVtx; i++)
1803  std::cout << "match[" << i << "] = " << match[i]
1804  << ((match[i] != i &&
1805  (i < np && match[i] != i+np))
1806  ? " *" : " ")
1807  << std::endl;
1808 #endif
1809 
1810  // See whether there were nontrivial changes in the matching.
1811  bool nontrivial = false;
1812  if (nmatches) {
1813  for (part_t i = 0; i < np; i++) {
1814  if ((match[i] != i) && (match[i] != (i+np))) {
1815  nontrivial = true;
1816  break;
1817  }
1818  }
1819  }
1820 
1821  // Process the matches
1822  if (nontrivial) {
1823  remap = new part_t[nGlobalParts_];
1824  for (part_t i = 0; i < nGlobalParts_; i++) remap[i] = -1;
1825 
1826  bool *used = new bool[np];
1827  for (part_t i = 0; i < np; i++) used[i] = false;
1828 
1829  // First, process all matched parts
1830  for (part_t i = 0; i < nGlobalParts_; i++) {
1831  part_t tmp = i + np;
1832  if (match[tmp] != tmp) {
1833  remap[i] = match[tmp];
1834  used[match[tmp]] = true;
1835  }
1836  }
1837 
1838  // Second, process unmatched parts; keep same part number if possible
1839  for (part_t i = 0; i < nGlobalParts_; i++) {
1840  if (remap[i] > -1) continue;
1841  if (!used[i]) {
1842  remap[i] = i;
1843  used[i] = true;
1844  }
1845  }
1846 
1847  // Third, process unmatched parts; give them the next unused part
1848  for (part_t i = 0, uidx = 0; i < nGlobalParts_; i++) {
1849  if (remap[i] > -1) continue;
1850  while (used[uidx]) uidx++;
1851  remap[i] = uidx;
1852  used[uidx] = true;
1853  }
1854  delete [] used;
1855  }
1856  delete [] match;
1857 
1858 #ifdef KDDKDD_DEBUG
1859  cout << "Remap vector: ";
1860  for (part_t i = 0; i < nGlobalParts_; i++) cout << remap[i] << " ";
1861  std::cout << std::endl;
1862 #endif
1863 
1864  long newgstaying = measure_stays(remap, idx, adj, wgt,
1865  nGlobalParts_, np);
1866  doRemap = (newgstaying > gstaying);
1867  std::cout << "gstaying " << gstaying << " measure(input) "
1868  << measure_stays(NULL, idx, adj, wgt, nGlobalParts_, np)
1869  << " newgstaying " << newgstaying
1870  << " nontrivial " << nontrivial
1871  << " doRemap " << doRemap << std::endl;
1872  }
1873  delete [] idx;
1874  delete [] sizes;
1875  delete [] adj;
1876  delete [] wgt;
1877 
1878  Teuchos::broadcast<int, int>(*comm_, 0, 1, &doRemap);
1879 
1880  if (doRemap) {
1881  if (me != 0) remap = new part_t[nGlobalParts_];
1882  Teuchos::broadcast<int, part_t>(*comm_, 0, nGlobalParts_, remap);
1883  for (size_t i = 0; i < len; i++) {
1884  parts_[i] = remap[parts_[i]];
1885  }
1886  }
1887 
1888  delete [] remap; // TODO May want to keep for repartitioning as in Zoltan
1889 }
1890 
1891 
1892 } // namespace Zoltan2
1893 
1894 #endif
const int * getProcListView() const
Returns the process list corresponding to the global ID list.
scalar_t getCriteriaPartSize(int idx, part_t part) const
Get the size for a given weight index and a given part.
Defines the Solution base class.
fast typical checks for valid arguments
#define Z2_FORWARD_EXCEPTIONS
Forward an exception back through call stack.
part_t pointAssign(int dim, scalar_t *point) const
Return the part overlapping a given point in space;.
void RemapParts()
Remap a new partition for maximum overlap with an input partition.
Just a placeholder for now.
PartitioningSolution(const RCP< const Environment > &env, RCP< const Comm< int > > &comm, int nUserWeights, const RCP< Algorithm< Adapter > > &algorithm=Teuchos::null)
Constructor when part sizes are not supplied.
more involved, like validate a graph
size_t getLocalNumberOfParts() const
Returns the number of parts to be assigned to this process.
void getPartsForProc(int procId, double &numParts, part_t &partMin, part_t &partMax) const
Get the parts belonging to a process.
void getProcsForPart(part_t partId, part_t &procMin, part_t &procMax) const
Get the processes containing a part.
sub-steps, each method&#39;s entry and exit
std::vector< Zoltan2::coordinateModelPartBox< scalar_t, part_t > > & getPartBoxesView() const
returns the part box boundary list.
bool oneToOnePartDistribution() const
Is the part-to-process distribution is one-to-one.
long measure_stays(part_t *remap, int *idx, part_t *adj, long *wgt, part_t nrhs, part_t nlhs)
scalar_t getLocalFractionOfPart() const
If parts are divided across processes, return the fraction of a part on this process.
dictionary vals
Definition: xml2dox.py:186
A PartitioningSolution is a solution to a partitioning problem.
const RCP< const Comm< int > > & getCommunicator() const
Return the communicator associated with the solution.
void boxAssign(int dim, scalar_t *lower, scalar_t *upper, size_t &nPartsFound, part_t **partsFound) const
Return an array of all parts overlapping a given box in space.
const part_t * getPartListView() const
Returns the part list corresponding to the global ID list.
Algorithm defines the base class for all algorithms.
Greedy Maximal Weight Matching.
static const std::string fail
void setParts(ArrayRCP< part_t > &partList)
The algorithm uses setParts to set the solution.
Defines the Environment class.
const part_t * getProcDistribution() const
Return a distribution by process.
void getCommunicationGraph(ArrayRCP< part_t > &comXAdj, ArrayRCP< part_t > &comAdj) const
returns communication graph resulting from geometric partitioning.
int getNumberOfCriteria() const
Get the number of criteria (object weights)
const int * getPartDistribution() const
Return a distribution by part.
#define Z2_THROW_OUTSIDE_ERROR(env)
Throw an error returned from outside the Zoltan2 library.
#define epsilon
size_t getActualGlobalNumberOfParts() const
Returns the actual global number of parts provided in setParts().
bool criteriaHasUniformPartSizes(int idx) const
Determine if balancing criteria has uniform part sizes. (User can specify differing part sizes...
bool criteriaHaveSamePartSizes(int c1, int c2) const
Return true if the two weight indices have the same part size information.
size_t getTargetGlobalNumberOfParts() const
Returns the global number of parts desired in the solution.
const RCP< const Environment > & getEnvironment() const
Return the environment associated with the solution.