1 #ifndef _ZOLTAN2_COORD_PARTITIONMAPPING_HPP_ 2 #define _ZOLTAN2_COORD_PARTITIONMAPPING_HPP_ 8 #include "Teuchos_ArrayViewDecl.hpp" 11 #include "Teuchos_ReductionOp.hpp" 14 #include "Teuchos_Comm.hpp" 15 #ifdef HAVE_ZOLTAN2_MPI 16 # include "Teuchos_DefaultMpiComm.hpp" 18 # include "Teuchos_DefaultSerialComm.hpp" 19 #endif // HAVE_ZOLTAN2_MPI 27 template <
typename Ordinal,
typename T>
40 void reduce(
const Ordinal count,
const T inBuffer[], T inoutBuffer[])
const 43 for (Ordinal i=0; i < count; i++){
44 if (inBuffer[0] - inoutBuffer[0] < -_EPSILON){
45 inoutBuffer[0] = inBuffer[0];
46 inoutBuffer[1] = inBuffer[1];
47 }
else if(inBuffer[0] - inoutBuffer[0] < _EPSILON &&
48 inBuffer[1] - inoutBuffer[1] < _EPSILON){
49 inoutBuffer[0] = inBuffer[0];
50 inoutBuffer[1] = inBuffer[1];
60 template <
typename it>
62 return (x == 1 ? x : x * z2Fact<it>(x - 1));
65 template <
typename gno_t,
typename part_t>
73 template <
typename IT>
83 fact[k] = fact[k - 1] * k;
86 for (k = 0; k < n; ++k)
88 perm[k] = i / fact[n - 1 - k];
89 i = i % fact[n - 1 - k];
94 for (k = n - 1; k > 0; --k)
95 for (j = k - 1; j >= 0; --j)
96 if (perm[j] <= perm[k])
102 template <
typename part_t>
104 int dim = grid_dims.size();
105 int neighborCount = 2 * dim;
106 task_comm_xadj = allocMemory<part_t>(taskCount+1);
107 task_comm_adj = allocMemory<part_t>(taskCount * neighborCount);
109 part_t neighBorIndex = 0;
110 task_comm_xadj[0] = 0;
111 for (part_t i = 0; i < taskCount; ++i){
112 part_t prevDimMul = 1;
113 for (
int j = 0; j < dim; ++j){
114 part_t lNeighbor = i - prevDimMul;
115 part_t rNeighbor = i + prevDimMul;
116 prevDimMul *= grid_dims[j];
117 if (lNeighbor >= 0 && lNeighbor/ prevDimMul == i / prevDimMul && lNeighbor < taskCount){
118 task_comm_adj[neighBorIndex++] = lNeighbor;
120 if (rNeighbor >= 0 && rNeighbor/ prevDimMul == i / prevDimMul && rNeighbor < taskCount){
121 task_comm_adj[neighBorIndex++] = rNeighbor;
124 task_comm_xadj[i+1] = neighBorIndex;
129 template <
typename Adapter,
typename scalar_t,
typename part_t>
132 const Teuchos::Comm<int> *comm,
137 scalar_t **partCenters){
139 typedef typename Adapter::lno_t lno_t;
140 typedef typename Adapter::gno_t gno_t;
143 ArrayView<const gno_t> gnos;
144 ArrayView<input_t> xyz;
145 ArrayView<input_t> wgts;
155 gno_t *point_counts = allocMemory<gno_t>(ntasks);
156 memset(point_counts, 0,
sizeof(gno_t) * ntasks);
159 gno_t *global_point_counts = allocMemory<gno_t>(ntasks);
162 scalar_t **multiJagged_coordinates = allocMemory<scalar_t *>(coordDim);
164 for (
int dim=0; dim < coordDim; dim++){
165 ArrayRCP<const scalar_t> ar;
166 xyz[dim].getInputArray(ar);
168 multiJagged_coordinates[dim] = (scalar_t *)ar.getRawPtr();
169 memset(partCenters[dim], 0,
sizeof(scalar_t) * ntasks);
183 std::vector< std::vector <GNO_LNO_PAIR<gno_t, part_t> > > hash(numLocalCoords);
186 for (lno_t i=0; i < numLocalCoords; i++){
192 ++point_counts[tmp.
part];
193 lno_t hash_index = tmp.
gno % numLocalCoords;
194 hash[hash_index].push_back(tmp);
199 reduceAll<int, gno_t>(*comm, Teuchos::REDUCE_SUM,
200 ntasks, point_counts, global_point_counts
207 for (lno_t i=0; i < numLocalCoords; i++){
209 lno_t hash_index = g % numLocalCoords;
210 lno_t hash_size = hash[hash_index].size();
212 for (lno_t j =0; j < hash_size; ++j){
213 if (hash[hash_index][j].gno == g){
214 p = hash[hash_index][j].part;
219 std::cerr <<
"ERROR AT HASHING FOR GNO:"<< g <<
" LNO:" << i << std::endl;
222 for(
int j = 0; j < coordDim; ++j){
223 scalar_t c = multiJagged_coordinates[j][i];
224 partCenters[j][p] += c;
229 for(
int j = 0; j < coordDim; ++j){
230 for (part_t i=0; i < ntasks; ++i){
231 partCenters[j][i] /= global_point_counts[i];
235 scalar_t *tmpCoords = allocMemory<scalar_t>(ntasks);
236 for(
int j = 0; j < coordDim; ++j){
237 reduceAll<int, scalar_t>(*comm, Teuchos::REDUCE_SUM,
238 ntasks, partCenters[j], tmpCoords
241 scalar_t *tmp = partCenters[j];
242 partCenters[j] = tmpCoords;
246 freeArray<gno_t> (point_counts);
247 freeArray<gno_t> (global_point_counts);
249 freeArray<scalar_t> (tmpCoords);
250 freeArray<scalar_t *>(multiJagged_coordinates);
256 template <
class IT,
class WT>
266 this->heapSize = heapsize_;
267 this->indices = allocMemory<IT>(heapsize_ );
268 this->values = allocMemory<WT>(heapsize_ );
273 freeArray<IT>(this->indices);
274 freeArray<WT>(this->values);
279 WT maxVal = this->values[0];
282 if (distance >= maxVal)
return;
284 this->values[0] = distance;
285 this->indices[0] = index;
292 IT child_index1 = 2 * index_on_heap + 1;
293 IT child_index2 = 2 * index_on_heap + 2;
296 if(child_index1 < this->heapSize && child_index2 < this->heapSize){
298 if (this->values[child_index1] < this->values[child_index2]){
299 biggerIndex = child_index2;
302 biggerIndex = child_index1;
305 else if(child_index1 < this->heapSize){
306 biggerIndex = child_index1;
309 else if(child_index2 < this->heapSize){
310 biggerIndex = child_index2;
312 if (biggerIndex >= 0 && this->values[biggerIndex] > this->values[index_on_heap]){
313 WT tmpVal = this->values[biggerIndex];
314 this->values[biggerIndex] = this->values[index_on_heap];
315 this->values[index_on_heap] = tmpVal;
317 IT tmpIndex = this->indices[biggerIndex];
318 this->indices[biggerIndex] = this->indices[index_on_heap];
319 this->indices[index_on_heap] = tmpIndex;
320 this->push_down(biggerIndex);
325 WT MAXVAL = std::numeric_limits<WT>::max();
326 for(IT i = 0; i < this->heapSize; ++i){
327 this->values[i] = MAXVAL;
328 this->indices[i] = -1;
336 for(IT j = 0; j < this->heapSize; ++j){
337 nc += this->values[j];
347 for(
int i = 0; i < dimension; ++i){
349 for(IT j = 0; j < this->heapSize; ++j){
350 IT k = this->indices[j];
354 nc /= this->heapSize;
355 moved = (
ZOLTAN2_ABS(center[i] - nc) > this->_EPSILON || moved );
363 for(IT i = 0; i < this->heapSize; ++i){
364 permutation[i] = this->indices[i];
371 template <
class IT,
class WT>
380 freeArray<WT>(center);
384 this->dimension = dimension_;
385 this->center = allocMemory<WT>(dimension_);
394 return this->closestPoints.
getNewCenters(center, coords, dimension);
401 for (
int i = 0; i < this->dimension; ++i){
402 WT d = (center[i] - elementCoords[i][index]);
405 distance = pow(distance, WT(1.0 / this->dimension));
406 closestPoints.
addPoint(index, distance);
422 template <
class IT,
class WT>
429 IT required_elements;
435 freeArray<KMeansCluster <IT,WT> >(clusters);
436 freeArray<WT>(maxCoordinates);
437 freeArray<WT>(minCoordinates);
446 IT required_elements_):
448 numElements(numElements_),
449 elementCoords(elementCoords_),
450 numClusters ((1 << dim_) + 1),
451 required_elements(required_elements_)
453 this->clusters = allocMemory<KMeansCluster <IT,WT> >(this->numClusters);
455 for (
int i = 0; i < numClusters; ++i){
456 this->clusters[i].
setParams(this->dim, this->required_elements);
459 this->maxCoordinates = allocMemory <WT> (this->dim);
460 this->minCoordinates = allocMemory <WT> (this->dim);
463 for (
int j = 0; j < dim; ++j){
464 this->minCoordinates[j] = this->maxCoordinates[j] = this->elementCoords[j][0];
465 for(IT i = 1; i < numElements; ++i){
466 WT t = this->elementCoords[j][i];
467 if(t > this->maxCoordinates[j]){
468 this->maxCoordinates[j] = t;
470 if (t < minCoordinates[j]){
471 this->minCoordinates[j] = t;
478 for (
int j = 0; j < dim; ++j){
479 int mod = (1 << (j+1));
480 for (
int i = 0; i < numClusters - 1; ++i){
482 if ( (i % mod) < mod / 2){
483 c = this->maxCoordinates[j];
487 c = this->minCoordinates[j];
489 this->clusters[i].
center[j] = c;
494 for (
int j = 0; j < dim; ++j){
495 this->clusters[numClusters - 1].
center[j] = (this->maxCoordinates[j] + this->minCoordinates[j]) / 2;
511 for(
int it = 0; it < 10; ++it){
513 for (IT j = 0; j < this->numClusters; ++j){
516 for (IT i = 0; i < this->numElements; ++i){
518 for (IT j = 0; j < this->numClusters; ++j){
520 this->clusters[j].
getDistance(i,this->elementCoords);
524 for (IT j = 0; j < this->numClusters; ++j){
525 moved =(this->clusters[j].
getNewCenters(this->elementCoords) || moved );
541 for (IT j = 1; j < this->numClusters; ++j){
544 if(minTmpDistance < minDistance){
545 minDistance = minTmpDistance;
557 #define MINOF(a,b) (((a)<(b))?(a):(b)) 565 template <
typename T>
569 #ifdef HAVE_ZOLTAN2_OMP 570 #pragma omp parallel for 572 for(
size_t i = 0; i < arrSize; ++i){
579 #ifdef HAVE_ZOLTAN2_OMP 580 #pragma omp parallel for 582 for(
size_t i = 0; i < arrSize; ++i){
591 template <
typename part_t,
typename pcoord_t>
603 no_tasks(no_tasks_){}
606 return this->no_procs;
609 return this->no_tasks;
614 part_t *task_to_proc,
615 part_t *task_communication_xadj,
616 part_t *task_communication_adj,
617 pcoord_t *task_communication_edge_weight){
619 double totalCost = 0;
621 part_t commCount = 0;
622 for (part_t task = 0; task < this->no_tasks; ++task){
623 int assigned_proc = task_to_proc[task];
625 part_t task_adj_begin = task_communication_xadj[task];
626 part_t task_adj_end = task_communication_xadj[task+1];
628 commCount += task_adj_end - task_adj_begin;
630 for (part_t task2 = task_adj_begin; task2 < task_adj_end; ++task2){
633 part_t neighborTask = task_communication_adj[task2];
635 int neighborProc = task_to_proc[neighborTask];
636 double distance = getProcDistance(assigned_proc, neighborProc);
640 if (task_communication_edge_weight == NULL){
641 totalCost += distance ;
644 totalCost += distance * task_communication_edge_weight[task2];
649 this->commCost = totalCost;
653 return this->commCost;
656 virtual double getProcDistance(
int procId1,
int procId2)
const = 0;
664 virtual void getMapping(
666 const RCP<const Environment> &env,
667 ArrayRCP <part_t> &proc_to_task_xadj,
668 ArrayRCP <part_t> &proc_to_task_adj,
669 ArrayRCP <part_t> &task_to_proc
675 template <
typename pcoord_t,
typename tcoord_t,
typename part_t>
715 proc_coord_dim(pcoord_dim_), proc_coords(pcoords_),
716 task_coord_dim(tcoord_dim_), task_coords(tcoords_),
717 partArraySize(
std::min(tcoord_dim_, pcoord_dim_)),
723 this->partArraySize = psize;
726 this->partNoArray = pNo;
738 part_t minCoordDim =
MINOF(this->task_coord_dim, this->proc_coord_dim);
741 this->proc_coords, ntasks);
746 for(
int i = ntasks; i < nprocs; ++i){
747 proc_permutation[i] = -1;
771 lo = _u_umpa_seed % q;
772 hi = _u_umpa_seed / q;
773 test = (a*lo)-(r*hi);
777 _u_umpa_seed = test + m;
778 d = (double) ((
double) _u_umpa_seed / (double) m);
779 return (part_t) (d*(double)l);
784 for (
int i = 0 ; i < this->proc_coord_dim; ++i){
785 distance +=
ZOLTAN2_ABS(proc_coords[i][procId1] - proc_coords[i][procId2]);
793 part_t *a = visitOrder;
805 for (i=0; i<n; i+=16)
807 u = umpa_uRandom(n-4, _u_umpa_seed);
808 v = umpa_uRandom(n-4, _u_umpa_seed);
820 part_t i, end = n / 4;
822 for (i=1; i<end; i++)
824 part_t j=umpa_uRandom(n-i, _u_umpa_seed);
843 const RCP<const Environment> &env,
844 ArrayRCP <part_t> &rcp_proc_to_task_xadj,
845 ArrayRCP <part_t> &rcp_proc_to_task_adj,
846 ArrayRCP <part_t> &rcp_task_to_proc
849 rcp_proc_to_task_xadj = ArrayRCP <part_t> (this->no_procs+1);
850 rcp_proc_to_task_adj = ArrayRCP <part_t> (this->no_tasks);
851 rcp_task_to_proc = ArrayRCP <part_t> (this->no_tasks);
853 part_t *proc_to_task_xadj = rcp_proc_to_task_xadj.getRawPtr();
854 part_t *proc_to_task_adj = rcp_proc_to_task_adj.getRawPtr();
855 part_t *task_to_proc = rcp_task_to_proc.getRawPtr();
859 fillContinousArray<part_t> (proc_to_task_xadj, this->no_procs+1, &invalid);
862 part_t num_parts =
MINOF(this->no_procs, this->no_tasks);
864 part_t minCoordDim =
MINOF(this->task_coord_dim, this->proc_coord_dim);
866 int recursion_depth = partArraySize;
867 if(partArraySize < minCoordDim) recursion_depth = minCoordDim;
869 int taskPerm = z2Fact<int>(this->task_coord_dim);
870 int procPerm = z2Fact<int>(this->proc_coord_dim);
871 int permutations = taskPerm * procPerm;
874 part_t *proc_xadj = allocMemory<part_t> (num_parts+1);
877 part_t *proc_adjList = allocMemory<part_t>(this->no_procs);
880 part_t used_num_procs = this->no_procs;
881 if(this->no_procs > this->no_tasks){
883 this->getClosestSubset(proc_adjList, this->no_procs, this->no_tasks);
884 used_num_procs = this->no_tasks;
887 fillContinousArray<part_t>(proc_adjList,this->no_procs, NULL);
890 int myPermutation = myRank % permutations;
892 int myProcPerm= myPermutation % procPerm;
893 int myTaskPerm = myPermutation / procPerm;
895 int *permutation = allocMemory<int> ((this->proc_coord_dim > this->task_coord_dim)
896 ? this->proc_coord_dim : this->task_coord_dim);
899 ithPermutation<int>(this->proc_coord_dim, myProcPerm, permutation);
901 pcoord_t **pcoords = allocMemory<pcoord_t *> (this->proc_coord_dim);
902 for(
int i = 0; i < this->proc_coord_dim; ++i){
903 pcoords[i] = this->proc_coords[permutation[i]];
909 env->timerStart(
MACRO_TIMERS,
"Mapping - Proc Partitioning");
925 env->timerStop(
MACRO_TIMERS,
"Mapping - Proc Partitioning");
926 freeArray<pcoord_t *> (pcoords);
929 part_t *task_xadj = allocMemory<part_t> (num_parts+1);
930 part_t *task_adjList = allocMemory<part_t>(this->no_tasks);
932 fillContinousArray<part_t>(task_adjList,this->no_tasks, NULL);
935 ithPermutation<int>(this->task_coord_dim, myTaskPerm, permutation);
938 tcoord_t **tcoords = allocMemory<tcoord_t *> (this->task_coord_dim);
939 for(
int i = 0; i < this->task_coord_dim; ++i){
940 tcoords[i] = this->task_coords[permutation[i]];
943 env->timerStart(
MACRO_TIMERS,
"Mapping - Task Partitioning");
958 env->timerStop(
MACRO_TIMERS,
"Mapping - Task Partitioning");
959 freeArray<pcoord_t *> (tcoords);
960 freeArray<int> (permutation);
964 for(part_t i = 0; i < num_parts; ++i){
966 part_t proc_index_begin = proc_xadj[i];
967 part_t task_begin_index = task_xadj[i];
968 part_t proc_index_end = proc_xadj[i+1];
969 part_t task_end_index = task_xadj[i+1];
972 if(proc_index_end - proc_index_begin != 1){
973 std::cerr <<
"Error at partitioning of processors" << std::endl;
974 std::cerr <<
"PART:" << i <<
" is assigned to " << proc_index_end - proc_index_begin <<
" processors." << std::endl;
977 part_t assigned_proc = proc_adjList[proc_index_begin];
978 proc_to_task_xadj[assigned_proc] = task_end_index - task_begin_index;
984 part_t *proc_to_task_xadj_work = allocMemory<part_t> (this->no_procs);
986 for(part_t i = 0; i < this->no_procs; ++i){
987 part_t tmp = proc_to_task_xadj[i];
988 proc_to_task_xadj[i] = sum;
990 proc_to_task_xadj_work[i] = sum;
992 proc_to_task_xadj[this->no_procs] = sum;
994 for(part_t i = 0; i < num_parts; ++i){
996 part_t proc_index_begin = proc_xadj[i];
997 part_t task_begin_index = task_xadj[i];
998 part_t task_end_index = task_xadj[i+1];
1000 part_t assigned_proc = proc_adjList[proc_index_begin];
1002 for (part_t j = task_begin_index; j < task_end_index; ++j){
1003 part_t taskId = task_adjList[j];
1005 task_to_proc[taskId] = assigned_proc;
1007 proc_to_task_adj [ --proc_to_task_xadj_work[assigned_proc] ] = taskId;
1011 freeArray<part_t>(proc_to_task_xadj_work);
1012 freeArray<part_t>(task_xadj);
1013 freeArray<part_t>(task_adjList);
1014 freeArray<part_t>(proc_xadj);
1015 freeArray<part_t>(proc_adjList);
1020 template <
typename Adapter,
typename part_t>
1024 #ifndef DOXYGEN_SHOULD_SKIP_THIS 1026 typedef typename Adapter::scalar_t pcoord_t;
1027 typedef typename Adapter::scalar_t tcoord_t;
1047 if(this->proc_task_comm){
1050 Teuchos::RCP<const Environment>(this->env,
false),
1051 this->proc_to_task_xadj,
1052 this->proc_to_task_adj,
1057 std::cerr <<
"communicationModel is not specified in the Mapper" << std::endl;
1069 int taskPerm = z2Fact<int>(procDim);
1070 int procPerm = z2Fact<int>(taskDim);
1071 int idealGroupSize = taskPerm * procPerm;
1073 int myRank = this->comm->getRank();
1074 int commSize = this->comm->getSize();
1076 int myGroupIndex = myRank / idealGroupSize;
1078 int prevGroupBegin = (myGroupIndex - 1)* idealGroupSize;
1079 if (prevGroupBegin < 0) prevGroupBegin = 0;
1080 int myGroupBegin = myGroupIndex * idealGroupSize;
1081 int myGroupEnd = (myGroupIndex + 1) * idealGroupSize;
1082 int nextGroupEnd = (myGroupIndex + 2)* idealGroupSize;
1084 if (myGroupEnd > commSize){
1085 myGroupBegin = prevGroupBegin;
1086 myGroupEnd = commSize;
1088 if (nextGroupEnd > commSize){
1089 myGroupEnd = commSize;
1091 int myGroupSize = myGroupEnd - myGroupBegin;
1093 part_t *myGroup = allocMemory<part_t>(myGroupSize);
1094 for (
int i = 0; i < myGroupSize; ++i){
1095 myGroup[i] = myGroupBegin + i;
1099 ArrayView<const part_t> myGroupView(myGroup, myGroupSize);
1101 RCP<Comm<int> > subComm = this->comm->createSubcommunicator(myGroupView);
1102 freeArray<part_t>(myGroup);
1111 RCP<Comm<int> > subComm = this->create_subCommunicator();
1115 double localCost[2], globalCost[2];
1117 localCost[0] = myCost;
1118 localCost[1] = double(subComm->getRank());
1120 globalCost[1] = globalCost[0] = std::numeric_limits<double>::max();
1122 reduceAll<int, double>(*subComm, reduceBest,
1123 2, localCost, globalCost);
1125 int sender = int(globalCost[1]);
1129 broadcast (*subComm, sender, this->ntasks, this->task_to_proc.getRawPtr());
1130 broadcast (*subComm, sender, this->nprocs, this->proc_to_task_xadj.getRawPtr());
1131 broadcast (*subComm, sender, this->ntasks, this->proc_to_task_adj.getRawPtr());
1138 std::ofstream gnuPlotCode (
"gnuPlot.plot", std::ofstream::out);
1141 std::string ss =
"";
1142 for(part_t i = 0; i < this->nprocs; ++i){
1144 std::string procFile = toString<int>(i) +
"_mapping.txt";
1146 gnuPlotCode <<
"plot \"" << procFile <<
"\"\n";
1149 gnuPlotCode <<
"replot \"" << procFile <<
"\"\n";
1152 std::ofstream inpFile (procFile.c_str(), std::ofstream::out);
1154 std::string gnuPlotArrow =
"set arrow from ";
1155 for(
int j = 0; j < mindim; ++j){
1156 if (j == mindim - 1){
1158 gnuPlotArrow += toString<float>(proc_task_comm->
proc_coords[j][i]);
1162 inpFile << proc_task_comm->
proc_coords[j][i] <<
" ";
1163 gnuPlotArrow += toString<float>(proc_task_comm->
proc_coords[j][i]) +
",";
1166 gnuPlotArrow +=
" to ";
1169 inpFile << std::endl;
1170 ArrayView<part_t> a = this->getAssignedTaksForProc(i);
1171 for(
int k = 0; k < a.size(); ++k){
1174 std::string gnuPlotArrow2 = gnuPlotArrow;
1175 for(
int z = 0; z < mindim; ++z){
1176 if(z == mindim - 1){
1180 gnuPlotArrow2 += toString<float>(proc_task_comm->
task_coords[z][j]);
1183 inpFile << proc_task_comm->
task_coords[z][j] <<
" ";
1184 gnuPlotArrow2 += toString<float>(proc_task_comm->
task_coords[z][j]) +
",";
1187 ss += gnuPlotArrow2 +
"\n";
1188 inpFile << std::endl;
1194 gnuPlotCode <<
"\nreplot\n pause -1 \n";
1195 gnuPlotCode.close();
1203 std::string rankStr = toString<int>(myRank);
1204 std::string gnuPlots =
"gnuPlot", extentionS =
".plot";
1205 std::string outF = gnuPlots + rankStr+ extentionS;
1206 std::ofstream gnuPlotCode ( outF.c_str(), std::ofstream::out);
1210 int mindim =
MINOF(tmpproc_task_comm->proc_coord_dim, tmpproc_task_comm->task_coord_dim);
1211 std::string ss =
"";
1212 std::string procs =
"", parts =
"";
1213 for(part_t i = 0; i < this->nprocs; ++i){
1216 ArrayView<part_t> a = this->getAssignedTaksForProc(i);
1223 std::string gnuPlotArrow =
"set arrow from ";
1224 for(
int j = 0; j < mindim; ++j){
1225 if (j == mindim - 1){
1227 gnuPlotArrow += toString<float>(tmpproc_task_comm->proc_coords[j][i]);
1228 procs += toString<float>(tmpproc_task_comm->proc_coords[j][i]);
1233 gnuPlotArrow += toString<float>(tmpproc_task_comm->proc_coords[j][i]) +
",";
1234 procs += toString<float>(tmpproc_task_comm->proc_coords[j][i])+
" ";
1239 gnuPlotArrow +=
" to ";
1242 for(
int k = 0; k < a.size(); ++k){
1245 std::string gnuPlotArrow2 = gnuPlotArrow;
1246 for(
int z = 0; z < mindim; ++z){
1247 if(z == mindim - 1){
1251 gnuPlotArrow2 += toString<float>(tmpproc_task_comm->task_coords[z][j]);
1252 parts += toString<float>(tmpproc_task_comm->task_coords[z][j]);
1256 gnuPlotArrow2 += toString<float>(tmpproc_task_comm->task_coords[z][j]) +
",";
1257 parts += toString<float>(tmpproc_task_comm->task_coords[z][j]) +
" ";
1261 ss += gnuPlotArrow2 +
" nohead\n";
1269 std::ofstream procFile (
"procPlot.plot", std::ofstream::out);
1270 procFile << procs <<
"\n";
1273 std::ofstream partFile (
"partPlot.plot", std::ofstream::out);
1274 partFile << parts<<
"\n";
1277 std::ofstream extraProcFile (
"allProc.plot", std::ofstream::out);
1279 for(part_t j = 0; j < this->nprocs; ++j){
1280 for(
int i = 0; i < mindim; ++i){
1281 extraProcFile << tmpproc_task_comm->proc_coords[i][j] <<
" ";
1283 extraProcFile << std::endl;
1286 extraProcFile.close();
1290 gnuPlotCode <<
"plot \"procPlot.plot\" with points pointsize 3\n";
1292 gnuPlotCode <<
"splot \"procPlot.plot\" with points pointsize 3\n";
1294 gnuPlotCode <<
"replot \"partPlot.plot\" with points pointsize 3\n";
1295 gnuPlotCode <<
"replot \"allProc.plot\" with points pointsize 0.65\n";
1296 gnuPlotCode <<
"\nreplot\n pause -1 \n";
1297 gnuPlotCode.close();
1305 const Teuchos::Comm<int> *comm_,
1308 tcoord_t **partCenters
1310 std::string file =
"gggnuPlot";
1311 std::string exten =
".plot";
1312 std::ofstream mm(
"2d.txt");
1313 file += toString<int>(comm_->getRank()) + exten;
1314 std::ofstream ff(file.c_str());
1318 for (part_t i = 0; i < this->ntasks;++i){
1319 outPartBoxes[i].writeGnuPlot(ff, mm);
1322 ff <<
"plot \"2d.txt\"" << std::endl;
1326 ff <<
"splot \"2d.txt\"" << std::endl;
1331 ff <<
"set style arrow 5 nohead size screen 0.03,15,135 ls 1" << std::endl;
1332 for (part_t i = 0; i < this->ntasks;++i){
1333 part_t pb = task_communication_xadj[i];
1334 part_t pe = task_communication_xadj[i+1];
1335 for (part_t p = pb; p < pe; ++p){
1336 part_t n = task_communication_adj[p];
1339 std::string arrowline =
"set arrow from ";
1340 for (
int j = 0; j < coordDim - 1; ++j){
1341 arrowline += toString<tcoord_t>(partCenters[j][n]) +
",";
1343 arrowline += toString<tcoord_t>(partCenters[coordDim -1][n]) +
" to ";
1346 for (
int j = 0; j < coordDim - 1; ++j){
1347 arrowline += toString<tcoord_t>(partCenters[j][i]) +
",";
1349 arrowline += toString<tcoord_t>(partCenters[coordDim -1][i]) +
" as 5\n";
1356 ff <<
"replot\n pause -1" << std::endl;
1363 void getProcTask(part_t* &proc_to_task_xadj_, part_t* &proc_to_task_adj_){
1364 proc_to_task_xadj_ = this->proc_to_task_xadj.getRawPtr();
1365 proc_to_task_adj_ = this->proc_to_task_adj.getRawPtr();
1373 if(this->isOwnerofModel){
1374 delete this->proc_task_comm;
1390 const Teuchos::Comm<int> *comm_,
1396 proc_to_task_xadj(0),
1397 proc_to_task_adj(0),
1399 isOwnerofModel(true),
1401 task_communication_xadj(0),
1402 task_communication_adj(0){
1404 pcoord_t *task_communication_edge_weight_ = NULL;
1419 tcoord_t **partCenters = NULL;
1420 partCenters = allocMemory<tcoord_t *>(coordDim);
1421 for (
int i = 0; i < coordDim; ++i){
1422 partCenters[i] = allocMemory<tcoord_t>(this->ntasks);
1428 getSolutionCenterCoordinates<Adapter, typename Adapter::scalar_t,part_t>(
1437 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Solution Center");
1441 this->proc_task_comm =
1451 int myRank = comm_->getRank();
1454 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Processor Task map");
1455 this->doMapping(myRank);
1456 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Processor Task map");
1459 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Communication Graph");
1461 task_communication_adj);
1463 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Communication Graph");
1465 if (comm_->getRank() == 0){
1467 part_t taskCommCount = task_communication_xadj.size();
1468 std::cout <<
" TotalComm:" << task_communication_xadj[taskCommCount] << std::endl;
1469 part_t maxN = task_communication_xadj[0];
1470 for (part_t i = 1; i <= taskCommCount; ++i){
1471 part_t nc = task_communication_xadj[i] - task_communication_xadj[i-1];
1472 if (maxN < nc) maxN = nc;
1474 std::cout <<
" maxNeighbor:" << maxN << std::endl;
1477 this->writeGnuPlot(comm_, soln_, coordDim, partCenters);
1480 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Communication Cost");
1482 task_to_proc.getRawPtr(),
1483 task_communication_xadj.getRawPtr(),
1484 task_communication_adj.getRawPtr(),
1485 task_communication_edge_weight_
1489 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Communication Cost");
1494 this->getBestMapping();
1496 this->writeMapping2(comm_->getRank());
1499 for (
int i = 0; i < coordDim; ++i){
1500 freeArray<tcoord_t>(partCenters[i]);
1502 freeArray<tcoord_t *>(partCenters);
1555 const Teuchos::Comm<int> *problemComm,
1558 pcoord_t **machine_coords,
1562 tcoord_t **task_coords,
1563 ArrayRCP<part_t>task_comm_xadj,
1564 ArrayRCP<part_t>task_comm_adj,
1565 pcoord_t *task_communication_edge_weight_,
1566 int recursion_depth,
1567 part_t *part_no_array,
1568 const part_t *machine_dimensions
1570 proc_to_task_xadj(0),
1571 proc_to_task_adj(0),
1573 isOwnerofModel(true),
1575 task_communication_xadj(task_comm_xadj),
1576 task_communication_adj(task_comm_adj){
1579 pcoord_t ** virtual_machine_coordinates = machine_coords;
1580 if (machine_dimensions){
1581 virtual_machine_coordinates =
1582 this->shiftMachineCoordinates (
1589 this->nprocs = num_processors;
1591 int coordDim = task_dim;
1592 this->ntasks = num_tasks;
1595 tcoord_t **partCenters = task_coords;
1598 this->proc_task_comm =
1601 virtual_machine_coordinates,
1610 int myRank = problemComm->getRank();
1612 this->doMapping(myRank);
1614 this->writeMapping2(myRank);
1617 if (task_communication_xadj.getRawPtr() && task_communication_adj.getRawPtr()){
1619 task_to_proc.getRawPtr(),
1620 task_communication_xadj.getRawPtr(),
1621 task_communication_adj.getRawPtr(),
1622 task_communication_edge_weight_
1626 this->getBestMapping();
1642 if (machine_dimensions){
1643 for (
int i = 0; i < proc_dim; ++i){
1644 delete [] virtual_machine_coordinates[i];
1646 delete [] virtual_machine_coordinates;
1649 if(comm_->getRank() == 0)
1650 this->writeMapping2(-1);
1656 return this->proc_task_comm->getCommCost();
1675 const part_t *machine_dimensions,
1677 pcoord_t **mCoords){
1678 pcoord_t **result_machine_coords = NULL;
1679 result_machine_coords =
new pcoord_t*[machine_dim];
1680 for (
int i = 0; i < machine_dim; ++i){
1681 result_machine_coords[i] =
new pcoord_t [numProcs];
1684 for (
int i = 0; i < machine_dim; ++i){
1685 part_t numMachinesAlongDim = machine_dimensions[i];
1686 part_t *machineCounts=
new part_t[numMachinesAlongDim];
1687 memset(machineCounts, 0,
sizeof(part_t) *numMachinesAlongDim);
1689 int *filledCoordinates=
new int[numMachinesAlongDim];
1691 pcoord_t *coords = mCoords[i];
1692 for(part_t j = 0; j < numProcs; ++j){
1693 part_t mc = (part_t) coords[j];
1694 ++machineCounts[mc];
1697 part_t filledCoordinateCount = 0;
1698 for(part_t j = 0; j < numMachinesAlongDim; ++j){
1699 if (machineCounts[j] > 0){
1700 filledCoordinates[filledCoordinateCount++] = j;
1704 part_t firstProcCoord = filledCoordinates[0];
1705 part_t firstProcCount = machineCounts[firstProcCoord];
1707 part_t lastProcCoord = filledCoordinates[filledCoordinateCount - 1];
1708 part_t lastProcCount = machineCounts[lastProcCoord];
1710 part_t firstLastGap = numMachinesAlongDim - lastProcCoord + firstProcCoord;
1711 part_t firstLastGapProc = lastProcCount + firstProcCount;
1713 part_t leftSideProcCoord = firstProcCoord;
1714 part_t leftSideProcCount = firstProcCount;
1715 part_t biggestGap = 0;
1716 part_t biggestGapProc = numProcs;
1718 part_t shiftBorderCoordinate = -1;
1719 for(part_t j = 1; j < filledCoordinateCount; ++j){
1720 part_t rightSideProcCoord= filledCoordinates[j];
1721 part_t rightSideProcCount = machineCounts[rightSideProcCoord];
1723 part_t gap = rightSideProcCoord - leftSideProcCoord;
1724 part_t gapProc = rightSideProcCount + leftSideProcCount;
1729 if (gap > biggestGap || (gap == biggestGap && biggestGapProc > gapProc)){
1730 shiftBorderCoordinate = rightSideProcCoord;
1731 biggestGapProc = gapProc;
1734 leftSideProcCoord = rightSideProcCoord;
1735 leftSideProcCount = rightSideProcCount;
1739 if (!(biggestGap > firstLastGap || (biggestGap == firstLastGap && biggestGapProc < firstLastGapProc))){
1740 shiftBorderCoordinate = -1;
1743 for(part_t j = 0; j < numProcs; ++j){
1745 if (coords[j] < shiftBorderCoordinate){
1746 result_machine_coords[i][j] = coords[j] + numMachinesAlongDim;
1750 result_machine_coords[i][j] = coords[j];
1754 delete [] machineCounts;
1755 delete [] filledCoordinates;
1758 return result_machine_coords;
1770 procs = this->task_to_proc.getRawPtr() + taskId;
1776 return this->task_to_proc[taskId];
1786 part_t task_begin = this->proc_to_task_xadj[procId];
1787 part_t taskend = this->proc_to_task_xadj[procId+1];
1788 parts = this->proc_to_task_adj.getRawPtr() + task_begin;
1789 numParts = taskend - task_begin;
1793 part_t task_begin = this->proc_to_task_xadj[procId];
1794 part_t taskend = this->proc_to_task_xadj[procId+1];
1802 if (taskend - task_begin > 0){
1803 ArrayView <part_t> assignedParts(this->proc_to_task_adj.getRawPtr() + task_begin, taskend - task_begin);
1804 return assignedParts;
1807 ArrayView <part_t> assignedParts;
1808 return assignedParts;
1884 template <
typename part_t,
typename pcoord_t,
typename tcoord_t>
1886 RCP<
const Teuchos::Comm<int> > problemComm,
1889 pcoord_t **machine_coords,
1892 tcoord_t **task_coords,
1893 part_t *task_comm_xadj,
1894 part_t *task_comm_adj,
1895 pcoord_t *task_communication_edge_weight_,
1896 part_t *proc_to_task_xadj,
1897 part_t *proc_to_task_adj,
1898 int recursion_depth,
1899 part_t *part_no_array,
1900 const part_t *machine_dimensions
1909 typedef Tpetra::MultiVector<tcoord_t, part_t, part_t>
tMVector_t;
1911 Teuchos::ArrayRCP<part_t> task_communication_xadj (task_comm_xadj, 0, num_tasks+1,
false);
1913 Teuchos::ArrayRCP<part_t> task_communication_adj;
1914 if (task_comm_xadj){
1915 Teuchos::ArrayRCP<part_t> tmp_task_communication_adj (task_comm_adj, 0, task_comm_xadj[num_tasks],
false);
1916 task_communication_adj = tmp_task_communication_adj;
1923 problemComm.getRawPtr(),
1932 task_communication_xadj,
1933 task_communication_adj,
1934 task_communication_edge_weight_,
1941 part_t* proc_to_task_xadj_;
1942 part_t* proc_to_task_adj_;
1944 ctm->
getProcTask(proc_to_task_xadj_, proc_to_task_adj_);
1946 for (part_t i = 0; i <= num_processors; ++i){
1947 proc_to_task_xadj[i] = proc_to_task_xadj_[i];
1949 for (part_t i = 0; i < num_tasks; ++i){
1950 proc_to_task_adj[i] = proc_to_task_adj_[i];
void setParams(int dimension_, int heapsize)
CommunicationModel Base Class that performs mapping between the coordinate partitioning result...
void getBestMapping()
finds the lowest cost mapping and broadcasts solution to everyone.
void ithPermutation(const IT n, IT i, IT *perm)
CommunicationModel(part_t no_procs_, part_t no_tasks_)
virtual ~CommunicationModel()
Time an algorithm (or other entity) as a whole.
pcoord_t ** getProcCoords() const
getProcDim function returns the coordinates of processors in two dimensional array.
void setPartArraySize(int psize)
WT getDistance(IT index, WT **elementCoords)
virtual size_t getLocalNumberOfParts() const
Returns the number of parts to be assigned to this process.
bool getNewCenters(WT **coords)
virtual void getPartsForProc(int procId, part_t &numParts, part_t *parts) const
getAssignedProcForTask function, returns the assigned tasks with the number of tasks.
virtual ~CoordinateTaskMapper()
void calculateCommunicationCost(part_t *task_to_proc, part_t *task_communication_xadj, part_t *task_communication_adj, pcoord_t *task_communication_edge_weight)
void getSolutionCenterCoordinates(const Environment *envConst, const Teuchos::Comm< int > *comm, const Zoltan2::CoordinateModel< typename Adapter::base_adapter_t > *coords, const Zoltan2::PartitioningSolution< Adapter > *soln_, int coordDim, part_t ntasks, scalar_t **partCenters)
pcoord_t ** shiftMachineCoordinates(int machine_dim, const part_t *machine_dimensions, part_t numProcs, pcoord_t **mCoords)
Using the machine dimensions provided, create virtual machine coordinates by assigning the largest ga...
virtual void getMapping(int myRank, const RCP< const Environment > &env, ArrayRCP< part_t > &rcp_proc_to_task_xadj, ArrayRCP< part_t > &rcp_proc_to_task_adj, ArrayRCP< part_t > &rcp_task_to_proc) const
Function is called whenever nprocs > no_task. Function returns only the subset of processors that are...
part_t getAssignedProcForTask(part_t taskId)
getAssignedProcForTask function, returns the assigned processor id for the given task ...
void getMinDistanceCluster(IT *procPermutation)
void coordinateTaskMapperInterface(RCP< const Teuchos::Comm< int > > problemComm, int proc_dim, int num_processors, pcoord_t **machine_coords, int task_dim, part_t num_tasks, tcoord_t **task_coords, part_t *task_comm_xadj, part_t *task_comm_adj, pcoord_t *task_communication_edge_weight_, part_t *proc_to_task_xadj, part_t *proc_to_task_adj, int recursion_depth, part_t *part_no_array, const part_t *machine_dimensions)
Constructor The interface function that calls CoordinateTaskMapper which will also perform the mappin...
Defines the XpetraMultiVectorAdapter.
Multi Jagged coordinate partitioning algorithm.
void timerStop(TimerType tt, const char *timerName) const
Stop a named timer.
This class provides geometric coordinates with optional weights to the Zoltan2 algorithm.
Contains the Multi-jagged algorthm.
size_t getCoordinates(ArrayView< const gno_t > &Ids, ArrayView< input_t > &xyz, ArrayView< input_t > &wgts) const
Returns the coordinate ids, values and optional weights.
PartitionMapping maps a solution or an input distribution to ranks.
void getProcTask(part_t *&proc_to_task_xadj_, part_t *&proc_to_task_adj_)
void sequential_task_partitioning(const RCP< const Environment > &env, mj_lno_t num_total_coords, mj_lno_t num_selected_coords, size_t num_target_part, int coord_dim, mj_scalar_t **mj_coordinates, mj_lno_t *initial_selected_coords_output_permutation, mj_lno_t *output_xadj, int recursion_depth, const mj_part_t *part_no_array)
Special function for partitioning for task mapping. Runs sequential, and performs deterministic parti...
A PartitioningSolution is a solution to a partitioning problem.
void getClosestSubset(part_t *proc_permutation, part_t nprocs, part_t ntasks) const
Function is called whenever nprocs > no_task. Function returns only the subset of processors that are...
const part_t * getPartListView() const
Returns the part list corresponding to the global ID list.
void copyCoordinates(IT *permutation)
void getGridCommunicationGraph(part_t taskCount, part_t *&task_comm_xadj, part_t *&task_comm_adj, std::vector< int > grid_dims)
CoordinateCommunicationModel(int pcoord_dim_, pcoord_t **pcoords_, int tcoord_dim_, tcoord_t **tcoords_, part_t no_procs_, part_t no_tasks_)
Class Constructor:
virtual void getProcsForPart(part_t taskId, part_t &numProcs, part_t *procs) const
getAssignedProcForTask function, returns the assigned tasks with the number of tasks.
Zoltan2_ReduceBestMapping Class, reduces the minimum cost mapping, ties breaks with minimum proc id...
ArrayRCP< part_t > task_communication_adj
ArrayView< part_t > getAssignedTaksForProc(part_t procId)
void reduce(const Ordinal count, const T inBuffer[], T inoutBuffer[]) const
Implement Teuchos::ValueTypeReductionOp interface.
The StridedData class manages lists of weights or coordinates.
void setPartArray(part_t *pNo)
ArrayRCP< part_t > proc_to_task_adj
The user parameters, debug, timing and memory profiling output objects, and error checking methods...
static part_t umpa_uRandom(part_t l, int &_u_umpa_seed)
void addPoint(IT index, WT distance)
bool getNewCenters(WT *center, WT **coords, int dimension)
MachineRepresentation Class Finds the coordinate of the processor. Used to find the processor coordin...
size_t getLocalNumCoordinates() const
Returns the number of coordinates on this process.
CoordinateModelInput Class that performs mapping between the coordinate partitioning result and mpi r...
double getCommunicationCostMetric()
CoordinateTaskMapper(const Environment *env_const_, const Teuchos::Comm< int > *problemComm, int proc_dim, int num_processors, pcoord_t **machine_coords, int task_dim, part_t num_tasks, tcoord_t **task_coords, ArrayRCP< part_t >task_comm_xadj, ArrayRCP< part_t >task_comm_adj, pcoord_t *task_communication_edge_weight_, int recursion_depth, part_t *part_no_array, const part_t *machine_dimensions)
Constructor The mapping constructor which will also perform the mapping operation. The result mapping can be obtained by –getAssignedProcForTask function: which returns the assigned processor id for the given task –getPartsForProc: which returns the assigned tasks with the number of tasks.
void doMapping(int myRank)
doMapping function, calls getMapping function of communicationModel object.
ArrayRCP< part_t > task_communication_xadj
void update_visit_order(part_t *visitOrder, part_t n, int &_u_umpa_seed, part_t rndm)
virtual double getProcDistance(int procId1, int procId2) const
Zoltan2_ReduceBestMapping()
Default Constructor.
CoordinateTaskMapper(const Teuchos::Comm< int > *comm_, const MachineRepresentation< pcoord_t > *machine_, const Zoltan2::Model< typename Adapter::base_adapter_t > *model_, const Zoltan2::PartitioningSolution< Adapter > *soln_, const Environment *envConst)
Constructor. When this constructor is called, in order to calculate the communication metric...
void getCommunicationGraph(ArrayRCP< part_t > &comXAdj, ArrayRCP< part_t > &comAdj) const
returns communication graph resulting from geometric partitioning.
int getProcDim() const
getProcDim function returns the dimension of the physical processor layout.
KmeansHeap Class, max heap, but holds the minimum values.
void fillContinousArray(T *arr, size_t arrSize, T *val)
fillContinousArray function
#define ZOLTAN2_ALGMULTIJAGGED_SWAP(a, b, temp)
void push_down(IT index_on_heap)
virtual ~CoordinateCommunicationModel()
RCP< Comm< int > > create_subCommunicator()
creates and returns the subcommunicator for the processor group.
size_t getActualGlobalNumberOfParts() const
Returns the actual global number of parts provided in setParts().
KMeansAlgorithm Class that performs clustering of the coordinates, and returns the closest set of coo...
void timerStart(TimerType tt, const char *timerName) const
Start a named timer.
ArrayRCP< part_t > task_to_proc
KMeansAlgorithm(int dim_, IT numElements_, WT **elementCoords_, IT required_elements_)
KMeansAlgorithm Constructor.
void writeMapping2(int myRank)
int getNumProcs() const
getNumProcs function returns the number of processors.
void copyCoordinates(IT *permutation)
size_t getTargetGlobalNumberOfParts() const
Returns the global number of parts desired in the solution.
double getCommunicationCostMetric()
CoordinateCommunicationModel()
ArrayRCP< part_t > proc_to_task_xadj
void setHeapsize(IT heapsize_)
CoordinateCommunicationModel< pcoord_t, tcoord_t, part_t > * proc_task_comm