1 #ifndef _ZOLTAN2_COORD_PARTITIONMAPPING_HPP_
2 #define _ZOLTAN2_COORD_PARTITIONMAPPING_HPP_
12 #include "Teuchos_ArrayViewDecl.hpp"
15 #include "Teuchos_ReductionOp.hpp"
20 #include <zoltan_dd.h>
23 #include "Teuchos_Comm.hpp"
24 #ifdef HAVE_ZOLTAN2_MPI
25 #include "Teuchos_DefaultMpiComm.hpp"
26 #endif // HAVE_ZOLTAN2_MPI
27 #include <Teuchos_DefaultSerialComm.hpp>
36 template <
typename Ordinal,
typename T>
49 void reduce(
const Ordinal count,
const T inBuffer[], T inoutBuffer[])
const
52 for (Ordinal i=0; i < count; i++){
53 if (inBuffer[0] - inoutBuffer[0] < -_EPSILON){
54 inoutBuffer[0] = inBuffer[0];
55 inoutBuffer[1] = inBuffer[1];
56 }
else if(inBuffer[0] - inoutBuffer[0] < _EPSILON &&
57 inBuffer[1] - inoutBuffer[1] < _EPSILON){
58 inoutBuffer[0] = inBuffer[0];
59 inoutBuffer[1] = inBuffer[1];
69 template <
typename it>
71 return (x == 1 ? x : x * z2Fact<it>(x - 1));
74 template <
typename gno_t,
typename part_t>
82 template <
typename IT>
92 fact[k] = fact[k - 1] * k;
95 for (k = 0; k < n; ++k)
97 perm[k] = i / fact[n - 1 - k];
98 i = i % fact[n - 1 - k];
103 for (k = n - 1; k > 0; --k)
104 for (j = k - 1; j >= 0; --j)
105 if (perm[j] <= perm[k])
111 template <
typename part_t>
113 int dim = grid_dims.size();
114 int neighborCount = 2 * dim;
115 task_comm_xadj = allocMemory<part_t>(taskCount+1);
116 task_comm_adj = allocMemory<part_t>(taskCount * neighborCount);
119 task_comm_xadj[0] = 0;
120 for (
part_t i = 0; i < taskCount; ++i){
122 for (
int j = 0; j < dim; ++j){
123 part_t lNeighbor = i - prevDimMul;
124 part_t rNeighbor = i + prevDimMul;
125 prevDimMul *= grid_dims[j];
126 if (lNeighbor >= 0 && lNeighbor/ prevDimMul == i / prevDimMul && lNeighbor < taskCount){
127 task_comm_adj[neighBorIndex++] = lNeighbor;
129 if (rNeighbor >= 0 && rNeighbor/ prevDimMul == i / prevDimMul && rNeighbor < taskCount){
130 task_comm_adj[neighBorIndex++] = rNeighbor;
133 task_comm_xadj[i+1] = neighBorIndex;
138 template <
typename Adapter,
typename scalar_t,
typename part_t>
141 const Teuchos::Comm<int> *comm,
147 scalar_t **partCenters){
149 typedef typename Adapter::lno_t lno_t;
150 typedef typename Adapter::gno_t gno_t;
153 ArrayView<const gno_t> gnos;
154 ArrayView<input_t> xyz;
155 ArrayView<input_t> wgts;
165 gno_t *point_counts = allocMemory<gno_t>(ntasks);
166 memset(point_counts, 0,
sizeof(gno_t) * ntasks);
169 gno_t *global_point_counts = allocMemory<gno_t>(ntasks);
172 scalar_t **multiJagged_coordinates = allocMemory<scalar_t *>(coordDim);
174 for (
int dim=0; dim < coordDim; dim++){
175 ArrayRCP<const scalar_t> ar;
176 xyz[dim].getInputArray(ar);
178 multiJagged_coordinates[dim] = (scalar_t *)ar.getRawPtr();
179 memset(partCenters[dim], 0,
sizeof(scalar_t) * ntasks);
194 for (lno_t i=0; i < numLocalCoords; i++){
197 for(
int j = 0; j < coordDim; ++j){
198 scalar_t c = multiJagged_coordinates[j][i];
199 partCenters[j][p] += c;
205 reduceAll<int, gno_t>(*comm, Teuchos::REDUCE_SUM,
206 ntasks, point_counts, global_point_counts
209 for(
int j = 0; j < coordDim; ++j){
210 for (
part_t i=0; i < ntasks; ++i){
211 partCenters[j][i] /= global_point_counts[i];
215 scalar_t *tmpCoords = allocMemory<scalar_t>(ntasks);
216 for(
int j = 0; j < coordDim; ++j){
217 reduceAll<int, scalar_t>(*comm, Teuchos::REDUCE_SUM,
218 ntasks, partCenters[j], tmpCoords
221 scalar_t *tmp = partCenters[j];
222 partCenters[j] = tmpCoords;
227 freeArray<gno_t>(point_counts);
228 freeArray<gno_t>(global_point_counts);
230 freeArray<scalar_t>(tmpCoords);
231 freeArray<scalar_t *>(multiJagged_coordinates);
235 template <
typename Adapter,
typename scalar_t,
typename part_t>
238 const Teuchos::Comm<int> *comm,
243 ArrayRCP<part_t> &g_part_xadj,
244 ArrayRCP<part_t> &g_part_adj,
245 ArrayRCP<scalar_t> &g_part_ew
248 typedef typename Adapter::lno_t t_lno_t;
249 typedef typename Adapter::gno_t t_gno_t;
250 typedef typename Adapter::scalar_t t_scalar_t;
251 typedef typename Adapter::offset_t t_offset_t;
272 ArrayView<const t_gno_t> Ids;
273 ArrayView<t_input_t> v_wghts;
277 ArrayView<const t_gno_t> edgeIds;
278 ArrayView<const t_offset_t> offsets;
279 ArrayView<t_input_t> e_wgts;
283 std::vector<t_scalar_t> edge_weights;
286 if (numWeightPerEdge > 0){
287 edge_weights = std::vector<t_scalar_t>(localNumEdges);
288 for (t_lno_t i = 0; i < localNumEdges; ++i){
289 edge_weights[i] = e_wgts[0][i];
295 std::vector<part_t> e_parts(localNumEdges);
296 #ifdef HAVE_ZOLTAN2_MPI
297 if (comm->getSize() > 1)
299 Zoltan_DD_Struct *dd = NULL;
301 MPI_Comm mpicomm = Teuchos::getRawMpiComm(*comm);
305 Zoltan_DD_Create(&dd, mpicomm,
307 sizeof(
part_t), localNumVertices, debug_level);
309 ZOLTAN_ID_PTR ddnotneeded = NULL;
312 (ZOLTAN_ID_PTR) Ids.getRawPtr(),
316 int(localNumVertices));
320 (ZOLTAN_ID_PTR) edgeIds.getRawPtr(),
322 (
char *)&(e_parts[0]),
327 Zoltan_DD_Destroy(&dd);
339 for (t_lno_t i = 0; i < localNumEdges; ++i){
340 t_gno_t ei = edgeIds[i];
346 std::vector<t_lno_t> part_begins(np, -1);
347 std::vector<t_lno_t> part_nexts(localNumVertices, -1);
351 for (t_lno_t i = 0; i < localNumVertices; ++i){
353 part_nexts[i] = part_begins[ap];
358 g_part_xadj = ArrayRCP<part_t>(np + 1);
359 g_part_adj = ArrayRCP<part_t>(localNumEdges);
360 g_part_ew = ArrayRCP<t_scalar_t>(localNumEdges);
363 std::vector<part_t> part_neighbors(np);
364 std::vector<t_scalar_t> part_neighbor_weights(np, 0);
365 std::vector<t_scalar_t> part_neighbor_weights_ordered(np);
368 for (t_lno_t i = 0; i < np; ++i){
369 part_t num_neighbor_parts = 0;
370 t_lno_t v = part_begins[i];
374 for (t_offset_t j = offsets[v]; j < offsets[v+1]; ++j){
379 if (numWeightPerEdge > 0){
380 ew = edge_weights[j];
384 if (part_neighbor_weights[ep] < 0.00001){
385 part_neighbors[num_neighbor_parts++] = ep;
387 part_neighbor_weights[ep] += ew;
394 for (t_lno_t j = 0; j < num_neighbor_parts; ++j){
395 part_t neighbor_part = part_neighbors[j];
396 g_part_adj[nindex] = neighbor_part;
397 g_part_ew[nindex++] = part_neighbor_weights[neighbor_part];
398 part_neighbor_weights[neighbor_part] = 0;
400 g_part_xadj[i + 1] = nindex;
405 RCP<const Teuchos::Comm<int> > tcomm = rcpFromRef(*comm);
411 typedef Tpetra::Map<>::global_ordinal_type use_this_gno_t;
412 typedef Tpetra::Map<part_t, use_this_gno_t> t_map_t;
413 Teuchos::RCP<const t_map_t> map = Teuchos::rcp(
new t_map_t(np, 0, tcomm));
414 typedef Tpetra::CrsMatrix<t_scalar_t, part_t, use_this_gno_t>
tcrsMatrix_t;
415 Teuchos::RCP<tcrsMatrix_t> tMatrix(
new tcrsMatrix_t(map, 0));
422 std::vector<t_lno_t> part_begins(np, -1);
423 std::vector<t_lno_t> part_nexts(localNumVertices, -1);
427 for (t_lno_t i = 0; i < localNumVertices; ++i){
429 part_nexts[i] = part_begins[ap];
433 std::vector<use_this_gno_t> part_neighbors(np);
434 std::vector<t_scalar_t> part_neighbor_weights(np, 0);
435 std::vector<t_scalar_t> part_neighbor_weights_ordered(np);
438 for (t_lno_t i = 0; i < np; ++i){
439 part_t num_neighbor_parts = 0;
440 t_lno_t v = part_begins[i];
444 for (t_offset_t j = offsets[v]; j < offsets[v+1]; ++j){
449 if (numWeightPerEdge > 0){
450 ew = edge_weights[j];
453 if (part_neighbor_weights[ep] < 0.00001){
454 part_neighbors[num_neighbor_parts++] = ep;
456 part_neighbor_weights[ep] += ew;
462 for (t_lno_t j = 0; j < num_neighbor_parts; ++j){
463 use_this_gno_t neighbor_part = part_neighbors[j];
464 part_neighbor_weights_ordered[j] = part_neighbor_weights[neighbor_part];
465 part_neighbor_weights[neighbor_part] = 0;
469 if (num_neighbor_parts > 0){
470 Teuchos::ArrayView<const use_this_gno_t> destinations(
471 &(part_neighbors[0]), num_neighbor_parts);
472 Teuchos::ArrayView<const t_scalar_t>
vals(
473 &(part_neighbor_weights_ordered[0]), num_neighbor_parts);
474 tMatrix->insertGlobalValues(i,destinations,
vals);
480 tMatrix->fillComplete();
484 std::vector<use_this_gno_t> part_indices(np);
486 for (use_this_gno_t i = 0; i < np; ++i) part_indices[i] = i;
488 Teuchos::ArrayView<const use_this_gno_t> global_ids(&(part_indices[0]), np);
492 Teuchos::RCP<const t_map_t> gatherRowMap(
new t_map_t(
493 Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid(),
494 global_ids, 0, tcomm));
498 Teuchos::RCP<tcrsMatrix_t> A_gather =
500 typedef Tpetra::Import<
typename t_map_t::local_ordinal_type,
501 typename t_map_t::global_ordinal_type,
502 typename t_map_t::node_type> import_type;
503 import_type
import(map, gatherRowMap);
504 A_gather->doImport(*tMatrix,
import, Tpetra::INSERT);
505 A_gather->fillComplete();
510 g_part_xadj = ArrayRCP<part_t>(np + 1);
511 g_part_adj = ArrayRCP<part_t>(A_gather->getNodeNumEntries());
512 g_part_ew = ArrayRCP<t_scalar_t>(A_gather->getNodeNumEntries());
514 part_t *taskidx = g_part_xadj.getRawPtr();
515 part_t *taskadj = g_part_adj.getRawPtr();
516 t_scalar_t *taskadjwgt = g_part_ew.getRawPtr();
521 for (
part_t i = 0; i < np; i++) {
522 part_t length = A_gather->getNumEntriesInLocalRow(i);
524 taskidx[i+1] = taskidx[i] + length;
526 Teuchos::ArrayView<part_t> Indices(taskadj + taskidx[i], length);
527 Teuchos::ArrayView<t_scalar_t> Values(taskadjwgt + taskidx[i], length);
528 A_gather->getLocalRowCopy(i, Indices, Values, nentries);
536 template <
class IT,
class WT>
546 this->heapSize = heapsize_;
547 this->indices = allocMemory<IT>(heapsize_ );
548 this->values = allocMemory<WT>(heapsize_ );
553 freeArray<IT>(this->indices);
554 freeArray<WT>(this->values);
559 WT maxVal = this->values[0];
562 if (distance >= maxVal)
return;
564 this->values[0] = distance;
565 this->indices[0] = index;
572 IT child_index1 = 2 * index_on_heap + 1;
573 IT child_index2 = 2 * index_on_heap + 2;
576 if(child_index1 < this->heapSize && child_index2 < this->heapSize){
578 if (this->values[child_index1] < this->values[child_index2]){
579 biggerIndex = child_index2;
582 biggerIndex = child_index1;
585 else if(child_index1 < this->heapSize){
586 biggerIndex = child_index1;
589 else if(child_index2 < this->heapSize){
590 biggerIndex = child_index2;
592 if (biggerIndex >= 0 && this->values[biggerIndex] > this->values[index_on_heap]){
593 WT tmpVal = this->values[biggerIndex];
594 this->values[biggerIndex] = this->values[index_on_heap];
595 this->values[index_on_heap] = tmpVal;
597 IT tmpIndex = this->indices[biggerIndex];
598 this->indices[biggerIndex] = this->indices[index_on_heap];
599 this->indices[index_on_heap] = tmpIndex;
605 WT MAXVAL = std::numeric_limits<WT>::max();
606 for(IT i = 0; i < this->heapSize; ++i){
607 this->values[i] = MAXVAL;
608 this->indices[i] = -1;
616 for(IT j = 0; j < this->heapSize; ++j){
617 nc += this->values[j];
627 for(
int i = 0; i < dimension; ++i){
629 for(IT j = 0; j < this->heapSize; ++j){
630 IT k = this->indices[j];
634 nc /= this->heapSize;
635 moved = (
ZOLTAN2_ABS(center[i] - nc) > this->_EPSILON || moved );
643 for(IT i = 0; i < this->heapSize; ++i){
644 permutation[i] = this->indices[i];
651 template <
class IT,
class WT>
664 this->dimension = dimension_;
665 this->center = allocMemory<WT>(dimension_);
681 for (
int i = 0; i < this->dimension; ++i){
682 WT d = (
center[i] - elementCoords[i][index]);
685 distance = pow(distance, WT(1.0 / this->dimension));
686 closestPoints.
addPoint(index, distance);
702 template <
class IT,
class WT>
709 IT required_elements;
715 freeArray<KMeansCluster<IT,WT> >(clusters);
716 freeArray<WT>(maxCoordinates);
717 freeArray<WT>(minCoordinates);
726 IT required_elements_):
728 numElements(numElements_),
729 elementCoords(elementCoords_),
730 numClusters((1 << dim_) + 1),
731 required_elements(required_elements_)
733 this->clusters = allocMemory<KMeansCluster<IT,WT> >(this->numClusters);
735 for (
int i = 0; i < numClusters; ++i){
736 this->clusters[i].
setParams(this->dim, this->required_elements);
739 this->maxCoordinates = allocMemory <WT>(this->dim);
740 this->minCoordinates = allocMemory <WT>(this->dim);
743 for (
int j = 0; j < dim; ++j){
744 this->minCoordinates[j] = this->maxCoordinates[j] = this->elementCoords[j][0];
745 for(IT i = 1; i < numElements; ++i){
746 WT t = this->elementCoords[j][i];
747 if(t > this->maxCoordinates[j]){
748 this->maxCoordinates[j] = t;
750 if (t < minCoordinates[j]){
751 this->minCoordinates[j] = t;
758 for (
int j = 0; j < dim; ++j){
759 int mod = (1 << (j+1));
760 for (
int i = 0; i < numClusters - 1; ++i){
762 if ( (i % mod) < mod / 2){
763 c = this->maxCoordinates[j];
767 c = this->minCoordinates[j];
769 this->clusters[i].
center[j] = c;
774 for (
int j = 0; j < dim; ++j){
775 this->clusters[numClusters - 1].
center[j] = (this->maxCoordinates[j] + this->minCoordinates[j]) / 2;
791 for(
int it = 0; it < 10; ++it){
793 for (IT j = 0; j < this->numClusters; ++j){
796 for (IT i = 0; i < this->numElements; ++i){
798 for (IT j = 0; j < this->numClusters; ++j){
800 this->clusters[j].
getDistance(i,this->elementCoords);
804 for (IT j = 0; j < this->numClusters; ++j){
805 moved =(this->clusters[j].
getNewCenters(this->elementCoords) || moved );
821 for (IT j = 1; j < this->numClusters; ++j){
824 if(minTmpDistance < minDistance){
825 minDistance = minTmpDistance;
837 #define MINOF(a,b) (((a)<(b))?(a):(b))
845 template <
typename T>
849 #ifdef HAVE_ZOLTAN2_OMP
850 #pragma omp parallel for
852 for(
size_t i = 0; i < arrSize; ++i){
859 #ifdef HAVE_ZOLTAN2_OMP
860 #pragma omp parallel for
862 for(
size_t i = 0; i < arrSize; ++i){
871 template <
typename part_t,
typename pcoord_t>
895 part_t *task_communication_xadj,
896 part_t *task_communication_adj,
897 pcoord_t *task_communication_edge_weight){
899 double totalCost = 0;
903 int assigned_proc = task_to_proc[task];
905 part_t task_adj_begin = task_communication_xadj[task];
906 part_t task_adj_end = task_communication_xadj[task+1];
908 commCount += task_adj_end - task_adj_begin;
910 for (
part_t task2 = task_adj_begin; task2 < task_adj_end; ++task2){
913 part_t neighborTask = task_communication_adj[task2];
915 int neighborProc = task_to_proc[neighborTask];
919 if (task_communication_edge_weight == NULL){
920 totalCost += distance ;
923 totalCost += distance * task_communication_edge_weight[task2];
936 this->commCost = totalCost;
953 const RCP<const Environment> &env,
954 ArrayRCP <part_t> &proc_to_task_xadj,
955 ArrayRCP <part_t> &proc_to_task_adj,
956 ArrayRCP <part_t> &task_to_proc
957 ,
const Teuchos::RCP <
const Teuchos::Comm<int> > comm_
963 template <
typename pcoord_t,
typename tcoord_t,
typename part_t>
1008 pcoord_t **pcoords_,
1010 tcoord_t **tcoords_,
1013 int *machine_extent_,
1014 bool *machine_extent_wrap_around_,
1031 this->partArraySize = psize;
1034 this->partNoArray = pNo;
1046 part_t minCoordDim =
MINOF(this->task_coord_dim, this->proc_coord_dim);
1048 minCoordDim, nprocs,
1049 this->proc_coords, ntasks);
1054 for(
int i = ntasks; i < nprocs; ++i){
1055 proc_permutation[i] = -1;
1079 lo = _u_umpa_seed % q;
1080 hi = _u_umpa_seed / q;
1081 test = (a*lo)-(r*hi);
1083 _u_umpa_seed = test;
1085 _u_umpa_seed = test + m;
1086 d = (double) ((
double) _u_umpa_seed / (double) m);
1087 return (
part_t) (d*(double)l);
1091 pcoord_t distance = 0;
1104 this->machine->
getHopCount(procId1, procId2, distance);
1125 for (i=0; i<n; i+=16)
1142 for (i=1; i<end; i++)
1163 const RCP<const Environment> &env,
1164 ArrayRCP <part_t> &rcp_proc_to_task_xadj,
1165 ArrayRCP <part_t> &rcp_proc_to_task_adj,
1166 ArrayRCP <part_t> &rcp_task_to_proc
1167 ,
const Teuchos::RCP <
const Teuchos::Comm<int> > comm_
1170 rcp_proc_to_task_xadj = ArrayRCP <part_t>(this->
no_procs+1);
1171 rcp_proc_to_task_adj = ArrayRCP <part_t>(this->
no_tasks);
1172 rcp_task_to_proc = ArrayRCP <part_t>(this->
no_tasks);
1174 part_t *proc_to_task_xadj = rcp_proc_to_task_xadj.getRawPtr();
1175 part_t *proc_to_task_adj = rcp_proc_to_task_adj.getRawPtr();
1176 part_t *task_to_proc = rcp_task_to_proc.getRawPtr();
1180 fillContinousArray<part_t>(proc_to_task_xadj, this->
no_procs+1, &invalid);
1199 recursion_depth = log(
float(this->
no_procs)) / log(2.0) * 2 + 1;
1202 recursion_depth = log(
float(this->
no_procs)) / log(2.0) + 1;
1207 int taskPerm = z2Fact<int>(this->task_coord_dim);
1208 int procPerm = z2Fact<int>(this->proc_coord_dim);
1210 int permutations = taskPerm * procPerm;
1213 permutations += taskPerm;
1215 permutations += procPerm;
1221 part_t *proc_xadj = allocMemory<part_t>(num_parts+1);
1234 fillContinousArray<part_t>(proc_adjList,this->
no_procs, NULL);
1237 int myPermutation = myRank % permutations;
1238 bool task_partition_along_longest_dim =
false;
1239 bool proc_partition_along_longest_dim =
false;
1245 if (myPermutation == 0){
1246 task_partition_along_longest_dim =
true;
1247 proc_partition_along_longest_dim =
true;
1251 if (myPermutation < taskPerm){
1252 proc_partition_along_longest_dim =
true;
1253 myTaskPerm = myPermutation;
1256 myPermutation -= taskPerm;
1257 if (myPermutation < procPerm){
1258 task_partition_along_longest_dim =
true;
1259 myProcPerm = myPermutation;
1262 myPermutation -= procPerm;
1263 myProcPerm = myPermutation % procPerm;
1264 myTaskPerm = myPermutation / procPerm;
1289 int *permutation = allocMemory<int>((this->proc_coord_dim > this->task_coord_dim)
1290 ? this->proc_coord_dim : this->task_coord_dim);
1293 ithPermutation<int>(this->proc_coord_dim, myProcPerm, permutation);
1346 env->timerStart(
MACRO_TIMERS,
"Mapping - Proc Partitioning");
1361 proc_partition_along_longest_dim
1365 env->timerStop(
MACRO_TIMERS,
"Mapping - Proc Partitioning");
1373 part_t *task_xadj = allocMemory<part_t>(num_parts+1);
1376 fillContinousArray<part_t>(task_adjList,this->
no_tasks, NULL);
1379 ithPermutation<int>(this->task_coord_dim, myTaskPerm, permutation);
1382 tcoord_t **tcoords = allocMemory<tcoord_t *>(this->task_coord_dim);
1384 tcoords[i] = this->task_coords[permutation[i]];
1388 env->timerStart(
MACRO_TIMERS,
"Mapping - Task Partitioning");
1395 this->task_coord_dim,
1402 task_partition_along_longest_dim
1408 env->timerStop(
MACRO_TIMERS,
"Mapping - Task Partitioning");
1414 freeArray<pcoord_t *>(tcoords);
1415 freeArray<int>(permutation);
1419 for(
part_t i = 0; i < num_parts; ++i){
1421 part_t proc_index_begin = proc_xadj[i];
1422 part_t task_begin_index = task_xadj[i];
1423 part_t proc_index_end = proc_xadj[i+1];
1424 part_t task_end_index = task_xadj[i+1];
1427 if(proc_index_end - proc_index_begin != 1){
1428 std::cerr <<
"Error at partitioning of processors" << std::endl;
1429 std::cerr <<
"PART:" << i <<
" is assigned to " << proc_index_end - proc_index_begin <<
" processors." << std::endl;
1432 part_t assigned_proc = proc_adjList[proc_index_begin];
1433 proc_to_task_xadj[assigned_proc] = task_end_index - task_begin_index;
1439 part_t *proc_to_task_xadj_work = allocMemory<part_t>(this->
no_procs);
1442 part_t tmp = proc_to_task_xadj[i];
1443 proc_to_task_xadj[i] = sum;
1445 proc_to_task_xadj_work[i] = sum;
1447 proc_to_task_xadj[this->
no_procs] = sum;
1449 for(
part_t i = 0; i < num_parts; ++i){
1451 part_t proc_index_begin = proc_xadj[i];
1452 part_t task_begin_index = task_xadj[i];
1453 part_t task_end_index = task_xadj[i+1];
1455 part_t assigned_proc = proc_adjList[proc_index_begin];
1457 for (
part_t j = task_begin_index; j < task_end_index; ++j){
1458 part_t taskId = task_adjList[j];
1460 task_to_proc[taskId] = assigned_proc;
1462 proc_to_task_adj [ --proc_to_task_xadj_work[assigned_proc] ] = taskId;
1515 freeArray<part_t>(proc_to_task_xadj_work);
1516 freeArray<part_t>(task_xadj);
1517 freeArray<part_t>(task_adjList);
1518 freeArray<part_t>(proc_xadj);
1519 freeArray<part_t>(proc_adjList);
1524 template <
typename Adapter,
typename part_t>
1528 #ifndef DOXYGEN_SHOULD_SKIP_THIS
1530 typedef typename Adapter::scalar_t pcoord_t;
1531 typedef typename Adapter::scalar_t tcoord_t;
1532 typedef typename Adapter::scalar_t
scalar_t;
1533 typedef typename Adapter::lno_t
lno_t;
1554 void doMapping(
int myRank,
const Teuchos::RCP <
const Teuchos::Comm<int> > comm_){
1556 if(this->proc_task_comm){
1560 this->proc_to_task_xadj,
1561 this->proc_to_task_adj,
1567 std::cerr <<
"communicationModel is not specified in the Mapper" << std::endl;
1579 int taskPerm = z2Fact<int>(procDim);
1580 int procPerm = z2Fact<int>(taskDim);
1581 int idealGroupSize = taskPerm * procPerm;
1583 idealGroupSize += taskPerm + procPerm + 1;
1585 int myRank = this->
comm->getRank();
1586 int commSize = this->
comm->getSize();
1588 int myGroupIndex = myRank / idealGroupSize;
1590 int prevGroupBegin = (myGroupIndex - 1)* idealGroupSize;
1591 if (prevGroupBegin < 0) prevGroupBegin = 0;
1592 int myGroupBegin = myGroupIndex * idealGroupSize;
1593 int myGroupEnd = (myGroupIndex + 1) * idealGroupSize;
1594 int nextGroupEnd = (myGroupIndex + 2)* idealGroupSize;
1596 if (myGroupEnd > commSize){
1597 myGroupBegin = prevGroupBegin;
1598 myGroupEnd = commSize;
1600 if (nextGroupEnd > commSize){
1601 myGroupEnd = commSize;
1603 int myGroupSize = myGroupEnd - myGroupBegin;
1605 part_t *myGroup = allocMemory<part_t>(myGroupSize);
1606 for (
int i = 0; i < myGroupSize; ++i){
1607 myGroup[i] = myGroupBegin + i;
1611 ArrayView<const part_t> myGroupView(myGroup, myGroupSize);
1613 RCP<Comm<int> > subComm = this->
comm->createSubcommunicator(myGroupView);
1614 freeArray<part_t>(myGroup);
1627 double localCost[2], globalCost[2];
1629 localCost[0] = myCost;
1630 localCost[1] = double(subComm->getRank());
1632 globalCost[1] = globalCost[0] = std::numeric_limits<double>::max();
1634 reduceAll<int, double>(*subComm, reduceBest,
1635 2, localCost, globalCost);
1637 int sender = int(globalCost[1]);
1651 broadcast(*subComm, sender, this->ntasks, this->task_to_proc.getRawPtr());
1652 broadcast(*subComm, sender, this->nprocs, this->proc_to_task_xadj.getRawPtr());
1653 broadcast(*subComm, sender, this->ntasks, this->proc_to_task_adj.getRawPtr());
1660 std::ofstream gnuPlotCode(
"gnuPlot.plot", std::ofstream::out);
1663 std::string ss =
"";
1666 std::string procFile = Teuchos::toString<int>(i) +
"_mapping.txt";
1668 gnuPlotCode <<
"plot \"" << procFile <<
"\"\n";
1671 gnuPlotCode <<
"replot \"" << procFile <<
"\"\n";
1674 std::ofstream inpFile(procFile.c_str(), std::ofstream::out);
1676 std::string gnuPlotArrow =
"set arrow from ";
1677 for(
int j = 0; j < mindim; ++j){
1678 if (j == mindim - 1){
1680 gnuPlotArrow += Teuchos::toString<float>(
proc_task_comm->proc_coords[j][i]);
1685 gnuPlotArrow += Teuchos::toString<float>(
proc_task_comm->proc_coords[j][i]) +
",";
1688 gnuPlotArrow +=
" to ";
1691 inpFile << std::endl;
1693 for(
int k = 0; k < a.size(); ++k){
1696 std::string gnuPlotArrow2 = gnuPlotArrow;
1697 for(
int z = 0; z < mindim; ++z){
1698 if(z == mindim - 1){
1702 gnuPlotArrow2 += Teuchos::toString<float>(
proc_task_comm->task_coords[z][j]);
1706 gnuPlotArrow2 += Teuchos::toString<float>(
proc_task_comm->task_coords[z][j]) +
",";
1709 ss += gnuPlotArrow2 +
"\n";
1710 inpFile << std::endl;
1715 gnuPlotCode <<
"\nreplot\n pause -1 \n";
1716 gnuPlotCode.close();
1723 std::string rankStr = Teuchos::toString<int>(myRank);
1724 std::string gnuPlots =
"gnuPlot", extentionS =
".plot";
1725 std::string outF = gnuPlots + rankStr+ extentionS;
1726 std::ofstream gnuPlotCode(outF.c_str(), std::ofstream::out);
1733 std::cerr <<
"Mapping Write is only good for 3 dim" << std::endl;
1736 std::string ss =
"";
1737 std::string procs =
"";
1739 std::set < std::tuple<int,int,int,int,int,int> > my_arrows;
1740 for(
part_t origin_rank = 0; origin_rank < this->
nprocs; ++origin_rank){
1746 std::string gnuPlotArrow =
"set arrow from ";
1747 for(
int j = 0; j < mindim; ++j){
1748 if (j == mindim - 1){
1749 gnuPlotArrow += Teuchos::toString<float>(tmpproc_task_comm->proc_coords[j][origin_rank]);
1750 procs += Teuchos::toString<float>(tmpproc_task_comm->proc_coords[j][origin_rank]);
1754 gnuPlotArrow += Teuchos::toString<float>(tmpproc_task_comm->proc_coords[j][origin_rank]) +
",";
1755 procs += Teuchos::toString<float>(tmpproc_task_comm->proc_coords[j][origin_rank])+
" ";
1760 gnuPlotArrow +=
" to ";
1763 for(
int k = 0; k < a.size(); ++k){
1764 int origin_task = a[k];
1769 bool differentnode =
false;
1773 for(
int j = 0; j < mindim; ++j){
1774 if (
int(tmpproc_task_comm->proc_coords[j][origin_rank]) !=
int(tmpproc_task_comm->proc_coords[j][neighbor_rank])){
1775 differentnode =
true;
break;
1778 std::tuple<int,int,int, int, int, int> foo(
1779 int(tmpproc_task_comm->proc_coords[0][origin_rank]),
1780 int(tmpproc_task_comm->proc_coords[1][origin_rank]),
1781 int(tmpproc_task_comm->proc_coords[2][origin_rank]),
1782 int(tmpproc_task_comm->proc_coords[0][neighbor_rank]),
1783 int(tmpproc_task_comm->proc_coords[1][neighbor_rank]),
1784 int(tmpproc_task_comm->proc_coords[2][neighbor_rank]));
1787 if (differentnode && my_arrows.find(foo) == my_arrows.end()){
1788 my_arrows.insert(foo);
1790 std::string gnuPlotArrow2 =
"";
1791 for(
int j = 0; j < mindim; ++j){
1792 if(j == mindim - 1){
1793 gnuPlotArrow2 += Teuchos::toString<float>(tmpproc_task_comm->proc_coords[j][neighbor_rank]);
1796 gnuPlotArrow2 += Teuchos::toString<float>(tmpproc_task_comm->proc_coords[j][neighbor_rank]) +
",";
1799 ss += gnuPlotArrow + gnuPlotArrow2 +
" nohead\n";
1807 std::ofstream procFile(
"procPlot.plot", std::ofstream::out);
1808 procFile << procs <<
"\n";
1813 gnuPlotCode <<
"plot \"procPlot.plot\" with points pointsize 3\n";
1815 gnuPlotCode <<
"splot \"procPlot.plot\" with points pointsize 3\n";
1818 gnuPlotCode << ss <<
"\nreplot\n pause -1 \n";
1819 gnuPlotCode.close();
1826 const Teuchos::Comm<int> *comm_,
1829 tcoord_t **partCenters
1831 std::string file =
"gggnuPlot";
1832 std::string exten =
".plot";
1833 std::ofstream mm(
"2d.txt");
1834 file += Teuchos::toString<int>(comm_->getRank()) + exten;
1835 std::ofstream ff(file.c_str());
1840 outPartBoxes[i].writeGnuPlot(ff, mm);
1843 ff <<
"plot \"2d.txt\"" << std::endl;
1847 ff <<
"splot \"2d.txt\"" << std::endl;
1852 ff <<
"set style arrow 5 nohead size screen 0.03,15,135 ls 1" << std::endl;
1856 for (
part_t p = pb; p < pe; ++p){
1860 std::string arrowline =
"set arrow from ";
1861 for (
int j = 0; j < coordDim - 1; ++j){
1862 arrowline += Teuchos::toString<tcoord_t>(partCenters[j][n]) +
",";
1864 arrowline += Teuchos::toString<tcoord_t>(partCenters[coordDim -1][n]) +
" to ";
1867 for (
int j = 0; j < coordDim - 1; ++j){
1868 arrowline += Teuchos::toString<tcoord_t>(partCenters[j][i]) +
",";
1870 arrowline += Teuchos::toString<tcoord_t>(partCenters[coordDim -1][i]) +
" as 5\n";
1877 ff <<
"replot\n pause -1" << std::endl;
1885 proc_to_task_xadj_ = this->proc_to_task_xadj.getRawPtr();
1886 proc_to_task_adj_ = this->proc_to_task_adj.getRawPtr();
1907 if(this->isOwnerofModel){
1913 const lno_t num_local_coords,
1914 const part_t *local_coord_parts,
1915 const ArrayRCP<part_t> task_to_proc_){
1918 for (
lno_t i = 0; i < num_local_coords; ++i){
1919 part_t local_coord_part = local_coord_parts[i];
1920 part_t rank_index = task_to_proc_[local_coord_part];
1938 const Teuchos::RCP <
const Teuchos::Comm<int> > comm_,
1940 const Teuchos::RCP <const Adapter> input_adapter_,
1942 const Teuchos::RCP <const Environment> envConst,
1943 bool is_input_adapter_distributed =
true,
1944 int num_ranks_per_node = 1,
1945 bool divide_to_prime_first =
false,
bool reduce_best_mapping =
true):
1946 PartitionMapping<Adapter>(comm_, machine_, input_adapter_, soln_, envConst),
1958 RCP<Zoltan2::GraphModel<ctm_base_adapter_t> > graph_model_;
1959 RCP<Zoltan2::CoordinateModel<ctm_base_adapter_t> > coordinateModel_ ;
1961 RCP<const Teuchos::Comm<int> > rcp_comm = comm_;
1962 RCP<const Teuchos::Comm<int> > ia_comm = rcp_comm;
1963 if (!is_input_adapter_distributed){
1964 ia_comm = Teuchos::createSerialComm<int>();
1967 RCP<const Environment> envConst_ = envConst;
1969 RCP<const ctm_base_adapter_t> baseInputAdapter_(
1970 rcp(dynamic_cast<const ctm_base_adapter_t *>(input_adapter_.getRawPtr()),
false));
1978 baseInputAdapter_, envConst_, ia_comm, coordFlags_));
1988 baseInputAdapter_, envConst_, ia_comm,
1992 if (!machine_->hasMachineCoordinates()) {
1993 throw std::runtime_error(
"Existing machine does not provide coordinates "
1994 "for coordinate task mapping");
1998 int procDim = machine_->getMachineDim();
1999 this->nprocs = machine_->getNumRanks();
2002 pcoord_t **procCoordinates = NULL;
2003 if (!machine_->getAllMachineCoordinatesView(procCoordinates)) {
2004 throw std::runtime_error(
"Existing machine does not implement "
2005 "getAllMachineCoordinatesView");
2012 std::vector<int> machine_extent_vec(procDim);
2014 int *machine_extent = &(machine_extent_vec[0]);
2015 bool *machine_extent_wrap_around =
new bool[procDim];
2016 for (
int i = 0; i < procDim; ++i)machine_extent_wrap_around[i] =
false;
2017 machine_->getMachineExtentWrapArounds(machine_extent_wrap_around);
2025 if (machine_->getMachineExtent(machine_extent)) {
2030 machine_extent_wrap_around,
2038 int coordDim = coordinateModel_->getCoordinateDim();
2048 tcoord_t **partCenters = NULL;
2049 partCenters = allocMemory<tcoord_t *>(coordDim);
2050 for (
int i = 0; i < coordDim; ++i){
2051 partCenters[i] = allocMemory<tcoord_t>(this->ntasks);
2054 typedef typename Adapter::scalar_t t_scalar_t;
2057 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Solution Center");
2060 getSolutionCenterCoordinates<Adapter, t_scalar_t,part_t>(
2061 envConst.getRawPtr(),
2062 ia_comm.getRawPtr(),
2063 coordinateModel_.getRawPtr(),
2071 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Solution Center");
2075 if (graph_model_.getRawPtr() != NULL){
2076 getCoarsenedPartGraph<Adapter, t_scalar_t, part_t>(
2077 envConst.getRawPtr(),
2078 ia_comm.getRawPtr(),
2079 graph_model_.getRawPtr(),
2091 this->proc_task_comm =
2100 machine_extent_wrap_around,
2101 machine_.getRawPtr());
2103 int myRank = comm_->getRank();
2108 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Processor Task map");
2110 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Processor Task map");
2113 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Communication Graph");
2119 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Communication Graph");
2121 if (comm_->getRank() == 0){
2126 for (
part_t i = 1; i <= taskCommCount; ++i){
2128 if (maxN < nc) maxN = nc;
2130 std::cout <<
" maxNeighbor:" << maxN << std::endl;
2133 this->writeGnuPlot(comm_, soln_, coordDim, partCenters);
2136 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Communication Cost");
2149 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Communication Cost");
2156 coordinateModel_->getLocalNumCoordinates(),
2177 delete []machine_extent_wrap_around;
2178 if (machine_->getMachineExtent(machine_extent)){
2179 for (
int i = 0; i < procDim; ++i){
2180 delete [] procCoordinates[i];
2182 delete [] procCoordinates;
2185 for (
int i = 0; i < coordDim; ++i){
2186 freeArray<tcoord_t>(partCenters[i]);
2188 freeArray<tcoord_t *>(partCenters);
2204 const Teuchos::RCP <
const Teuchos::Comm<int> > comm_,
2206 const Teuchos::RCP <const Adapter> input_adapter_,
2208 const part_t *result_parts,
2209 const Teuchos::RCP <const Environment> envConst,
2210 bool is_input_adapter_distributed =
true,
2211 int num_ranks_per_node = 1,
2212 bool divide_to_prime_first =
false,
bool reduce_best_mapping =
true):
2213 PartitionMapping<Adapter>(comm_, machine_, input_adapter_, num_parts_, result_parts, envConst),
2225 RCP<Zoltan2::GraphModel<ctm_base_adapter_t> > graph_model_;
2226 RCP<Zoltan2::CoordinateModel<ctm_base_adapter_t> > coordinateModel_ ;
2228 RCP<const Teuchos::Comm<int> > rcp_comm = comm_;
2229 RCP<const Teuchos::Comm<int> > ia_comm = rcp_comm;
2230 if (!is_input_adapter_distributed){
2231 ia_comm = Teuchos::createSerialComm<int>();
2233 RCP<const Environment> envConst_ = envConst;
2235 RCP<const ctm_base_adapter_t> baseInputAdapter_(
2236 rcp(dynamic_cast<const ctm_base_adapter_t *>(input_adapter_.getRawPtr()),
false));
2244 baseInputAdapter_, envConst_, ia_comm, coordFlags_));
2254 baseInputAdapter_, envConst_, ia_comm,
2258 if (!machine_->hasMachineCoordinates()) {
2259 throw std::runtime_error(
"Existing machine does not provide coordinates "
2260 "for coordinate task mapping");
2264 int procDim = machine_->getMachineDim();
2265 this->nprocs = machine_->getNumRanks();
2268 pcoord_t **procCoordinates = NULL;
2269 if (!machine_->getAllMachineCoordinatesView(procCoordinates)) {
2270 throw std::runtime_error(
"Existing machine does not implement "
2271 "getAllMachineCoordinatesView");
2278 std::vector<int> machine_extent_vec(procDim);
2280 int *machine_extent = &(machine_extent_vec[0]);
2281 bool *machine_extent_wrap_around =
new bool[procDim];
2282 machine_->getMachineExtentWrapArounds(machine_extent_wrap_around);
2290 if (machine_->getMachineExtent(machine_extent)) {
2295 machine_extent_wrap_around,
2303 int coordDim = coordinateModel_->getCoordinateDim();
2306 this->ntasks = num_parts_;
2310 tcoord_t **partCenters = NULL;
2311 partCenters = allocMemory<tcoord_t *>(coordDim);
2312 for (
int i = 0; i < coordDim; ++i){
2313 partCenters[i] = allocMemory<tcoord_t>(this->ntasks);
2316 typedef typename Adapter::scalar_t t_scalar_t;
2319 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Solution Center");
2322 getSolutionCenterCoordinates<Adapter, t_scalar_t,part_t>(
2323 envConst.getRawPtr(),
2324 ia_comm.getRawPtr(),
2325 coordinateModel_.getRawPtr(),
2333 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Solution Center");
2337 if (graph_model_.getRawPtr() != NULL){
2338 getCoarsenedPartGraph<Adapter, t_scalar_t, part_t>(
2339 envConst.getRawPtr(),
2340 ia_comm.getRawPtr(),
2341 graph_model_.getRawPtr(),
2354 envConst->timerStart(
MACRO_TIMERS,
"CoordinateCommunicationModel Create");
2356 this->proc_task_comm =
2365 machine_extent_wrap_around,
2366 machine_.getRawPtr());
2368 envConst->timerStop(
MACRO_TIMERS,
"CoordinateCommunicationModel Create");
2374 int myRank = comm_->getRank();
2377 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Processor Task map");
2379 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Processor Task map");
2382 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Communication Graph");
2388 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Communication Graph");
2390 if (comm_->getRank() == 0){
2395 for (
part_t i = 1; i <= taskCommCount; ++i){
2397 if (maxN < nc) maxN = nc;
2399 std::cout <<
" maxNeighbor:" << maxN << std::endl;
2402 this->writeGnuPlot(comm_, soln_, coordDim, partCenters);
2405 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Communication Cost");
2418 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Communication Cost");
2426 coordinateModel_->getLocalNumCoordinates(),
2449 delete []machine_extent_wrap_around;
2450 if (machine_->getMachineExtent(machine_extent)){
2451 for (
int i = 0; i < procDim; ++i){
2452 delete [] procCoordinates[i];
2454 delete [] procCoordinates;
2457 for (
int i = 0; i < coordDim; ++i){
2458 freeArray<tcoord_t>(partCenters[i]);
2460 freeArray<tcoord_t *>(partCenters);
2512 const Teuchos::Comm<int> *problemComm,
2515 pcoord_t **machine_coords,
2519 tcoord_t **task_coords,
2520 ArrayRCP<part_t>task_comm_xadj,
2521 ArrayRCP<part_t>task_comm_adj,
2522 pcoord_t *task_communication_edge_weight_,
2523 int recursion_depth,
2525 const part_t *machine_dimensions,
2526 int num_ranks_per_node = 1,
2527 bool divide_to_prime_first =
false,
bool reduce_best_mapping =
true
2540 pcoord_t ** virtual_machine_coordinates = machine_coords;
2541 bool *wrap_arounds =
new bool [proc_dim];
2542 for (
int i = 0; i < proc_dim; ++i) wrap_arounds[i] =
true;
2544 if (machine_dimensions){
2545 virtual_machine_coordinates =
2554 this->nprocs = num_processors;
2556 int coordDim = task_dim;
2557 this->ntasks = num_tasks;
2560 tcoord_t **partCenters = task_coords;
2563 this->proc_task_comm =
2566 virtual_machine_coordinates,
2570 this->ntasks, NULL, NULL
2579 int myRank = problemComm->getRank();
2591 task_communication_edge_weight_
2610 delete [] wrap_arounds;
2612 if (machine_dimensions){
2613 for (
int i = 0; i < proc_dim; ++i){
2614 delete [] virtual_machine_coordinates[i];
2616 delete [] virtual_machine_coordinates;
2619 if(problemComm->getRank() == 0)
2647 const part_t *machine_dimensions,
2648 bool *machine_extent_wrap_around,
2650 pcoord_t **mCoords){
2651 pcoord_t **result_machine_coords = NULL;
2652 result_machine_coords =
new pcoord_t*[machine_dim];
2653 for (
int i = 0; i < machine_dim; ++i){
2654 result_machine_coords[i] =
new pcoord_t [numProcs];
2657 for (
int i = 0; i < machine_dim; ++i){
2658 part_t numMachinesAlongDim = machine_dimensions[i];
2659 part_t *machineCounts=
new part_t[numMachinesAlongDim];
2660 memset(machineCounts, 0,
sizeof(
part_t) *numMachinesAlongDim);
2662 int *filledCoordinates=
new int[numMachinesAlongDim];
2664 pcoord_t *coords = mCoords[i];
2665 for(
part_t j = 0; j < numProcs; ++j){
2667 ++machineCounts[mc];
2670 part_t filledCoordinateCount = 0;
2671 for(
part_t j = 0; j < numMachinesAlongDim; ++j){
2672 if (machineCounts[j] > 0){
2673 filledCoordinates[filledCoordinateCount++] = j;
2677 part_t firstProcCoord = filledCoordinates[0];
2678 part_t firstProcCount = machineCounts[firstProcCoord];
2680 part_t lastProcCoord = filledCoordinates[filledCoordinateCount - 1];
2681 part_t lastProcCount = machineCounts[lastProcCoord];
2683 part_t firstLastGap = numMachinesAlongDim - lastProcCoord + firstProcCoord;
2684 part_t firstLastGapProc = lastProcCount + firstProcCount;
2686 part_t leftSideProcCoord = firstProcCoord;
2687 part_t leftSideProcCount = firstProcCount;
2689 part_t biggestGapProc = numProcs;
2691 part_t shiftBorderCoordinate = -1;
2692 for(
part_t j = 1; j < filledCoordinateCount; ++j){
2693 part_t rightSideProcCoord= filledCoordinates[j];
2694 part_t rightSideProcCount = machineCounts[rightSideProcCoord];
2696 part_t gap = rightSideProcCoord - leftSideProcCoord;
2697 part_t gapProc = rightSideProcCount + leftSideProcCount;
2702 if (gap > biggestGap || (gap == biggestGap && biggestGapProc > gapProc)){
2703 shiftBorderCoordinate = rightSideProcCoord;
2704 biggestGapProc = gapProc;
2707 leftSideProcCoord = rightSideProcCoord;
2708 leftSideProcCount = rightSideProcCount;
2712 if (!(biggestGap > firstLastGap || (biggestGap == firstLastGap && biggestGapProc < firstLastGapProc))){
2713 shiftBorderCoordinate = -1;
2716 for(
part_t j = 0; j < numProcs; ++j){
2718 if (machine_extent_wrap_around[i] && coords[j] < shiftBorderCoordinate){
2719 result_machine_coords[i][j] = coords[j] + numMachinesAlongDim;
2723 result_machine_coords[i][j] = coords[j];
2727 delete [] machineCounts;
2728 delete [] filledCoordinates;
2731 return result_machine_coords;
2743 procs = this->task_to_proc.getRawPtr() + taskId;
2749 return this->task_to_proc[taskId];
2759 part_t task_begin = this->proc_to_task_xadj[procId];
2760 part_t taskend = this->proc_to_task_xadj[procId+1];
2761 parts = this->proc_to_task_adj.getRawPtr() + task_begin;
2762 numParts = taskend - task_begin;
2766 part_t task_begin = this->proc_to_task_xadj[procId];
2767 part_t taskend = this->proc_to_task_xadj[procId+1];
2775 if (taskend - task_begin > 0){
2776 ArrayView <part_t> assignedParts(this->proc_to_task_adj.getRawPtr() + task_begin, taskend - task_begin);
2777 return assignedParts;
2780 ArrayView <part_t> assignedParts;
2781 return assignedParts;
2857 template <
typename part_t,
typename pcoord_t,
typename tcoord_t>
2859 RCP<
const Teuchos::Comm<int> > problemComm,
2862 pcoord_t **machine_coords,
2865 tcoord_t **task_coords,
2868 pcoord_t *task_communication_edge_weight_,
2871 int recursion_depth,
2873 const part_t *machine_dimensions,
2874 int num_ranks_per_node = 1,
2875 bool divide_to_prime_first =
false
2884 typedef Tpetra::MultiVector<tcoord_t, part_t, part_t>
tMVector_t;
2889 if (task_comm_xadj){
2890 Teuchos::ArrayRCP<part_t> tmp_task_communication_adj(task_comm_adj, 0, task_comm_xadj[num_tasks],
false);
2898 problemComm.getRawPtr(),
2909 task_communication_edge_weight_,
2914 divide_to_prime_first
2918 part_t* proc_to_task_xadj_;
2919 part_t* proc_to_task_adj_;
2921 ctm->
getProcTask(proc_to_task_xadj_, proc_to_task_adj_);
2923 for (
part_t i = 0; i <= num_processors; ++i){
2926 for (
part_t i = 0; i < num_tasks; ++i){
2934 template <
typename proc_coord_t,
typename v_lno_t>
2936 const int machine_coord_dim,
const int num_ranks, proc_coord_t **machine_coords,
2939 std::string rankStr = Teuchos::toString<int>(myRank);
2940 std::string gnuPlots =
"gnuPlot", extentionS =
".plot";
2941 std::string outF = gnuPlots + rankStr+ extentionS;
2942 std::ofstream gnuPlotCode( outF.c_str(), std::ofstream::out);
2944 if (machine_coord_dim != 3) {
2945 std::cerr <<
"Mapping Write is only good for 3 dim" << std::endl;
2948 std::string ss =
"";
2949 std::string procs =
"";
2951 std::set < std::tuple<int,int,int,int,int,int> > my_arrows;
2952 for(v_lno_t origin_task = 0; origin_task < num_tasks; ++origin_task){
2953 int origin_rank = task_to_rank[origin_task];
2954 std::string gnuPlotArrow =
"set arrow from ";
2956 for(
int j = 0; j < machine_coord_dim; ++j){
2957 if (j == machine_coord_dim - 1){
2958 gnuPlotArrow += Teuchos::toString<proc_coord_t>(machine_coords[j][origin_rank]);
2959 procs += Teuchos::toString<proc_coord_t>(machine_coords[j][origin_rank]);
2963 gnuPlotArrow += Teuchos::toString<proc_coord_t>(machine_coords[j][origin_rank]) +
",";
2964 procs += Teuchos::toString<proc_coord_t>(machine_coords[j][origin_rank])+
" ";
2969 gnuPlotArrow +=
" to ";
2975 bool differentnode =
false;
2976 int neighbor_rank = task_to_rank[neighbor_task];
2978 for(
int j = 0; j < machine_coord_dim; ++j){
2979 if (
int(machine_coords[j][origin_rank]) !=
int(machine_coords[j][neighbor_rank])){
2980 differentnode =
true;
break;
2983 std::tuple<int,int,int, int, int, int> foo(
2984 (
int)(machine_coords[0][origin_rank]),
2985 (
int)(machine_coords[1][origin_rank]),
2986 (
int)(machine_coords[2][origin_rank]),
2987 (
int)(machine_coords[0][neighbor_rank]),
2988 (
int)(machine_coords[1][neighbor_rank]),
2989 (
int)(machine_coords[2][neighbor_rank]));
2992 if (differentnode && my_arrows.find(foo) == my_arrows.end()){
2993 my_arrows.insert(foo);
2995 std::string gnuPlotArrow2 =
"";
2996 for(
int j = 0; j < machine_coord_dim; ++j){
2997 if(j == machine_coord_dim - 1){
2998 gnuPlotArrow2 += Teuchos::toString<float>(machine_coords[j][neighbor_rank]);
3001 gnuPlotArrow2 += Teuchos::toString<float>(machine_coords[j][neighbor_rank]) +
",";
3004 ss += gnuPlotArrow + gnuPlotArrow2 +
" nohead\n";
3009 std::ofstream procFile(
"procPlot.plot", std::ofstream::out);
3010 procFile << procs <<
"\n";
3014 if(machine_coord_dim == 2){
3015 gnuPlotCode <<
"plot \"procPlot.plot\" with points pointsize 3\n";
3017 gnuPlotCode <<
"splot \"procPlot.plot\" with points pointsize 3\n";
3020 gnuPlotCode << ss <<
"\nreplot\n pause -1\npause -1";
3021 gnuPlotCode.close();