Zoltan2
Zoltan2_TaskMapping.hpp
Go to the documentation of this file.
1 #ifndef _ZOLTAN2_COORD_PARTITIONMAPPING_HPP_
2 #define _ZOLTAN2_COORD_PARTITIONMAPPING_HPP_
3 
4 #include <fstream>
5 #include <ctime>
6 #include <vector>
7 #include <set>
8 #include <tuple>
9 
10 #include "Zoltan2_Standards.hpp"
12 #include "Teuchos_ArrayViewDecl.hpp"
15 #include "Teuchos_ReductionOp.hpp"
16 
18 
19 #include "Zoltan2_GraphModel.hpp"
20 #include <zoltan_dd.h>
21 #include <Zoltan2_TPLTraits.hpp>
22 
23 #include "Teuchos_Comm.hpp"
24 #ifdef HAVE_ZOLTAN2_MPI
25 #include "Teuchos_DefaultMpiComm.hpp"
26 #endif // HAVE_ZOLTAN2_MPI
27 #include <Teuchos_DefaultSerialComm.hpp>
28 
29 //#define gnuPlot
31 
32 namespace Teuchos{
33 
36 template <typename Ordinal, typename T>
37 class Zoltan2_ReduceBestMapping : public ValueTypeReductionOp<Ordinal,T>
38 {
39 private:
40  T _EPSILON;
41 
42 public:
45  Zoltan2_ReduceBestMapping():_EPSILON(std::numeric_limits<T>::epsilon()){}
46 
49  void reduce( const Ordinal count, const T inBuffer[], T inoutBuffer[]) const
50  {
51 
52  for (Ordinal i=0; i < count; i++){
53  if (inBuffer[0] - inoutBuffer[0] < -_EPSILON){
54  inoutBuffer[0] = inBuffer[0];
55  inoutBuffer[1] = inBuffer[1];
56  } else if(inBuffer[0] - inoutBuffer[0] < _EPSILON &&
57  inBuffer[1] - inoutBuffer[1] < _EPSILON){
58  inoutBuffer[0] = inBuffer[0];
59  inoutBuffer[1] = inBuffer[1];
60  }
61  }
62  }
63 };
64 } // namespace Teuchos
65 
66 
67 namespace Zoltan2{
68 
69 template <typename it>
70 inline it z2Fact(it x) {
71  return (x == 1 ? x : x * z2Fact<it>(x - 1));
72 }
73 
74 template <typename gno_t, typename part_t>
76 public:
77  gno_t gno;
79 };
80 
81 //returns the ith permutation indices.
82 template <typename IT>
83 void ithPermutation(const IT n, IT i, IT *perm)
84 {
85  IT j, k = 0;
86  IT *fact = new IT[n];
87 
88 
89  // compute factorial numbers
90  fact[k] = 1;
91  while (++k < n)
92  fact[k] = fact[k - 1] * k;
93 
94  // compute factorial code
95  for (k = 0; k < n; ++k)
96  {
97  perm[k] = i / fact[n - 1 - k];
98  i = i % fact[n - 1 - k];
99  }
100 
101  // readjust values to obtain the permutation
102  // start from the end and check if preceding values are lower
103  for (k = n - 1; k > 0; --k)
104  for (j = k - 1; j >= 0; --j)
105  if (perm[j] <= perm[k])
106  perm[k]++;
107 
108  delete [] fact;
109 }
110 
111 template <typename part_t>
112 void getGridCommunicationGraph(part_t taskCount, part_t *&task_comm_xadj, part_t *&task_comm_adj, std::vector<int> grid_dims){
113  int dim = grid_dims.size();
114  int neighborCount = 2 * dim;
115  task_comm_xadj = allocMemory<part_t>(taskCount+1);
116  task_comm_adj = allocMemory<part_t>(taskCount * neighborCount);
117 
118  part_t neighBorIndex = 0;
119  task_comm_xadj[0] = 0;
120  for (part_t i = 0; i < taskCount; ++i){
121  part_t prevDimMul = 1;
122  for (int j = 0; j < dim; ++j){
123  part_t lNeighbor = i - prevDimMul;
124  part_t rNeighbor = i + prevDimMul;
125  prevDimMul *= grid_dims[j];
126  if (lNeighbor >= 0 && lNeighbor/ prevDimMul == i / prevDimMul && lNeighbor < taskCount){
127  task_comm_adj[neighBorIndex++] = lNeighbor;
128  }
129  if (rNeighbor >= 0 && rNeighbor/ prevDimMul == i / prevDimMul && rNeighbor < taskCount){
130  task_comm_adj[neighBorIndex++] = rNeighbor;
131  }
132  }
133  task_comm_xadj[i+1] = neighBorIndex;
134  }
135 
136 }
137 //returns the center of the parts.
138 template <typename Adapter, typename scalar_t, typename part_t>
140  const Environment *envConst,
141  const Teuchos::Comm<int> *comm,
143  //const Zoltan2::PartitioningSolution<Adapter> *soln_,
144  const part_t *parts,
145  int coordDim,
146  part_t ntasks,
147  scalar_t **partCenters){
148 
149  typedef typename Adapter::lno_t lno_t;
150  typedef typename Adapter::gno_t gno_t;
151 
152  typedef StridedData<lno_t, scalar_t> input_t;
153  ArrayView<const gno_t> gnos;
154  ArrayView<input_t> xyz;
155  ArrayView<input_t> wgts;
156  coords->getCoordinates(gnos, xyz, wgts);
157 
158  //local and global num coordinates.
159  lno_t numLocalCoords = coords->getLocalNumCoordinates();
160  //gno_t numGlobalCoords = coords->getGlobalNumCoordinates();
161 
162 
163 
164  //local number of points in each part.
165  gno_t *point_counts = allocMemory<gno_t>(ntasks);
166  memset(point_counts, 0, sizeof(gno_t) * ntasks);
167 
168  //global number of points in each part.
169  gno_t *global_point_counts = allocMemory<gno_t>(ntasks);
170 
171 
172  scalar_t **multiJagged_coordinates = allocMemory<scalar_t *>(coordDim);
173 
174  for (int dim=0; dim < coordDim; dim++){
175  ArrayRCP<const scalar_t> ar;
176  xyz[dim].getInputArray(ar);
177  //multiJagged coordinate values assignment
178  multiJagged_coordinates[dim] = (scalar_t *)ar.getRawPtr();
179  memset(partCenters[dim], 0, sizeof(scalar_t) * ntasks);
180  }
181 
182  //get parts with parallel gnos.
183  //const part_t *parts = soln_->getPartListView();
184  /*
185  for (lno_t i=0; i < numLocalCoords; i++){
186  cout << "me:" << comm->getRank() << " gno:" << soln_gnos[i] << " tmp.part :" << parts[i]<< endl;
187  }
188  */
189 
190 
191  envConst->timerStart(MACRO_TIMERS, "Mapping - Center Calculation");
192 
193 
194  for (lno_t i=0; i < numLocalCoords; i++){
195  part_t p = parts[i];
196  //add up all coordinates in each part.
197  for(int j = 0; j < coordDim; ++j){
198  scalar_t c = multiJagged_coordinates[j][i];
199  partCenters[j][p] += c;
200  }
201  ++point_counts[p];
202  }
203 
204  //get global number of points in each part.
205  reduceAll<int, gno_t>(*comm, Teuchos::REDUCE_SUM,
206  ntasks, point_counts, global_point_counts
207  );
208 
209  for(int j = 0; j < coordDim; ++j){
210  for (part_t i=0; i < ntasks; ++i){
211  partCenters[j][i] /= global_point_counts[i];
212  }
213  }
214 
215  scalar_t *tmpCoords = allocMemory<scalar_t>(ntasks);
216  for(int j = 0; j < coordDim; ++j){
217  reduceAll<int, scalar_t>(*comm, Teuchos::REDUCE_SUM,
218  ntasks, partCenters[j], tmpCoords
219  );
220 
221  scalar_t *tmp = partCenters[j];
222  partCenters[j] = tmpCoords;
223  tmpCoords = tmp;
224  }
225  envConst->timerStop(MACRO_TIMERS, "Mapping - Center Calculation");
226 
227  freeArray<gno_t>(point_counts);
228  freeArray<gno_t>(global_point_counts);
229 
230  freeArray<scalar_t>(tmpCoords);
231  freeArray<scalar_t *>(multiJagged_coordinates);
232 }
233 
234 //returns the coarsend part graph.
235 template <typename Adapter, typename scalar_t, typename part_t>
237  const Environment *envConst,
238  const Teuchos::Comm<int> *comm,
240  //const Zoltan2::PartitioningSolution<Adapter> *soln_,
241  part_t np,
242  const part_t *parts,
243  ArrayRCP<part_t> &g_part_xadj,
244  ArrayRCP<part_t> &g_part_adj,
245  ArrayRCP<scalar_t> &g_part_ew
246  ){
247 
248  typedef typename Adapter::lno_t t_lno_t;
249  typedef typename Adapter::gno_t t_gno_t;
250  typedef typename Adapter::scalar_t t_scalar_t;
251  typedef typename Adapter::offset_t t_offset_t;
253 
254  //int numRanks = comm->getSize();
255  //int myRank = comm->getRank();
256 
257  //get parts with parallel gnos.
258  /*
259  const part_t *parts = soln_->getPartListView();
260 
261  part_t np = soln_->getActualGlobalNumberOfParts();
262  if (part_t(soln_->getTargetGlobalNumberOfParts()) > np){
263  np = soln_->getTargetGlobalNumberOfParts();
264  }
265  */
266 
267 
268  t_lno_t localNumVertices = graph->getLocalNumVertices();
269  t_lno_t localNumEdges = graph->getLocalNumEdges();
270 
271  //get the vertex global ids, and weights
272  ArrayView<const t_gno_t> Ids;
273  ArrayView<t_input_t> v_wghts;
274  graph->getVertexList(Ids, v_wghts);
275 
276  //get the edge ids, and weights
277  ArrayView<const t_gno_t> edgeIds;
278  ArrayView<const t_offset_t> offsets;
279  ArrayView<t_input_t> e_wgts;
280  graph->getEdgeList(edgeIds, offsets, e_wgts);
281 
282 
283  std::vector<t_scalar_t> edge_weights;
284  int numWeightPerEdge = graph->getNumWeightsPerEdge();
285 
286  if (numWeightPerEdge > 0){
287  edge_weights = std::vector<t_scalar_t>(localNumEdges);
288  for (t_lno_t i = 0; i < localNumEdges; ++i){
289  edge_weights[i] = e_wgts[0][i];
290  }
291  }
292 
293  //create a zoltan dictionary to get the parts of the vertices
294  //at the other end of edges
295  std::vector<part_t> e_parts(localNumEdges);
296 #ifdef HAVE_ZOLTAN2_MPI
297  if (comm->getSize() > 1)
298  {
299  Zoltan_DD_Struct *dd = NULL;
300 
301  MPI_Comm mpicomm = Teuchos::getRawMpiComm(*comm);
303 
304  int debug_level = 0;
305  Zoltan_DD_Create(&dd, mpicomm,
306  size_gnot, 0,
307  sizeof(part_t), localNumVertices, debug_level);
308 
309  ZOLTAN_ID_PTR ddnotneeded = NULL; // Local IDs not needed
310  Zoltan_DD_Update(
311  dd,
312  (ZOLTAN_ID_PTR) Ids.getRawPtr(),
313  ddnotneeded,
314  (char *) parts,
315  NULL,
316  int(localNumVertices));
317 
318  Zoltan_DD_Find(
319  dd,
320  (ZOLTAN_ID_PTR) edgeIds.getRawPtr(),
321  ddnotneeded,
322  (char *)&(e_parts[0]),
323  NULL,
324  localNumEdges,
325  NULL
326  );
327  Zoltan_DD_Destroy(&dd);
328  } else
329 #endif
330  {
331 
332  /*
333  std::cout << "localNumVertices:" << localNumVertices
334  << " np:" << np
335  << " globalNumVertices:" << graph->getGlobalNumVertices()
336  << " localNumEdges:" << localNumEdges << std::endl;
337  */
338 
339  for (t_lno_t i = 0; i < localNumEdges; ++i){
340  t_gno_t ei = edgeIds[i];
341  part_t p = parts[ei];
342  e_parts[i] = p;
343  }
344 
345  //get the vertices in each part in my part.
346  std::vector<t_lno_t> part_begins(np, -1);
347  std::vector<t_lno_t> part_nexts(localNumVertices, -1);
348 
349  //cluster vertices according to their parts.
350  //create local part graph.
351  for (t_lno_t i = 0; i < localNumVertices; ++i){
352  part_t ap = parts[i];
353  part_nexts[i] = part_begins[ap];
354  part_begins[ap] = i;
355  }
356 
357 
358  g_part_xadj = ArrayRCP<part_t>(np + 1);
359  g_part_adj = ArrayRCP<part_t>(localNumEdges);
360  g_part_ew = ArrayRCP<t_scalar_t>(localNumEdges);
361  part_t nindex = 0;
362  g_part_xadj[0] = 0;
363  std::vector<part_t> part_neighbors(np);
364  std::vector<t_scalar_t> part_neighbor_weights(np, 0);
365  std::vector<t_scalar_t> part_neighbor_weights_ordered(np);
366 
367  //coarsen for all vertices in my part in order with parts.
368  for (t_lno_t i = 0; i < np; ++i){
369  part_t num_neighbor_parts = 0;
370  t_lno_t v = part_begins[i];
371  //get part i, and first vertex in this part v.
372  while (v != -1){
373  //now get the neightbors of v.
374  for (t_offset_t j = offsets[v]; j < offsets[v+1]; ++j){
375  //get the part of the second vertex.
376  part_t ep = e_parts[j];
377 
378  t_scalar_t ew = 1;
379  if (numWeightPerEdge > 0){
380  ew = edge_weights[j];
381  }
382  //std::cout << "part:" << i << " v:" << v << " part2:" << ep << " v2:" << edgeIds[j] << " w:" << ew << std::endl;
383  //add it to my local part neighbors for part i.
384  if (part_neighbor_weights[ep] < 0.00001){
385  part_neighbors[num_neighbor_parts++] = ep;
386  }
387  part_neighbor_weights[ep] += ew;
388  }
389  v = part_nexts[v];
390  }
391 
392 
393  //now get the part list.
394  for (t_lno_t j = 0; j < num_neighbor_parts; ++j){
395  part_t neighbor_part = part_neighbors[j];
396  g_part_adj[nindex] = neighbor_part;
397  g_part_ew[nindex++] = part_neighbor_weights[neighbor_part];
398  part_neighbor_weights[neighbor_part] = 0;
399  }
400  g_part_xadj[i + 1] = nindex;
401  }
402  return;
403  }
404 
405  RCP<const Teuchos::Comm<int> > tcomm = rcpFromRef(*comm);
406  // KDD 8/8/18: Ideally, we'd use part_t as the global ordinal in our map
407  // typedef part_t use_this_gno_t;
408  // But when Tpetra is built without global ordinal = int, part_t will not
409  // link. And changing part_t would affect backward compatibility.
410  // So we'll use the Tpetra default here.
411  typedef Tpetra::Map<>::global_ordinal_type use_this_gno_t;
412  typedef Tpetra::Map<part_t, use_this_gno_t> t_map_t;
413  Teuchos::RCP<const t_map_t> map = Teuchos::rcp(new t_map_t(np, 0, tcomm));
414  typedef Tpetra::CrsMatrix<t_scalar_t, part_t, use_this_gno_t> tcrsMatrix_t;
415  Teuchos::RCP<tcrsMatrix_t> tMatrix(new tcrsMatrix_t(map, 0));
416 
417 
418 
419  envConst->timerStart(MACRO_TIMERS, "GRAPHCREATE Coarsen");
420  {
421  //get the vertices in each part in my part.
422  std::vector<t_lno_t> part_begins(np, -1);
423  std::vector<t_lno_t> part_nexts(localNumVertices, -1);
424 
425  //cluster vertices according to their parts.
426  //create local part graph.
427  for (t_lno_t i = 0; i < localNumVertices; ++i){
428  part_t ap = parts[i];
429  part_nexts[i] = part_begins[ap];
430  part_begins[ap] = i;
431  }
432 
433  std::vector<use_this_gno_t> part_neighbors(np);
434  std::vector<t_scalar_t> part_neighbor_weights(np, 0);
435  std::vector<t_scalar_t> part_neighbor_weights_ordered(np);
436 
437  //coarsen for all vertices in my part in order with parts.
438  for (t_lno_t i = 0; i < np; ++i){
439  part_t num_neighbor_parts = 0;
440  t_lno_t v = part_begins[i];
441  //get part i, and first vertex in this part v.
442  while (v != -1){
443  //now get the neightbors of v.
444  for (t_offset_t j = offsets[v]; j < offsets[v+1]; ++j){
445  //get the part of the second vertex.
446  part_t ep = e_parts[j];
447 
448  t_scalar_t ew = 1;
449  if (numWeightPerEdge > 0){
450  ew = edge_weights[j];
451  }
452  //add it to my local part neighbors for part i.
453  if (part_neighbor_weights[ep] < 0.00001){
454  part_neighbors[num_neighbor_parts++] = ep;
455  }
456  part_neighbor_weights[ep] += ew;
457  }
458  v = part_nexts[v];
459  }
460 
461  //now get the part list.
462  for (t_lno_t j = 0; j < num_neighbor_parts; ++j){
463  use_this_gno_t neighbor_part = part_neighbors[j];
464  part_neighbor_weights_ordered[j] = part_neighbor_weights[neighbor_part];
465  part_neighbor_weights[neighbor_part] = 0;
466  }
467 
468  //insert it to tpetra crsmatrix.
469  if (num_neighbor_parts > 0){
470  Teuchos::ArrayView<const use_this_gno_t> destinations(
471  &(part_neighbors[0]), num_neighbor_parts);
472  Teuchos::ArrayView<const t_scalar_t> vals(
473  &(part_neighbor_weights_ordered[0]), num_neighbor_parts);
474  tMatrix->insertGlobalValues(i,destinations, vals);
475  }
476  }
477  }
478  envConst->timerStop(MACRO_TIMERS, "GRAPHCREATE Coarsen");
479  envConst->timerStart(MACRO_TIMERS, "GRAPHCREATE fillComplete");
480  tMatrix->fillComplete();
481  envConst->timerStop(MACRO_TIMERS, "GRAPHCREATE fillComplete");
482 
483 
484  std::vector<use_this_gno_t> part_indices(np);
485 
486  for (use_this_gno_t i = 0; i < np; ++i) part_indices[i] = i;
487 
488  Teuchos::ArrayView<const use_this_gno_t> global_ids(&(part_indices[0]), np);
489 
490  //create a map where all processors own all rows.
491  //so that we do a gatherAll for crsMatrix.
492  Teuchos::RCP<const t_map_t> gatherRowMap(new t_map_t(
493  Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid(),
494  global_ids, 0, tcomm));
495 
496  envConst->timerStart(MACRO_TIMERS, "GRAPHCREATE Import");
497  //create the importer for gatherAll
498  Teuchos::RCP<tcrsMatrix_t> A_gather =
499  Teuchos::rcp(new tcrsMatrix_t(gatherRowMap, 0));
500  typedef Tpetra::Import<typename t_map_t::local_ordinal_type,
501  typename t_map_t::global_ordinal_type,
502  typename t_map_t::node_type> import_type;
503  import_type import(map, gatherRowMap);
504  A_gather->doImport(*tMatrix, import, Tpetra::INSERT);
505  A_gather->fillComplete();
506  envConst->timerStop(MACRO_TIMERS, "GRAPHCREATE Import");
507 
508  //create the output part arrays.
509  //all processors owns whole copy.
510  g_part_xadj = ArrayRCP<part_t>(np + 1);
511  g_part_adj = ArrayRCP<part_t>(A_gather->getNodeNumEntries());
512  g_part_ew = ArrayRCP<t_scalar_t>(A_gather->getNodeNumEntries());
513 
514  part_t *taskidx = g_part_xadj.getRawPtr();
515  part_t *taskadj = g_part_adj.getRawPtr();
516  t_scalar_t *taskadjwgt = g_part_ew.getRawPtr();
517 
518  taskidx[0] = 0;
519 
520  envConst->timerStart(MACRO_TIMERS, "GRAPHCREATE Import Copy");
521  for (part_t i = 0; i < np; i++) {
522  part_t length = A_gather->getNumEntriesInLocalRow(i); // Use Global to get same
523  size_t nentries;
524  taskidx[i+1] = taskidx[i] + length;
525  //get the indices
526  Teuchos::ArrayView<part_t> Indices(taskadj + taskidx[i], length);
527  Teuchos::ArrayView<t_scalar_t> Values(taskadjwgt + taskidx[i], length);
528  A_gather->getLocalRowCopy(i, Indices, Values, nentries);
529  }
530  envConst->timerStop(MACRO_TIMERS, "GRAPHCREATE Import Copy");
531 }
532 
533 
536 template <class IT, class WT>
538  IT heapSize;
539  IT *indices;
540  WT *values;
541  WT _EPSILON;
542 
543 
544 public:
545  void setHeapsize(IT heapsize_){
546  this->heapSize = heapsize_;
547  this->indices = allocMemory<IT>(heapsize_ );
548  this->values = allocMemory<WT>(heapsize_ );
549  this->_EPSILON = std::numeric_limits<WT>::epsilon();
550  }
551 
553  freeArray<IT>(this->indices);
554  freeArray<WT>(this->values);
555  }
556 
557 
558  void addPoint(IT index, WT distance){
559  WT maxVal = this->values[0];
560  //add only the distance is smaller than the maximum distance.
561  //cout << "indeX:" << index << "distance:" <<distance << " maxVal:" << maxVal << endl;
562  if (distance >= maxVal) return;
563  else {
564  this->values[0] = distance;
565  this->indices[0] = index;
566  this->push_down(0);
567  }
568  }
569 
570  //heap push down operation
571  void push_down(IT index_on_heap){
572  IT child_index1 = 2 * index_on_heap + 1;
573  IT child_index2 = 2 * index_on_heap + 2;
574 
575  IT biggerIndex = -1;
576  if(child_index1 < this->heapSize && child_index2 < this->heapSize){
577 
578  if (this->values[child_index1] < this->values[child_index2]){
579  biggerIndex = child_index2;
580  }
581  else {
582  biggerIndex = child_index1;
583  }
584  }
585  else if(child_index1 < this->heapSize){
586  biggerIndex = child_index1;
587 
588  }
589  else if(child_index2 < this->heapSize){
590  biggerIndex = child_index2;
591  }
592  if (biggerIndex >= 0 && this->values[biggerIndex] > this->values[index_on_heap]){
593  WT tmpVal = this->values[biggerIndex];
594  this->values[biggerIndex] = this->values[index_on_heap];
595  this->values[index_on_heap] = tmpVal;
596 
597  IT tmpIndex = this->indices[biggerIndex];
598  this->indices[biggerIndex] = this->indices[index_on_heap];
599  this->indices[index_on_heap] = tmpIndex;
600  this->push_down(biggerIndex);
601  }
602  }
603 
604  void initValues(){
605  WT MAXVAL = std::numeric_limits<WT>::max();
606  for(IT i = 0; i < this->heapSize; ++i){
607  this->values[i] = MAXVAL;
608  this->indices[i] = -1;
609  }
610  }
611 
612  //returns the total distance to center in the cluster.
614 
615  WT nc = 0;
616  for(IT j = 0; j < this->heapSize; ++j){
617  nc += this->values[j];
618 
619  //cout << "index:" << this->indices[j] << " distance:" << this->values[j] << endl;
620  }
621  return nc;
622  }
623 
624  //returns the new center of the cluster.
625  bool getNewCenters(WT *center, WT **coords, int dimension){
626  bool moved = false;
627  for(int i = 0; i < dimension; ++i){
628  WT nc = 0;
629  for(IT j = 0; j < this->heapSize; ++j){
630  IT k = this->indices[j];
631  //cout << "i:" << i << " dim:" << dimension << " k:" << k << " heapSize:" << heapSize << endl;
632  nc += coords[i][k];
633  }
634  nc /= this->heapSize;
635  moved = (ZOLTAN2_ABS(center[i] - nc) > this->_EPSILON || moved );
636  center[i] = nc;
637 
638  }
639  return moved;
640  }
641 
642  void copyCoordinates(IT *permutation){
643  for(IT i = 0; i < this->heapSize; ++i){
644  permutation[i] = this->indices[i];
645  }
646  }
647 };
648 
651 template <class IT, class WT>
653 
654  int dimension;
655  KmeansHeap<IT,WT> closestPoints;
656 
657 public:
658  WT *center;
660  freeArray<WT>(center);
661  }
662 
663  void setParams(int dimension_, int heapsize){
664  this->dimension = dimension_;
665  this->center = allocMemory<WT>(dimension_);
666  this->closestPoints.setHeapsize(heapsize);
667  }
668 
669  void clearHeap(){
670  this->closestPoints.initValues();
671  }
672 
673  bool getNewCenters( WT **coords){
674  return this->closestPoints.getNewCenters(center, coords, dimension);
675  }
676 
677  //returns the distance of the coordinate to the center.
678  //also adds it to the heap.
679  WT getDistance(IT index, WT **elementCoords){
680  WT distance = 0;
681  for (int i = 0; i < this->dimension; ++i){
682  WT d = (center[i] - elementCoords[i][index]);
683  distance += d * d;
684  }
685  distance = pow(distance, WT(1.0 / this->dimension));
686  closestPoints.addPoint(index, distance);
687  return distance;
688  }
689 
691  return closestPoints.getTotalDistance();
692  }
693 
694  void copyCoordinates(IT *permutation){
695  closestPoints.copyCoordinates(permutation);
696  }
697 };
698 
702 template <class IT, class WT>
704 
705  int dim;
706  IT numElements;
707  WT **elementCoords;
708  IT numClusters;
709  IT required_elements;
710  KMeansCluster<IT,WT> *clusters;
711  WT *maxCoordinates;
712  WT *minCoordinates;
713 public:
715  freeArray<KMeansCluster<IT,WT> >(clusters);
716  freeArray<WT>(maxCoordinates);
717  freeArray<WT>(minCoordinates);
718  }
719 
723  int dim_ ,
724  IT numElements_,
725  WT **elementCoords_,
726  IT required_elements_):
727  dim(dim_),
728  numElements(numElements_),
729  elementCoords(elementCoords_),
730  numClusters((1 << dim_) + 1),
731  required_elements(required_elements_)
732  {
733  this->clusters = allocMemory<KMeansCluster<IT,WT> >(this->numClusters);
734  //set dimension and the number of required elements for all clusters.
735  for (int i = 0; i < numClusters; ++i){
736  this->clusters[i].setParams(this->dim, this->required_elements);
737  }
738 
739  this->maxCoordinates = allocMemory <WT>(this->dim);
740  this->minCoordinates = allocMemory <WT>(this->dim);
741 
742  //obtain the min and max coordiantes for each dimension.
743  for (int j = 0; j < dim; ++j){
744  this->minCoordinates[j] = this->maxCoordinates[j] = this->elementCoords[j][0];
745  for(IT i = 1; i < numElements; ++i){
746  WT t = this->elementCoords[j][i];
747  if(t > this->maxCoordinates[j]){
748  this->maxCoordinates[j] = t;
749  }
750  if (t < minCoordinates[j]){
751  this->minCoordinates[j] = t;
752  }
753  }
754  }
755 
756 
757  //assign initial cluster centers.
758  for (int j = 0; j < dim; ++j){
759  int mod = (1 << (j+1));
760  for (int i = 0; i < numClusters - 1; ++i){
761  WT c = 0;
762  if ( (i % mod) < mod / 2){
763  c = this->maxCoordinates[j];
764  //cout << "i:" << i << " j:" << j << " setting max:" << c << endl;
765  }
766  else {
767  c = this->minCoordinates[j];
768  }
769  this->clusters[i].center[j] = c;
770  }
771  }
772 
773  //last cluster center is placed to middle.
774  for (int j = 0; j < dim; ++j){
775  this->clusters[numClusters - 1].center[j] = (this->maxCoordinates[j] + this->minCoordinates[j]) / 2;
776  }
777 
778 
779  /*
780  for (int i = 0; i < numClusters; ++i){
781  //cout << endl << "cluster:" << i << endl << "\t";
782  for (int j = 0; j < dim; ++j){
783  cout << this->clusters[i].center[j] << " ";
784  }
785  }
786  */
787  }
788 
789  //performs kmeans clustering of coordinates.
790  void kmeans(){
791  for(int it = 0; it < 10; ++it){
792  //cout << "it:" << it << endl;
793  for (IT j = 0; j < this->numClusters; ++j){
794  this->clusters[j].clearHeap();
795  }
796  for (IT i = 0; i < this->numElements; ++i){
797  //cout << "i:" << i << " numEl:" << this->numElements << endl;
798  for (IT j = 0; j < this->numClusters; ++j){
799  //cout << "j:" << j << " numClusters:" << this->numClusters << endl;
800  this->clusters[j].getDistance(i,this->elementCoords);
801  }
802  }
803  bool moved = false;
804  for (IT j = 0; j < this->numClusters; ++j){
805  moved =(this->clusters[j].getNewCenters(this->elementCoords) || moved );
806  }
807  if (!moved){
808  break;
809  }
810  }
811 
812 
813  }
814 
815  //finds the cluster in which the coordinates are the closest to each other.
816  void getMinDistanceCluster(IT *procPermutation){
817 
818  WT minDistance = this->clusters[0].getDistanceToCenter();
819  IT minCluster = 0;
820  //cout << "j:" << 0 << " minDistance:" << minDistance << " minTmpDistance:" << minDistance<< " minCluster:" << minCluster << endl;
821  for (IT j = 1; j < this->numClusters; ++j){
822  WT minTmpDistance = this->clusters[j].getDistanceToCenter();
823  //cout << "j:" << j << " minDistance:" << minDistance << " minTmpDistance:" << minTmpDistance<< " minCluster:" << minCluster << endl;
824  if(minTmpDistance < minDistance){
825  minDistance = minTmpDistance;
826  minCluster = j;
827  }
828  }
829 
830  //cout << "minCluster:" << minCluster << endl;
831  this->clusters[minCluster].copyCoordinates(procPermutation);
832  }
833 };
834 
835 
836 
837 #define MINOF(a,b) (((a)<(b))?(a):(b))
838 
845 template <typename T>
846 void fillContinousArray(T *arr, size_t arrSize, T *val){
847  if(val == NULL){
848 
849 #ifdef HAVE_ZOLTAN2_OMP
850 #pragma omp parallel for
851 #endif
852  for(size_t i = 0; i < arrSize; ++i){
853  arr[i] = i;
854  }
855 
856  }
857  else {
858  T v = *val;
859 #ifdef HAVE_ZOLTAN2_OMP
860 #pragma omp parallel for
861 #endif
862  for(size_t i = 0; i < arrSize; ++i){
863  //cout << "writing to i:" << i << " arr:" << arrSize << endl;
864  arr[i] = v;
865  }
866  }
867 }
868 
871 template <typename part_t, typename pcoord_t>
873 protected:
874  double commCost;
875 public:
876 
877  part_t no_procs; //the number of processors
878  part_t no_tasks; //the number of taks.
880  CommunicationModel(part_t no_procs_, part_t no_tasks_):
881  commCost(),
882  no_procs(no_procs_),
883  no_tasks(no_tasks_){}
885  part_t getNProcs() const{
886  return this->no_procs;
887  }
889  return this->no_tasks;
890  }
891 
892 
894  part_t *task_to_proc,
895  part_t *task_communication_xadj,
896  part_t *task_communication_adj,
897  pcoord_t *task_communication_edge_weight){
898 
899  double totalCost = 0;
900 
901  part_t commCount = 0;
902  for (part_t task = 0; task < this->no_tasks; ++task){
903  int assigned_proc = task_to_proc[task];
904  //cout << "task:" << task << endl;
905  part_t task_adj_begin = task_communication_xadj[task];
906  part_t task_adj_end = task_communication_xadj[task+1];
907 
908  commCount += task_adj_end - task_adj_begin;
909  //cout << "task:" << task << " proc:" << assigned_proc << endl;
910  for (part_t task2 = task_adj_begin; task2 < task_adj_end; ++task2){
911 
912  //cout << "task2:" << task2 << endl;
913  part_t neighborTask = task_communication_adj[task2];
914  //cout << "neighborTask :" << neighborTask << endl;
915  int neighborProc = task_to_proc[neighborTask];
916 
917  double distance = getProcDistance(assigned_proc, neighborProc);
918 
919  if (task_communication_edge_weight == NULL){
920  totalCost += distance ;
921  }
922  else {
923  totalCost += distance * task_communication_edge_weight[task2];
924 
925  /*
926  std::cout << "\ttask:" << task << " assigned_proc:" << assigned_proc <<
927  "task2:" << task << " neighborProc:" << neighborProc <<
928  " d:" << distance << " task_communication_edge_weight[task2]:" << task_communication_edge_weight[task2] <<
929  " wh:" << distance * task_communication_edge_weight[task2] <<
930  std::endl;
931  */
932  }
933  }
934  }
935 
936  this->commCost = totalCost;// commCount;
937  }
938 
940  return this->commCost;
941  }
942 
943  virtual double getProcDistance(int procId1, int procId2) const = 0;
944 
951  virtual void getMapping(
952  int myRank,
953  const RCP<const Environment> &env,
954  ArrayRCP <part_t> &proc_to_task_xadj, // = allocMemory<part_t> (this->no_procs+1); //holds the pointer to the task array
955  ArrayRCP <part_t> &proc_to_task_adj, // = allocMemory<part_t>(this->no_tasks); //holds the indices of tasks wrt to proc_to_task_xadj array.
956  ArrayRCP <part_t> &task_to_proc //allocMemory<part_t>(this->no_tasks); //holds the processors mapped to tasks.
957  ,const Teuchos::RCP <const Teuchos::Comm<int> > comm_
958  ) const = 0;
959 };
963 template <typename pcoord_t, typename tcoord_t, typename part_t>
964 class CoordinateCommunicationModel:public CommunicationModel<part_t, pcoord_t> {
965 public:
966  //private:
967  int proc_coord_dim; //dimension of the processors
968  pcoord_t **proc_coords; //the processor coordinates. allocated outside of the class.
969  int task_coord_dim; //dimension of the tasks coordinates.
970  tcoord_t **task_coords; //the task coordinates allocated outside of the class.
973 
977 
980 
981  //public:
983  CommunicationModel<part_t, pcoord_t>(),
984  proc_coord_dim(0),
985  proc_coords(0),
986  task_coord_dim(0),
987  task_coords(0),
988  partArraySize(-1),
989  partNoArray(NULL),
990  machine_extent(NULL),
992  machine(NULL),
994  divide_to_prime_first(false){}
995 
997 
1007  int pcoord_dim_,
1008  pcoord_t **pcoords_,
1009  int tcoord_dim_,
1010  tcoord_t **tcoords_,
1011  part_t no_procs_,
1012  part_t no_tasks_,
1013  int *machine_extent_,
1014  bool *machine_extent_wrap_around_,
1015  const MachineRepresentation<pcoord_t,part_t> *machine_ = NULL
1016  ):
1017  CommunicationModel<part_t, pcoord_t>(no_procs_, no_tasks_),
1018  proc_coord_dim(pcoord_dim_), proc_coords(pcoords_),
1019  task_coord_dim(tcoord_dim_), task_coords(tcoords_),
1020  partArraySize(-1),
1021  partNoArray(NULL),
1022  machine_extent(machine_extent_),
1023  machine_extent_wrap_around(machine_extent_wrap_around_),
1024  machine(machine_),
1025  num_ranks_per_node(1),
1026  divide_to_prime_first(false){
1027  }
1028 
1029 
1030  void setPartArraySize(int psize){
1031  this->partArraySize = psize;
1032  }
1033  void setPartArray(part_t *pNo){
1034  this->partNoArray = pNo;
1035  }
1036 
1043  void getClosestSubset(part_t *proc_permutation, part_t nprocs, part_t ntasks) const{
1044  //currently returns a random subset.
1045 
1046  part_t minCoordDim = MINOF(this->task_coord_dim, this->proc_coord_dim);
1048  minCoordDim, nprocs,
1049  this->proc_coords, ntasks);
1050 
1051  kma.kmeans();
1052  kma.getMinDistanceCluster(proc_permutation);
1053 
1054  for(int i = ntasks; i < nprocs; ++i){
1055  proc_permutation[i] = -1;
1056  }
1057  /*
1058  //fill array.
1059  fillContinousArray<part_t>(proc_permutation, nprocs, NULL);
1060  int _u_umpa_seed = 847449649;
1061  srand (time(NULL));
1062  int a = rand() % 1000 + 1;
1063  _u_umpa_seed -= a;
1064  //permute array randomly.
1065  update_visit_order(proc_permutation, nprocs,_u_umpa_seed, 1);
1066  */
1067  }
1068 
1069  //temporary, necessary for random permutation.
1070  static part_t umpa_uRandom(part_t l, int &_u_umpa_seed)
1071  {
1072  int a = 16807;
1073  int m = 2147483647;
1074  int q = 127773;
1075  int r = 2836;
1076  int lo, hi, test;
1077  double d;
1078 
1079  lo = _u_umpa_seed % q;
1080  hi = _u_umpa_seed / q;
1081  test = (a*lo)-(r*hi);
1082  if (test>0)
1083  _u_umpa_seed = test;
1084  else
1085  _u_umpa_seed = test + m;
1086  d = (double) ((double) _u_umpa_seed / (double) m);
1087  return (part_t) (d*(double)l);
1088  }
1089 
1090  virtual double getProcDistance(int procId1, int procId2) const{
1091  pcoord_t distance = 0;
1092  if (machine == NULL){
1093  for (int i = 0 ; i < this->proc_coord_dim; ++i){
1094  double d = ZOLTAN2_ABS(proc_coords[i][procId1] - proc_coords[i][procId2]);
1096  if (machine_extent[i] - d < d){
1097  d = machine_extent[i] - d;
1098  }
1099  }
1100  distance += d;
1101  }
1102  }
1103  else {
1104  this->machine->getHopCount(procId1, procId2, distance);
1105  }
1106 
1107  return distance;
1108  }
1109 
1110 
1111  //temporary, does random permutation.
1112  void update_visit_order(part_t* visitOrder, part_t n, int &_u_umpa_seed, part_t rndm) {
1113  part_t *a = visitOrder;
1114 
1115 
1116  if (rndm){
1117  part_t i, u, v, tmp;
1118 
1119  if (n <= 4)
1120  return;
1121 
1122  //srand ( time(NULL) );
1123 
1124  //_u_umpa_seed = _u_umpa_seed1 - (rand()%100);
1125  for (i=0; i<n; i+=16)
1126  {
1127  u = umpa_uRandom(n-4, _u_umpa_seed);
1128  v = umpa_uRandom(n-4, _u_umpa_seed);
1129 
1130  // FIXME (mfh 30 Sep 2015) This requires including Zoltan2_AlgMultiJagged.hpp.
1131 
1132  ZOLTAN2_ALGMULTIJAGGED_SWAP(a[v], a[u], tmp);
1133  ZOLTAN2_ALGMULTIJAGGED_SWAP(a[v+1], a[u+1], tmp);
1134  ZOLTAN2_ALGMULTIJAGGED_SWAP(a[v+2], a[u+2], tmp);
1135  ZOLTAN2_ALGMULTIJAGGED_SWAP(a[v+3], a[u+3], tmp);
1136 
1137  }
1138  }
1139  else {
1140  part_t i, end = n / 4;
1141 
1142  for (i=1; i<end; i++)
1143  {
1144  part_t j=umpa_uRandom(n-i, _u_umpa_seed);
1145  part_t t=a[j];
1146  a[j] = a[n-i];
1147  a[n-i] = t;
1148  }
1149  }
1150  //PermuteInPlace(visitOrder, n);
1151  }
1152 
1153 
1154 
1161  virtual void getMapping(
1162  int myRank,
1163  const RCP<const Environment> &env,
1164  ArrayRCP <part_t> &rcp_proc_to_task_xadj, // = allocMemory<part_t> (this->no_procs+1); //holds the pointer to the task array
1165  ArrayRCP <part_t> &rcp_proc_to_task_adj, // = allocMemory<part_t>(this->no_tasks); //holds the indices of tasks wrt to proc_to_task_xadj array.
1166  ArrayRCP <part_t> &rcp_task_to_proc //allocMemory<part_t>(this->no_tasks); //holds the processors mapped to tasks.
1167  ,const Teuchos::RCP <const Teuchos::Comm<int> > comm_
1168  ) const{
1169 
1170  rcp_proc_to_task_xadj = ArrayRCP <part_t>(this->no_procs+1);
1171  rcp_proc_to_task_adj = ArrayRCP <part_t>(this->no_tasks);
1172  rcp_task_to_proc = ArrayRCP <part_t>(this->no_tasks);
1173 
1174  part_t *proc_to_task_xadj = rcp_proc_to_task_xadj.getRawPtr(); //holds the pointer to the task array
1175  part_t *proc_to_task_adj = rcp_proc_to_task_adj.getRawPtr(); //holds the indices of tasks wrt to proc_to_task_xadj array.
1176  part_t *task_to_proc = rcp_task_to_proc.getRawPtr(); //holds the processors mapped to tasks.);
1177 
1178 
1179  part_t invalid = 0;
1180  fillContinousArray<part_t>(proc_to_task_xadj, this->no_procs+1, &invalid);
1181 
1182  //obtain the number of parts that should be divided.
1183  part_t num_parts = MINOF(this->no_procs, this->no_tasks);
1184  //obtain the min coordinate dim.
1185  //No more want to do min coord dim. If machine dimension > task_dim,
1186  //we end up with a long line.
1187  //part_t minCoordDim = MINOF(this->task_coord_dim, this->proc_coord_dim);
1188 
1189  int recursion_depth = partArraySize;
1190  //if(partArraySize < minCoordDim) recursion_depth = minCoordDim;
1191  if (partArraySize == -1){
1192 
1193  if (divide_to_prime_first){
1194  //it is difficult to estimate the number of steps in this case as each branch will have different depth.
1195  //The worst case happens when all prime factors are 3s. P = 3^n, n recursion depth will divide parts to 2x and x
1196  //and n recursion depth with divide 2x into x and x.
1197  //set it to upperbound here.
1198  //we could calculate the exact value here as well, but the partitioning algorithm skips further ones anyways.
1199  recursion_depth = log(float(this->no_procs)) / log(2.0) * 2 + 1;
1200  }
1201  else {
1202  recursion_depth = log(float(this->no_procs)) / log(2.0) + 1;
1203  }
1204  }
1205 
1206 
1207  int taskPerm = z2Fact<int>(this->task_coord_dim); //get the number of different permutations for task dimension ordering
1208  int procPerm = z2Fact<int>(this->proc_coord_dim); //get the number of different permutations for proc dimension ordering
1209 
1210  int permutations = taskPerm * procPerm; //total number of permutations
1211  //now add the ones, where we divide the processors with longest dimension,
1212  //but task with order.
1213  permutations += taskPerm;
1214  //and divide tasks with longest dimension, and processors with order.
1215  permutations += procPerm; //total number of permutations
1216  //and both with longest dimension.
1217  permutations += 1;
1218  //add one also that partitions based the longest dimension.
1219 
1220  //holds the pointers to proc_adjList
1221  part_t *proc_xadj = allocMemory<part_t>(num_parts+1);
1222  //holds the processors in parts according to the result of partitioning algorithm.
1223  //the processors assigned to part x is at proc_adjList[ proc_xadj[x] : proc_xadj[x+1] ]
1224  part_t *proc_adjList = allocMemory<part_t>(this->no_procs);
1225 
1226 
1227  part_t used_num_procs = this->no_procs;
1228  if(this->no_procs > this->no_tasks){
1229  //obtain the subset of the processors that are closest to each other.
1230  this->getClosestSubset(proc_adjList, this->no_procs, this->no_tasks);
1231  used_num_procs = this->no_tasks;
1232  }
1233  else {
1234  fillContinousArray<part_t>(proc_adjList,this->no_procs, NULL);
1235  }
1236 
1237  int myPermutation = myRank % permutations; //the index of the permutation
1238  bool task_partition_along_longest_dim = false;
1239  bool proc_partition_along_longest_dim = false;
1240 
1241 
1242  int myProcPerm = 0;
1243  int myTaskPerm = 0;
1244 
1245  if (myPermutation == 0){
1246  task_partition_along_longest_dim = true;
1247  proc_partition_along_longest_dim = true;
1248  }
1249  else {
1250  --myPermutation;
1251  if (myPermutation < taskPerm){
1252  proc_partition_along_longest_dim = true;
1253  myTaskPerm = myPermutation; // the index of the task permutation
1254  }
1255  else{
1256  myPermutation -= taskPerm;
1257  if (myPermutation < procPerm){
1258  task_partition_along_longest_dim = true;
1259  myProcPerm = myPermutation; // the index of the task permutation
1260  }
1261  else {
1262  myPermutation -= procPerm;
1263  myProcPerm = myPermutation % procPerm; // the index of the proc permutation
1264  myTaskPerm = myPermutation / procPerm; // the index of the task permutation
1265  }
1266  }
1267  }
1268 
1269 
1270 
1271 
1272  /*
1273  if (task_partition_along_longest_dim && proc_partition_along_longest_dim){
1274  std::cout <<"me:" << myRank << " task:longest proc:longest" << " numPerms:" << permutations << std::endl;
1275  }
1276  else if (proc_partition_along_longest_dim){
1277  std::cout <<"me:" << myRank << " task:" << myTaskPerm << " proc:longest" << " numPerms:" << permutations << std::endl;
1278  }
1279  else if (task_partition_along_longest_dim){
1280  std::cout <<"me:" << myRank << " task: longest" << " proc:" << myProcPerm << " numPerms:" << permutations << std::endl;
1281  }
1282  else {
1283  std::cout <<"me:" << myRank << " task:" << myTaskPerm << " proc:" << myProcPerm << " numPerms:" << permutations << std::endl;
1284  }
1285  */
1286 
1287 
1288 
1289  int *permutation = allocMemory<int>((this->proc_coord_dim > this->task_coord_dim)
1290  ? this->proc_coord_dim : this->task_coord_dim);
1291 
1292  //get the permutation order from the proc permutation index.
1293  ithPermutation<int>(this->proc_coord_dim, myProcPerm, permutation);
1294 
1295  /*
1296  //reorder the coordinate dimensions.
1297  pcoord_t **pcoords = allocMemory<pcoord_t *>(this->proc_coord_dim);
1298  for(int i = 0; i < this->proc_coord_dim; ++i){
1299  pcoords[i] = this->proc_coords[permutation[i]];
1300  //cout << permutation[i] << " ";
1301  }
1302  */
1303  int procdim = this->proc_coord_dim;
1304  pcoord_t **pcoords = this->proc_coords;
1305  /*
1306  int procdim = this->proc_coord_dim;
1307  procdim = 6;
1308  //reorder the coordinate dimensions.
1309  pcoord_t **pcoords = allocMemory<pcoord_t *>(procdim);
1310  for(int i = 0; i < procdim; ++i){
1311  pcoords[i] = new pcoord_t[used_num_procs] ;//this->proc_coords[permutation[i]];
1312  }
1313 
1314  for (int k = 0; k < used_num_procs ; k++){
1315  pcoords[0][k] = (int (this->proc_coords[0][k]) / 2) * 64;
1316  pcoords[3][k] = (int (this->proc_coords[0][k]) % 2) * 8 ;
1317 
1318  pcoords[1][k] = (int (this->proc_coords[1][k]) / 2) * 8 * 2400;
1319  pcoords[4][k] = (int (this->proc_coords[1][k]) % 2) * 8;
1320  pcoords[2][k] = ((int (this->proc_coords[2][k])) / 8) * 160;
1321  pcoords[5][k] = ((int (this->proc_coords[2][k])) % 8) * 5;
1322 
1323  //if (this->proc_coords[0][k] == 40 && this->proc_coords[1][k] == 8 && this->proc_coords[2][k] == 48){
1324  if (this->proc_coords[0][k] == 5 && this->proc_coords[1][k] == 0 && this->proc_coords[2][k] == 10){
1325  std::cout << "pcoords[0][k]:" << pcoords[0][k] <<
1326  "pcoords[1][k]:" << pcoords[1][k] <<
1327  "pcoords[2][k]:" << pcoords[2][k] <<
1328  "pcoords[3][k]:" << pcoords[3][k] <<
1329  "pcoords[4][k]:" << pcoords[4][k] <<
1330  "pcoords[5][k]:" << pcoords[5][k] << std::endl;
1331  }
1332  else if ( pcoords[0][k] == 64 && pcoords[1][k] == 0 && pcoords[2][k] == 160 &&
1333  pcoords[3][k]==16 && pcoords[4][k] == 0 && pcoords[5][k] == 10){
1334  std::cout << "this->proc_coords[0][k]:" << this->proc_coords[0][k] <<
1335  "this->proc_coords[1][k]:" << this->proc_coords[1][k] <<
1336  "this->proc_coords[2][k]:" << this->proc_coords[2][k] << std::endl;
1337  }
1338 
1339  }
1340  */
1341 
1342 
1343  //if (partNoArray == NULL) std::cout << "partNoArray is null" << std::endl;
1344  //std::cout << "recursion_depth:" << recursion_depth << " partArraySize:" << partArraySize << std::endl;
1345  //do the partitioning and renumber the parts.
1346  env->timerStart(MACRO_TIMERS, "Mapping - Proc Partitioning");
1347 
1349  mj_partitioner.sequential_task_partitioning(
1350  env,
1351  this->no_procs,
1352  used_num_procs,
1353  num_parts,
1354  procdim,
1355  //minCoordDim,
1356  pcoords,//this->proc_coords,
1357  proc_adjList,
1358  proc_xadj,
1359  recursion_depth,
1360  partNoArray,
1361  proc_partition_along_longest_dim//, false
1364  );
1365  env->timerStop(MACRO_TIMERS, "Mapping - Proc Partitioning");
1366  //comm_->barrier();
1367  //std::cout << "mj_partitioner.for procs over" << std::endl;
1368 
1369 
1370  //freeArray<pcoord_t *>(pcoords);
1371 
1372 
1373  part_t *task_xadj = allocMemory<part_t>(num_parts+1);
1374  part_t *task_adjList = allocMemory<part_t>(this->no_tasks);
1375  //fill task_adjList st: task_adjList[i] <- i.
1376  fillContinousArray<part_t>(task_adjList,this->no_tasks, NULL);
1377 
1378  //get the permutation order from the task permutation index.
1379  ithPermutation<int>(this->task_coord_dim, myTaskPerm, permutation);
1380 
1381  //reorder task coordinate dimensions.
1382  tcoord_t **tcoords = allocMemory<tcoord_t *>(this->task_coord_dim);
1383  for(int i = 0; i < this->task_coord_dim; ++i){
1384  tcoords[i] = this->task_coords[permutation[i]];
1385  }
1386 
1387 
1388  env->timerStart(MACRO_TIMERS, "Mapping - Task Partitioning");
1389  //partitioning of tasks
1390  mj_partitioner.sequential_task_partitioning(
1391  env,
1392  this->no_tasks,
1393  this->no_tasks,
1394  num_parts,
1395  this->task_coord_dim,
1396  //minCoordDim,
1397  tcoords, //this->task_coords,
1398  task_adjList,
1399  task_xadj,
1400  recursion_depth,
1401  partNoArray,
1402  task_partition_along_longest_dim
1405  //,"task_partitioning"
1406  //, false//(myRank == 6)
1407  );
1408  env->timerStop(MACRO_TIMERS, "Mapping - Task Partitioning");
1409 
1410  //std::cout << "myrank:" << myRank << std::endl;
1411  //comm_->barrier();
1412  //std::cout << "mj_partitioner.sequential_task_partitioning over" << std::endl;
1413 
1414  freeArray<pcoord_t *>(tcoords);
1415  freeArray<int>(permutation);
1416 
1417 
1418  //filling proc_to_task_xadj, proc_to_task_adj, task_to_proc arrays.
1419  for(part_t i = 0; i < num_parts; ++i){
1420 
1421  part_t proc_index_begin = proc_xadj[i];
1422  part_t task_begin_index = task_xadj[i];
1423  part_t proc_index_end = proc_xadj[i+1];
1424  part_t task_end_index = task_xadj[i+1];
1425 
1426 
1427  if(proc_index_end - proc_index_begin != 1){
1428  std::cerr << "Error at partitioning of processors" << std::endl;
1429  std::cerr << "PART:" << i << " is assigned to " << proc_index_end - proc_index_begin << " processors." << std::endl;
1430  exit(1);
1431  }
1432  part_t assigned_proc = proc_adjList[proc_index_begin];
1433  proc_to_task_xadj[assigned_proc] = task_end_index - task_begin_index;
1434  }
1435 
1436 
1437  //holds the pointer to the task array
1438  //convert proc_to_task_xadj to CSR index array
1439  part_t *proc_to_task_xadj_work = allocMemory<part_t>(this->no_procs);
1440  part_t sum = 0;
1441  for(part_t i = 0; i < this->no_procs; ++i){
1442  part_t tmp = proc_to_task_xadj[i];
1443  proc_to_task_xadj[i] = sum;
1444  sum += tmp;
1445  proc_to_task_xadj_work[i] = sum;
1446  }
1447  proc_to_task_xadj[this->no_procs] = sum;
1448 
1449  for(part_t i = 0; i < num_parts; ++i){
1450 
1451  part_t proc_index_begin = proc_xadj[i];
1452  part_t task_begin_index = task_xadj[i];
1453  part_t task_end_index = task_xadj[i+1];
1454 
1455  part_t assigned_proc = proc_adjList[proc_index_begin];
1456 
1457  for (part_t j = task_begin_index; j < task_end_index; ++j){
1458  part_t taskId = task_adjList[j];
1459 
1460  task_to_proc[taskId] = assigned_proc;
1461 
1462  proc_to_task_adj [ --proc_to_task_xadj_work[assigned_proc] ] = taskId;
1463  }
1464  }
1465 
1466  /*
1467  if (myPermutation == 0){
1468  std::ofstream gnuPlotCode ("mymapping.out", std::ofstream::out);
1469 
1470 
1471  for(part_t i = 0; i < num_parts; ++i){
1472 
1473  part_t proc_index_begin = proc_xadj[i];
1474  part_t proc_index_end = proc_xadj[i+1];
1475 
1476 
1477  if(proc_index_end - proc_index_begin != 1){
1478  std::cerr << "Error at partitioning of processors" << std::endl;
1479  std::cerr << "PART:" << i << " is assigned to " << proc_index_end - proc_index_begin << " processors." << std::endl;
1480  exit(1);
1481  }
1482 
1483  part_t assigned_proc = proc_adjList[proc_index_begin];
1484  gnuPlotCode << "Rank:" << i << " " <<
1485  this->proc_coords[0][assigned_proc] << " " << this->proc_coords[1][assigned_proc] << " " << this->proc_coords[2][assigned_proc] <<
1486  " " << pcoords[0][assigned_proc] << " " << pcoords[1][assigned_proc] <<
1487  " " << pcoords[2][assigned_proc] << " " << pcoords[3][assigned_proc] <<
1488  std::endl;
1489 
1490  }
1491 
1492 
1493  gnuPlotCode << "Machine Extent:" << std::endl;
1494  //filling proc_to_task_xadj, proc_to_task_adj, task_to_proc arrays.
1495  for(part_t i = 0; i < num_parts; ++i){
1496 
1497  part_t proc_index_begin = proc_xadj[i];
1498  part_t proc_index_end = proc_xadj[i+1];
1499 
1500 
1501  if(proc_index_end - proc_index_begin != 1){
1502  std::cerr << "Error at partitioning of processors" << std::endl;
1503  std::cerr << "PART:" << i << " is assigned to " << proc_index_end - proc_index_begin << " processors." << std::endl;
1504  exit(1);
1505  }
1506 
1507  part_t assigned_proc = proc_adjList[proc_index_begin];
1508  gnuPlotCode << "Rank:" << i << " " << this->proc_coords[0][assigned_proc] << " " << this->proc_coords[1][assigned_proc] << " " << this->proc_coords[2][assigned_proc] << std::endl;
1509 
1510  }
1511  gnuPlotCode.close();
1512  }
1513  */
1514 
1515  freeArray<part_t>(proc_to_task_xadj_work);
1516  freeArray<part_t>(task_xadj);
1517  freeArray<part_t>(task_adjList);
1518  freeArray<part_t>(proc_xadj);
1519  freeArray<part_t>(proc_adjList);
1520  }
1521 
1522 };
1523 
1524 template <typename Adapter, typename part_t>
1526 protected:
1527 
1528 #ifndef DOXYGEN_SHOULD_SKIP_THIS
1529 
1530  typedef typename Adapter::scalar_t pcoord_t;
1531  typedef typename Adapter::scalar_t tcoord_t;
1532  typedef typename Adapter::scalar_t scalar_t;
1533  typedef typename Adapter::lno_t lno_t;
1534 
1535 #endif
1536 
1537  //RCP<const Environment> env;
1538  ArrayRCP<part_t> proc_to_task_xadj; // = allocMemory<part_t> (this->no_procs+1); //holds the pointer to the task array
1539  ArrayRCP<part_t> proc_to_task_adj; // = allocMemory<part_t>(this->no_tasks); //holds the indices of tasks wrt to proc_to_task_xadj array.
1540  ArrayRCP<part_t> task_to_proc; //allocMemory<part_t>(this->no_procs); //holds the processors mapped to tasks.
1541  ArrayRCP<part_t> local_task_to_rank; //allocMemory<part_t>(this->no_procs); //holds the processors mapped to tasks.
1542 
1547  ArrayRCP<part_t> task_communication_xadj;
1548  ArrayRCP<part_t> task_communication_adj;
1550 
1551 
1554  void doMapping(int myRank, const Teuchos::RCP <const Teuchos::Comm<int> > comm_){
1555 
1556  if(this->proc_task_comm){
1557  this->proc_task_comm->getMapping(
1558  myRank,
1559  this->env,
1560  this->proc_to_task_xadj, // = allocMemory<part_t> (this->no_procs+1); //holds the pointer to the task array
1561  this->proc_to_task_adj, // = allocMemory<part_t>(this->no_tasks); //holds the indices of tasks wrt to proc_to_task_xadj array.
1562  this->task_to_proc //allocMemory<part_t>(this->no_procs); //holds the processors mapped to tasks.);
1563  ,comm_
1564  );
1565  }
1566  else {
1567  std::cerr << "communicationModel is not specified in the Mapper" << std::endl;
1568  exit(1);
1569  }
1570  }
1571 
1572 
1575  RCP<Comm<int> > create_subCommunicator(){
1576  int procDim = this->proc_task_comm->proc_coord_dim;
1577  int taskDim = this->proc_task_comm->task_coord_dim;
1578 
1579  int taskPerm = z2Fact<int>(procDim); //get the number of different permutations for task dimension ordering
1580  int procPerm = z2Fact<int>(taskDim); //get the number of different permutations for proc dimension ordering
1581  int idealGroupSize = taskPerm * procPerm; //total number of permutations
1582 
1583  idealGroupSize += taskPerm + procPerm + 1; //for the one that does longest dimension partitioning.
1584 
1585  int myRank = this->comm->getRank();
1586  int commSize = this->comm->getSize();
1587 
1588  int myGroupIndex = myRank / idealGroupSize;
1589 
1590  int prevGroupBegin = (myGroupIndex - 1)* idealGroupSize;
1591  if (prevGroupBegin < 0) prevGroupBegin = 0;
1592  int myGroupBegin = myGroupIndex * idealGroupSize;
1593  int myGroupEnd = (myGroupIndex + 1) * idealGroupSize;
1594  int nextGroupEnd = (myGroupIndex + 2)* idealGroupSize;
1595 
1596  if (myGroupEnd > commSize){
1597  myGroupBegin = prevGroupBegin;
1598  myGroupEnd = commSize;
1599  }
1600  if (nextGroupEnd > commSize){
1601  myGroupEnd = commSize;
1602  }
1603  int myGroupSize = myGroupEnd - myGroupBegin;
1604 
1605  part_t *myGroup = allocMemory<part_t>(myGroupSize);
1606  for (int i = 0; i < myGroupSize; ++i){
1607  myGroup[i] = myGroupBegin + i;
1608  }
1609  //cout << "me:" << myRank << " myGroupBegin:" << myGroupBegin << " myGroupEnd:" << myGroupEnd << endl;
1610 
1611  ArrayView<const part_t> myGroupView(myGroup, myGroupSize);
1612 
1613  RCP<Comm<int> > subComm = this->comm->createSubcommunicator(myGroupView);
1614  freeArray<part_t>(myGroup);
1615  return subComm;
1616  }
1617 
1618 
1622  //create the sub group.
1623  RCP<Comm<int> > subComm = this->create_subCommunicator();
1624  //calculate cost.
1625  double myCost = this->proc_task_comm->getCommunicationCostMetric();
1626  //std::cout << "me:" << this->comm->getRank() << " myCost:" << myCost << std::endl;
1627  double localCost[2], globalCost[2];
1628 
1629  localCost[0] = myCost;
1630  localCost[1] = double(subComm->getRank());
1631 
1632  globalCost[1] = globalCost[0] = std::numeric_limits<double>::max();
1634  reduceAll<int, double>(*subComm, reduceBest,
1635  2, localCost, globalCost);
1636 
1637  int sender = int(globalCost[1]);
1638 
1639  /*
1640  if ( this->comm->getRank() == 0){
1641  std::cout << "me:" << localCost[1] <<
1642  " localcost:" << localCost[0]<<
1643  " bestcost:" << globalCost[0] <<
1644  " Sender:" << sender <<
1645  " procDim" << proc_task_comm->proc_coord_dim <<
1646  " taskDim:" << proc_task_comm->task_coord_dim << std::endl;
1647  }
1648  */
1649  //cout << "me:" << localCost[1] << " localcost:" << localCost[0]<< " bestcost:" << globalCost[0] << endl;
1650  //cout << "me:" << localCost[1] << " proc:" << globalCost[1] << endl;
1651  broadcast(*subComm, sender, this->ntasks, this->task_to_proc.getRawPtr());
1652  broadcast(*subComm, sender, this->nprocs, this->proc_to_task_xadj.getRawPtr());
1653  broadcast(*subComm, sender, this->ntasks, this->proc_to_task_adj.getRawPtr());
1654  }
1655 
1656 
1657 
1658  //write mapping to gnuPlot code to visualize.
1660  std::ofstream gnuPlotCode("gnuPlot.plot", std::ofstream::out);
1661 
1662  int mindim = MINOF(proc_task_comm->proc_coord_dim, proc_task_comm->task_coord_dim);
1663  std::string ss = "";
1664  for(part_t i = 0; i < this->nprocs; ++i){
1665 
1666  std::string procFile = Teuchos::toString<int>(i) + "_mapping.txt";
1667  if (i == 0){
1668  gnuPlotCode << "plot \"" << procFile << "\"\n";
1669  }
1670  else {
1671  gnuPlotCode << "replot \"" << procFile << "\"\n";
1672  }
1673 
1674  std::ofstream inpFile(procFile.c_str(), std::ofstream::out);
1675 
1676  std::string gnuPlotArrow = "set arrow from ";
1677  for(int j = 0; j < mindim; ++j){
1678  if (j == mindim - 1){
1679  inpFile << proc_task_comm->proc_coords[j][i];
1680  gnuPlotArrow += Teuchos::toString<float>(proc_task_comm->proc_coords[j][i]);
1681 
1682  }
1683  else {
1684  inpFile << proc_task_comm->proc_coords[j][i] << " ";
1685  gnuPlotArrow += Teuchos::toString<float>(proc_task_comm->proc_coords[j][i]) +",";
1686  }
1687  }
1688  gnuPlotArrow += " to ";
1689 
1690 
1691  inpFile << std::endl;
1692  ArrayView<part_t> a = this->getAssignedTasksForProc(i);
1693  for(int k = 0; k < a.size(); ++k){
1694  int j = a[k];
1695  //cout << "i:" << i << " j:"
1696  std::string gnuPlotArrow2 = gnuPlotArrow;
1697  for(int z = 0; z < mindim; ++z){
1698  if(z == mindim - 1){
1699 
1700  //cout << "z:" << z << " j:" << j << " " << proc_task_comm->task_coords[z][j] << endl;
1701  inpFile << proc_task_comm->task_coords[z][j];
1702  gnuPlotArrow2 += Teuchos::toString<float>(proc_task_comm->task_coords[z][j]);
1703  }
1704  else{
1705  inpFile << proc_task_comm->task_coords[z][j] << " ";
1706  gnuPlotArrow2 += Teuchos::toString<float>(proc_task_comm->task_coords[z][j]) +",";
1707  }
1708  }
1709  ss += gnuPlotArrow2 + "\n";
1710  inpFile << std::endl;
1711  }
1712  inpFile.close();
1713  }
1714  gnuPlotCode << ss;
1715  gnuPlotCode << "\nreplot\n pause -1 \n";
1716  gnuPlotCode.close();
1717 
1718  }
1719 
1720 
1721  //write mapping to gnuPlot code to visualize.
1722  void writeMapping2(int myRank){
1723  std::string rankStr = Teuchos::toString<int>(myRank);
1724  std::string gnuPlots = "gnuPlot", extentionS = ".plot";
1725  std::string outF = gnuPlots + rankStr+ extentionS;
1726  std::ofstream gnuPlotCode(outF.c_str(), std::ofstream::out);
1727 
1730  //int mindim = MINOF(tmpproc_task_comm->proc_coord_dim, tmpproc_task_comm->task_coord_dim);
1731  int mindim = tmpproc_task_comm->proc_coord_dim;
1732  if (mindim != 3) {
1733  std::cerr << "Mapping Write is only good for 3 dim" << std::endl;
1734  return;
1735  }
1736  std::string ss = "";
1737  std::string procs = "";
1738 
1739  std::set < std::tuple<int,int,int,int,int,int> > my_arrows;
1740  for(part_t origin_rank = 0; origin_rank < this->nprocs; ++origin_rank){
1741  ArrayView<part_t> a = this->getAssignedTasksForProc(origin_rank);
1742  if (a.size() == 0){
1743  continue;
1744  }
1745 
1746  std::string gnuPlotArrow = "set arrow from ";
1747  for(int j = 0; j < mindim; ++j){
1748  if (j == mindim - 1){
1749  gnuPlotArrow += Teuchos::toString<float>(tmpproc_task_comm->proc_coords[j][origin_rank]);
1750  procs += Teuchos::toString<float>(tmpproc_task_comm->proc_coords[j][origin_rank]);
1751 
1752  }
1753  else {
1754  gnuPlotArrow += Teuchos::toString<float>(tmpproc_task_comm->proc_coords[j][origin_rank]) +",";
1755  procs += Teuchos::toString<float>(tmpproc_task_comm->proc_coords[j][origin_rank])+ " ";
1756  }
1757  }
1758  procs += "\n";
1759 
1760  gnuPlotArrow += " to ";
1761 
1762 
1763  for(int k = 0; k < a.size(); ++k){
1764  int origin_task = a[k];
1765 
1766  for (int nind = task_communication_xadj[origin_task]; nind < task_communication_xadj[origin_task + 1]; ++nind){
1767  int neighbor_task = task_communication_adj[nind];
1768 
1769  bool differentnode = false;
1770 
1771  int neighbor_rank = this->getAssignedProcForTask(neighbor_task);
1772 
1773  for(int j = 0; j < mindim; ++j){
1774  if (int(tmpproc_task_comm->proc_coords[j][origin_rank]) != int(tmpproc_task_comm->proc_coords[j][neighbor_rank])){
1775  differentnode = true; break;
1776  }
1777  }
1778  std::tuple<int,int,int, int, int, int> foo(
1779  int(tmpproc_task_comm->proc_coords[0][origin_rank]),
1780  int(tmpproc_task_comm->proc_coords[1][origin_rank]),
1781  int(tmpproc_task_comm->proc_coords[2][origin_rank]),
1782  int(tmpproc_task_comm->proc_coords[0][neighbor_rank]),
1783  int(tmpproc_task_comm->proc_coords[1][neighbor_rank]),
1784  int(tmpproc_task_comm->proc_coords[2][neighbor_rank]));
1785 
1786 
1787  if (differentnode && my_arrows.find(foo) == my_arrows.end()){
1788  my_arrows.insert(foo);
1789 
1790  std::string gnuPlotArrow2 = "";
1791  for(int j = 0; j < mindim; ++j){
1792  if(j == mindim - 1){
1793  gnuPlotArrow2 += Teuchos::toString<float>(tmpproc_task_comm->proc_coords[j][neighbor_rank]);
1794  }
1795  else{
1796  gnuPlotArrow2 += Teuchos::toString<float>(tmpproc_task_comm->proc_coords[j][neighbor_rank]) +",";
1797  }
1798  }
1799  ss += gnuPlotArrow + gnuPlotArrow2 + " nohead\n";
1800  }
1801  }
1802  }
1803 
1804  }
1805 
1806 
1807  std::ofstream procFile("procPlot.plot", std::ofstream::out);
1808  procFile << procs << "\n";
1809  procFile.close();
1810 
1811  //gnuPlotCode << ss;
1812  if(mindim == 2){
1813  gnuPlotCode << "plot \"procPlot.plot\" with points pointsize 3\n";
1814  } else {
1815  gnuPlotCode << "splot \"procPlot.plot\" with points pointsize 3\n";
1816  }
1817 
1818  gnuPlotCode << ss << "\nreplot\n pause -1 \n";
1819  gnuPlotCode.close();
1820  }
1821 
1822 
1823 // KDD Need to provide access to algorithm for getPartBoxes
1824 #ifdef gnuPlot
1825  void writeGnuPlot(
1826  const Teuchos::Comm<int> *comm_,
1828  int coordDim,
1829  tcoord_t **partCenters
1830  ){
1831  std::string file = "gggnuPlot";
1832  std::string exten = ".plot";
1833  std::ofstream mm("2d.txt");
1834  file += Teuchos::toString<int>(comm_->getRank()) + exten;
1835  std::ofstream ff(file.c_str());
1836  //ff.seekg(0, ff.end);
1837  std::vector<Zoltan2::coordinateModelPartBox <tcoord_t, part_t> > outPartBoxes = ((Zoltan2::PartitioningSolution<Adapter> *)soln_)->getPartBoxesView();
1838 
1839  for (part_t i = 0; i < this->ntasks;++i){
1840  outPartBoxes[i].writeGnuPlot(ff, mm);
1841  }
1842  if (coordDim == 2){
1843  ff << "plot \"2d.txt\"" << std::endl;
1844  //ff << "\n pause -1" << endl;
1845  }
1846  else {
1847  ff << "splot \"2d.txt\"" << std::endl;
1848  //ff << "\n pause -1" << endl;
1849  }
1850  mm.close();
1851 
1852  ff << "set style arrow 5 nohead size screen 0.03,15,135 ls 1" << std::endl;
1853  for (part_t i = 0; i < this->ntasks;++i){
1855  part_t pe = task_communication_xadj[i+1];
1856  for (part_t p = pb; p < pe; ++p){
1858 
1859  //cout << "i:" << i << " n:" << n << endl;
1860  std::string arrowline = "set arrow from ";
1861  for (int j = 0; j < coordDim - 1; ++j){
1862  arrowline += Teuchos::toString<tcoord_t>(partCenters[j][n]) + ",";
1863  }
1864  arrowline += Teuchos::toString<tcoord_t>(partCenters[coordDim -1][n]) + " to ";
1865 
1866 
1867  for (int j = 0; j < coordDim - 1; ++j){
1868  arrowline += Teuchos::toString<tcoord_t>(partCenters[j][i]) + ",";
1869  }
1870  arrowline += Teuchos::toString<tcoord_t>(partCenters[coordDim -1][i]) + " as 5\n";
1871 
1872  //cout << "arrow:" << arrowline << endl;
1873  ff << arrowline;
1874  }
1875  }
1876 
1877  ff << "replot\n pause -1" << std::endl;
1878  ff.close();
1879  }
1880 #endif // gnuPlot
1881 
1882 public:
1883 
1884  void getProcTask(part_t* &proc_to_task_xadj_, part_t* &proc_to_task_adj_){
1885  proc_to_task_xadj_ = this->proc_to_task_xadj.getRawPtr();
1886  proc_to_task_adj_ = this->proc_to_task_adj.getRawPtr();
1887  }
1888 
1889  virtual void map(const RCP<MappingSolution<Adapter> > &mappingsoln) {
1890 
1891  // Mapping was already computed in the constructor; we need to store it
1892  // in the solution.
1893  mappingsoln->setMap_RankForLocalElements(local_task_to_rank);
1894 
1895  // KDDKDD TODO: Algorithm is also creating task_to_proc, which maybe
1896  // KDDKDD is not needed once we use MappingSolution to answer queries
1897  // KDDKDD instead of this algorithm.
1898  // KDDKDD Ask Mehmet: what is the most efficient way to get the answer
1899  // KDDKDD out of CoordinateTaskMapper and into the MappingSolution?
1900  }
1901 
1902 
1904  //freeArray<part_t>(proc_to_task_xadj);
1905  //freeArray<part_t>(proc_to_task_adj);
1906  //freeArray<part_t>(task_to_proc);
1907  if(this->isOwnerofModel){
1908  delete this->proc_task_comm;
1909  }
1910  }
1911 
1913  const lno_t num_local_coords,
1914  const part_t *local_coord_parts,
1915  const ArrayRCP<part_t> task_to_proc_){
1916  local_task_to_rank = ArrayRCP <part_t>(num_local_coords);
1917 
1918  for (lno_t i = 0; i < num_local_coords; ++i){
1919  part_t local_coord_part = local_coord_parts[i];
1920  part_t rank_index = task_to_proc_[local_coord_part];
1921  local_task_to_rank[i] = rank_index;
1922  }
1923  }
1924 
1925 
1926 
1938  const Teuchos::RCP <const Teuchos::Comm<int> > comm_,
1939  const Teuchos::RCP <const MachineRepresentation<pcoord_t,part_t> > machine_,
1940  const Teuchos::RCP <const Adapter> input_adapter_,
1941  const Teuchos::RCP <const Zoltan2::PartitioningSolution<Adapter> > soln_,
1942  const Teuchos::RCP <const Environment> envConst,
1943  bool is_input_adapter_distributed = true,
1944  int num_ranks_per_node = 1,
1945  bool divide_to_prime_first = false, bool reduce_best_mapping = true):
1946  PartitionMapping<Adapter>(comm_, machine_, input_adapter_, soln_, envConst),
1947  proc_to_task_xadj(0),
1948  proc_to_task_adj(0),
1949  task_to_proc(0),
1950  isOwnerofModel(true),
1951  proc_task_comm(0),
1955  using namespace Teuchos;
1956  typedef typename Adapter::base_adapter_t ctm_base_adapter_t;
1957 
1958  RCP<Zoltan2::GraphModel<ctm_base_adapter_t> > graph_model_;
1959  RCP<Zoltan2::CoordinateModel<ctm_base_adapter_t> > coordinateModel_ ;
1960 
1961  RCP<const Teuchos::Comm<int> > rcp_comm = comm_;
1962  RCP<const Teuchos::Comm<int> > ia_comm = rcp_comm;
1963  if (!is_input_adapter_distributed){
1964  ia_comm = Teuchos::createSerialComm<int>();
1965  }
1966 
1967  RCP<const Environment> envConst_ = envConst;
1968 
1969  RCP<const ctm_base_adapter_t> baseInputAdapter_(
1970  rcp(dynamic_cast<const ctm_base_adapter_t *>(input_adapter_.getRawPtr()), false));
1971 
1972  modelFlag_t coordFlags_, graphFlags_;
1973 
1974  //create coordinate model
1975  //since this is coordinate task mapper,
1976  //the adapter has to have the coordinates
1977  coordinateModel_ = rcp(new CoordinateModel<ctm_base_adapter_t>(
1978  baseInputAdapter_, envConst_, ia_comm, coordFlags_));
1979 
1980  //if the adapter has also graph model, we will use graph model
1981  //to calculate the cost mapping.
1982  BaseAdapterType inputType_ = input_adapter_->adapterType();
1983  if (inputType_ == MatrixAdapterType ||
1984  inputType_ == GraphAdapterType ||
1985  inputType_ == MeshAdapterType)
1986  {
1987  graph_model_ = rcp(new GraphModel<ctm_base_adapter_t>(
1988  baseInputAdapter_, envConst_, ia_comm,
1989  graphFlags_));
1990  }
1991 
1992  if (!machine_->hasMachineCoordinates()) {
1993  throw std::runtime_error("Existing machine does not provide coordinates "
1994  "for coordinate task mapping");
1995  }
1996 
1997  //if mapping type is 0 then it is coordinate mapping
1998  int procDim = machine_->getMachineDim();
1999  this->nprocs = machine_->getNumRanks();
2000 
2001  //get processor coordinates.
2002  pcoord_t **procCoordinates = NULL;
2003  if (!machine_->getAllMachineCoordinatesView(procCoordinates)) {
2004  throw std::runtime_error("Existing machine does not implement "
2005  "getAllMachineCoordinatesView");
2006  }
2007 
2008  //get the machine extent.
2009  //if we have machine extent,
2010  //if the machine has wrap-around links, we would like to shift the coordinates,
2011  //so that the largest hap would be the wrap-around.
2012  std::vector<int> machine_extent_vec(procDim);
2013  //std::vector<bool> machine_extent_wrap_around_vec(procDim, 0);
2014  int *machine_extent = &(machine_extent_vec[0]);
2015  bool *machine_extent_wrap_around = new bool[procDim];
2016  for (int i = 0; i < procDim; ++i)machine_extent_wrap_around[i] = false;
2017  machine_->getMachineExtentWrapArounds(machine_extent_wrap_around);
2018 
2019 
2020 
2021  // KDDKDD ASK MEHMET: SHOULD WE GET AND USE machine_dimension HERE IF IT
2022  // KDDKDD ASK MEHMET: IS PROVIDED BY THE MACHINE REPRESENTATION?
2023  // KDDKDD ASK MEHMET: IF NOT HERE, THEN WHERE?
2024  // MD: Yes, I ADDED BELOW:
2025  if (machine_->getMachineExtent(machine_extent)) {
2026  procCoordinates =
2028  procDim,
2029  machine_extent,
2030  machine_extent_wrap_around,
2031  this->nprocs,
2032  procCoordinates);
2033  }
2034 
2035 
2036  //get the tasks information, such as coordinate dimension,
2037  //number of parts.
2038  int coordDim = coordinateModel_->getCoordinateDim();
2039 
2040 
2041  this->ntasks = soln_->getActualGlobalNumberOfParts();
2042  if (part_t(soln_->getTargetGlobalNumberOfParts()) > this->ntasks){
2043  this->ntasks = soln_->getTargetGlobalNumberOfParts();
2044  }
2045  this->solution_parts = soln_->getPartListView();
2046 
2047  //we need to calculate the center of parts.
2048  tcoord_t **partCenters = NULL;
2049  partCenters = allocMemory<tcoord_t *>(coordDim);
2050  for (int i = 0; i < coordDim; ++i){
2051  partCenters[i] = allocMemory<tcoord_t>(this->ntasks);
2052  }
2053 
2054  typedef typename Adapter::scalar_t t_scalar_t;
2055 
2056 
2057  envConst->timerStart(MACRO_TIMERS, "Mapping - Solution Center");
2058 
2059  //get centers for the parts.
2060  getSolutionCenterCoordinates<Adapter, t_scalar_t,part_t>(
2061  envConst.getRawPtr(),
2062  ia_comm.getRawPtr(),
2063  coordinateModel_.getRawPtr(),
2064  this->solution_parts,
2065  //soln_->getPartListView();
2066  //this->soln.getRawPtr(),
2067  coordDim,
2068  ntasks,
2069  partCenters);
2070 
2071  envConst->timerStop(MACRO_TIMERS, "Mapping - Solution Center");
2072 
2073 
2074  //create the part graph
2075  if (graph_model_.getRawPtr() != NULL){
2076  getCoarsenedPartGraph<Adapter, t_scalar_t, part_t>(
2077  envConst.getRawPtr(),
2078  ia_comm.getRawPtr(),
2079  graph_model_.getRawPtr(),
2080  this->ntasks,
2081  this->solution_parts,
2082  //soln_->getPartListView(),
2083  //this->soln.getRawPtr(),
2087  );
2088  }
2089 
2090  //create coordinate communication model.
2091  this->proc_task_comm =
2093  procDim,
2094  procCoordinates,
2095  coordDim,
2096  partCenters,
2097  this->nprocs,
2098  this->ntasks,
2099  machine_extent,
2100  machine_extent_wrap_around,
2101  machine_.getRawPtr());
2102 
2103  int myRank = comm_->getRank();
2104  this->proc_task_comm->num_ranks_per_node = num_ranks_per_node ;
2105  this->proc_task_comm->divide_to_prime_first = divide_to_prime_first;
2106 
2107 
2108  envConst->timerStart(MACRO_TIMERS, "Mapping - Processor Task map");
2109  this->doMapping(myRank, comm_);
2110  envConst->timerStop(MACRO_TIMERS, "Mapping - Processor Task map");
2111 
2112 
2113  envConst->timerStart(MACRO_TIMERS, "Mapping - Communication Graph");
2114 
2115  /*soln_->getCommunicationGraph(task_communication_xadj,
2116  task_communication_adj);
2117  */
2118 
2119  envConst->timerStop(MACRO_TIMERS, "Mapping - Communication Graph");
2120  #ifdef gnuPlot1
2121  if (comm_->getRank() == 0){
2122 
2123  part_t taskCommCount = task_communication_xadj.size();
2124  std::cout << " TotalComm:" << task_communication_xadj[taskCommCount] << std::endl;
2125  part_t maxN = task_communication_xadj[0];
2126  for (part_t i = 1; i <= taskCommCount; ++i){
2128  if (maxN < nc) maxN = nc;
2129  }
2130  std::cout << " maxNeighbor:" << maxN << std::endl;
2131  }
2132 
2133  this->writeGnuPlot(comm_, soln_, coordDim, partCenters);
2134  #endif
2135 
2136  envConst->timerStart(MACRO_TIMERS, "Mapping - Communication Cost");
2137 
2138  if (reduce_best_mapping && task_communication_xadj.getRawPtr() && task_communication_adj.getRawPtr()){
2139  this->proc_task_comm->calculateCommunicationCost(
2140  task_to_proc.getRawPtr(),
2141  task_communication_xadj.getRawPtr(),
2142  task_communication_adj.getRawPtr(),
2143  task_communication_edge_weight.getRawPtr()
2144  );
2145  }
2146 
2147  //std::cout << "me: " << comm_->getRank() << " cost:" << this->proc_task_comm->getCommunicationCostMetric() << std::endl;
2148 
2149  envConst->timerStop(MACRO_TIMERS, "Mapping - Communication Cost");
2150 
2151  //processors are divided into groups of size procDim! * coordDim!
2152  //each processor in the group obtains a mapping with a different rotation
2153  //and best one is broadcasted all processors.
2154  this->getBestMapping();
2156  coordinateModel_->getLocalNumCoordinates(),
2157  this->solution_parts,
2158  this->task_to_proc);
2159  /*
2160  {
2161  if (task_communication_xadj.getRawPtr() && task_communication_adj.getRawPtr())
2162  this->proc_task_comm->calculateCommunicationCost(
2163  task_to_proc.getRawPtr(),
2164  task_communication_xadj.getRawPtr(),
2165  task_communication_adj.getRawPtr(),
2166  task_communication_edge_weight.getRawPtr()
2167  );
2168  std::cout << "me: " << comm_->getRank() << " cost:" << this->proc_task_comm->getCommunicationCostMetric() << std::endl;
2169  }
2170  */
2171 
2172 
2173  #ifdef gnuPlot
2174  this->writeMapping2(comm_->getRank());
2175  #endif
2176 
2177  delete []machine_extent_wrap_around;
2178  if (machine_->getMachineExtent(machine_extent)){
2179  for (int i = 0; i < procDim; ++i){
2180  delete [] procCoordinates[i];
2181  }
2182  delete [] procCoordinates;
2183  }
2184 
2185  for (int i = 0; i < coordDim; ++i){
2186  freeArray<tcoord_t>(partCenters[i]);
2187  }
2188  freeArray<tcoord_t *>(partCenters);
2189 
2190  }
2191 
2192 
2204  const Teuchos::RCP <const Teuchos::Comm<int> > comm_,
2205  const Teuchos::RCP <const MachineRepresentation<pcoord_t,part_t> > machine_,
2206  const Teuchos::RCP <const Adapter> input_adapter_,
2207  const part_t num_parts_,
2208  const part_t *result_parts,
2209  const Teuchos::RCP <const Environment> envConst,
2210  bool is_input_adapter_distributed = true,
2211  int num_ranks_per_node = 1,
2212  bool divide_to_prime_first = false, bool reduce_best_mapping = true):
2213  PartitionMapping<Adapter>(comm_, machine_, input_adapter_, num_parts_, result_parts, envConst),
2214  proc_to_task_xadj(0),
2215  proc_to_task_adj(0),
2216  task_to_proc(0),
2217  isOwnerofModel(true),
2218  proc_task_comm(0),
2222  using namespace Teuchos;
2223  typedef typename Adapter::base_adapter_t ctm_base_adapter_t;
2224 
2225  RCP<Zoltan2::GraphModel<ctm_base_adapter_t> > graph_model_;
2226  RCP<Zoltan2::CoordinateModel<ctm_base_adapter_t> > coordinateModel_ ;
2227 
2228  RCP<const Teuchos::Comm<int> > rcp_comm = comm_;
2229  RCP<const Teuchos::Comm<int> > ia_comm = rcp_comm;
2230  if (!is_input_adapter_distributed){
2231  ia_comm = Teuchos::createSerialComm<int>();
2232  }
2233  RCP<const Environment> envConst_ = envConst;
2234 
2235  RCP<const ctm_base_adapter_t> baseInputAdapter_(
2236  rcp(dynamic_cast<const ctm_base_adapter_t *>(input_adapter_.getRawPtr()), false));
2237 
2238  modelFlag_t coordFlags_, graphFlags_;
2239 
2240  //create coordinate model
2241  //since this is coordinate task mapper,
2242  //the adapter has to have the coordinates
2243  coordinateModel_ = rcp(new CoordinateModel<ctm_base_adapter_t>(
2244  baseInputAdapter_, envConst_, ia_comm, coordFlags_));
2245 
2246  //if the adapter has also graph model, we will use graph model
2247  //to calculate the cost mapping.
2248  BaseAdapterType inputType_ = input_adapter_->adapterType();
2249  if (inputType_ == MatrixAdapterType ||
2250  inputType_ == GraphAdapterType ||
2251  inputType_ == MeshAdapterType)
2252  {
2253  graph_model_ = rcp(new GraphModel<ctm_base_adapter_t>(
2254  baseInputAdapter_, envConst_, ia_comm,
2255  graphFlags_));
2256  }
2257 
2258  if (!machine_->hasMachineCoordinates()) {
2259  throw std::runtime_error("Existing machine does not provide coordinates "
2260  "for coordinate task mapping");
2261  }
2262 
2263  //if mapping type is 0 then it is coordinate mapping
2264  int procDim = machine_->getMachineDim();
2265  this->nprocs = machine_->getNumRanks();
2266 
2267  //get processor coordinates.
2268  pcoord_t **procCoordinates = NULL;
2269  if (!machine_->getAllMachineCoordinatesView(procCoordinates)) {
2270  throw std::runtime_error("Existing machine does not implement "
2271  "getAllMachineCoordinatesView");
2272  }
2273 
2274  //get the machine extent.
2275  //if we have machine extent,
2276  //if the machine has wrap-around links, we would like to shift the coordinates,
2277  //so that the largest hap would be the wrap-around.
2278  std::vector<int> machine_extent_vec(procDim);
2279  //std::vector<bool> machine_extent_wrap_around_vec(procDim, 0);
2280  int *machine_extent = &(machine_extent_vec[0]);
2281  bool *machine_extent_wrap_around = new bool[procDim];
2282  machine_->getMachineExtentWrapArounds(machine_extent_wrap_around);
2283 
2284 
2285 
2286  // KDDKDD ASK MEHMET: SHOULD WE GET AND USE machine_dimension HERE IF IT
2287  // KDDKDD ASK MEHMET: IS PROVIDED BY THE MACHINE REPRESENTATION?
2288  // KDDKDD ASK MEHMET: IF NOT HERE, THEN WHERE?
2289  // MD: Yes, I ADDED BELOW:
2290  if (machine_->getMachineExtent(machine_extent)) {
2291  procCoordinates =
2293  procDim,
2294  machine_extent,
2295  machine_extent_wrap_around,
2296  this->nprocs,
2297  procCoordinates);
2298  }
2299 
2300 
2301  //get the tasks information, such as coordinate dimension,
2302  //number of parts.
2303  int coordDim = coordinateModel_->getCoordinateDim();
2304 
2305 
2306  this->ntasks = num_parts_;
2307  this->solution_parts = result_parts;
2308 
2309  //we need to calculate the center of parts.
2310  tcoord_t **partCenters = NULL;
2311  partCenters = allocMemory<tcoord_t *>(coordDim);
2312  for (int i = 0; i < coordDim; ++i){
2313  partCenters[i] = allocMemory<tcoord_t>(this->ntasks);
2314  }
2315 
2316  typedef typename Adapter::scalar_t t_scalar_t;
2317 
2318 
2319  envConst->timerStart(MACRO_TIMERS, "Mapping - Solution Center");
2320 
2321  //get centers for the parts.
2322  getSolutionCenterCoordinates<Adapter, t_scalar_t,part_t>(
2323  envConst.getRawPtr(),
2324  ia_comm.getRawPtr(),
2325  coordinateModel_.getRawPtr(),
2326  this->solution_parts,
2327  //soln_->getPartListView();
2328  //this->soln.getRawPtr(),
2329  coordDim,
2330  ntasks,
2331  partCenters);
2332 
2333  envConst->timerStop(MACRO_TIMERS, "Mapping - Solution Center");
2334 
2335  envConst->timerStart(MACRO_TIMERS, "GRAPHCREATE");
2336  //create the part graph
2337  if (graph_model_.getRawPtr() != NULL){
2338  getCoarsenedPartGraph<Adapter, t_scalar_t, part_t>(
2339  envConst.getRawPtr(),
2340  ia_comm.getRawPtr(),
2341  graph_model_.getRawPtr(),
2342  this->ntasks,
2343  this->solution_parts,
2344  //soln_->getPartListView(),
2345  //this->soln.getRawPtr(),
2349  );
2350  }
2351  envConst->timerStop(MACRO_TIMERS, "GRAPHCREATE");
2352 
2353 
2354  envConst->timerStart(MACRO_TIMERS, "CoordinateCommunicationModel Create");
2355  //create coordinate communication model.
2356  this->proc_task_comm =
2358  procDim,
2359  procCoordinates,
2360  coordDim,
2361  partCenters,
2362  this->nprocs,
2363  this->ntasks,
2364  machine_extent,
2365  machine_extent_wrap_around,
2366  machine_.getRawPtr());
2367 
2368  envConst->timerStop(MACRO_TIMERS, "CoordinateCommunicationModel Create");
2369 
2370 
2371  this->proc_task_comm->num_ranks_per_node = num_ranks_per_node;
2372  this->proc_task_comm->divide_to_prime_first = divide_to_prime_first;
2373 
2374  int myRank = comm_->getRank();
2375 
2376 
2377  envConst->timerStart(MACRO_TIMERS, "Mapping - Processor Task map");
2378  this->doMapping(myRank, comm_);
2379  envConst->timerStop(MACRO_TIMERS, "Mapping - Processor Task map");
2380 
2381 
2382  envConst->timerStart(MACRO_TIMERS, "Mapping - Communication Graph");
2383 
2384  /*soln_->getCommunicationGraph(task_communication_xadj,
2385  task_communication_adj);
2386  */
2387 
2388  envConst->timerStop(MACRO_TIMERS, "Mapping - Communication Graph");
2389  #ifdef gnuPlot1
2390  if (comm_->getRank() == 0){
2391 
2392  part_t taskCommCount = task_communication_xadj.size();
2393  std::cout << " TotalComm:" << task_communication_xadj[taskCommCount] << std::endl;
2394  part_t maxN = task_communication_xadj[0];
2395  for (part_t i = 1; i <= taskCommCount; ++i){
2397  if (maxN < nc) maxN = nc;
2398  }
2399  std::cout << " maxNeighbor:" << maxN << std::endl;
2400  }
2401 
2402  this->writeGnuPlot(comm_, soln_, coordDim, partCenters);
2403  #endif
2404 
2405  envConst->timerStart(MACRO_TIMERS, "Mapping - Communication Cost");
2406 
2407  if (reduce_best_mapping && task_communication_xadj.getRawPtr() && task_communication_adj.getRawPtr()){
2408  this->proc_task_comm->calculateCommunicationCost(
2409  task_to_proc.getRawPtr(),
2410  task_communication_xadj.getRawPtr(),
2411  task_communication_adj.getRawPtr(),
2412  task_communication_edge_weight.getRawPtr()
2413  );
2414  }
2415 
2416  //std::cout << "me: " << comm_->getRank() << " cost:" << this->proc_task_comm->getCommunicationCostMetric() << std::endl;
2417 
2418  envConst->timerStop(MACRO_TIMERS, "Mapping - Communication Cost");
2419 
2420  //processors are divided into groups of size procDim! * coordDim!
2421  //each processor in the group obtains a mapping with a different rotation
2422  //and best one is broadcasted all processors.
2423  this->getBestMapping();
2424 
2426  coordinateModel_->getLocalNumCoordinates(),
2427  this->solution_parts,
2428  this->task_to_proc);
2429  /*
2430 
2431  {
2432  if (task_communication_xadj.getRawPtr() && task_communication_adj.getRawPtr())
2433  this->proc_task_comm->calculateCommunicationCost(
2434  task_to_proc.getRawPtr(),
2435  task_communication_xadj.getRawPtr(),
2436  task_communication_adj.getRawPtr(),
2437  task_communication_edge_weight.getRawPtr()
2438  );
2439  std::cout << "me: " << comm_->getRank() << " cost:" << this->proc_task_comm->getCommunicationCostMetric() << std::endl;
2440  }
2441  */
2442 
2443 
2444 
2445  #ifdef gnuPlot
2446  this->writeMapping2(comm_->getRank());
2447  #endif
2448 
2449  delete []machine_extent_wrap_around;
2450  if (machine_->getMachineExtent(machine_extent)){
2451  for (int i = 0; i < procDim; ++i){
2452  delete [] procCoordinates[i];
2453  }
2454  delete [] procCoordinates;
2455  }
2456 
2457  for (int i = 0; i < coordDim; ++i){
2458  freeArray<tcoord_t>(partCenters[i]);
2459  }
2460  freeArray<tcoord_t *>(partCenters);
2461 
2462  }
2463 
2511  const Environment *env_const_,
2512  const Teuchos::Comm<int> *problemComm,
2513  int proc_dim,
2514  int num_processors,
2515  pcoord_t **machine_coords,
2516 
2517  int task_dim,
2518  part_t num_tasks,
2519  tcoord_t **task_coords,
2520  ArrayRCP<part_t>task_comm_xadj,
2521  ArrayRCP<part_t>task_comm_adj,
2522  pcoord_t *task_communication_edge_weight_,
2523  int recursion_depth,
2524  part_t *part_no_array,
2525  const part_t *machine_dimensions,
2526  int num_ranks_per_node = 1,
2527  bool divide_to_prime_first = false, bool reduce_best_mapping = true
2528  ): PartitionMapping<Adapter>(
2529  Teuchos::rcpFromRef<const Teuchos::Comm<int> >(*problemComm),
2530  Teuchos::rcpFromRef<const Environment>(*env_const_)),
2531  proc_to_task_xadj(0),
2532  proc_to_task_adj(0),
2533  task_to_proc(0),
2534  isOwnerofModel(true),
2535  proc_task_comm(0),
2536  task_communication_xadj(task_comm_xadj),
2537  task_communication_adj(task_comm_adj){
2538 
2539  //if mapping type is 0 then it is coordinate mapping
2540  pcoord_t ** virtual_machine_coordinates = machine_coords;
2541  bool *wrap_arounds = new bool [proc_dim];
2542  for (int i = 0; i < proc_dim; ++i) wrap_arounds[i] = true;
2543 
2544  if (machine_dimensions){
2545  virtual_machine_coordinates =
2547  proc_dim,
2548  machine_dimensions,
2549  wrap_arounds,
2550  num_processors,
2551  machine_coords);
2552  }
2553 
2554  this->nprocs = num_processors;
2555 
2556  int coordDim = task_dim;
2557  this->ntasks = num_tasks;
2558 
2559  //alloc memory for part centers.
2560  tcoord_t **partCenters = task_coords;
2561 
2562  //create coordinate communication model.
2563  this->proc_task_comm =
2565  proc_dim,
2566  virtual_machine_coordinates,
2567  coordDim,
2568  partCenters,
2569  this->nprocs,
2570  this->ntasks, NULL, NULL
2571  );
2572 
2573  this->proc_task_comm->num_ranks_per_node = num_ranks_per_node;
2574  this->proc_task_comm->divide_to_prime_first = divide_to_prime_first;
2575 
2576  this->proc_task_comm->setPartArraySize(recursion_depth);
2577  this->proc_task_comm->setPartArray(part_no_array);
2578 
2579  int myRank = problemComm->getRank();
2580 
2581  this->doMapping(myRank, this->comm);
2582 #ifdef gnuPlot
2583  this->writeMapping2(myRank);
2584 #endif
2585 
2586  if (reduce_best_mapping && task_communication_xadj.getRawPtr() && task_communication_adj.getRawPtr()){
2587  this->proc_task_comm->calculateCommunicationCost(
2588  task_to_proc.getRawPtr(),
2589  task_communication_xadj.getRawPtr(),
2590  task_communication_adj.getRawPtr(),
2591  task_communication_edge_weight_
2592  );
2593 
2594 
2595  this->getBestMapping();
2596 
2597 /*
2598  if (myRank == 0){
2599  this->proc_task_comm->calculateCommunicationCost(
2600  task_to_proc.getRawPtr(),
2601  task_communication_xadj.getRawPtr(),
2602  task_communication_adj.getRawPtr(),
2603  task_communication_edge_weight_
2604  );
2605  cout << "me: " << problemComm->getRank() << " cost:" << this->proc_task_comm->getCommunicationCostMetric() << endl;
2606  }
2607 */
2608 
2609  }
2610  delete [] wrap_arounds;
2611 
2612  if (machine_dimensions){
2613  for (int i = 0; i < proc_dim; ++i){
2614  delete [] virtual_machine_coordinates[i];
2615  }
2616  delete [] virtual_machine_coordinates;
2617  }
2618 #ifdef gnuPlot
2619  if(problemComm->getRank() == 0)
2620  this->writeMapping2(-1);
2621 #endif
2622  }
2623 
2624 
2625  /*
2626  double getCommunicationCostMetric(){
2627  return this->proc_task_comm->getCommCost();
2628  }
2629  */
2630 
2633  virtual size_t getLocalNumberOfParts() const{
2634  return 0;
2635  }
2636 
2646  int machine_dim,
2647  const part_t *machine_dimensions,
2648  bool *machine_extent_wrap_around,
2649  part_t numProcs,
2650  pcoord_t **mCoords){
2651  pcoord_t **result_machine_coords = NULL;
2652  result_machine_coords = new pcoord_t*[machine_dim];
2653  for (int i = 0; i < machine_dim; ++i){
2654  result_machine_coords[i] = new pcoord_t [numProcs];
2655  }
2656 
2657  for (int i = 0; i < machine_dim; ++i){
2658  part_t numMachinesAlongDim = machine_dimensions[i];
2659  part_t *machineCounts= new part_t[numMachinesAlongDim];
2660  memset(machineCounts, 0, sizeof(part_t) *numMachinesAlongDim);
2661 
2662  int *filledCoordinates= new int[numMachinesAlongDim];
2663 
2664  pcoord_t *coords = mCoords[i];
2665  for(part_t j = 0; j < numProcs; ++j){
2666  part_t mc = (part_t) coords[j];
2667  ++machineCounts[mc];
2668  }
2669 
2670  part_t filledCoordinateCount = 0;
2671  for(part_t j = 0; j < numMachinesAlongDim; ++j){
2672  if (machineCounts[j] > 0){
2673  filledCoordinates[filledCoordinateCount++] = j;
2674  }
2675  }
2676 
2677  part_t firstProcCoord = filledCoordinates[0];
2678  part_t firstProcCount = machineCounts[firstProcCoord];
2679 
2680  part_t lastProcCoord = filledCoordinates[filledCoordinateCount - 1];
2681  part_t lastProcCount = machineCounts[lastProcCoord];
2682 
2683  part_t firstLastGap = numMachinesAlongDim - lastProcCoord + firstProcCoord;
2684  part_t firstLastGapProc = lastProcCount + firstProcCount;
2685 
2686  part_t leftSideProcCoord = firstProcCoord;
2687  part_t leftSideProcCount = firstProcCount;
2688  part_t biggestGap = 0;
2689  part_t biggestGapProc = numProcs;
2690 
2691  part_t shiftBorderCoordinate = -1;
2692  for(part_t j = 1; j < filledCoordinateCount; ++j){
2693  part_t rightSideProcCoord= filledCoordinates[j];
2694  part_t rightSideProcCount = machineCounts[rightSideProcCoord];
2695 
2696  part_t gap = rightSideProcCoord - leftSideProcCoord;
2697  part_t gapProc = rightSideProcCount + leftSideProcCount;
2698 
2699  /* Pick the largest gap in this dimension. Use fewer process on either side
2700  of the largest gap to break the tie. An easy addition to this would
2701  be to weight the gap by the number of processes. */
2702  if (gap > biggestGap || (gap == biggestGap && biggestGapProc > gapProc)){
2703  shiftBorderCoordinate = rightSideProcCoord;
2704  biggestGapProc = gapProc;
2705  biggestGap = gap;
2706  }
2707  leftSideProcCoord = rightSideProcCoord;
2708  leftSideProcCount = rightSideProcCount;
2709  }
2710 
2711 
2712  if (!(biggestGap > firstLastGap || (biggestGap == firstLastGap && biggestGapProc < firstLastGapProc))){
2713  shiftBorderCoordinate = -1;
2714  }
2715 
2716  for(part_t j = 0; j < numProcs; ++j){
2717 
2718  if (machine_extent_wrap_around[i] && coords[j] < shiftBorderCoordinate){
2719  result_machine_coords[i][j] = coords[j] + numMachinesAlongDim;
2720 
2721  }
2722  else {
2723  result_machine_coords[i][j] = coords[j];
2724  }
2725  //cout << "I:" << i << "j:" << j << " coord:" << coords[j] << " now:" << result_machine_coords[i][j] << endl;
2726  }
2727  delete [] machineCounts;
2728  delete [] filledCoordinates;
2729  }
2730 
2731  return result_machine_coords;
2732 
2733  }
2734 
2741  virtual void getProcsForPart(part_t taskId, part_t &numProcs, part_t *&procs) const{
2742  numProcs = 1;
2743  procs = this->task_to_proc.getRawPtr() + taskId;
2744  }
2749  return this->task_to_proc[taskId];
2750  }
2757  virtual void getPartsForProc(int procId, part_t &numParts, part_t *&parts) const{
2758 
2759  part_t task_begin = this->proc_to_task_xadj[procId];
2760  part_t taskend = this->proc_to_task_xadj[procId+1];
2761  parts = this->proc_to_task_adj.getRawPtr() + task_begin;
2762  numParts = taskend - task_begin;
2763  }
2764 
2765  ArrayView<part_t> getAssignedTasksForProc(part_t procId){
2766  part_t task_begin = this->proc_to_task_xadj[procId];
2767  part_t taskend = this->proc_to_task_xadj[procId+1];
2768 
2769  /*
2770  cout << "part_t:" << procId << " taskCount:" << taskend - task_begin << endl;
2771  for(part_t i = task_begin; i < taskend; ++i){
2772  cout << "part_t:" << procId << " task:" << proc_to_task_adj[i] << endl;
2773  }
2774  */
2775  if (taskend - task_begin > 0){
2776  ArrayView <part_t> assignedParts(this->proc_to_task_adj.getRawPtr() + task_begin, taskend - task_begin);
2777  return assignedParts;
2778  }
2779  else {
2780  ArrayView <part_t> assignedParts;
2781  return assignedParts;
2782  }
2783  }
2784 
2785 };
2786 
2787 
2788 
2857 template <typename part_t, typename pcoord_t, typename tcoord_t>
2859  RCP<const Teuchos::Comm<int> > problemComm,
2860  int proc_dim,
2861  int num_processors,
2862  pcoord_t **machine_coords,
2863  int task_dim,
2864  part_t num_tasks,
2865  tcoord_t **task_coords,
2866  part_t *task_comm_xadj,
2867  part_t *task_comm_adj,
2868  pcoord_t *task_communication_edge_weight_, /*float-like, same size with task_communication_adj_ weight of the corresponding edge.*/
2869  part_t *proc_to_task_xadj, /*output*/
2870  part_t *proc_to_task_adj, /*output*/
2871  int recursion_depth,
2872  part_t *part_no_array,
2873  const part_t *machine_dimensions,
2874  int num_ranks_per_node = 1,
2875  bool divide_to_prime_first = false
2876 )
2877 {
2878 
2879  const Environment *envConst_ = new Environment(problemComm);
2880 
2881  // mfh 03 Mar 2015: It's OK to omit the Node template
2882  // parameter in Tpetra, if you're just going to use the
2883  // default Node.
2884  typedef Tpetra::MultiVector<tcoord_t, part_t, part_t> tMVector_t;
2885 
2886  Teuchos::ArrayRCP<part_t> task_communication_xadj(task_comm_xadj, 0, num_tasks+1, false);
2887 
2888  Teuchos::ArrayRCP<part_t> task_communication_adj;
2889  if (task_comm_xadj){
2890  Teuchos::ArrayRCP<part_t> tmp_task_communication_adj(task_comm_adj, 0, task_comm_xadj[num_tasks], false);
2891  task_communication_adj = tmp_task_communication_adj;
2892  }
2893 
2894 
2897  envConst_,
2898  problemComm.getRawPtr(),
2899  proc_dim,
2900  num_processors,
2901  machine_coords,//machine_coords_,
2902 
2903  task_dim,
2904  num_tasks,
2905  task_coords,
2906 
2909  task_communication_edge_weight_,
2910  recursion_depth,
2911  part_no_array,
2912  machine_dimensions,
2913  num_ranks_per_node,
2914  divide_to_prime_first
2915  );
2916 
2917 
2918  part_t* proc_to_task_xadj_;
2919  part_t* proc_to_task_adj_;
2920 
2921  ctm->getProcTask(proc_to_task_xadj_, proc_to_task_adj_);
2922 
2923  for (part_t i = 0; i <= num_processors; ++i){
2924  proc_to_task_xadj[i] = proc_to_task_xadj_[i];
2925  }
2926  for (part_t i = 0; i < num_tasks; ++i){
2927  proc_to_task_adj[i] = proc_to_task_adj_[i];
2928  }
2929  delete ctm;
2930  delete envConst_;
2931 
2932 }
2933 
2934 template <typename proc_coord_t, typename v_lno_t>
2935 inline void visualize_mapping(int myRank,
2936  const int machine_coord_dim, const int num_ranks, proc_coord_t **machine_coords,
2937  const v_lno_t num_tasks, const v_lno_t *task_communication_xadj, const v_lno_t *task_communication_adj, const int *task_to_rank){
2938 
2939  std::string rankStr = Teuchos::toString<int>(myRank);
2940  std::string gnuPlots = "gnuPlot", extentionS = ".plot";
2941  std::string outF = gnuPlots + rankStr+ extentionS;
2942  std::ofstream gnuPlotCode( outF.c_str(), std::ofstream::out);
2943 
2944  if (machine_coord_dim != 3) {
2945  std::cerr << "Mapping Write is only good for 3 dim" << std::endl;
2946  return;
2947  }
2948  std::string ss = "";
2949  std::string procs = "";
2950 
2951  std::set < std::tuple<int,int,int,int,int,int> > my_arrows;
2952  for(v_lno_t origin_task = 0; origin_task < num_tasks; ++origin_task){
2953  int origin_rank = task_to_rank[origin_task];
2954  std::string gnuPlotArrow = "set arrow from ";
2955 
2956  for(int j = 0; j < machine_coord_dim; ++j){
2957  if (j == machine_coord_dim - 1){
2958  gnuPlotArrow += Teuchos::toString<proc_coord_t>(machine_coords[j][origin_rank]);
2959  procs += Teuchos::toString<proc_coord_t>(machine_coords[j][origin_rank]);
2960 
2961  }
2962  else {
2963  gnuPlotArrow += Teuchos::toString<proc_coord_t>(machine_coords[j][origin_rank]) +",";
2964  procs += Teuchos::toString<proc_coord_t>(machine_coords[j][origin_rank])+ " ";
2965  }
2966  }
2967  procs += "\n";
2968 
2969  gnuPlotArrow += " to ";
2970 
2971 
2972  for (int nind = task_communication_xadj[origin_task]; nind < task_communication_xadj[origin_task + 1]; ++nind){
2973  int neighbor_task = task_communication_adj[nind];
2974 
2975  bool differentnode = false;
2976  int neighbor_rank = task_to_rank[neighbor_task];
2977 
2978  for(int j = 0; j < machine_coord_dim; ++j){
2979  if (int(machine_coords[j][origin_rank]) != int(machine_coords[j][neighbor_rank])){
2980  differentnode = true; break;
2981  }
2982  }
2983  std::tuple<int,int,int, int, int, int> foo(
2984  (int)(machine_coords[0][origin_rank]),
2985  (int)(machine_coords[1][origin_rank]),
2986  (int)(machine_coords[2][origin_rank]),
2987  (int)(machine_coords[0][neighbor_rank]),
2988  (int)(machine_coords[1][neighbor_rank]),
2989  (int)(machine_coords[2][neighbor_rank]));
2990 
2991 
2992  if (differentnode && my_arrows.find(foo) == my_arrows.end()){
2993  my_arrows.insert(foo);
2994 
2995  std::string gnuPlotArrow2 = "";
2996  for(int j = 0; j < machine_coord_dim; ++j){
2997  if(j == machine_coord_dim - 1){
2998  gnuPlotArrow2 += Teuchos::toString<float>(machine_coords[j][neighbor_rank]);
2999  }
3000  else{
3001  gnuPlotArrow2 += Teuchos::toString<float>(machine_coords[j][neighbor_rank]) +",";
3002  }
3003  }
3004  ss += gnuPlotArrow + gnuPlotArrow2 + " nohead\n";
3005  }
3006  }
3007  }
3008 
3009  std::ofstream procFile("procPlot.plot", std::ofstream::out);
3010  procFile << procs << "\n";
3011  procFile.close();
3012 
3013  //gnuPlotCode << ss;
3014  if(machine_coord_dim == 2){
3015  gnuPlotCode << "plot \"procPlot.plot\" with points pointsize 3\n";
3016  } else {
3017  gnuPlotCode << "splot \"procPlot.plot\" with points pointsize 3\n";
3018  }
3019 
3020  gnuPlotCode << ss << "\nreplot\n pause -1\npause -1";
3021  gnuPlotCode.close();
3022 }
3023 
3024 }// namespace Zoltan2
3025 
3026 #endif
Zoltan2::CoordinateTaskMapper::proc_task_comm
CoordinateCommunicationModel< pcoord_t, tcoord_t, part_t > * proc_task_comm
Definition: Zoltan2_TaskMapping.hpp:1544
MINOF
#define MINOF(a, b)
Definition: Zoltan2_TaskMapping.hpp:837
ZOLTAN2_ALGMULTIJAGGED_SWAP
#define ZOLTAN2_ALGMULTIJAGGED_SWAP(a, b, temp)
Definition: Zoltan2_AlgMultiJagged.hpp:107
Zoltan2::CoordinateTaskMapper::task_communication_adj
ArrayRCP< part_t > task_communication_adj
Definition: Zoltan2_TaskMapping.hpp:1548
Zoltan2::KMeansCluster::setParams
void setParams(int dimension_, int heapsize)
Definition: Zoltan2_TaskMapping.hpp:663
Zoltan2::MeshAdapterType
mesh data
Definition: Zoltan2_Adapter.hpp:69
Zoltan2::CommunicationModel::commCost
double commCost
Definition: Zoltan2_TaskMapping.hpp:874
Zoltan2::KmeansHeap::getTotalDistance
WT getTotalDistance()
Definition: Zoltan2_TaskMapping.hpp:613
Zoltan2::CoordinateModel
This class provides geometric coordinates with optional weights to the Zoltan2 algorithm.
Definition: Zoltan2_CoordinateModel.hpp:71
Zoltan2::CoordinateCommunicationModel
CoordinateModelInput Class that performs mapping between the coordinate partitioning result and mpi r...
Definition: Zoltan2_TaskMapping.hpp:964
Zoltan2::CoordinateModel::getCoordinates
size_t getCoordinates(ArrayView< const gno_t > &Ids, ArrayView< input_t > &xyz, ArrayView< input_t > &wgts) const
Returns the coordinate ids, values and optional weights.
Definition: Zoltan2_CoordinateModel.hpp:202
Zoltan2_PartitionMapping.hpp
Zoltan2::CoordinateTaskMapper::task_communication_xadj
ArrayRCP< part_t > task_communication_xadj
Definition: Zoltan2_TaskMapping.hpp:1547
Zoltan2::CoordinateTaskMapper::writeMapping2
void writeMapping2(int myRank)
Definition: Zoltan2_TaskMapping.hpp:1722
Zoltan2::CoordinateCommunicationModel::getClosestSubset
void getClosestSubset(part_t *proc_permutation, part_t nprocs, part_t ntasks) const
Function is called whenever nprocs > no_task. Function returns only the subset of processors that are...
Definition: Zoltan2_TaskMapping.hpp:1043
Zoltan2::coordinateTaskMapperInterface
void coordinateTaskMapperInterface(RCP< const Teuchos::Comm< int > > problemComm, int proc_dim, int num_processors, pcoord_t **machine_coords, int task_dim, part_t num_tasks, tcoord_t **task_coords, part_t *task_comm_xadj, part_t *task_comm_adj, pcoord_t *task_communication_edge_weight_, part_t *proc_to_task_xadj, part_t *proc_to_task_adj, int recursion_depth, part_t *part_no_array, const part_t *machine_dimensions, int num_ranks_per_node=1, bool divide_to_prime_first=false)
Constructor The interface function that calls CoordinateTaskMapper which will also perform the mappin...
Definition: Zoltan2_TaskMapping.hpp:2858
Zoltan2::PartitioningSolution
A PartitioningSolution is a solution to a partitioning problem.
Definition: Zoltan2_PartitioningSolution.hpp:55
Zoltan2::CoordinateTaskMapper::map
virtual void map(const RCP< MappingSolution< Adapter > > &mappingsoln)
Mapping method.
Definition: Zoltan2_TaskMapping.hpp:1889
Zoltan2::PartitionMapping::env
const Teuchos::RCP< const Environment > env
Definition: Zoltan2_PartitionMapping.hpp:82
Zoltan2::PartitioningSolution::getPartListView
const part_t * getPartListView() const
Returns the part list corresponding to the global ID list.
Definition: Zoltan2_PartitioningSolution.hpp:372
Zoltan2::z2Fact
it z2Fact(it x)
Definition: Zoltan2_TaskMapping.hpp:70
Zoltan2::CoordinateTaskMapper::getProcTask
void getProcTask(part_t *&proc_to_task_xadj_, part_t *&proc_to_task_adj_)
Definition: Zoltan2_TaskMapping.hpp:1884
Zoltan2_TestingFramework::base_adapter_t
Zoltan2::BaseAdapter< userTypes_t > base_adapter_t
Definition: Zoltan2_Typedefs.hpp:168
Zoltan2::CoordinateCommunicationModel::~CoordinateCommunicationModel
virtual ~CoordinateCommunicationModel()
Definition: Zoltan2_TaskMapping.hpp:996
Zoltan2::MACRO_TIMERS
Time an algorithm (or other entity) as a whole.
Definition: Zoltan2_Parameters.hpp:120
Zoltan2::MachineRepresentation::getHopCount
bool getHopCount(int rank1, int rank2, pcoord_t &hops) const
Definition: Zoltan2_MachineRepresentation.hpp:125
Zoltan2::GraphModel::getVertexList
size_t getVertexList(ArrayView< const gno_t > &Ids, ArrayView< input_t > &wgts) const
Sets pointers to this process' vertex Ids and their weights.
Definition: Zoltan2_GraphModel.hpp:177
ZOLTAN2_ABS
#define ZOLTAN2_ABS(x)
Definition: Zoltan2_AlgMultiJagged.hpp:99
Zoltan2::CoordinateCommunicationModel::getProcDistance
virtual double getProcDistance(int procId1, int procId2) const
Definition: Zoltan2_TaskMapping.hpp:1090
Zoltan2::CoordinateCommunicationModel::divide_to_prime_first
bool divide_to_prime_first
Definition: Zoltan2_TaskMapping.hpp:979
Zoltan2::CoordinateCommunicationModel::machine
const MachineRepresentation< pcoord_t, part_t > * machine
Definition: Zoltan2_TaskMapping.hpp:976
Zoltan2::CoordinateTaskMapper::shiftMachineCoordinates
pcoord_t ** shiftMachineCoordinates(int machine_dim, const part_t *machine_dimensions, bool *machine_extent_wrap_around, part_t numProcs, pcoord_t **mCoords)
Using the machine dimensions provided, create virtual machine coordinates by assigning the largest ga...
Definition: Zoltan2_TaskMapping.hpp:2645
Zoltan2::CommunicationModel::~CommunicationModel
virtual ~CommunicationModel()
Definition: Zoltan2_TaskMapping.hpp:884
Zoltan2_MappingSolution.hpp
Defines the MappingSolution class.
Zoltan2::PartitionMapping
PartitionMapping maps a solution or an input distribution to ranks.
Definition: Zoltan2_PartitionMapping.hpp:65
Zoltan2::CoordinateCommunicationModel::CoordinateCommunicationModel
CoordinateCommunicationModel()
Definition: Zoltan2_TaskMapping.hpp:982
Zoltan2::CommunicationModel::CommunicationModel
CommunicationModel(part_t no_procs_, part_t no_tasks_)
Definition: Zoltan2_TaskMapping.hpp:880
Zoltan2::Environment::timerStop
void timerStop(TimerType tt, const char *timerName) const
Stop a named timer.
Definition: Zoltan2_Environment.hpp:469
Zoltan2::CommunicationModel::CommunicationModel
CommunicationModel()
Definition: Zoltan2_TaskMapping.hpp:879
xml2dox.vals
dictionary vals
Definition: xml2dox.py:186
Zoltan2::PartitioningSolution::getActualGlobalNumberOfParts
size_t getActualGlobalNumberOfParts() const
Returns the actual global number of parts provided in setParts().
Definition: Zoltan2_PartitioningSolution.hpp:169
Zoltan2::CoordinateCommunicationModel::setPartArray
void setPartArray(part_t *pNo)
Definition: Zoltan2_TaskMapping.hpp:1033
Zoltan2::CommunicationModel::getProcDistance
virtual double getProcDistance(int procId1, int procId2) const =0
Zoltan2::KmeansHeap::addPoint
void addPoint(IT index, WT distance)
Definition: Zoltan2_TaskMapping.hpp:558
Zoltan2::KMeansAlgorithm::~KMeansAlgorithm
~KMeansAlgorithm()
Definition: Zoltan2_TaskMapping.hpp:714
part_t
SparseMatrixAdapter_t::part_t part_t
Definition: partitioningTree.cpp:74
Zoltan2::KmeansHeap::push_down
void push_down(IT index_on_heap)
Definition: Zoltan2_TaskMapping.hpp:571
Zoltan2::CoordinateTaskMapper::getProcsForPart
virtual void getProcsForPart(part_t taskId, part_t &numProcs, part_t *&procs) const
getAssignedProcForTask function, returns the assigned tasks with the number of tasks.
Definition: Zoltan2_TaskMapping.hpp:2741
Zoltan2::CoordinateTaskMapper::create_subCommunicator
RCP< Comm< int > > create_subCommunicator()
creates and returns the subcommunicator for the processor group.
Definition: Zoltan2_TaskMapping.hpp:1575
Zoltan2::CoordinateCommunicationModel::getMapping
virtual void getMapping(int myRank, const RCP< const Environment > &env, ArrayRCP< part_t > &rcp_proc_to_task_xadj, ArrayRCP< part_t > &rcp_proc_to_task_adj, ArrayRCP< part_t > &rcp_task_to_proc, const Teuchos::RCP< const Teuchos::Comm< int > > comm_) const
Function is called whenever nprocs > no_task. Function returns only the subset of processors that are...
Definition: Zoltan2_TaskMapping.hpp:1161
Zoltan2::GraphModel::getLocalNumVertices
size_t getLocalNumVertices() const
Returns the number vertices on this process.
Definition: Zoltan2_GraphModel.hpp:141
Zoltan2::PartitionMapping::solution_parts
const part_t * solution_parts
Definition: Zoltan2_PartitionMapping.hpp:84
Zoltan2::GraphModel::getLocalNumEdges
size_t getLocalNumEdges() const
Returns the number of edges on this process. In global or subset graphs, includes off-process edges.
Definition: Zoltan2_GraphModel.hpp:150
Zoltan2::Environment
The user parameters, debug, timing and memory profiling output objects, and error checking methods.
Definition: Zoltan2_Environment.hpp:83
Zoltan2::CoordinateTaskMapper::CoordinateTaskMapper
CoordinateTaskMapper(const Teuchos::RCP< const Teuchos::Comm< int > > comm_, const Teuchos::RCP< const MachineRepresentation< pcoord_t, part_t > > machine_, const Teuchos::RCP< const Adapter > input_adapter_, const part_t num_parts_, const part_t *result_parts, const Teuchos::RCP< const Environment > envConst, bool is_input_adapter_distributed=true, int num_ranks_per_node=1, bool divide_to_prime_first=false, bool reduce_best_mapping=true)
Constructor. Instead of Solution we have two parameters, numparts When this constructor is called,...
Definition: Zoltan2_TaskMapping.hpp:2203
Zoltan2::AlgMJ
Multi Jagged coordinate partitioning algorithm.
Definition: Zoltan2_AlgMultiJagged.hpp:532
Zoltan2::KMeansAlgorithm
KMeansAlgorithm Class that performs clustering of the coordinates, and returns the closest set of coo...
Definition: Zoltan2_TaskMapping.hpp:703
Zoltan2::CoordinateCommunicationModel::update_visit_order
void update_visit_order(part_t *visitOrder, part_t n, int &_u_umpa_seed, part_t rndm)
Definition: Zoltan2_TaskMapping.hpp:1112
Zoltan2::GraphModel::getNumWeightsPerEdge
int getNumWeightsPerEdge() const
Returns the number (0 or greater) of weights per edge.
Definition: Zoltan2_GraphModel.hpp:163
Zoltan2::KmeansHeap::~KmeansHeap
~KmeansHeap()
Definition: Zoltan2_TaskMapping.hpp:552
Zoltan2::KMeansAlgorithm::KMeansAlgorithm
KMeansAlgorithm(int dim_, IT numElements_, WT **elementCoords_, IT required_elements_)
KMeansAlgorithm Constructor.
Definition: Zoltan2_TaskMapping.hpp:722
Zoltan2::GNO_LNO_PAIR::part
part_t part
Definition: Zoltan2_TaskMapping.hpp:78
Zoltan2::CommunicationModel::calculateCommunicationCost
void calculateCommunicationCost(part_t *task_to_proc, part_t *task_communication_xadj, part_t *task_communication_adj, pcoord_t *task_communication_edge_weight)
Definition: Zoltan2_TaskMapping.hpp:893
Zoltan2::KMeansCluster::getDistance
WT getDistance(IT index, WT **elementCoords)
Definition: Zoltan2_TaskMapping.hpp:679
epsilon
#define epsilon
Definition: partition2DMatrix.cpp:97
Zoltan2::CommunicationModel::no_tasks
part_t no_tasks
Definition: Zoltan2_TaskMapping.hpp:878
Zoltan2::GNO_LNO_PAIR::gno
gno_t gno
Definition: Zoltan2_TaskMapping.hpp:77
Zoltan2::Algorithm::part_t
Adapter::part_t part_t
Definition: Zoltan2_Algorithm.hpp:85
Zoltan2_Standards.hpp
Gathering definitions used in software development.
Zoltan2::KMeansCluster::center
WT * center
Definition: Zoltan2_TaskMapping.hpp:658
Zoltan2::GNO_LNO_PAIR
Definition: Zoltan2_TaskMapping.hpp:75
Zoltan2::CommunicationModel::getNTasks
part_t getNTasks() const
Definition: Zoltan2_TaskMapping.hpp:888
Zoltan2::KmeansHeap::copyCoordinates
void copyCoordinates(IT *permutation)
Definition: Zoltan2_TaskMapping.hpp:642
Zoltan2::CoordinateCommunicationModel::task_coord_dim
int task_coord_dim
Definition: Zoltan2_TaskMapping.hpp:969
Zoltan2::MachineRepresentation
MachineRepresentation Class Base class for representing machine coordinates, networks,...
Definition: Zoltan2_MachineRepresentation.hpp:22
Zoltan2::CoordinateTaskMapper::doMapping
void doMapping(int myRank, const Teuchos::RCP< const Teuchos::Comm< int > > comm_)
doMapping function, calls getMapping function of communicationModel object.
Definition: Zoltan2_TaskMapping.hpp:1554
Zoltan2::visualize_mapping
void visualize_mapping(int myRank, const int machine_coord_dim, const int num_ranks, proc_coord_t **machine_coords, const v_lno_t num_tasks, const v_lno_t *task_communication_xadj, const v_lno_t *task_communication_adj, const int *task_to_rank)
Definition: Zoltan2_TaskMapping.hpp:2935
Zoltan2::getSolutionCenterCoordinates
void getSolutionCenterCoordinates(const Environment *envConst, const Teuchos::Comm< int > *comm, const Zoltan2::CoordinateModel< typename Adapter::base_adapter_t > *coords, const part_t *parts, int coordDim, part_t ntasks, scalar_t **partCenters)
Definition: Zoltan2_TaskMapping.hpp:139
Zoltan2::CoordinateTaskMapper::getBestMapping
void getBestMapping()
finds the lowest cost mapping and broadcasts solution to everyone.
Definition: Zoltan2_TaskMapping.hpp:1621
Zoltan2::ithPermutation
void ithPermutation(const IT n, IT i, IT *perm)
Definition: Zoltan2_TaskMapping.hpp:83
Zoltan2::CoordinateTaskMapper::getAssignedProcForTask
part_t getAssignedProcForTask(part_t taskId)
getAssignedProcForTask function, returns the assigned processor id for the given task
Definition: Zoltan2_TaskMapping.hpp:2748
Zoltan2::CommunicationModel::getNProcs
part_t getNProcs() const
Definition: Zoltan2_TaskMapping.hpp:885
Zoltan2::CoordinateCommunicationModel::proc_coord_dim
int proc_coord_dim
Definition: Zoltan2_TaskMapping.hpp:967
Zoltan2::CommunicationModel::getMapping
virtual void getMapping(int myRank, const RCP< const Environment > &env, ArrayRCP< part_t > &proc_to_task_xadj, ArrayRCP< part_t > &proc_to_task_adj, ArrayRCP< part_t > &task_to_proc, const Teuchos::RCP< const Teuchos::Comm< int > > comm_) const =0
Function is called whenever nprocs > no_task. Function returns only the subset of processors that are...
Zoltan2::CoordinateCommunicationModel::proc_coords
pcoord_t ** proc_coords
Definition: Zoltan2_TaskMapping.hpp:968
Zoltan2::CoordinateTaskMapper::create_local_task_to_rank
void create_local_task_to_rank(const lno_t num_local_coords, const part_t *local_coord_parts, const ArrayRCP< part_t > task_to_proc_)
Definition: Zoltan2_TaskMapping.hpp:1912
Zoltan2::CoordinateCommunicationModel::machine_extent_wrap_around
bool * machine_extent_wrap_around
Definition: Zoltan2_TaskMapping.hpp:975
Zoltan2::KMeansCluster
KMeansCluster Class.
Definition: Zoltan2_TaskMapping.hpp:652
Zoltan2::KMeansCluster::getNewCenters
bool getNewCenters(WT **coords)
Definition: Zoltan2_TaskMapping.hpp:673
Zoltan2::CoordinateTaskMapper::proc_to_task_xadj
ArrayRCP< part_t > proc_to_task_xadj
Definition: Zoltan2_TaskMapping.hpp:1538
Zoltan2::CoordinateTaskMapper
Definition: Zoltan2_TaskMapping.hpp:1525
Teuchos::Zoltan2_ReduceBestMapping::reduce
void reduce(const Ordinal count, const T inBuffer[], T inoutBuffer[]) const
Implement Teuchos::ValueTypeReductionOp interface.
Definition: Zoltan2_TaskMapping.hpp:49
Zoltan2_MachineRepresentation.hpp
Zoltan2::PartitionMapping::comm
const Teuchos::RCP< const Teuchos::Comm< int > > comm
Definition: Zoltan2_PartitionMapping.hpp:78
Zoltan2::KmeansHeap::initValues
void initValues()
Definition: Zoltan2_TaskMapping.hpp:604
Zoltan2::CommunicationModel::no_procs
part_t no_procs
Definition: Zoltan2_TaskMapping.hpp:877
Zoltan2::CoordinateTaskMapper::local_task_to_rank
ArrayRCP< part_t > local_task_to_rank
Definition: Zoltan2_TaskMapping.hpp:1541
Zoltan2::BaseAdapterType
BaseAdapterType
An enum to identify general types of adapters.
Definition: Zoltan2_Adapter.hpp:63
Zoltan2::CoordinateTaskMapper::isOwnerofModel
bool isOwnerofModel
Definition: Zoltan2_TaskMapping.hpp:1543
Zoltan2::CoordinateTaskMapper::CoordinateTaskMapper
CoordinateTaskMapper(const Environment *env_const_, const Teuchos::Comm< int > *problemComm, int proc_dim, int num_processors, pcoord_t **machine_coords, int task_dim, part_t num_tasks, tcoord_t **task_coords, ArrayRCP< part_t >task_comm_xadj, ArrayRCP< part_t >task_comm_adj, pcoord_t *task_communication_edge_weight_, int recursion_depth, part_t *part_no_array, const part_t *machine_dimensions, int num_ranks_per_node=1, bool divide_to_prime_first=false, bool reduce_best_mapping=true)
Constructor The mapping constructor which will also perform the mapping operation....
Definition: Zoltan2_TaskMapping.hpp:2510
Zoltan2::KMeansCluster::~KMeansCluster
~KMeansCluster()
Definition: Zoltan2_TaskMapping.hpp:659
Zoltan2::AlgMJ::sequential_task_partitioning
void sequential_task_partitioning(const RCP< const Environment > &env, mj_lno_t num_total_coords, mj_lno_t num_selected_coords, size_t num_target_part, int coord_dim, mj_scalar_t **mj_coordinates, mj_lno_t *initial_selected_coords_output_permutation, mj_lno_t *output_xadj, int recursion_depth, const mj_part_t *part_no_array, bool partition_along_longest_dim, int num_ranks_per_node, bool divide_to_prime_first_)
Special function for partitioning for task mapping. Runs sequential, and performs deterministic parti...
Definition: Zoltan2_AlgMultiJagged.hpp:1397
Zoltan2::CoordinateTaskMapper::proc_to_task_adj
ArrayRCP< part_t > proc_to_task_adj
Definition: Zoltan2_TaskMapping.hpp:1539
Zoltan2::KMeansAlgorithm::kmeans
void kmeans()
Definition: Zoltan2_TaskMapping.hpp:790
Zoltan2::CoordinateCommunicationModel::num_ranks_per_node
int num_ranks_per_node
Definition: Zoltan2_TaskMapping.hpp:978
Zoltan2::CoordinateCommunicationModel::umpa_uRandom
static part_t umpa_uRandom(part_t l, int &_u_umpa_seed)
Definition: Zoltan2_TaskMapping.hpp:1070
Teuchos::Zoltan2_ReduceBestMapping
Zoltan2_ReduceBestMapping Class, reduces the minimum cost mapping, ties breaks with minimum proc id.
Definition: Zoltan2_TaskMapping.hpp:37
Zoltan2::GraphModel::getEdgeList
size_t getEdgeList(ArrayView< const gno_t > &edgeIds, ArrayView< const offset_t > &offsets, ArrayView< input_t > &wgts) const
Sets pointers to this process' edge (neighbor) global Ids, including off-process edges.
Definition: Zoltan2_GraphModel.hpp:213
Zoltan2::KmeansHeap::setHeapsize
void setHeapsize(IT heapsize_)
Definition: Zoltan2_TaskMapping.hpp:545
Zoltan2::CoordinateTaskMapper::task_communication_edge_weight
ArrayRCP< scalar_t > task_communication_edge_weight
Definition: Zoltan2_TaskMapping.hpp:1549
Zoltan2::CoordinateTaskMapper::getAssignedTasksForProc
ArrayView< part_t > getAssignedTasksForProc(part_t procId)
Definition: Zoltan2_TaskMapping.hpp:2765
Zoltan2::TPL_Traits
Definition: Zoltan2_TPLTraits.hpp:70
Zoltan2::GraphAdapterType
graph data
Definition: Zoltan2_Adapter.hpp:68
Zoltan2::CommunicationModel
CommunicationModel Base Class that performs mapping between the coordinate partitioning result.
Definition: Zoltan2_TaskMapping.hpp:872
Zoltan2::CoordinateCommunicationModel::partNoArray
part_t * partNoArray
Definition: Zoltan2_TaskMapping.hpp:972
Zoltan2::PartitioningSolution::getTargetGlobalNumberOfParts
size_t getTargetGlobalNumberOfParts() const
Returns the global number of parts desired in the solution.
Definition: Zoltan2_PartitioningSolution.hpp:165
Zoltan2::CoordinateTaskMapper::nprocs
part_t nprocs
Definition: Zoltan2_TaskMapping.hpp:1545
Zoltan2::CoordinateTaskMapper::~CoordinateTaskMapper
virtual ~CoordinateTaskMapper()
Definition: Zoltan2_TaskMapping.hpp:1903
Zoltan2::KMeansCluster::copyCoordinates
void copyCoordinates(IT *permutation)
Definition: Zoltan2_TaskMapping.hpp:694
Zoltan2::CoordinateCommunicationModel::task_coords
tcoord_t ** task_coords
Definition: Zoltan2_TaskMapping.hpp:970
Zoltan2::KMeansCluster::getDistanceToCenter
WT getDistanceToCenter()
Definition: Zoltan2_TaskMapping.hpp:690
Zoltan2::MappingSolution
PartitionMapping maps a solution or an input distribution to ranks.
Definition: Zoltan2_MappingSolution.hpp:64
Zoltan2::KMeansAlgorithm::getMinDistanceCluster
void getMinDistanceCluster(IT *procPermutation)
Definition: Zoltan2_TaskMapping.hpp:816
Zoltan2::CoordinateModel::getLocalNumCoordinates
size_t getLocalNumCoordinates() const
Returns the number of coordinates on this process.
Definition: Zoltan2_CoordinateModel.hpp:170
Zoltan2::CoordinateTaskMapper::getPartsForProc
virtual void getPartsForProc(int procId, part_t &numParts, part_t *&parts) const
getAssignedProcForTask function, returns the assigned tasks with the number of tasks.
Definition: Zoltan2_TaskMapping.hpp:2757
Zoltan2
Definition: Zoltan2_AlgSerialGreedy.hpp:56
Zoltan2::CoordinateCommunicationModel::machine_extent
int * machine_extent
Definition: Zoltan2_TaskMapping.hpp:974
Zoltan2_TPLTraits.hpp
Traits class to handle conversions between gno_t/lno_t and TPL data types (e.g., ParMETIS's idx_t,...
Zoltan2::MatrixAdapterType
matrix data
Definition: Zoltan2_Adapter.hpp:67
Zoltan2::CoordinateCommunicationModel::setPartArraySize
void setPartArraySize(int psize)
Definition: Zoltan2_TaskMapping.hpp:1030
Zoltan2::KmeansHeap::getNewCenters
bool getNewCenters(WT *center, WT **coords, int dimension)
Definition: Zoltan2_TaskMapping.hpp:625
tcrsMatrix_t
Tpetra::CrsMatrix< zscalar_t, zlno_t, zgno_t, znode_t > tcrsMatrix_t
Definition: GraphModel.cpp:82
Zoltan2::getCoarsenedPartGraph
void getCoarsenedPartGraph(const Environment *envConst, const Teuchos::Comm< int > *comm, const Zoltan2::GraphModel< typename Adapter::base_adapter_t > *graph, part_t np, const part_t *parts, ArrayRCP< part_t > &g_part_xadj, ArrayRCP< part_t > &g_part_adj, ArrayRCP< scalar_t > &g_part_ew)
Definition: Zoltan2_TaskMapping.hpp:236
Zoltan2::KMeansCluster::clearHeap
void clearHeap()
Definition: Zoltan2_TaskMapping.hpp:669
Zoltan2::Algorithm::scalar_t
Adapter::scalar_t scalar_t
Definition: Zoltan2_Algorithm.hpp:84
Zoltan2::Algorithm::lno_t
Adapter::lno_t lno_t
Definition: Zoltan2_Algorithm.hpp:82
Teuchos
Definition: Zoltan2_AlgMultiJagged.hpp:110
Zoltan2_GraphModel.hpp
Defines the GraphModel interface.
Zoltan2::CommunicationModel::getCommunicationCostMetric
double getCommunicationCostMetric()
Definition: Zoltan2_TaskMapping.hpp:939
tMVector_t
Tpetra::MultiVector< double, int, int > tMVector_t
Definition: APFMeshInput.cpp:68
Teuchos::Zoltan2_ReduceBestMapping::Zoltan2_ReduceBestMapping
Zoltan2_ReduceBestMapping()
Default Constructor.
Definition: Zoltan2_TaskMapping.hpp:45
Zoltan2::CoordinateTaskMapper::CoordinateTaskMapper
CoordinateTaskMapper(const Teuchos::RCP< const Teuchos::Comm< int > > comm_, const Teuchos::RCP< const MachineRepresentation< pcoord_t, part_t > > machine_, const Teuchos::RCP< const Adapter > input_adapter_, const Teuchos::RCP< const Zoltan2::PartitioningSolution< Adapter > > soln_, const Teuchos::RCP< const Environment > envConst, bool is_input_adapter_distributed=true, int num_ranks_per_node=1, bool divide_to_prime_first=false, bool reduce_best_mapping=true)
Constructor. When this constructor is called, in order to calculate the communication metric,...
Definition: Zoltan2_TaskMapping.hpp:1937
Zoltan2::GraphModel
GraphModel defines the interface required for graph models.
Definition: Zoltan2_GraphModel.hpp:80
Zoltan2::CoordinateTaskMapper::ntasks
part_t ntasks
Definition: Zoltan2_TaskMapping.hpp:1546
Zoltan2::CoordinateTaskMapper::getLocalNumberOfParts
virtual size_t getLocalNumberOfParts() const
Returns the number of parts to be assigned to this process.
Definition: Zoltan2_TaskMapping.hpp:2633
Zoltan2::modelFlag_t
std::bitset< NUM_MODEL_FLAGS > modelFlag_t
Definition: Zoltan2_Model.hpp:93
Zoltan2::CoordinateTaskMapper::task_to_proc
ArrayRCP< part_t > task_to_proc
Definition: Zoltan2_TaskMapping.hpp:1540
Zoltan2_XpetraMultiVectorAdapter.hpp
Defines the XpetraMultiVectorAdapter.
Zoltan2::fillContinousArray
void fillContinousArray(T *arr, size_t arrSize, T *val)
fillContinousArray function
Definition: Zoltan2_TaskMapping.hpp:846
Zoltan2_AlgMultiJagged.hpp
Contains the Multi-jagged algorthm.
Zoltan2::Environment::timerStart
void timerStart(TimerType tt, const char *timerName) const
Start a named timer.
Definition: Zoltan2_Environment.hpp:430
Zoltan2::getGridCommunicationGraph
void getGridCommunicationGraph(part_t taskCount, part_t *&task_comm_xadj, part_t *&task_comm_adj, std::vector< int > grid_dims)
Definition: Zoltan2_TaskMapping.hpp:112
Zoltan2::StridedData
The StridedData class manages lists of weights or coordinates.
Definition: Zoltan2_StridedData.hpp:76
Zoltan2::KmeansHeap
KmeansHeap Class, max heap, but holds the minimum values.
Definition: Zoltan2_TaskMapping.hpp:537
Zoltan2::CoordinateCommunicationModel::partArraySize
int partArraySize
Definition: Zoltan2_TaskMapping.hpp:971
Zoltan2::CoordinateCommunicationModel::CoordinateCommunicationModel
CoordinateCommunicationModel(int pcoord_dim_, pcoord_t **pcoords_, int tcoord_dim_, tcoord_t **tcoords_, part_t no_procs_, part_t no_tasks_, int *machine_extent_, bool *machine_extent_wrap_around_, const MachineRepresentation< pcoord_t, part_t > *machine_=NULL)
Class Constructor:
Definition: Zoltan2_TaskMapping.hpp:1006
Zoltan2::CoordinateTaskMapper::writeMapping
void writeMapping()
Definition: Zoltan2_TaskMapping.hpp:1659