1 #ifndef BP_MAXIMUM_MATCHING_H
2 #define BP_MAXIMUM_MATCHING_H
4 #include "../CombBLAS.h"
26 template <
class IT,
class DER>
30 IT procsPerRow = ri.commGrid->GetGridCols();
31 IT procsPerCol = ri.commGrid->GetGridRows();
34 IT global_nrow = ri.TotalLength();
35 IT global_ncol = ncol;
36 IT m_perprocrow = global_nrow / procsPerRow;
37 IT n_perproccol = global_ncol / procsPerCol;
44 std::vector< std::vector<IT> > rowid(procsPerRow);
45 std::vector< std::vector<IT> > colid(procsPerRow);
47 IT locvec = ri.arr.size();
48 IT roffset = ri.RowLenUntil();
49 for(
typename std::vector<IT>::size_type i=0; i< (unsigned)locvec; ++i)
51 if(ri.arr[i]>=0 && ri.arr[i]<ncol)
53 IT rowrec = (n_perproccol!=0) ? std::min(ri.arr[i] / n_perproccol, procsPerRow-1) : (procsPerRow-1);
55 rowid[rowrec].push_back( i + roffset);
56 colid[rowrec].push_back(ri.arr[i] - (rowrec * n_perproccol));
63 int * sendcnt =
new int[procsPerRow];
64 int * recvcnt =
new int[procsPerRow];
65 for(IT i=0; i<procsPerRow; ++i)
67 sendcnt[i] = rowid[i].size();
70 MPI_Alltoall(sendcnt, 1, MPI_INT, recvcnt, 1, MPI_INT, ri.commGrid->GetRowWorld());
72 int * sdispls =
new int[procsPerRow]();
73 int * rdispls =
new int[procsPerRow]();
74 partial_sum(sendcnt, sendcnt+procsPerRow-1, sdispls+1);
75 partial_sum(recvcnt, recvcnt+procsPerRow-1, rdispls+1);
76 IT p_nnz = accumulate(recvcnt,recvcnt+procsPerRow,
static_cast<IT
>(0));
79 IT * p_rows =
new IT[p_nnz];
80 IT * p_cols =
new IT[p_nnz];
81 IT * senddata =
new IT[locvec];
82 for(
int i=0; i<procsPerRow; ++i)
84 copy(rowid[i].begin(), rowid[i].end(), senddata+sdispls[i]);
85 std::vector<IT>().swap(rowid[i]);
87 MPI_Alltoallv(senddata, sendcnt, sdispls, MPIType<IT>(), p_rows, recvcnt, rdispls, MPIType<IT>(), ri.commGrid->GetRowWorld());
89 for(
int i=0; i<procsPerRow; ++i)
91 copy(colid[i].begin(), colid[i].end(), senddata+sdispls[i]);
92 std::vector<IT>().swap(colid[i]);
94 MPI_Alltoallv(senddata, sendcnt, sdispls, MPIType<IT>(), p_cols, recvcnt, rdispls, MPIType<IT>(), ri.commGrid->GetRowWorld());
97 std::tuple<IT,IT,bool> * p_tuples =
new std::tuple<IT,IT,bool>[p_nnz];
98 for(IT i=0; i< p_nnz; ++i)
100 p_tuples[i] = make_tuple(p_rows[i], p_cols[i], 1);
106 IT local_nrow = ri.MyRowLength();
107 int my_proccol = ri.commGrid->GetRankInProcRow();
108 IT local_ncol = (my_proccol<(procsPerCol-1))? (n_perproccol) : (global_ncol - (n_perproccol*(procsPerCol-1)));
112 DER_IT * PSeq =
new DER_IT();
113 PSeq->Create( p_nnz, local_nrow, local_ncol, p_tuples);
129 template <
typename IT>
133 IT nrow = mateRow2Col.TotalLength();
134 IT ncol = mateCol2Row.TotalLength();
139 while(col.getnnz()!=0)
143 row = EWiseApply<IT>(row, parentsRow,
144 [](IT root, IT parent){
return parent;},
145 [](IT root, IT parent){
return true;},
149 nextcol = EWiseApply<IT>(col, mateCol2Row,
150 [](IT child, IT mate){
return mate;},
151 [](IT child, IT mate){
return mate!=-1;},
153 mateRow2Col.
Set(row);
154 mateCol2Row.
Set(col);
168 template <
typename IT>
171 MPI_Win win_mateRow2Col, win_mateCol2Row, win_parentsRow;
172 MPI_Win_create((IT*)mateRow2Col.
GetLocArr(), mateRow2Col.
LocArrSize() *
sizeof(IT),
sizeof(IT), MPI_INFO_NULL, mateRow2Col.commGrid->GetWorld(), &win_mateRow2Col);
173 MPI_Win_create((IT*)mateCol2Row.
GetLocArr(), mateCol2Row.
LocArrSize() *
sizeof(IT),
sizeof(IT), MPI_INFO_NULL, mateCol2Row.commGrid->GetWorld(), &win_mateCol2Row);
174 MPI_Win_create((IT*)parentsRow.
GetLocArr(), parentsRow.
LocArrSize() *
sizeof(IT),
sizeof(IT), MPI_INFO_NULL, parentsRow.commGrid->GetWorld(), &win_parentsRow);
177 IT* leaves_ptr = (IT*) leaves.
GetLocArr();
182 IT row, col=100, nextrow;
183 int owner_row, owner_col;
184 IT locind_row, locind_col;
187 MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
193 row = *(leaves_ptr+i);
197 owner_row = mateRow2Col.Owner(row, locind_row);
198 MPI_Win_lock(MPI_LOCK_SHARED, owner_row, 0, win_parentsRow);
199 MPI_Get(&col, 1, MPIType<IT>(), owner_row, locind_row, 1, MPIType<IT>(), win_parentsRow);
200 MPI_Win_unlock(owner_row, win_parentsRow);
202 owner_col = mateCol2Row.Owner(col, locind_col);
203 MPI_Win_lock(MPI_LOCK_SHARED, owner_col, 0, win_mateCol2Row);
204 MPI_Fetch_and_op(&row, &nextrow, MPIType<IT>(), owner_col, locind_col, MPI_REPLACE, win_mateCol2Row);
205 MPI_Win_unlock(owner_col, win_mateCol2Row);
207 MPI_Win_lock(MPI_LOCK_SHARED, owner_row, 0, win_mateRow2Col);
208 MPI_Put(&col, 1, MPIType<IT>(), owner_row, locind_row, 1, MPIType<IT>(), win_mateRow2Col);
209 MPI_Win_unlock(owner_row, win_mateRow2Col);
219 MPI_Win_free(&win_mateRow2Col);
220 MPI_Win_free(&win_mateCol2Row);
221 MPI_Win_free(&win_parentsRow);
230 template <
typename IT,
typename NT,
typename DER>
241 nthreads = omp_get_num_threads();
246 double tstart = MPI_Wtime();
248 MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
249 MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
251 IT nrow =
A.getnrow();
252 IT ncol =
A.getncol();
258 std::vector<std::vector<double> > timing;
259 std::vector<int> layers;
260 std::vector<int64_t> phaseMatched;
261 double t1, time_search, time_augment, time_phase;
270 MPI_Win_create((IT*)leaves.
GetLocArr(), leaves.
LocArrSize() *
sizeof(IT),
sizeof(IT), MPI_INFO_NULL,
A.getcommgrid()->GetWorld(), &winLeaves);
275 time_phase = MPI_Wtime();
277 std::vector<double> phase_timing(8,0);
278 leaves.
Apply ( [](IT val){
return (IT) -1;});
281 fringeCol = EWiseApply<VertexType>(fringeCol, mateCol2Row,
283 [](
VertexType vtx, IT mate){
return mate==-1;},
297 numUnmatchedCol = fringeCol.
getnnz();
301 time_search = MPI_Wtime();
302 while(fringeCol.
getnnz() > 0)
309 SpMV<WeightMaxMMSR<NT, VertexType>>(
A, fringeCol, fringeRow,
false, SPA);
311 SpMV<Select2ndMinSR<NT, VertexType>>(
A, fringeCol, fringeRow,
false, SPA);
312 phase_timing[0] += MPI_Wtime()-t1;
319 fringeRow = EWiseApply<VertexType>(fringeRow, parentsRow,
321 [](
VertexType vtx, IT parent){
return parent==-1;},
331 umFringeRow = EWiseApply<IT>(fringeRow, mateRow2Col,
332 [](
VertexType vtx, IT mate){
return vtx.root;},
333 [](
VertexType vtx, IT mate){
return mate==-1;},
336 phase_timing[1] += MPI_Wtime()-t1;
339 IT nnz_umFringeRow = umFringeRow.
getnnz();
342 if(nnz_umFringeRow >0)
357 temp1 = umFringeRow.
Invert(ncol);
362 phase_timing[2] += MPI_Wtime()-t1;
368 fringeRow = EWiseApply<VertexType>(fringeRow, mateRow2Col,
370 [](
VertexType vtx, IT mate){
return mate!=-1;},
374 if(nnz_umFringeRow>0 &&
prune)
376 fringeRow.FilterByVal (umFringeRow,[](
VertexType vtx){
return vtx.root;},
false);
378 double tprune = MPI_Wtime()-t1;
379 phase_timing[3] += tprune;
384 fringeCol = fringeRow.Invert(ncol,
386 [](
VertexType& vtx,
const IT & index){
return vtx;},
388 phase_timing[4] += MPI_Wtime()-t1;
394 time_search = MPI_Wtime() - time_search;
395 phase_timing[5] += time_search;
397 IT numMatchedCol = leaves.
Count([](IT leaf){
return leaf!=-1;});
398 phaseMatched.push_back(numMatchedCol);
399 time_augment = MPI_Wtime();
400 if (numMatchedCol== 0) matched =
false;
404 if(numMatchedCol < (2* nprocs * nprocs))
405 AugmentPath(mateRow2Col, mateCol2Row,parentsRow, leaves);
407 AugmentLevel(mateRow2Col, mateCol2Row,parentsRow, leaves);
409 time_augment = MPI_Wtime() - time_augment;
410 phase_timing[6] += time_augment;
412 time_phase = MPI_Wtime() - time_phase;
413 phase_timing[7] += time_phase;
414 timing.push_back(phase_timing);
416 layers.push_back(layer);
421 MPI_Win_free(&winLeaves);
433 std::cout <<
"****** maximum matching runtime ********\n";
434 std::cout << std::endl;
435 std::cout <<
"========================================================================\n";
436 std::cout <<
" BFS Search \n";
437 std::cout <<
"===================== ==================================================\n";
438 std::cout <<
"Phase Layer Match SpMV EWOpp CmUqL Prun CmMC BFS Aug Total\n";
439 std::cout <<
"===================== ===================================================\n";
441 std::vector<double> totalTimes(timing[0].
size(),0);
442 int nphases = timing.size();
443 for(
int i=0; i<timing.size(); i++)
445 printf(
" %3d %3d %8lld ", i+1, layers[i], phaseMatched[i]);
446 for(
int j=0; j<timing[i].size(); j++)
448 totalTimes[j] += timing[i][j];
450 printf(
"%.2lf ", timing[i][j]);
456 std::cout <<
"-----------------------------------------------------------------------\n";
457 std::cout <<
"Phase Layer UnMat SpMV EWOpp CmUqL Prun CmMC BFS Aug Total \n";
458 std::cout <<
"-----------------------------------------------------------------------\n";
460 combTime = totalTimes.back();
461 printf(
" %3d %3d %8lld ", nphases, totalLayer/nphases, numUnmatchedCol);
462 for(
int j=0; j<totalTimes.size()-1; j++)
464 printf(
"%.2lf ", totalTimes[j]);
466 printf(
"%.2lf\n", combTime);
469 IT nrows=
A.getnrow();
470 IT matchedRow = mateRow2Col.
Count([](IT mate){
return mate!=-1;});
473 std::cout <<
"***Final Maximum Matching***\n";
474 std::cout <<
"***Total-Rows Matched-Rows Total Time***\n";
475 printf(
"%lld %lld %lf \n",nrows, matchedRow, combTime);
476 printf(
"matched rows: %lld , which is: %lf percent \n",matchedRow, 100*(
double)matchedRow/(nrows));
477 std::cout <<
"-------------------------------------------------------\n\n";