Fixes use in latest versions of SuperLU_DIST. See e.g. https://github.com/xiaoyeli/superlu_dist/issues/60 * gnu/packages/patches/combinatorial-blas-awpm.patch: Remove globals related to performance measurement. Declare non-template function inline.
		
			
				
	
	
		
			283 lines
		
	
	
	
		
			8.9 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
			
		
		
	
	
			283 lines
		
	
	
	
		
			8.9 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
| Install BipartiteMatchings headers for SuperLU_DIST.  Removes global variables
 | |
| and code related to performance measurement that is not useful when used in a
 | |
| library setting.
 | |
| 
 | |
| --- a/BipartiteMatchings/ApproxWeightPerfectMatching.h
 | |
| +++ b/BipartiteMatchings/ApproxWeightPerfectMatching.h
 | |
| @@ -9,7 +9,7 @@
 | |
|  #ifndef ApproxWeightPerfectMatching_h
 | |
|  #define ApproxWeightPerfectMatching_h
 | |
|  
 | |
| -#include "../CombBLAS.h"
 | |
| +#include "CombBLAS.h"
 | |
|  #include "BPMaximalMatching.h"
 | |
|  #include "BPMaximumMatching.h"
 | |
|  #include <parallel/algorithm>
 | |
| @@ -39,9 +39,6 @@
 | |
|      std::shared_ptr<CommGrid> commGrid;
 | |
|  };
 | |
|  
 | |
| -double t1Comp, t1Comm, t2Comp, t2Comm, t3Comp, t3Comm, t4Comp, t4Comm, t5Comp, t5Comm, tUpdateMateComp;
 | |
| -    
 | |
| -
 | |
|  template <class IT, class NT>
 | |
|  std::vector<std::tuple<IT,IT,NT>> ExchangeData(std::vector<std::vector<std::tuple<IT,IT,NT>>> & tempTuples, MPI_Comm World)
 | |
|  {
 | |
| @@ -391,7 +388,7 @@
 | |
|  
 | |
|  
 | |
|  
 | |
| -int ThreadBuffLenForBinning(int itemsize, int nbins)
 | |
| +inline int ThreadBuffLenForBinning(int itemsize, int nbins)
 | |
|  {
 | |
|      // 1MB shared cache (per 2 cores) in KNL
 | |
|  #ifndef L2_CACHE_SIZE
 | |
| @@ -417,7 +414,6 @@
 | |
|      
 | |
|      
 | |
|      
 | |
| -    double tstart = MPI_Wtime();
 | |
|      
 | |
|      
 | |
|      MPI_Comm World = param.commGrid->GetWorld();
 | |
| @@ -528,9 +524,6 @@
 | |
|          }
 | |
|      }
 | |
|      
 | |
| -    t1Comp = MPI_Wtime() - tstart;
 | |
| -    tstart = MPI_Wtime();
 | |
| -    
 | |
|      // Step 3: Communicate data
 | |
|      
 | |
|      std::vector<int> recvcnt (param.nprocs);
 | |
| @@ -548,7 +541,6 @@
 | |
|      std::vector< std::tuple<IT,IT,NT> > recvTuples1(totrecv);
 | |
|      MPI_Alltoallv(sendTuples.data(), sendcnt.data(), sdispls.data(), MPI_tuple, recvTuples1.data(), recvcnt.data(), rdispls.data(), MPI_tuple, World);
 | |
|      MPI_Type_free(&MPI_tuple);
 | |
| -    t1Comm = MPI_Wtime() - tstart;
 | |
|      return recvTuples1;
 | |
|  }
 | |
|  
 | |
| @@ -730,9 +722,6 @@
 | |
|  
 | |
|      // Step 4: Communicate data
 | |
|      
 | |
| -    t2Comp = MPI_Wtime() - tstart;
 | |
| -    tstart = MPI_Wtime();
 | |
| -    
 | |
|      std::vector<int> recvcnt (param.nprocs);
 | |
|      std::vector<int> rdispls (param.nprocs, 0);
 | |
|      
 | |
| @@ -748,7 +737,6 @@
 | |
|      std::vector< std::tuple<IT,IT,IT,NT> > recvTuples1(totrecv);
 | |
|      MPI_Alltoallv(sendTuples.data(), sendcnt.data(), sdispls.data(), MPI_tuple, recvTuples1.data(), recvcnt.data(), rdispls.data(), MPI_tuple, World);
 | |
|      MPI_Type_free(&MPI_tuple);
 | |
| -    t2Comm = MPI_Wtime() - tstart;
 | |
|      return recvTuples1;
 | |
|  }
 | |
|  
 | |
| @@ -836,7 +824,6 @@
 | |
|      param.myrank = myrank;
 | |
|      param.commGrid = commGrid;
 | |
|      
 | |
| -    double t1CompAll = 0, t1CommAll = 0, t2CompAll = 0, t2CommAll = 0, t3CompAll = 0, t3CommAll = 0, t4CompAll = 0, t4CommAll = 0, t5CompAll = 0, t5CommAll = 0, tUpdateMateCompAll = 0, tUpdateWeightAll = 0;
 | |
|  	
 | |
|  	// -----------------------------------------------------------
 | |
|  	// replicate mate vectors for mateCol2Row
 | |
| @@ -975,11 +962,7 @@
 | |
|  		}
 | |
|  		
 | |
|  		//vector< tuple<IT,IT,IT, NT> >().swap(recvTuples1);
 | |
| -		double t3Comp = MPI_Wtime() - tstart;
 | |
| -		tstart = MPI_Wtime();
 | |
|  		recvTuples1 = ExchangeData1(tempTuples1, World);
 | |
| -		double t3Comm = MPI_Wtime() - tstart;
 | |
| -		tstart = MPI_Wtime();
 | |
|  		
 | |
|  		std::vector<std::tuple<IT,IT,IT,IT, NT>> bestTuplesPhase4 (lncol);
 | |
|  		// we could have used lnrow in both bestTuplesPhase3 and bestTuplesPhase4
 | |
| @@ -1041,14 +1024,9 @@
 | |
|  		
 | |
|  		
 | |
|  		//vector< tuple<IT,IT,IT, NT> >().swap(recvTuples1);
 | |
| -		double t4Comp = MPI_Wtime() - tstart;
 | |
| -		tstart = MPI_Wtime();
 | |
|  		
 | |
|  		std::vector<std::tuple<IT,IT,IT,IT>> recvWinnerTuples = ExchangeData1(winnerTuples, World);
 | |
|  		
 | |
| -		double t4Comm = MPI_Wtime() - tstart;
 | |
| -		tstart = MPI_Wtime();
 | |
| -		
 | |
|  		// at the owner of (mj,j)
 | |
|  		std::vector<std::tuple<IT,IT>> rowBcastTuples(recvWinnerTuples.size()); //(mi,mj)
 | |
|  		std::vector<std::tuple<IT,IT>> colBcastTuples(recvWinnerTuples.size()); //(j,i)
 | |
| @@ -1065,15 +1043,10 @@
 | |
|  			colBcastTuples[k] = std::make_tuple(j,i);
 | |
|  			rowBcastTuples[k] = std::make_tuple(mj,mi);
 | |
|  		}
 | |
| -		double t5Comp = MPI_Wtime() - tstart;
 | |
| -		tstart = MPI_Wtime();
 | |
|  		
 | |
|  		std::vector<std::tuple<IT,IT>> updatedR2C = MateBcast(rowBcastTuples, RowWorld);
 | |
|  		std::vector<std::tuple<IT,IT>> updatedC2R = MateBcast(colBcastTuples, ColWorld);
 | |
|  		
 | |
| -		double t5Comm = MPI_Wtime() - tstart;
 | |
| -		tstart = MPI_Wtime();
 | |
| -		
 | |
|  #ifdef THREADED
 | |
|  #pragma omp parallel for
 | |
|  #endif
 | |
| @@ -1095,13 +1068,9 @@
 | |
|  		}
 | |
|  		
 | |
|  		
 | |
| -		double tUpdateMateComp = MPI_Wtime() - tstart;
 | |
| -		tstart = MPI_Wtime();
 | |
|  		// update weights of matched edges
 | |
|  		// we can do better than this since we are doing sparse updates
 | |
|  		ReplicateMateWeights(param, dcsc, colptr, RepMateC2R, RepMateWR2C, RepMateWC2R);
 | |
| -		double tUpdateWeight = MPI_Wtime() - tstart;
 | |
| -		
 | |
|  		
 | |
|  		weightPrev = weightCur;
 | |
|  		weightCur = MatchingWeight(RepMateWC2R, RowWorld, minw);
 | |
| @@ -1110,32 +1079,8 @@
 | |
|  		//UpdateMatching(mateRow2Col, mateCol2Row, RepMateR2C, RepMateC2R);
 | |
|  		//CheckMatching(mateRow2Col,mateCol2Row);
 | |
|  		
 | |
| -		if(myrank==0)
 | |
| -		{
 | |
| -			std::cout  <<  t1Comp << " " << t1Comm << " "<< t2Comp << " " << t2Comm << " " << t3Comp << " " << t3Comm << " " << t4Comp << " " << t4Comm << " " << t5Comp << " " << t5Comm << " " << tUpdateMateComp << " " << tUpdateWeight << std::endl;
 | |
| -            
 | |
| -            t1CompAll += t1Comp;
 | |
| -            t1CommAll += t1Comm;
 | |
| -            t2CompAll += t2Comp;
 | |
| -            t2CommAll += t2Comm;
 | |
| -            t3CompAll += t3Comp;
 | |
| -            t3CommAll += t3Comm;
 | |
| -            t4CompAll += t4Comp;
 | |
| -            t4CommAll += t4Comm;
 | |
| -            t5CompAll += t5Comp;
 | |
| -            t5CommAll += t5Comm;
 | |
| -            tUpdateMateCompAll += tUpdateMateComp;
 | |
| -            tUpdateWeightAll += tUpdateWeight;
 | |
| -            
 | |
| -		}
 | |
|  	}
 | |
|  	
 | |
| -    if(myrank==0)
 | |
| -    {
 | |
| -        std::cout << "=========== overal timing ==========" << std::endl;
 | |
| -        std::cout  <<  t1CompAll << " " << t1CommAll << " " << t2CompAll << " " << t2CommAll << " " << t3CompAll << " " << t3CommAll << " " << t4CompAll << " " << t4CommAll << " " << t5CompAll << " " << t5CommAll << " " << tUpdateMateCompAll << " " << tUpdateWeightAll << std::endl;
 | |
| -    }
 | |
| -	
 | |
|  	// update the distributed mate vectors from replicated mate vectors
 | |
|  	UpdateMatching(mateRow2Col, mateCol2Row, RepMateR2C, RepMateC2R);
 | |
|  	//weightCur = MatchingWeight(RepMateWC2R, RowWorld);
 | |
| --- a/BipartiteMatchings/BPMaximalMatching.h
 | |
| +++ b/BipartiteMatchings/BPMaximalMatching.h
 | |
| @@ -1,7 +1,7 @@
 | |
|  #ifndef BP_MAXIMAL_MATCHING_H
 | |
|  #define BP_MAXIMAL_MATCHING_H
 | |
|  
 | |
| -#include "../CombBLAS.h"
 | |
| +#include "CombBLAS.h"
 | |
|  #include <iostream>
 | |
|  #include <functional>
 | |
|  #include <algorithm>
 | |
| @@ -14,8 +14,6 @@
 | |
|  #define GREEDY 1
 | |
|  #define KARP_SIPSER 2
 | |
|  #define DMD 3
 | |
| -MTRand GlobalMT(123); // for reproducible result
 | |
| -double tTotalMaximal;
 | |
|  
 | |
|  namespace combblas {
 | |
|  
 | |
| @@ -25,7 +25,7 @@
 | |
|  void MaximalMatching(Par_DCSC_Bool & A, Par_DCSC_Bool & AT, FullyDistVec<IT, IT>& mateRow2Col,
 | |
|              FullyDistVec<IT, IT>& mateCol2Row, FullyDistVec<IT, IT>& degColRecv, int type, bool rand=true)
 | |
|  {
 | |
| -
 | |
| +    static MTRand GlobalMT(123); // for reproducible result
 | |
|  	typedef VertexTypeML < IT, IT> VertexType;
 | |
|      int nprocs, myrank;
 | |
|      MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
 | |
| @@ -354,8 +354,6 @@
 | |
|  		
 | |
|  	}
 | |
|  	
 | |
| -    tTotalMaximal = MPI_Wtime() - tStart;
 | |
| -    
 | |
|  	IT cardinality = mateRow2Col.Count([](IT mate){return mate!=-1;});
 | |
|  	std::vector<double> totalTimes(timing[0].size(),0);
 | |
|  	for(int i=0; i<timing.size(); i++)
 | |
| --- a/BipartiteMatchings/BPMaximumMatching.h
 | |
| +++ b/BipartiteMatchings/BPMaximumMatching.h
 | |
| @@ -1,7 +1,7 @@
 | |
|  #ifndef BP_MAXIMUM_MATCHING_H
 | |
|  #define BP_MAXIMUM_MATCHING_H
 | |
|  
 | |
| -#include "../CombBLAS.h"
 | |
| +#include "CombBLAS.h"
 | |
|  #include <mpi.h>
 | |
|  #include <sys/time.h>
 | |
|  #include <iostream>
 | |
| @@ -11,7 +11,6 @@
 | |
|  #include <string>
 | |
|  #include <sstream>
 | |
|  #include "MatchingDefs.h"
 | |
| -double tTotalMaximum;
 | |
|  
 | |
|  namespace combblas {
 | |
|  
 | |
| @@ -231,7 +231,7 @@
 | |
|  void maximumMatching(SpParMat < IT, NT, DER > & A, FullyDistVec<IT, IT>& mateRow2Col,
 | |
|                       FullyDistVec<IT, IT>& mateCol2Row, bool prune=true, bool randMM = false, bool maximizeWeight = false)
 | |
|  {
 | |
| -	
 | |
| +    static MTRand GlobalMT(123); // for reproducible result	
 | |
|  	typedef VertexTypeMM <IT> VertexType;
 | |
|  	
 | |
|      int nthreads=1;
 | |
| @@ -420,8 +420,6 @@
 | |
|      
 | |
|      MPI_Win_free(&winLeaves);
 | |
|      
 | |
| -    tTotalMaximum = MPI_Wtime() - tstart;
 | |
| -    
 | |
|      //isMaximalmatching(A, mateRow2Col, mateCol2Row, unmatchedRow, unmatchedCol);
 | |
|      //isMatching(mateCol2Row, mateRow2Col); //todo there is a better way to check this
 | |
|      
 | |
| --- a/BipartiteMatchings/MatchingDefs.h
 | |
| +++ b/BipartiteMatchings/MatchingDefs.h
 | |
| @@ -9,7 +9,7 @@
 | |
|  #ifndef MatchingDefs_h
 | |
|  #define MatchingDefs_h
 | |
|  
 | |
| -#include "../CombBLAS.h"
 | |
| +#include "CombBLAS.h"
 | |
|  #include <iostream>
 | |
|  
 | |
|  namespace combblas {
 | |
| --- a/BipartiteMatchings/Utility.h
 | |
| +++ b/BipartiteMatchings/Utility.h
 | |
| @@ -1,7 +1,7 @@
 | |
|  #ifndef BP_UTILITY_H
 | |
|  #define BP_UTILITY_H
 | |
|  
 | |
| -#include "../CombBLAS.h"
 | |
| +#include "CombBLAS.h"
 | |
|  
 | |
|  namespace combblas {
 | |
|  
 | |
| --- a/CMakeLists.txt
 | |
| +++ b/CMakeLists.txt
 | |
| @@ -68,6 +68,7 @@ set_property(TARGET CombBLAS PROPERTY VERSION ${CombBLAS_VERSION})
 | |
|  # installation
 | |
|  install(DIRECTORY include/ DESTINATION include)
 | |
|  install(DIRECTORY psort-1.0/include/ DESTINATION include)
 | |
| +install(DIRECTORY BipartiteMatchings DESTINATION include FILES_MATCHING PATTERN "*.h")
 | |
|  install(TARGETS CombBLAS EXPORT CombBLASTargets
 | |
|    LIBRARY DESTINATION lib
 | |
|    ARCHIVE DESTINATION lib
 |