[jackhill/guix/guix.git] / gnu / packages / patches / combinatorial-blas-awpm.patch

Install BipartiteMatchings headers for SuperLU_DIST.  Removes global variables
and code related to performance measurement that is not useful when used in a
library setting.

--- a/BipartiteMatchings/ApproxWeightPerfectMatching.h
+++ b/BipartiteMatchings/ApproxWeightPerfectMatching.h
@@ -9,7 +9,7 @@
 #ifndef ApproxWeightPerfectMatching_h
 #define ApproxWeightPerfectMatching_h
 
-#include "../CombBLAS.h"
+#include "CombBLAS.h"
 #include "BPMaximalMatching.h"
 #include "BPMaximumMatching.h"
 #include <parallel/algorithm>
@@ -39,9 +39,6 @@
     std::shared_ptr<CommGrid> commGrid;
 };
 
-double t1Comp, t1Comm, t2Comp, t2Comm, t3Comp, t3Comm, t4Comp, t4Comm, t5Comp, t5Comm, tUpdateMateComp;
-    
-
 template <class IT, class NT>
 std::vector<std::tuple<IT,IT,NT>> ExchangeData(std::vector<std::vector<std::tuple<IT,IT,NT>>> & tempTuples, MPI_Comm World)
 {
@@ -391,7 +388,7 @@
 
 
 
-int ThreadBuffLenForBinning(int itemsize, int nbins)
+inline int ThreadBuffLenForBinning(int itemsize, int nbins)
 {
     // 1MB shared cache (per 2 cores) in KNL
 #ifndef L2_CACHE_SIZE
@@ -417,7 +414,6 @@
     
     
     
-    double tstart = MPI_Wtime();
     
     
     MPI_Comm World = param.commGrid->GetWorld();
@@ -528,9 +524,6 @@
         }
     }
     
-    t1Comp = MPI_Wtime() - tstart;
-    tstart = MPI_Wtime();
-    
     // Step 3: Communicate data
     
     std::vector<int> recvcnt (param.nprocs);
@@ -548,7 +541,6 @@
     std::vector< std::tuple<IT,IT,NT> > recvTuples1(totrecv);
     MPI_Alltoallv(sendTuples.data(), sendcnt.data(), sdispls.data(), MPI_tuple, recvTuples1.data(), recvcnt.data(), rdispls.data(), MPI_tuple, World);
     MPI_Type_free(&MPI_tuple);
-    t1Comm = MPI_Wtime() - tstart;
     return recvTuples1;
 }
 
@@ -730,9 +722,6 @@
 
     // Step 4: Communicate data
     
-    t2Comp = MPI_Wtime() - tstart;
-    tstart = MPI_Wtime();
-    
     std::vector<int> recvcnt (param.nprocs);
     std::vector<int> rdispls (param.nprocs, 0);
     
@@ -748,7 +737,6 @@
     std::vector< std::tuple<IT,IT,IT,NT> > recvTuples1(totrecv);
     MPI_Alltoallv(sendTuples.data(), sendcnt.data(), sdispls.data(), MPI_tuple, recvTuples1.data(), recvcnt.data(), rdispls.data(), MPI_tuple, World);
     MPI_Type_free(&MPI_tuple);
-    t2Comm = MPI_Wtime() - tstart;
     return recvTuples1;
 }
 
@@ -836,7 +824,6 @@
     param.myrank = myrank;
     param.commGrid = commGrid;
     
-    double t1CompAll = 0, t1CommAll = 0, t2CompAll = 0, t2CommAll = 0, t3CompAll = 0, t3CommAll = 0, t4CompAll = 0, t4CommAll = 0, t5CompAll = 0, t5CommAll = 0, tUpdateMateCompAll = 0, tUpdateWeightAll = 0;
 	
 	// -----------------------------------------------------------
 	// replicate mate vectors for mateCol2Row
@@ -975,11 +962,7 @@
 		}
 		
 		//vector< tuple<IT,IT,IT, NT> >().swap(recvTuples1);
-		double t3Comp = MPI_Wtime() - tstart;
-		tstart = MPI_Wtime();
 		recvTuples1 = ExchangeData1(tempTuples1, World);
-		double t3Comm = MPI_Wtime() - tstart;
-		tstart = MPI_Wtime();
 		
 		std::vector<std::tuple<IT,IT,IT,IT, NT>> bestTuplesPhase4 (lncol);
 		// we could have used lnrow in both bestTuplesPhase3 and bestTuplesPhase4
@@ -1041,14 +1024,9 @@
 		
 		
 		//vector< tuple<IT,IT,IT, NT> >().swap(recvTuples1);
-		double t4Comp = MPI_Wtime() - tstart;
-		tstart = MPI_Wtime();
 		
 		std::vector<std::tuple<IT,IT,IT,IT>> recvWinnerTuples = ExchangeData1(winnerTuples, World);
 		
-		double t4Comm = MPI_Wtime() - tstart;
-		tstart = MPI_Wtime();
-		
 		// at the owner of (mj,j)
 		std::vector<std::tuple<IT,IT>> rowBcastTuples(recvWinnerTuples.size()); //(mi,mj)
 		std::vector<std::tuple<IT,IT>> colBcastTuples(recvWinnerTuples.size()); //(j,i)
@@ -1065,15 +1043,10 @@
 			colBcastTuples[k] = std::make_tuple(j,i);
 			rowBcastTuples[k] = std::make_tuple(mj,mi);
 		}
-		double t5Comp = MPI_Wtime() - tstart;
-		tstart = MPI_Wtime();
 		
 		std::vector<std::tuple<IT,IT>> updatedR2C = MateBcast(rowBcastTuples, RowWorld);
 		std::vector<std::tuple<IT,IT>> updatedC2R = MateBcast(colBcastTuples, ColWorld);
 		
-		double t5Comm = MPI_Wtime() - tstart;
-		tstart = MPI_Wtime();
-		
 #ifdef THREADED
 #pragma omp parallel for
 #endif
@@ -1095,13 +1068,9 @@
 		}
 		
 		
-		double tUpdateMateComp = MPI_Wtime() - tstart;
-		tstart = MPI_Wtime();
 		// update weights of matched edges
 		// we can do better than this since we are doing sparse updates
 		ReplicateMateWeights(param, dcsc, colptr, RepMateC2R, RepMateWR2C, RepMateWC2R);
-		double tUpdateWeight = MPI_Wtime() - tstart;
-		
 		
 		weightPrev = weightCur;
 		weightCur = MatchingWeight(RepMateWC2R, RowWorld, minw);
@@ -1110,32 +1079,8 @@
 		//UpdateMatching(mateRow2Col, mateCol2Row, RepMateR2C, RepMateC2R);
 		//CheckMatching(mateRow2Col,mateCol2Row);
 		
-		if(myrank==0)
-		{
-			std::cout  <<  t1Comp << " " << t1Comm << " "<< t2Comp << " " << t2Comm << " " << t3Comp << " " << t3Comm << " " << t4Comp << " " << t4Comm << " " << t5Comp << " " << t5Comm << " " << tUpdateMateComp << " " << tUpdateWeight << std::endl;
-            
-            t1CompAll += t1Comp;
-            t1CommAll += t1Comm;
-            t2CompAll += t2Comp;
-            t2CommAll += t2Comm;
-            t3CompAll += t3Comp;
-            t3CommAll += t3Comm;
-            t4CompAll += t4Comp;
-            t4CommAll += t4Comm;
-            t5CompAll += t5Comp;
-            t5CommAll += t5Comm;
-            tUpdateMateCompAll += tUpdateMateComp;
-            tUpdateWeightAll += tUpdateWeight;
-            
-		}
 	}
 	
-    if(myrank==0)
-    {
-        std::cout << "=========== overal timing ==========" << std::endl;
-        std::cout  <<  t1CompAll << " " << t1CommAll << " " << t2CompAll << " " << t2CommAll << " " << t3CompAll << " " << t3CommAll << " " << t4CompAll << " " << t4CommAll << " " << t5CompAll << " " << t5CommAll << " " << tUpdateMateCompAll << " " << tUpdateWeightAll << std::endl;
-    }
-	
 	// update the distributed mate vectors from replicated mate vectors
 	UpdateMatching(mateRow2Col, mateCol2Row, RepMateR2C, RepMateC2R);
 	//weightCur = MatchingWeight(RepMateWC2R, RowWorld);
--- a/BipartiteMatchings/BPMaximalMatching.h
+++ b/BipartiteMatchings/BPMaximalMatching.h
@@ -1,7 +1,7 @@
 #ifndef BP_MAXIMAL_MATCHING_H
 #define BP_MAXIMAL_MATCHING_H
 
-#include "../CombBLAS.h"
+#include "CombBLAS.h"
 #include <iostream>
 #include <functional>
 #include <algorithm>
@@ -14,8 +14,6 @@
 #define GREEDY 1
 #define KARP_SIPSER 2
 #define DMD 3
-MTRand GlobalMT(123); // for reproducible result
-double tTotalMaximal;
 
 namespace combblas {
 
@@ -25,7 +25,7 @@
 void MaximalMatching(Par_DCSC_Bool & A, Par_DCSC_Bool & AT, FullyDistVec<IT, IT>& mateRow2Col,
             FullyDistVec<IT, IT>& mateCol2Row, FullyDistVec<IT, IT>& degColRecv, int type, bool rand=true)
 {
-
+    static MTRand GlobalMT(123); // for reproducible result
 	typedef VertexTypeML < IT, IT> VertexType;
     int nprocs, myrank;
     MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
@@ -354,8 +354,6 @@
 		
 	}
 	
-    tTotalMaximal = MPI_Wtime() - tStart;
-    
 	IT cardinality = mateRow2Col.Count([](IT mate){return mate!=-1;});
 	std::vector<double> totalTimes(timing[0].size(),0);
 	for(int i=0; i<timing.size(); i++)
--- a/BipartiteMatchings/BPMaximumMatching.h
+++ b/BipartiteMatchings/BPMaximumMatching.h
@@ -1,7 +1,7 @@
 #ifndef BP_MAXIMUM_MATCHING_H
 #define BP_MAXIMUM_MATCHING_H
 
-#include "../CombBLAS.h"
+#include "CombBLAS.h"
 #include <mpi.h>
 #include <sys/time.h>
 #include <iostream>
@@ -11,7 +11,6 @@
 #include <string>
 #include <sstream>
 #include "MatchingDefs.h"
-double tTotalMaximum;
 
 namespace combblas {
 
@@ -231,7 +231,7 @@
 void maximumMatching(SpParMat < IT, NT, DER > & A, FullyDistVec<IT, IT>& mateRow2Col,
                      FullyDistVec<IT, IT>& mateCol2Row, bool prune=true, bool randMM = false, bool maximizeWeight = false)
 {
-	
+    static MTRand GlobalMT(123); // for reproducible result	
 	typedef VertexTypeMM <IT> VertexType;
 	
     int nthreads=1;
@@ -420,8 +420,6 @@
     
     MPI_Win_free(&winLeaves);
     
-    tTotalMaximum = MPI_Wtime() - tstart;
-    
     //isMaximalmatching(A, mateRow2Col, mateCol2Row, unmatchedRow, unmatchedCol);
     //isMatching(mateCol2Row, mateRow2Col); //todo there is a better way to check this
     
--- a/BipartiteMatchings/MatchingDefs.h
+++ b/BipartiteMatchings/MatchingDefs.h
@@ -9,7 +9,7 @@
 #ifndef MatchingDefs_h
 #define MatchingDefs_h
 
-#include "../CombBLAS.h"
+#include "CombBLAS.h"
 #include <iostream>
 
 namespace combblas {
--- a/BipartiteMatchings/Utility.h
+++ b/BipartiteMatchings/Utility.h
@@ -1,7 +1,7 @@
 #ifndef BP_UTILITY_H
 #define BP_UTILITY_H
 
-#include "../CombBLAS.h"
+#include "CombBLAS.h"
 
 namespace combblas {
 
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -68,6 +68,7 @@ set_property(TARGET CombBLAS PROPERTY VERSION ${CombBLAS_VERSION})
 # installation
 install(DIRECTORY include/ DESTINATION include)
 install(DIRECTORY psort-1.0/include/ DESTINATION include)
+install(DIRECTORY BipartiteMatchings DESTINATION include FILES_MATCHING PATTERN "*.h")
 install(TARGETS CombBLAS EXPORT CombBLASTargets
   LIBRARY DESTINATION lib
   ARCHIVE DESTINATION lib
Commit	Line	Data
38dd27e8 EB	1	Install BipartiteMatchings headers for SuperLU_DIST. Removes global variables
	2	and code related to performance measurement that is not useful when used in a
	3	library setting.
a6b9ebc2 EB	4
	5	--- a/BipartiteMatchings/ApproxWeightPerfectMatching.h
	6	+++ b/BipartiteMatchings/ApproxWeightPerfectMatching.h
	7	@@ -9,7 +9,7 @@
	8	#ifndef ApproxWeightPerfectMatching_h
	9	#define ApproxWeightPerfectMatching_h
	10
	11	-#include "../CombBLAS.h"
	12	+#include "CombBLAS.h"
	13	#include "BPMaximalMatching.h"
	14	#include "BPMaximumMatching.h"
	15	#include <parallel/algorithm>
38dd27e8 EB	16	@@ -39,9 +39,6 @@
	17	std::shared_ptr<CommGrid> commGrid;
	18	};
	19
	20	-double t1Comp, t1Comm, t2Comp, t2Comm, t3Comp, t3Comm, t4Comp, t4Comm, t5Comp, t5Comm, tUpdateMateComp;
	21	-
	22	-
	23	template <class IT, class NT>
	24	std::vector<std::tuple<IT,IT,NT>> ExchangeData(std::vector<std::vector<std::tuple<IT,IT,NT>>> & tempTuples, MPI_Comm World)
	25	{
	26	@@ -391,7 +388,7 @@
	27
	28
	29
	30	-int ThreadBuffLenForBinning(int itemsize, int nbins)
	31	+inline int ThreadBuffLenForBinning(int itemsize, int nbins)
	32	{
	33	// 1MB shared cache (per 2 cores) in KNL
	34	#ifndef L2_CACHE_SIZE
	35	@@ -417,7 +414,6 @@
	36
	37
	38
	39	- double tstart = MPI_Wtime();
	40
	41
	42	MPI_Comm World = param.commGrid->GetWorld();
	43	@@ -528,9 +524,6 @@
	44	}
	45	}
	46
	47	- t1Comp = MPI_Wtime() - tstart;
	48	- tstart = MPI_Wtime();
	49	-
	50	// Step 3: Communicate data
	51
	52	std::vector<int> recvcnt (param.nprocs);
	53	@@ -548,7 +541,6 @@
	54	std::vector< std::tuple<IT,IT,NT> > recvTuples1(totrecv);
	55	MPI_Alltoallv(sendTuples.data(), sendcnt.data(), sdispls.data(), MPI_tuple, recvTuples1.data(), recvcnt.data(), rdispls.data(), MPI_tuple, World);
	56	MPI_Type_free(&MPI_tuple);
	57	- t1Comm = MPI_Wtime() - tstart;
	58	return recvTuples1;
	59	}
	60
	61	@@ -730,9 +722,6 @@
	62
	63	// Step 4: Communicate data
	64
	65	- t2Comp = MPI_Wtime() - tstart;
	66	- tstart = MPI_Wtime();
	67	-
	68	std::vector<int> recvcnt (param.nprocs);
	69	std::vector<int> rdispls (param.nprocs, 0);
	70
	71	@@ -748,7 +737,6 @@
	72	std::vector< std::tuple<IT,IT,IT,NT> > recvTuples1(totrecv);
	73	MPI_Alltoallv(sendTuples.data(), sendcnt.data(), sdispls.data(), MPI_tuple, recvTuples1.data(), recvcnt.data(), rdispls.data(), MPI_tuple, World);
	74	MPI_Type_free(&MPI_tuple);
	75	- t2Comm = MPI_Wtime() - tstart;
	76	return recvTuples1;
	77	}
	78
	79	@@ -836,7 +824,6 @@
80	param.myrank = myrank;
81	param.commGrid = commGrid;
82
83	- double t1CompAll = 0, t1CommAll = 0, t2CompAll = 0, t2CommAll = 0, t3CompAll = 0, t3CommAll = 0, t4CompAll = 0, t4CommAll = 0, t5CompAll = 0, t5CommAll = 0, tUpdateMateCompAll = 0, tUpdateWeightAll = 0;
84
85	// -----------------------------------------------------------
86	// replicate mate vectors for mateCol2Row
87	@@ -975,11 +962,7 @@
88	}
89
90	//vector< tuple<IT,IT,IT, NT> >().swap(recvTuples1);
91	- double t3Comp = MPI_Wtime() - tstart;
92	- tstart = MPI_Wtime();
93	recvTuples1 = ExchangeData1(tempTuples1, World);
94	- double t3Comm = MPI_Wtime() - tstart;
95	- tstart = MPI_Wtime();
96
97	std::vector<std::tuple<IT,IT,IT,IT, NT>> bestTuplesPhase4 (lncol);
98	// we could have used lnrow in both bestTuplesPhase3 and bestTuplesPhase4
99	@@ -1041,14 +1024,9 @@
100
101
102	//vector< tuple<IT,IT,IT, NT> >().swap(recvTuples1);
103	- double t4Comp = MPI_Wtime() - tstart;
104	- tstart = MPI_Wtime();
105
106	std::vector<std::tuple<IT,IT,IT,IT>> recvWinnerTuples = ExchangeData1(winnerTuples, World);
107
108	- double t4Comm = MPI_Wtime() - tstart;
109	- tstart = MPI_Wtime();
110	-
111	// at the owner of (mj,j)
112	std::vector<std::tuple<IT,IT>> rowBcastTuples(recvWinnerTuples.size()); //(mi,mj)
113	std::vector<std::tuple<IT,IT>> colBcastTuples(recvWinnerTuples.size()); //(j,i)
114	@@ -1065,15 +1043,10 @@
115	colBcastTuples[k] = std::make_tuple(j,i);
116	rowBcastTuples[k] = std::make_tuple(mj,mi);
117	}
118	- double t5Comp = MPI_Wtime() - tstart;
119	- tstart = MPI_Wtime();
120
121	std::vector<std::tuple<IT,IT>> updatedR2C = MateBcast(rowBcastTuples, RowWorld);
122	std::vector<std::tuple<IT,IT>> updatedC2R = MateBcast(colBcastTuples, ColWorld);
123
124	- double t5Comm = MPI_Wtime() - tstart;
125	- tstart = MPI_Wtime();
126	-
127	#ifdef THREADED
128	#pragma omp parallel for
129	#endif
130	@@ -1095,13 +1068,9 @@
131	}
132
133
134	- double tUpdateMateComp = MPI_Wtime() - tstart;
135	- tstart = MPI_Wtime();
136	// update weights of matched edges
137	// we can do better than this since we are doing sparse updates
138	ReplicateMateWeights(param, dcsc, colptr, RepMateC2R, RepMateWR2C, RepMateWC2R);
139	- double tUpdateWeight = MPI_Wtime() - tstart;
140	-
141
142	weightPrev = weightCur;
143	weightCur = MatchingWeight(RepMateWC2R, RowWorld, minw);
144	@@ -1110,32 +1079,8 @@
145	//UpdateMatching(mateRow2Col, mateCol2Row, RepMateR2C, RepMateC2R);
146	//CheckMatching(mateRow2Col,mateCol2Row);
147
148	- if(myrank==0)
149	- {
150	- std::cout << t1Comp << " " << t1Comm << " "<< t2Comp << " " << t2Comm << " " << t3Comp << " " << t3Comm << " " << t4Comp << " " << t4Comm << " " << t5Comp << " " << t5Comm << " " << tUpdateMateComp << " " << tUpdateWeight << std::endl;
151	-
152	- t1CompAll += t1Comp;
153	- t1CommAll += t1Comm;
154	- t2CompAll += t2Comp;
155	- t2CommAll += t2Comm;
156	- t3CompAll += t3Comp;
157	- t3CommAll += t3Comm;
158	- t4CompAll += t4Comp;
159	- t4CommAll += t4Comm;
160	- t5CompAll += t5Comp;
161	- t5CommAll += t5Comm;
162	- tUpdateMateCompAll += tUpdateMateComp;
163	- tUpdateWeightAll += tUpdateWeight;
164	-
165	- }
166	}
167
168	- if(myrank==0)
169	- {
170	- std::cout << "=========== overal timing ==========" << std::endl;
171	- std::cout << t1CompAll << " " << t1CommAll << " " << t2CompAll << " " << t2CommAll << " " << t3CompAll << " " << t3CommAll << " " << t4CompAll << " " << t4CommAll << " " << t5CompAll << " " << t5CommAll << " " << tUpdateMateCompAll << " " << tUpdateWeightAll << std::endl;
172	- }
173	-
174	// update the distributed mate vectors from replicated mate vectors
175	UpdateMatching(mateRow2Col, mateCol2Row, RepMateR2C, RepMateC2R);
176	//weightCur = MatchingWeight(RepMateWC2R, RowWorld);
a6b9ebc2 EB	177	--- a/BipartiteMatchings/BPMaximalMatching.h
	178	+++ b/BipartiteMatchings/BPMaximalMatching.h
	179	@@ -1,7 +1,7 @@
	180	#ifndef BP_MAXIMAL_MATCHING_H
	181	#define BP_MAXIMAL_MATCHING_H
	182
	183	-#include "../CombBLAS.h"
	184	+#include "CombBLAS.h"
	185	#include <iostream>
	186	#include <functional>
	187	#include <algorithm>
38dd27e8 EB	188	@@ -14,8 +14,6 @@
	189	#define GREEDY 1
	190	#define KARP_SIPSER 2
	191	#define DMD 3
	192	-MTRand GlobalMT(123); // for reproducible result
	193	-double tTotalMaximal;
	194
	195	namespace combblas {
	196
	197	@@ -25,7 +25,7 @@
	198	void MaximalMatching(Par_DCSC_Bool & A, Par_DCSC_Bool & AT, FullyDistVec<IT, IT>& mateRow2Col,
	199	FullyDistVec<IT, IT>& mateCol2Row, FullyDistVec<IT, IT>& degColRecv, int type, bool rand=true)
	200	{
	201	-
	202	+ static MTRand GlobalMT(123); // for reproducible result
	203	typedef VertexTypeML < IT, IT> VertexType;
	204	int nprocs, myrank;
	205	MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
	206	@@ -354,8 +354,6 @@
	207
	208	}
	209
	210	- tTotalMaximal = MPI_Wtime() - tStart;
	211	-
	212	IT cardinality = mateRow2Col.Count([](IT mate){return mate!=-1;});
	213	std::vector<double> totalTimes(timing[0].size(),0);
	214	for(int i=0; i<timing.size(); i++)
a6b9ebc2 EB	215	--- a/BipartiteMatchings/BPMaximumMatching.h
	216	+++ b/BipartiteMatchings/BPMaximumMatching.h
	217	@@ -1,7 +1,7 @@
	218	#ifndef BP_MAXIMUM_MATCHING_H
	219	#define BP_MAXIMUM_MATCHING_H
	220
	221	-#include "../CombBLAS.h"
	222	+#include "CombBLAS.h"
	223	#include <mpi.h>
	224	#include <sys/time.h>
	225	#include <iostream>
38dd27e8 EB	226	@@ -11,7 +11,6 @@
	227	#include <string>
	228	#include <sstream>
	229	#include "MatchingDefs.h"
	230	-double tTotalMaximum;
	231
	232	namespace combblas {
	233
	234	@@ -231,7 +231,7 @@
	235	void maximumMatching(SpParMat < IT, NT, DER > & A, FullyDistVec<IT, IT>& mateRow2Col,
	236	FullyDistVec<IT, IT>& mateCol2Row, bool prune=true, bool randMM = false, bool maximizeWeight = false)
	237	{
	238	-
	239	+ static MTRand GlobalMT(123); // for reproducible result
	240	typedef VertexTypeMM <IT> VertexType;
	241
	242	int nthreads=1;
	243	@@ -420,8 +420,6 @@
	244
	245	MPI_Win_free(&winLeaves);
	246
	247	- tTotalMaximum = MPI_Wtime() - tstart;
	248	-
	249	//isMaximalmatching(A, mateRow2Col, mateCol2Row, unmatchedRow, unmatchedCol);
	250	//isMatching(mateCol2Row, mateRow2Col); //todo there is a better way to check this
	251
a6b9ebc2 EB	252	--- a/BipartiteMatchings/MatchingDefs.h
	253	+++ b/BipartiteMatchings/MatchingDefs.h
	254	@@ -9,7 +9,7 @@
	255	#ifndef MatchingDefs_h
	256	#define MatchingDefs_h
	257
	258	-#include "../CombBLAS.h"
	259	+#include "CombBLAS.h"
	260	#include <iostream>
	261
	262	namespace combblas {
	263	--- a/BipartiteMatchings/Utility.h
	264	+++ b/BipartiteMatchings/Utility.h
	265	@@ -1,7 +1,7 @@
	266	#ifndef BP_UTILITY_H
	267	#define BP_UTILITY_H
	268
	269	-#include "../CombBLAS.h"
	270	+#include "CombBLAS.h"
	271
	272	namespace combblas {
	273
	274	--- a/CMakeLists.txt
	275	+++ b/CMakeLists.txt
	276	@@ -68,6 +68,7 @@ set_property(TARGET CombBLAS PROPERTY VERSION ${CombBLAS_VERSION})
	277	# installation
	278	install(DIRECTORY include/ DESTINATION include)
	279	install(DIRECTORY psort-1.0/include/ DESTINATION include)
	280	+install(DIRECTORY BipartiteMatchings DESTINATION include FILES_MATCHING PATTERN "*.h")
	281	install(TARGETS CombBLAS EXPORT CombBLASTargets
	282	LIBRARY DESTINATION lib
	283	ARCHIVE DESTINATION lib