Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/training/MetricMatchingTraining.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/training/MetricMatchingTraining.java	(revision 136)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/training/MetricMatchingTraining.java	(revision 137)
@@ -27,6 +27,4 @@
 import java.util.logging.Level;
 
-import javax.management.RuntimeErrorException;
-
 import java.util.Random;
 
@@ -46,5 +44,12 @@
 import weka.core.Instances;
 
-
+/**
+ * Implements Heterogenous Defect Prediction after Nam et al.
+ * 
+ * TODO:
+ * - spacing, coding conventions
+ * - we depend on having exactly one class attribute on multiple locations
+ * - 
+ */
 public class MetricMatchingTraining extends WekaBaseTraining implements ISetWiseTestdataAwareTrainingStrategy {
 
@@ -78,6 +83,6 @@
 
 	/**
-	 * We need the testdata instances to do a metric matching, so in this special case we get this data
-	 * before evaluation
+	 * We need the test data instances to do a metric matching, so in this special case we get this data
+	 * before evaluation.
 	 */
 	@Override
@@ -85,5 +90,5 @@
 		this.traindataSet = traindataSet;
 
-		int rank = 0; // we want at least 5 matching attributes
+		double score = 0; // custom ranking score to select the best training data from the set
 		int num = 0;
 		int biggest_num = 0;
@@ -92,4 +97,5 @@
 		for (Instances traindata : this.traindataSet) {
 			num++;
+
 			tmp = new MetricMatch(traindata, testdata);
 
@@ -97,4 +103,5 @@
 			try {
 				tmp.attributeSelection();
+				tmp.matchAttributes(this.method, this.threshold);
 			}catch(Exception e) {
 				e.printStackTrace();
@@ -102,20 +109,7 @@
 			}
 			
-			if (this.method.equals("spearman")) {
-			    tmp.spearmansRankCorrelation(this.threshold);
-			}
-			else if (this.method.equals("kolmogorov")) {
-			    tmp.kolmogorovSmirnovTest(this.threshold);
-			}
-			else if( this.method.equals("percentile") ) {
-			    tmp.percentiles(this.threshold);
-			}
-			else {
-			    throw new RuntimeException("unknown method");
-			}
-
 			// we only select the training data from our set with the most matching attributes
-			if (tmp.getRank() > rank) {
-				rank = tmp.getRank();
+			if (tmp.getScore() > score) {
+				score = tmp.getScore();
 				biggest = tmp;
 				biggest_num = num;
@@ -127,10 +121,12 @@
 		}
 
-		// we use the best match
-		
+		// we use the best match according to our matching score
 		this.mm = biggest;
 		Instances ilist = this.mm.getMatchedTrain();
-		Console.traceln(Level.INFO, "Chosing the trainingdata set num "+biggest_num +" with " + rank + " matching attributes, " + ilist.size() + " instances out of a possible set of " + traindataSet.size() + " sets");
-		
+		Console.traceln(Level.INFO, "Chosing the trainingdata set num "+biggest_num +" with " + score + " matching score, " + ilist.size() + " instances, and " + biggest.attributes.size() + " matched attributes out of a possible set of " + traindataSet.size() + " sets");
+		
+		for(int i = 0; i < this.mm.attributes.size(); i++) {
+		    Console.traceln(Level.INFO, "Matched Attribute: " + this.mm.train.attribute(i).name() + " with " + this.mm.test.attribute((int)this.mm.attributes.get(i)).name());
+		}
 		// replace traindataSEt
 		//traindataSet = new SetUniqueList<Instances>();
@@ -156,5 +152,6 @@
 	
 	/**
-	 * encapsulates the classifier configured with WekaBase
+	 * Encapsulates the classifier configured with WekaBase within but use metric matching.
+	 * This allows us to use any Weka classifier with Heterogenous Defect Prediction.
 	 */
 	public class MetricMatchingClassifier extends AbstractClassifier {
@@ -197,40 +194,62 @@
 	 */
     public class MetricMatch {
-		 Instances train;
-		 Instances test;
-		 
-		 HashMap<Integer, Integer> attributes = new HashMap<Integer,Integer>();
-		 
-		 ArrayList<double[]> train_values;
-		 ArrayList<double[]> test_values;
-		 
-		 // todo: this constructor does not work
-		 public MetricMatch() {
-		 }
-		 
-		 public MetricMatch(Instances train, Instances test) {
-			 this.train = new Instances(train);  // expensive! but we are dropping the attributes
-			 this.test = new Instances(test);
+	    Instances train;
+		Instances test;
+		
+		// used to sum up the matching values of all attributes
+		double p_sum = 0;
+		
+		// attribute matching, train -> test
+		HashMap<Integer, Integer> attributes = new HashMap<Integer,Integer>();
+		//double[][] weights;  /* weight matrix, needed to find maximum weighted bipartite matching */
+		 
+		ArrayList<double[]> train_values;
+		ArrayList<double[]> test_values;
+
+		// todo: this constructor does not work
+		public MetricMatch() {
+		}
+		 
+		public MetricMatch(Instances train, Instances test) {
+		    // expensive! but we are dropping the attributes so we have to copy all of the data
+		    this.train = new Instances(train);
+			this.test = new Instances(test);
 			 
-			 // 1. convert metrics of testdata and traindata to later use in test
-			 this.train_values = new ArrayList<double[]>();
-			 for (int i = 0; i < this.train.numAttributes()-1; i++) {
-		 		this.train_values.add(train.attributeToDoubleArray(i));
-	 		 }
+			// 1. convert metrics of testdata and traindata to later use in test
+			this.train_values = new ArrayList<double[]>();
+			for (int i = 0; i < this.train.numAttributes()-1; i++) {
+			    this.train_values.add(train.attributeToDoubleArray(i));
+			}
 			
-			 this.test_values = new ArrayList<double[]>();
-			 for (int i=0; i < this.test.numAttributes()-1; i++) {
-				this.test_values.add(this.test.attributeToDoubleArray(i));
-			 }
-		 }
-		 
-		 /**
-		  * returns the number of matched attributes
-		  * as a way of scoring traindata sets individually
-		  * 
-		  * @return
-		  */
-		 public int getRank() {
-			 return this.attributes.size();
+			this.test_values = new ArrayList<double[]>();
+			for (int i=0; i < this.test.numAttributes()-1; i++) {
+			    this.test_values.add(this.test.attributeToDoubleArray(i));
+			}
+		}
+		 
+		/**
+		 * We have a lot of matching possibilities.
+		 * Here we try to determine the best one.
+		 * 
+		 * @return double matching score
+		 */
+	    public double getScore() {
+	        int as = this.attributes.size();
+	        
+	        // we use thresholding ranking approach for numInstances to influence the matching score
+	        int instances = this.train.numInstances();
+	        int inst_rank = 0;
+	        if(instances > 100) {
+	            inst_rank = 1;
+	        }
+	        if(instances > 500) {
+                inst_rank = 2;
+            }
+            
+	        return this.p_sum + as + inst_rank;
+	    }
+		 
+		 public HashMap<Integer, Integer> getAttributes() {
+		     return this.attributes;
 		 }
 		 
@@ -260,6 +279,5 @@
 			 
 			 ni.setClassValue(test.value(test.classAttribute()));
-			 
-			 //System.out.println(ni);
+
 			 return ni;
 		 }
@@ -283,7 +301,9 @@
 		 }
 		 
+		 // todo: there must be a better way
 		 // https://weka.wikispaces.com/Programmatic+Use
 		 private Instances getMatchedInstances(String name, Instances data) {
-			 // construct our new attributes
+			 //Console.traceln(Level.INFO, "Constructing instances from: " + name);
+		     // construct our new attributes
 			 Attribute[] attrs = new Attribute[this.attributes.size()+1];
 			 FastVector fwTrain = new FastVector(this.attributes.size());
@@ -301,4 +321,6 @@
 			 newTrain.setClassIndex(newTrain.numAttributes()-1);
 			 
+			 //Console.traceln(Level.INFO, "data attributes: " + data.numAttributes() + ", this.attributes: "+this.attributes.size());
+			 
 			 for (int i=0; i < data.size(); i++) {
 				 Instance ni = new DenseInstance(this.attributes.size()+1);
@@ -314,4 +336,5 @@
 						 value = (int)values.getKey();
 					 }
+					 //Console.traceln(Level.INFO, "setting attribute " + j + " with data from instance: " + i);
 					 ni.setValue(newTrain.attribute(j), data.instance(i).value(value));
 					 j++;
@@ -328,5 +351,8 @@
 		/**
 		 * performs the attribute selection
-		 * we perform attribute significance tests and drop attributes
+		 * we perform attribute significance tests and drop attributes 
+		 * 
+		 * attribute selection is only performed on the source dataset
+		 * we retain the top 15% attributes (if 15% is a float we just use the integer part)
 		 */
 		public void attributeSelection() throws Exception {
@@ -335,5 +361,5 @@
 			//Console.traceln(Level.INFO, "-----");
 			//Console.traceln(Level.INFO, "Attribute Selection on Test Attributes ("+this.test.numAttributes()+")");
-			this.attributeSelection(this.test);
+			//this.attributeSelection(this.test);
 			//Console.traceln(Level.INFO, "-----");
 		}
@@ -358,6 +384,6 @@
 			HashMap<String, Double> sorted = sortByValues(saeval);
 			
-			// die letzen 15% wollen wir haben
-			float last = ((float)saeval.size() / 100) * 15;
+			// die besten 15% wollen wir haben
+			double last = ((double)saeval.size() / 100.0) * 15.0;
 			int drop_first = saeval.size() - (int)last;
 			
@@ -380,8 +406,6 @@
 	    		}
 		    	drop_first-=1;
-		 
-		    
 		    }
-//		    //Console.traceln(Level.INFO, "Now we have " + which.numAttributes() + " attributes left (incl. class attribute!)");
+		    //Console.traceln(Level.INFO, "Now we have " + which.numAttributes() + " attributes left (incl. class attribute!)");
 		}
 		
@@ -406,4 +430,34 @@
 		}
 		 
+		
+        
+		public void matchAttributes(String type, double cutoff) {
+		    
+
+		    MWBMatchingAlgorithm mwbm = new MWBMatchingAlgorithm(this.train.numAttributes(), this.test.numAttributes());
+		    
+		    if (type.equals("spearman")) {
+		        this.spearmansRankCorrelation(cutoff, mwbm);
+		    }else if(type.equals("ks")) {
+		        this.kolmogorovSmirnovTest(cutoff, mwbm);
+		    }else if(type.equals("percentile")) {
+		        this.percentiles(cutoff, mwbm);
+		    }else {
+		        throw new RuntimeException("unknown matching method");
+		    }
+		    
+		    // resulting maximal match
+            int[] result = mwbm.getMatching();
+            for( int i = 0; i < result.length; i++) {
+                
+                // -1 means that it is not in the set of maximal matching
+                if( i != -1 && result[i] != -1) {
+                    //Console.traceln(Level.INFO, "Found maximal bipartite match between: "+ i + " and " + result[i]);
+                    this.attributes.put(i, result[i]);
+                }
+            }
+        }
+        
+        
 		/**
 		 * Calculates the Percentiles of the source and target metrics.
@@ -411,43 +465,45 @@
 		 * @param cutoff
 		 */
-		public void percentiles(double cutoff) {
-		    for( int i = 0; i < this.train.numAttributes()-1; i++ ) {
-                for( int j = 0; j < this.test.numAttributes()-1; j++ ) {
+		public void percentiles(double cutoff, MWBMatchingAlgorithm mwbm) {
+		    for( int i = 0; i < this.train.numAttributes(); i++ ) {
+                for( int j = 0; j < this.test.numAttributes(); j++ ) {
+                    // negative infinity counts as not present, we do this so we don't have to map between attribute indexs in weka
+                    // and the result of the mwbm computation
+                    mwbm.setWeight(i, j, Double.NEGATIVE_INFINITY);
+                    
                     // class attributes are not relevant 
-                    if( this.train.classIndex() == i ) {
+                    if (this.test.classIndex() == j) {
                         continue;
                     }
-                    if( this.test.classIndex() == j ) {
+                    if (this.train.classIndex() == i) {
                         continue;
                     }
+
+                    // get percentiles
+                    double train[] = this.train_values.get(i);
+                    double test[] = this.test_values.get(j);
                     
+                    Arrays.sort(train);
+                    Arrays.sort(test);
                     
-                    if( !this.attributes.containsKey(i) ) {
-                        // get percentiles
-                        double train[] = this.train_values.get(i);
-                        double test[] = this.test_values.get(j);
-                        
-                        Arrays.sort(train);
-                        Arrays.sort(test);
-                        
-                        // percentiles
-                        double train_p;
-                        double test_p;
-                        double score = 0.0;
-                        for( double p=0.1; p < 1; p+=0.1 ) {
-                            train_p = train[(int)Math.ceil(train.length * p)];
-                            test_p = test[(int)Math.ceil(test.length * p)];
-                        
-                            if( train_p > test_p ) {
-                                score += test_p / train_p;
-                            }else {
-                                score += train_p / test_p;
-                            }
-                        }
-                        
-                        if( score > cutoff ) {
-                            this.attributes.put(i, j);
+                    // percentiles
+                    double train_p;
+                    double test_p;
+                    double score = 0.0;
+                    for( int p=1; p <= 9; p++ ) {
+                        train_p = train[(int)Math.ceil(train.length * (p/100))];
+                        test_p = test[(int)Math.ceil(test.length * (p/100))];
+                    
+                        if( train_p > test_p ) {
+                            score += test_p / train_p;
+                        }else {
+                            score += train_p / test_p;
                         }
                     }
+                    
+                    if( score > cutoff ) {
+                        this.p_sum += score;
+                        mwbm.setWeight(i, j, score);
+                    }
                 }
             }
@@ -455,10 +511,13 @@
 		 
 		 /**
-		  * calculate Spearmans rank correlation coefficient as matching score
+		  * Calculate Spearmans rank correlation coefficient as matching score
+		  * The number of instances for the source and target needs to be the same so we randomly sample from the bigger one.
 		  * 
 		  * @param cutoff
+		  * @param mwbmatching
 		  */
-		 public void spearmansRankCorrelation(double cutoff) {
-			 double p = 0;			 
+		 public void spearmansRankCorrelation(double cutoff, MWBMatchingAlgorithm mwbm) {
+			 double p = 0;
+
 			 SpearmansCorrelation t = new SpearmansCorrelation();
 
@@ -469,28 +528,34 @@
 			     this.sample(this.test, this.train, this.test_values);
 			 }
-			 
-			 // try out possible attribute combinations
-            for (int i=0; i < this.train.numAttributes()-1; i++) {
-                for (int j=0; j < this.test.numAttributes()-1; j++) {
+			
+            // try out possible attribute combinations
+            for (int i=0; i < this.train.numAttributes(); i++) {
+
+                for (int j=0; j < this.test.numAttributes(); j++) {
+                    // negative infinity counts as not present, we do this so we don't have to map between attribute indexs in weka
+                    // and the result of the mwbm computation
+                    mwbm.setWeight(i, j, Double.NEGATIVE_INFINITY);
+                    
                     // class attributes are not relevant 
+                    if (this.test.classIndex() == j) {
+                        continue;
+                    }
                     if (this.train.classIndex() == i) {
                         continue;
                     }
-                    if (this.test.classIndex() == j) {
-                        continue;
-                    }
                     
-                    
-					if (!this.attributes.containsKey(i)) {
-						p = t.correlation(this.train_values.get(i), this.test_values.get(j));
-						if (p > cutoff) {
-							this.attributes.put(i, j);
-						}
+					p = t.correlation(this.train_values.get(i), this.test_values.get(j));
+					if (p > cutoff) {
+					    this.p_sum += p;
+                        mwbm.setWeight(i, j, p);
+					    //Console.traceln(Level.INFO, "Found match: p-val: " + p);
 					}
 				}
 		    }
-        }
-
-		
+
+            //Console.traceln(Level.INFO, "Found " + this.attributes.size() + " matching attributes");
+        }
+
+		 
         public void sample(Instances bigger, Instances smaller, ArrayList<double[]> values) {
             // we want to at keep the indices we select the same
@@ -535,37 +600,443 @@
 		 * @return p-val
 		 */
-		public void kolmogorovSmirnovTest(double cutoff) {
+		public void kolmogorovSmirnovTest(double cutoff, MWBMatchingAlgorithm mwbm) {
 			double p = 0;
-			
+            
 			KolmogorovSmirnovTest t = new KolmogorovSmirnovTest();
 
-			// todo: this should be symmetrical we don't have to compare i to j and then j to i 
-			// todo: this relies on the last attribute being the class, 
 			//Console.traceln(Level.INFO, "Starting Kolmogorov-Smirnov test for traindata size: " + this.train.size() + " attributes("+this.train.numAttributes()+") and testdata size: " + this.test.size() + " attributes("+this.test.numAttributes()+")");
-			for (int i=0; i < this.train.numAttributes()-1; i++) {
-				for ( int j=0; j < this.test.numAttributes()-1; j++) {
-					//p = t.kolmogorovSmirnovTest(this.train_values.get(i), this.test_values.get(j));
-					//p = t.kolmogorovSmirnovTest(this.train_values.get(i), this.test_values.get(j));
+			for (int i=0; i < this.train.numAttributes(); i++) {
+				for ( int j=0; j < this.test.numAttributes(); j++) {
+                    // negative infinity counts as not present, we do this so we don't have to map between attribute indexs in weka
+                    // and the result of the mwbm computation
+                    mwbm.setWeight(i, j, Double.NEGATIVE_INFINITY);
+                    
                     // class attributes are not relevant 
-                    if ( this.train.classIndex() == i ) {
+                    if (this.test.classIndex() == j) {
                         continue;
                     }
-                    if ( this.test.classIndex() == j ) {
+                    if (this.train.classIndex() == i) {
                         continue;
                     }
-					// PRoblem: exactP is forced for small sample sizes and it never finishes
-					if (!this.attributes.containsKey(i)) {
-						
-						// todo: output the values and complain on the math.commons mailinglist
-						p = t.kolmogorovSmirnovTest(this.train_values.get(i), this.test_values.get(j));
-						if (p > cutoff) {
-							this.attributes.put(i, j);
-						}
+                    
+                    // this may invoke exactP on small sample sizes which will not terminate in all cases
+					//p = t.kolmogorovSmirnovTest(this.train_values.get(i), this.test_values.get(j), false);
+					p = t.approximateP(t.kolmogorovSmirnovStatistic(this.train_values.get(i), this.test_values.get(j)), this.train_values.get(i).length, this.test_values.get(j).length);
+					if (p > cutoff) {
+                        this.p_sum += p;
+                        mwbm.setWeight(i, j, p);
 					}
 				}
 			}
-
-			//Console.traceln(Level.INFO, "Found " + this.attmatch.size() + " matching attributes");
-		}
-	 }
+			//Console.traceln(Level.INFO, "Found " + this.attributes.size() + " matching attributes");
+	    }
+    }
+
+    /*
+     * Copyright (c) 2007, Massachusetts Institute of Technology
+     * Copyright (c) 2005-2006, Regents of the University of California
+     * All rights reserved.
+     * 
+     * Redistribution and use in source and binary forms, with or without
+     * modification, are permitted provided that the following conditions
+     * are met:
+     *
+     * * Redistributions of source code must retain the above copyright
+     *   notice, this list of conditions and the following disclaimer.
+     *
+     * * Redistributions in binary form must reproduce the above copyright
+     *   notice, this list of conditions and the following disclaimer in
+     *   the documentation and/or other materials provided with the
+     *   distribution.  
+     *
+     * * Neither the name of the University of California, Berkeley nor
+     *   the names of its contributors may be used to endorse or promote
+     *   products derived from this software without specific prior 
+     *   written permission.
+     *
+     * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+     * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+     * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+     * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+     * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+     * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+     * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+     * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+     * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+     * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+     * OF THE POSSIBILITY OF SUCH DAMAGE.
+     */
+
+
+
+    /**
+     * An engine for finding the maximum-weight matching in a complete
+     * bipartite graph.  Suppose we have two sets <i>S</i> and <i>T</i>,
+     * both of size <i>n</i>.  For each <i>i</i> in <i>S</i> and <i>j</i>
+     * in <i>T</i>, we have a weight <i>w<sub>ij</sub></i>.  A perfect
+     * matching <i>X</i> is a subset of <i>S</i> x <i>T</i> such that each
+     * <i>i</i> in <i>S</i> occurs in exactly one element of <i>X</i>, and
+     * each <i>j</i> in <i>T</i> occurs in exactly one element of
+     * <i>X</i>.  Thus, <i>X</i> can be thought of as a one-to-one
+     * function from <i>S</i> to <i>T</i>.  The weight of <i>X</i> is the
+     * sum, over (<i>i</i>, <i>j</i>) in <i>X</i>, of
+     * <i>w<sub>ij</sub></i>.  A BipartiteMatcher takes the number
+     * <i>n</i> and the weights <i>w<sub>ij</sub></i>, and finds a perfect
+     * matching of maximum weight.
+     *
+     * It uses the Hungarian algorithm of Kuhn (1955), as improved and
+     * presented by E. L. Lawler in his book <cite>Combinatorial
+     * Optimization: Networks and Matroids</cite> (Holt, Rinehart and
+     * Winston, 1976, p. 205-206).  The running time is
+     * O(<i>n</i><sup>3</sup>).  The weights can be any finite real
+     * numbers; Lawler's algorithm assumes positive weights, so if
+     * necessary we add a constant <i>c</i> to all the weights before
+     * running the algorithm.  This increases the weight of every perfect
+     * matching by <i>nc</i>, which doesn't change which perfect matchings
+     * have maximum weight.
+     *
+     * If a weight is set to Double.NEGATIVE_INFINITY, then the algorithm will 
+     * behave as if that edge were not in the graph.  If all the edges incident on 
+     * a given node have weight Double.NEGATIVE_INFINITY, then the final result 
+     * will not be a perfect matching, and an exception will be thrown.  
+     */
+     class MWBMatchingAlgorithm {
+        /**
+         * Creates a BipartiteMatcher without specifying the graph size.  Calling 
+         * any other method before calling reset will yield an 
+         * IllegalStateException.
+         */
+        
+         /**
+         * Tolerance for comparisons to zero, to account for
+         * floating-point imprecision.  We consider a positive number to
+         * be essentially zero if it is strictly less than TOL.
+         */
+        private static final double TOL = 1e-10;
+        //Number of left side nodes
+        int n;
+
+        //Number of right side nodes
+        int m;
+
+        double[][] weights;
+        double minWeight;
+        double maxWeight;
+
+        // If (i, j) is in the mapping, then sMatches[i] = j and tMatches[j] = i.  
+        // If i is unmatched, then sMatches[i] = -1 (and likewise for tMatches). 
+        int[] sMatches;
+        int[] tMatches;
+
+        static final int NO_LABEL = -1;
+        static final int EMPTY_LABEL = -2;
+
+        int[] sLabels;
+        int[] tLabels;
+
+        double[] u;
+        double[] v;
+        
+        double[] pi;
+
+        List<Integer> eligibleS = new ArrayList<Integer>();
+        List<Integer> eligibleT = new ArrayList<Integer>(); 
+        
+        
+        public MWBMatchingAlgorithm() {
+        n = -1;
+        m = -1;
+        }
+
+        /**
+         * Creates a BipartiteMatcher and prepares it to run on an n x m graph.  
+         * All the weights are initially set to 1.  
+         */
+        public MWBMatchingAlgorithm(int n, int m) {
+        reset(n, m);
+        }
+
+        /**
+         * Resets the BipartiteMatcher to run on an n x m graph.  The weights are 
+         * all reset to 1.
+         */
+        private void reset(int n, int m) {
+            if (n < 0 || m < 0) {
+                throw new IllegalArgumentException("Negative num nodes: " + n + " or " + m);
+            }
+            this.n = n;
+            this.m = m;
+
+            weights = new double[n][m];
+            for (int i = 0; i < n; i++) {
+                for (int j = 0; j < m; j++) {
+                weights[i][j] = 1;
+                }
+            }
+            minWeight = 1;
+            maxWeight = Double.NEGATIVE_INFINITY;
+
+            sMatches = new int[n];
+            tMatches = new int[m];
+            sLabels = new int[n];
+            tLabels = new int[m];
+            u = new double[n];
+            v = new double[m];
+            pi = new double[m];
+            
+        }
+        /**
+         * Sets the weight w<sub>ij</sub> to the given value w. 
+         *
+         * @throws IllegalArgumentException if i or j is outside the range [0, n).
+         */
+        public void setWeight(int i, int j, double w) {
+        if (n == -1 || m == -1) {
+            throw new IllegalStateException("Graph size not specified.");
+        }
+        if ((i < 0) || (i >= n)) {
+            throw new IllegalArgumentException("i-value out of range: " + i);
+        }
+        if ((j < 0) || (j >= m)) {
+            throw new IllegalArgumentException("j-value out of range: " + j);
+        }
+        if (Double.isNaN(w)) {
+            throw new IllegalArgumentException("Illegal weight: " + w);
+        }
+
+        weights[i][j] = w;
+        if ((w > Double.NEGATIVE_INFINITY) && (w < minWeight)) {
+            minWeight = w;
+        }
+        if (w > maxWeight) {
+            maxWeight = w;
+        }
+        }
+
+        /**
+         * Returns a maximum-weight perfect matching relative to the weights 
+         * specified with setWeight.  The matching is represented as an array arr 
+         * of length n, where arr[i] = j if (i,j) is in the matching.
+         */
+        public int[] getMatching() {
+        if (n == -1 || m == -1 ) {
+            throw new IllegalStateException("Graph size not specified.");
+        }
+        if (n == 0) {
+            return new int[0];
+        }
+        ensurePositiveWeights();
+
+        // Step 0: Initialization
+        eligibleS.clear();
+        eligibleT.clear();
+        for (Integer i = 0; i < n; i++) {
+            sMatches[i] = -1;
+
+            u[i] = maxWeight; // ambiguous on p. 205 of Lawler, but see p. 202
+
+            // this is really first run of Step 1.0
+            sLabels[i] = EMPTY_LABEL; 
+            eligibleS.add(i);
+        }
+
+        for (int j = 0; j < m; j++) {
+            tMatches[j] = -1;
+
+            v[j] = 0;
+            pi[j] = Double.POSITIVE_INFINITY;
+
+            // this is really first run of Step 1.0
+            tLabels[j] = NO_LABEL;
+        }
+        
+        while (true) {
+            // Augment the matching until we can't augment any more given the 
+            // current settings of the dual variables.  
+            while (true) {
+            // Steps 1.1-1.4: Find an augmenting path
+            int lastNode = findAugmentingPath();
+            if (lastNode == -1) {
+                break; // no augmenting path
+            }
+                    
+            // Step 2: Augmentation
+            flipPath(lastNode);
+            for (int i = 0; i < n; i++)
+                sLabels[i] = NO_LABEL;
+            
+            for (int j = 0; j < m; j++) {
+                pi[j] = Double.POSITIVE_INFINITY;
+                tLabels[j] = NO_LABEL;
+            }
+            
+            
+            
+            // This is Step 1.0
+            eligibleS.clear();
+            for (int i = 0; i < n; i++) {
+                if (sMatches[i] == -1) {
+                sLabels[i] = EMPTY_LABEL;
+                eligibleS.add(new Integer(i));
+                }
+            }
+
+            
+            eligibleT.clear();
+            }
+
+            // Step 3: Change the dual variables
+
+            // delta1 = min_i u[i]
+            double delta1 = Double.POSITIVE_INFINITY;
+            for (int i = 0; i < n; i++) {
+            if (u[i] < delta1) {
+                delta1 = u[i];
+            }
+            }
+
+            // delta2 = min_{j : pi[j] > 0} pi[j]
+            double delta2 = Double.POSITIVE_INFINITY;
+            for (int j = 0; j < m; j++) {
+            if ((pi[j] >= TOL) && (pi[j] < delta2)) {
+                delta2 = pi[j];
+            }
+            }
+
+            if (delta1 < delta2) {
+            // In order to make another pi[j] equal 0, we'd need to 
+            // make some u[i] negative.  
+            break; // we have a maximum-weight matching
+            }
+                
+            changeDualVars(delta2);
+        }
+
+        int[] matching = new int[n];
+        for (int i = 0; i < n; i++) {
+            matching[i] = sMatches[i];
+        }
+        return matching;
+        }
+
+        /**
+         * Tries to find an augmenting path containing only edges (i,j) for which 
+         * u[i] + v[j] = weights[i][j].  If it succeeds, returns the index of the 
+         * last node in the path.  Otherwise, returns -1.  In any case, updates 
+         * the labels and pi values.
+         */
+        int findAugmentingPath() {
+        while ((!eligibleS.isEmpty()) || (!eligibleT.isEmpty())) {
+            if (!eligibleS.isEmpty()) {
+            int i = ((Integer) eligibleS.get(eligibleS.size() - 1)).
+                intValue();
+            eligibleS.remove(eligibleS.size() - 1);
+            for (int j = 0; j < m; j++) {
+                // If pi[j] has already been decreased essentially
+                // to zero, then j is already labeled, and we
+                // can't decrease pi[j] any more.  Omitting the 
+                // pi[j] >= TOL check could lead us to relabel j
+                // unnecessarily, since the diff we compute on the
+                // next line may end up being less than pi[j] due
+                // to floating point imprecision.
+                if ((tMatches[j] != i) && (pi[j] >= TOL)) {
+                double diff = u[i] + v[j] - weights[i][j];
+                if (diff < pi[j]) {
+                    tLabels[j] = i;
+                    pi[j] = diff;
+                    if (pi[j] < TOL) {
+                    eligibleT.add(new Integer(j));
+                    }
+                }
+                }
+            }
+            } else {
+            int j = ((Integer) eligibleT.get(eligibleT.size() - 1)).
+                intValue();
+            eligibleT.remove(eligibleT.size() - 1);
+            if (tMatches[j] == -1) {
+                return j; // we've found an augmenting path
+            } 
+
+            int i = tMatches[j];
+            sLabels[i] = j;
+            eligibleS.add(new Integer(i)); // ok to add twice
+            }
+        }
+
+        return -1;
+        }
+
+        /**
+         * Given an augmenting path ending at lastNode, "flips" the path.  This 
+         * means that an edge on the path is in the matching after the flip if 
+         * and only if it was not in the matching before the flip.  An augmenting 
+         * path connects two unmatched nodes, so the result is still a matching. 
+         */ 
+        void flipPath(int lastNode) {
+            while (lastNode != EMPTY_LABEL) {
+                int parent = tLabels[lastNode];
+    
+                // Add (parent, lastNode) to matching.  We don't need to 
+                // explicitly remove any edges from the matching because: 
+                //  * We know at this point that there is no i such that 
+                //    sMatches[i] = lastNode.  
+                //  * Although there might be some j such that tMatches[j] =
+                //    parent, that j must be sLabels[parent], and will change 
+                //    tMatches[j] in the next time through this loop.  
+                sMatches[parent] = lastNode;
+                tMatches[lastNode] = parent;
+                            
+                lastNode = sLabels[parent];
+            }
+        }
+
+        void changeDualVars(double delta) {
+            for (int i = 0; i < n; i++) {
+                if (sLabels[i] != NO_LABEL) {
+                u[i] -= delta;
+                }
+            }
+                
+            for (int j = 0; j < m; j++) {
+                if (pi[j] < TOL) {
+                v[j] += delta;
+                } else if (tLabels[j] != NO_LABEL) {
+                pi[j] -= delta;
+                if (pi[j] < TOL) {
+                    eligibleT.add(new Integer(j));
+                }
+                }
+            }
+        }
+
+        /**
+         * Ensures that all weights are either Double.NEGATIVE_INFINITY, 
+         * or strictly greater than zero.
+         */
+        private void ensurePositiveWeights() {
+            // minWeight is the minimum non-infinite weight
+            if (minWeight < TOL) {
+                for (int i = 0; i < n; i++) {
+                for (int j = 0; j < m; j++) {
+                    weights[i][j] = weights[i][j] - minWeight + 1;
+                }
+                }
+    
+                maxWeight = maxWeight - minWeight + 1;
+                minWeight = 1;
+            }
+        }
+
+        @SuppressWarnings("unused")
+        private void printWeights() {
+            for (int i = 0; i < n; i++) {
+                for (int j = 0; j < m; j++) {
+                System.out.print(weights[i][j] + " ");
+                }
+                System.out.println("");
+            }
+        }
+    }
 }