Changeset 140
- Timestamp:
- 08/22/16 12:02:26 (8 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/CrossPare/src/de/ugoe/cs/cpdp/training/MetricMatchingTraining.java
r139 r140 49 49 * This also means we can use any Weka Classifier not just LogisticRegression. 50 50 * 51 * Config: <setwisetestdataawaretrainer name="MetricMatchingTraining" param="Logistic weka.classifiers.functions.Logistic" threshold="0.05" method="spearman"/> 52 * Instead of spearman metchod it also takes ks, percentile. 53 * Instead of Logistic every other weka classifier can be chosen. 54 * 55 * Future work: 56 * implement chisquare test in addition to significance for attribute selection 57 * http://commons.apache.org/proper/commons-math/apidocs/org/apache/commons/math3/stat/inference/ChiSquareTest.html 58 * use chiSquareTestDataSetsComparison 51 59 */ 52 60 public class MetricMatchingTraining extends WekaBaseTraining implements ISetWiseTestdataAwareTrainingStrategy { 53 61 54 62 private MetricMatch mm = null; 55 private Classifier classifier = n ew MetricMatchingClassifier();63 private Classifier classifier = null; 56 64 57 65 private String method; … … 89 97 @Override 90 98 public void apply(SetUniqueList<Instances> traindataSet, Instances testdata) { 91 99 // reset these for each run 100 this.mm = null; 101 this.classifier = null; 102 92 103 double score = 0; // matching score to select the best matching training data from the set 93 104 int num = 0; … … 116 127 } 117 128 118 // if we have found a matching instance we use it 129 // if we have found a matching instance we use it, log information about the match for additional eval later 119 130 Instances ilist = null; 120 131 if (this.mm != null) { … … 131 142 try { 132 143 if(this.mm != null) { 144 this.classifier = new MetricMatchingClassifier(); 133 145 this.classifier.buildClassifier(ilist); 134 146 ((MetricMatchingClassifier) this.classifier).setMetricMatching(this.mm); … … 257 269 return this.train_values.get(0).length; 258 270 } 259 260 /** 261 * This creates a new Instance out of the passed Instance and the previously matched attributes. 262 * We do this because the evaluation phase requires an original Instance with every attribute. 271 272 273 /** 274 * The test instance must be of the same dataset as the train data, otherwise WekaEvaluation will die. 275 * This means we have to force the dataset of this.train (after matching) and only 276 * set the values for the attributes we matched but with the index of the traindata attributes we matched. 263 277 * 264 * @param test instance265 * @return new instance278 * @param test 279 * @return 266 280 */ 267 281 public Instance getMatchedTestInstance(Instance test) { 268 //create new instance with our matched number of attributes + 1 (the class attribute) 269 Instances testdata = this.getMatchedTest(); 270 271 Instance ni = new DenseInstance(this.attributes.size()+1); 272 ni.setDataset(testdata); 273 274 for(Map.Entry<Integer, Integer> attmatch : this.attributes.entrySet()) { 275 ni.setValue(testdata.attribute(attmatch.getKey()), test.value(attmatch.getValue())); 276 } 277 278 ni.setClassValue(test.value(test.classAttribute())); 279 280 return ni; 281 } 282 282 Instance ni = new DenseInstance(this.attributes.size()+1); 283 284 Instances inst = this.getMatchedTrain(); 285 286 ni.setDataset(inst); 287 288 // assign only the matched attributes to new indexes 289 double val; 290 int k = 0; 291 for(Map.Entry<Integer, Integer> attmatch : this.attributes.entrySet()) { 292 // get value from matched attribute 293 val = test.value(attmatch.getValue()); 294 295 // set it to new index, the order of the attributes is the same 296 ni.setValue(k, val); 297 k++; 298 } 299 ni.setClassValue(test.value(test.classAttribute())); 300 301 return ni; 302 } 303 304 283 305 /** 284 306 * returns a new instances array with the metric matched training data … … 494 516 // -1 means that it is not in the set of maximal matching 495 517 if( i != -1 && result[i] != -1) { 496 //Console.traceln(Level.INFO, "Found maximal bipartite match between: "+ i + " and " + result[i]);518 this.p_sum += mwbm.weights[i][result[i]]; // we add the weight of the returned matching for scoring the complete match later 497 519 this.attributes.put(i, result[i]); 498 520 } … … 544 566 545 567 if( score > cutoff ) { 546 this.p_sum += score;547 568 mwbm.setWeight(i, j, score); 548 569 } … … 587 608 p = t.correlation(this.train_values.get(i), this.test_values.get(j)); 588 609 if (p > cutoff) { 589 this.p_sum += p;590 610 mwbm.setWeight(i, j, p); 591 611 } … … 667 687 p = t.approximateP(t.kolmogorovSmirnovStatistic(this.train_values.get(i), this.test_values.get(j)), this.train_values.get(i).length, this.test_values.get(j).length); 668 688 if (p > cutoff) { 669 this.p_sum += p;670 689 mwbm.setWeight(i, j, p); 671 690 }
Note: See TracChangeset
for help on using the changeset viewer.