Changeset 47
- Timestamp:
- 12/12/15 10:57:31 (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/CrossPare/src/de/ugoe/cs/cpdp/training/MetricMatchingTraining.java
r45 r47 16 16 17 17 import java.util.ArrayList; 18 import java.util.Arrays; 19 import java.util.Collections; 20 import java.util.Comparator; 18 21 import java.util.HashMap; 19 22 import java.util.Iterator; 23 import java.util.LinkedHashMap; 24 import java.util.LinkedList; 25 import java.util.List; 20 26 import java.util.Map; 21 27 import java.util.logging.Level; 28 29 import javax.management.RuntimeErrorException; 30 22 31 import java.util.Random; 23 32 … … 28 37 29 38 import de.ugoe.cs.util.console.Console; 39 import weka.attributeSelection.SignificanceAttributeEval; 30 40 import weka.classifiers.AbstractClassifier; 31 41 import weka.classifiers.Classifier; … … 35 45 import weka.core.Instance; 36 46 import weka.core.Instances; 47 37 48 38 49 public class MetricMatchingTraining extends WekaBaseTraining implements ISetWiseTestdataAwareTrainingStrategy { … … 53 64 return this.classifier; 54 65 } 55 56 57 @Override58 public String getName() {59 return "MetricMatching_" + classifierName;60 }61 66 62 67 … … 90 95 //tmp.kolmogorovSmirnovTest(0.05); 91 96 92 if( this.method.equals("spearman") ) { 97 try { 98 tmp.attributeSelection(); 99 }catch(Exception e) { 100 101 } 102 103 if (this.method.equals("spearman")) { 93 104 tmp.spearmansRankCorrelation(this.threshold); 94 105 } 95 else if ( this.method.equals("kolmogorov")) {106 else if (this.method.equals("kolmogorov")) { 96 107 tmp.kolmogorovSmirnovTest(this.threshold); 97 108 } … … 101 112 102 113 // we only select the training data from our set with the most matching attributes 103 if (tmp.getRank() > rank) {114 if (tmp.getRank() > rank) { 104 115 rank = tmp.getRank(); 105 116 biggest = tmp; … … 108 119 } 109 120 110 if ( biggest == null) {121 if (biggest == null) { 111 122 throw new RuntimeException("not enough matching attributes found"); 112 123 } … … 117 128 Instances ilist = this.mm.getMatchedTrain(); 118 129 Console.traceln(Level.INFO, "Chosing the trainingdata set num "+biggest_num +" with " + rank + " matching attributs, " + ilist.size() + " instances out of a possible set of " + traindataSet.size() + " sets"); 130 131 // replace traindataSEt 132 //traindataSet = new SetUniqueList<Instances>(); 133 traindataSet.clear(); 134 traindataSet.add(ilist); 119 135 120 136 // we have to build the classifier here: … … 122 138 123 139 // 124 if ( this.classifier == null) {140 if (this.classifier == null) { 125 141 Console.traceln(Level.SEVERE, "Classifier is null"); 126 142 } … … 200 216 201 217 this.test_values = new ArrayList<double[]>(); 202 for ( int i=0; i < this.test.numAttributes()-1; i++) {218 for (int i=0; i < this.test.numAttributes()-1; i++) { 203 219 this.test_values.add(this.test.attributeToDoubleArray(i)); 204 220 } … … 232 248 Iterator it = this.attributes.entrySet().iterator(); 233 249 int j = 0; 234 while (it.hasNext()) {250 while (it.hasNext()) { 235 251 Map.Entry values = (Map.Entry)it.next(); 236 252 ni.setValue(testdata.attribute(j), test.value((int)values.getValue())); … … 268 284 Attribute[] attrs = new Attribute[this.attributes.size()+1]; 269 285 FastVector fwTrain = new FastVector(this.attributes.size()); 270 for (int i=0; i < this.attributes.size(); i++) {286 for (int i=0; i < this.attributes.size(); i++) { 271 287 attrs[i] = new Attribute(String.valueOf(i)); 272 288 fwTrain.addElement(attrs[i]); … … 281 297 newTrain.setClassIndex(newTrain.numAttributes()-1); 282 298 283 for (int i=0; i < data.size(); i++) {299 for (int i=0; i < data.size(); i++) { 284 300 Instance ni = new DenseInstance(this.attributes.size()+1); 285 301 286 302 Iterator it = this.attributes.entrySet().iterator(); 287 303 int j = 0; 288 while (it.hasNext()) {304 while (it.hasNext()) { 289 305 Map.Entry values = (Map.Entry)it.next(); 290 306 int value = (int)values.getValue(); 291 307 292 308 // key ist traindata 293 if (name.equals("train")) {309 if (name.equals("train")) { 294 310 value = (int)values.getKey(); 295 311 } … … 302 318 } 303 319 304 return newTrain; 305 } 320 return newTrain; 321 } 322 323 324 /** 325 * performs the attribute selection 326 * we perform attribute significance tests and drop attributes 327 */ 328 public void attributeSelection() throws Exception { 329 this.attributeSelection(this.train); 330 this.attributeSelection(this.test); 331 } 332 333 private void attributeSelection(Instances which) throws Exception { 334 // 1. step we have to categorize the attributes 335 //http://weka.sourceforge.net/doc.packages/probabilisticSignificanceAE/weka/attributeSelection/SignificanceAttributeEval.html 336 337 SignificanceAttributeEval et = new SignificanceAttributeEval(); 338 et.buildEvaluator(which); 339 //double tmp[] = new double[this.train.numAttributes()]; 340 HashMap<Integer,Double> saeval = new HashMap<Integer,Double>(); 341 // evaluate all training attributes 342 // select top 15% of metrics 343 for(int i=0; i < which.numAttributes() - 1; i++) { 344 //tmp[i] = et.evaluateAttribute(i); 345 saeval.put(i, et.evaluateAttribute(i)); 346 //Console.traceln(Level.SEVERE, "Significance Attribute Eval: " + tmp); 347 } 348 349 HashMap<Integer, Double> sorted = sortByValues(saeval); 350 351 // die letzen 15% wollen wir haben 352 int last = (saeval.size() / 100) * 15; 353 int drop_first = saeval.size() - last; 354 355 // drop attributes above last 356 Iterator it = sorted.entrySet().iterator(); 357 while (it.hasNext()) { 358 Map.Entry pair = (Map.Entry)it.next(); 359 if(drop_first > 0) { 360 which.deleteAttributeAt((int)pair.getKey()); 361 } 362 drop_first--; 363 } 364 } 365 366 private HashMap sortByValues(HashMap map) { 367 List list = new LinkedList(map.entrySet()); 368 // Defined Custom Comparator here 369 Collections.sort(list, new Comparator() { 370 public int compare(Object o1, Object o2) { 371 return ((Comparable) ((Map.Entry) (o1)).getValue()) 372 .compareTo(((Map.Entry) (o2)).getValue()); 373 } 374 }); 375 376 // Here I am copying the sorted list in HashMap 377 // using LinkedHashMap to preserve the insertion order 378 HashMap sortedHashMap = new LinkedHashMap(); 379 for (Iterator it = list.iterator(); it.hasNext();) { 380 Map.Entry entry = (Map.Entry) it.next(); 381 sortedHashMap.put(entry.getKey(), entry.getValue()); 382 } 383 return sortedHashMap; 384 } 385 306 386 307 387 /** … … 315 395 316 396 // size has to be the same so we randomly sample the number of the smaller sample from the big sample 317 if ( this.train.size() > this.test.size()) {397 if (this.train.size() > this.test.size()) { 318 398 this.sample(this.train, this.test, this.train_values); 319 }else if ( this.test.size() > this.train.size()) {399 }else if (this.test.size() > this.train.size()) { 320 400 this.sample(this.test, this.train, this.test_values); 321 401 } 322 402 323 403 // try out possible attribute combinations 324 for ( int i=0; i < this.train.numAttributes()-1; i++) {325 for ( int j=0; j < this.test.numAttributes()-1; j++) {404 for (int i=0; i < this.train.numAttributes()-1; i++) { 405 for (int j=0; j < this.test.numAttributes()-1; j++) { 326 406 // class attributes are not relevant 327 if ( this.train.classIndex() == i) {407 if (this.train.classIndex() == i) { 328 408 continue; 329 409 } 330 if ( this.test.classIndex() == j) {410 if (this.test.classIndex() == j) { 331 411 continue; 332 412 } 333 413 334 414 335 if ( !this.attributes.containsKey(i)) {415 if (!this.attributes.containsKey(i)) { 336 416 p = t.correlation(this.train_values.get(i), this.test_values.get(j)); 337 if ( p > cutoff) {417 if (p > cutoff) { 338 418 this.attributes.put(i, j); 339 419 } … … 349 429 ArrayList<Integer> indices = new ArrayList<Integer>(); 350 430 Random rand = new Random(); 351 while (indices_to_draw > 0) {431 while (indices_to_draw > 0) { 352 432 353 433 int index = rand.nextInt(bigger.size()-1); 354 434 355 if ( !indices.contains(index)) {435 if (!indices.contains(index)) { 356 436 indices.add(index); 357 437 indices_to_draw--; … … 360 440 361 441 // now reduce our values to the indices we choose above for every attribute 362 for (int att=0; att < bigger.numAttributes()-1; att++) {442 for (int att=0; att < bigger.numAttributes()-1; att++) { 363 443 364 444 // get double for the att … … 367 447 368 448 int i = 0; 369 for ( Iterator<Integer> it = indices.iterator(); it.hasNext();) {449 for (Iterator<Integer> it = indices.iterator(); it.hasNext();) { 370 450 new_vals[i] = vals[it.next()]; 371 451 i++; … … 394 474 // todo: this relies on the last attribute being the class, 395 475 //Console.traceln(Level.INFO, "Starting Kolmogorov-Smirnov test for traindata size: " + this.train.size() + " attributes("+this.train.numAttributes()+") and testdata size: " + this.test.size() + " attributes("+this.test.numAttributes()+")"); 396 for ( int i=0; i < this.train.numAttributes()-1; i++) {476 for (int i=0; i < this.train.numAttributes()-1; i++) { 397 477 for ( int j=0; j < this.test.numAttributes()-1; j++) { 398 478 //p = t.kolmogorovSmirnovTest(this.train_values.get(i), this.test_values.get(j)); … … 406 486 } 407 487 // PRoblem: exactP is forced for small sample sizes and it never finishes 408 if ( !this.attributes.containsKey(i)) {488 if (!this.attributes.containsKey(i)) { 409 489 410 490 // todo: output the values and complain on the math.commons mailinglist 411 491 p = t.kolmogorovSmirnovTest(this.train_values.get(i), this.test_values.get(j)); 412 if ( p > cutoff) {492 if (p > cutoff) { 413 493 this.attributes.put(i, j); 414 494 }
Note: See TracChangeset
for help on using the changeset viewer.