Changeset 108
- Timestamp:
- 05/27/16 14:28:08 (8 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/CrossPare/src/de/ugoe/cs/cpdp/training/GPTraining.java
r106 r108 61 61 private int tournamentSize = 7; 62 62 private int maxGenerations = 50; 63 private double errorType2Weight = 1 ;64 private int numberRuns = 200; // 200 in the paper63 private double errorType2Weight = 15; 64 private int numberRuns = 1; // 200 in the paper 65 65 private int maxDepth = 20; // max depth within one program 66 66 private int maxNodes = 100; // max nodes within one program … … 68 68 @Override 69 69 public void setParameter(String parameters) { 70 71 // todo: split parameters to get classifier and the configuration variables for the gprun 70 72 if(parameters.equals("GPVVClassifier")) { 71 73 this.classifier = new GPVVClassifier(); … … 125 127 126 128 /** 127 * One Run of a GP Classifier 128 * we want several runs to mitigate problems with local maxima/minima 129 * One Run executed by a GP Classifier 129 130 */ 130 131 public class GPRun extends AbstractClassifier { … … 311 312 } 312 313 314 /** 315 * This is the fitness function 316 * 317 * Our fitness is best if we have the less wrong classifications, this includes a weight for type2 errors 318 */ 313 319 @Override 314 320 protected double evaluate(final IGPProgram program) { … … 329 335 330 336 // value gives us a double, if < 0.5 we set this instance as faulty 331 value = program.execute_double(0, this.x); // todo: test with this.x337 value = program.execute_double(0, this.x); 332 338 333 339 if(value < 0.5) { … … 344 350 // now calc pfitness 345 351 pfitness = (this.errorType1 + this.errorType2Weight * this.errorType2) / this.instances.length; 346 347 //System.out.println("pfitness: " + pfitness);348 352 349 353 // number of nodes in the programm, if lower then 10 we assign sFitness of 10 … … 413 417 * GP Multiple Data Sets Validation-Voting Classifier 414 418 * 415 * As the GP Multiple Data Sets Validation Classifier419 * Basically the same as the GP Multiple Data Sets Validation Classifier. 416 420 * But here we do keep a model candidate for each training set which may later vote 417 421 * … … 428 432 } 429 433 434 /** Build the GP Multiple Data Sets Validation-Voting Classifier 435 * 436 * This is according to Section 6 of the Paper by Liu et al. 437 * It is basically the Multiple Data Sets Validation Classifier but here we keep the best models an let them vote. 438 * 439 * @param traindataSet 440 * @throws Exception 441 */ 430 442 public void buildClassifier(SetUniqueList<Instances> traindataSet) throws Exception { 431 443 … … 450 462 for(int j=0; j < traindataSet.size(); j++) { 451 463 if(j != i) { 452 // if type1 and type2 errors are < 0.5 we allow the model in the final voting464 // if type1 and type2 errors are < 0.5 we allow the model in the candidates 453 465 errors = this.evaluate((GPRun)classifier, traindataSet.get(j)); 454 466 if((errors[0] < 0.5) && (errors[0] < 0.5)) { … … 481 493 482 494 /** 483 * Use the remaining classifiers for ourvoting495 * Use the best classifiers for each training data in a majority voting 484 496 */ 485 497 @Override … … 515 527 * GP Multiple Data Sets Validation Classifier 516 528 * 517 * 518 * for one test data set: 519 * for one in X possible training data sets: 520 * For Y GP Runs: 521 * train one Classifier with this training data 522 * then evaluate the classifier with the remaining project 523 * if the candidate model performs bad (error type1 or type2 > 50%) discard it 524 * for the remaining model candidates the best one is used 525 * 529 * We train a Classifier with one training project $numberRun times. 530 * Then we evaluate the classifier on the rest of the training projects and keep the best classifier. 531 * After that we have for each training project the best classifier as per the evaluation on the rest of the data set. 532 * Then we determine the best classifier from these candidates and keep it to be used later. 526 533 */ 527 534 public class GPVClassifier extends AbstractClassifier { … … 566 573 /** Build the GP Multiple Data Sets Validation Classifier 567 574 * 568 * - Traindata one of the Instances of the Set (which one? The firsT? as it is a list?) 569 * - Testdata one other Instances of the Set (the next one? chose randomly?) 570 * - Evaluation the rest of the instances 575 * This is according to Section 6 of the Paper by Liu et al. except for the selection of the best model. 576 * Section 4 describes a slightly different approach. 571 577 * 572 578 * @param traindataSet … … 582 588 LinkedList<Classifier> candidates = new LinkedList<>(); 583 589 584 // 200 runs590 // numberRuns full GPRuns, we generate numberRuns models for each traindata 585 591 for(int k=0; k < this.numberRuns; k++) { 586 592 Classifier classifier = new GPRun(); 587 593 ((GPRun)classifier).configure(this.populationSize, this.initMinDepth, this.initMaxDepth, this.tournamentSize, this.maxGenerations, this.errorType2Weight, this.maxDepth, this.maxNodes); 588 594 589 // one project is training data590 595 classifier.buildClassifier(traindataSet.get(i)); 591 596 592 597 double[] errors; 593 598 594 599 // rest of the set is evaluation data, we evaluate now 595 600 for(int j=0; j < traindataSet.size(); j++) { 596 601 if(j != i) { 597 // if type1 and type2 errors are < 0.5 we allow the model in the final voting602 // if type1 and type2 errors are < 0.5 we allow the model in the candidate list 598 603 errors = this.evaluate((GPRun)classifier, traindataSet.get(j)); 599 604 if((errors[0] < 0.5) && (errors[0] < 0.5)) { 600 candidates.add(classifier); 605 candidates.add(classifier); 601 606 } 602 607 } … … 604 609 } 605 610 606 // now after the evaluation we do a model selection where only one model remains per training data set 607 // from that we chose the best model 608 609 // now after the evaluation we do a model selection where only one model remains for the given training data 611 // after the numberRuns we have < numberRuns candidate models for this trainData 612 // we now evaluate the candidates 613 // finding the best model is not really described in the paper we go with least errors 610 614 double smallest_error_count = Double.MAX_VALUE; 611 615 double[] errors; … … 625 629 // now we have the best classifier for this training data 626 630 classifiers.add(best); 627 } 628 629 // now determine the best classifier for all training data 631 } /* endfor trainData */ 632 633 // now we have one best classifier for each trainData 634 // we evaluate again to find the best classifier of all time 635 // this selection is now according to section 4 of the paper and not 6 where an average of the 6 models is build 630 636 double smallest_error_count = Double.MAX_VALUE; 631 637 double error_count; … … 653 659 } 654 660 661 /** 662 * Evaluation of the Classifier 663 * 664 * We evaluate the classifier with the Instances of the evalData. 665 * It basically assigns the instance attribute values to the variables of the s-expression-tree and 666 * then counts the missclassifications. 667 * 668 * @param classifier 669 * @param evalData 670 * @return 671 */ 655 672 public double[] evaluate(GPRun classifier, Instances evalData) { 656 673 GPGenotype gp = classifier.getGp(); … … 667 684 for(Instance instance: evalData) { 668 685 669 for(int i = 0; i < instance.numAttributes()-1; i++) { 670 vars[i].set(instance.value(i)); 686 // assign instance attribute values to the variables of the s-expression-tree 687 double[] tmp = WekaUtils.instanceValues(instance); 688 for(int i = 0; i < tmp.length; i++) { 689 vars[i].set(tmp[i]); 671 690 } 672 691
Note: See TracChangeset
for help on using the changeset viewer.