Changeset 103 for trunk/CrossPare/src/de/ugoe
- Timestamp:
- 05/19/16 13:56:30 (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/CrossPare/src/de/ugoe/cs/cpdp/training/GPTraining.java
r93 r103 1 1 package de.ugoe.cs.cpdp.training; 2 3 import java.util.List; 2 4 3 5 import org.apache.commons.collections4.list.SetUniqueList; … … 8 10 import weka.core.Instances; 9 11 import org.apache.commons.lang3.ArrayUtils; 10 12 import org.jgap.Configuration; 11 13 import org.jgap.InvalidConfigurationException; 12 14 import org.jgap.gp.CommandGene; … … 34 36 import org.jgap.util.ICloneable; 35 37 38 import de.ugoe.cs.cpdp.util.WekaUtils; 39 36 40 import org.jgap.gp.impl.ProgramChromosome; 37 41 import org.jgap.util.CloneException; … … 43 47 public class GPTraining implements ISetWiseTrainingStrategy, IWekaCompatibleTrainer { 44 48 45 private final GPClassifier classifier = new GPClassifier();49 private GPVClassifier classifier = new GPVClassifier(); 46 50 47 51 private int populationSize = 1000; … … 52 56 @Override 53 57 public void setParameter(String parameters) { 54 System.out.println("setParameters"); 58 // todo, which type of classifier? GPV, GPVV? 59 // more config population size, etc. 60 // todo: voting for gpvv only 3 votes necessary? 55 61 } 56 62 57 63 @Override 58 64 public void apply(SetUniqueList<Instances> traindataSet) { 59 System.out.println("apply"); 60 for (Instances traindata : traindataSet) { 61 try { 62 classifier.buildClassifier(traindata); 63 }catch(Exception e) { 64 throw new RuntimeException(e); 65 } 65 try { 66 classifier.buildClassifier(traindataSet); 67 }catch(Exception e) { 68 throw new RuntimeException(e); 66 69 } 67 70 } … … 69 72 @Override 70 73 public String getName() { 71 System.out.println("getName");72 74 return "GPTraining"; 73 75 } … … 75 77 @Override 76 78 public Classifier getClassifier() { 77 System.out.println("getClassifier");78 79 return this.classifier; 79 80 } … … 85 86 public InstanceData(Instances instances) { 86 87 this.instances_x = new double[instances.numInstances()][instances.numAttributes()-1]; 87 88 this.instances_y = new boolean[instances.numInstances()]; 89 88 90 Instance current; 89 91 for(int i=0; i < this.instances_x.length; i++) { 90 92 current = instances.get(i); 91 for(int j=0; j < this.instances_x[0].length; j++) { 92 this.instances_x[i][j] = current.value(j); 93 } 94 95 this.instances_y[i] = current.stringValue(instances.classIndex()).equals("Y"); 93 this.instances_x[i] = WekaUtils.instanceValues(current); 94 this.instances_y[i] = 1.0 == current.classValue(); 96 95 } 97 96 } … … 105 104 } 106 105 107 public class GPClassifier extends AbstractClassifier {108 109 private static final long serialVersionUID = 3708714057579101522L;106 // one gprun, we want several for voting 107 public class GPRun extends AbstractClassifier { 108 private static final long serialVersionUID = -4250422550107888789L; 110 109 111 110 private int populationSize = 1000; … … 126 125 } 127 126 128 @Override 129 public void buildClassifier(Instances instances) throws Exception { 130 // load instances into double[][] and boolean[] 131 InstanceData train = new InstanceData(instances); 132 this.problem = new CrossPareGP(train.getX(), train.getY(), this.populationSize, this.initMinDepth, this.initMaxDepth, this.tournamentSize); 133 134 this.gp = problem.create(); 135 this.gp.evolve(this.maxGenerations); 136 } 137 138 @Override 139 public double classifyInstance(Instance instance) { 140 Variable[] vars = ((CrossPareGP)this.problem).getVariables(); 141 142 double[][] x = new double[1][instance.numAttributes()-1]; 143 boolean[] y = new boolean[1]; 144 145 for(int i = 0; i < instance.numAttributes()-1; i++) { 146 x[0][i] = instance.value(i); 147 } 148 y[0] = instance.stringValue(instance.classIndex()).equals("Y"); 149 150 CrossPareFitness test = new CrossPareFitness(vars, x, y); 151 IGPProgram fitest = gp.getAllTimeBest(); 152 153 double sfitness = test.evaluate(fitest); 154 155 // korrekt sind wir wenn wir geringe fitness haben? 156 if(sfitness < 0.5) { 157 return 1.0; 158 } 159 return 0; 160 161 } 162 127 public GPGenotype getGp() { 128 return this.gp; 129 } 130 131 public Variable[] getVariables() { 132 return ((CrossPareGP)this.problem).getVariables(); 133 } 134 135 public void setEvaldata(Instances testdata) { 136 137 } 138 163 139 /** 164 140 * GPProblem implementation … … 166 142 class CrossPareGP extends GPProblem { 167 143 168 private static final long serialVersionUID = 7526472295622776147L;144 //private static final long serialVersionUID = 7526472295622776147L; 169 145 170 146 private double[][] instances; … … 175 151 public CrossPareGP(double[][] instances, boolean[] output, int populationSize, int minInitDept, int maxInitDepth, int tournamentSize) throws InvalidConfigurationException { 176 152 super(new GPConfiguration()); 177 153 178 154 this.instances = instances; 179 155 this.output = output; 180 156 157 Configuration.reset(); 181 158 GPConfiguration config = this.getGPConfiguration(); 182 159 //config.reset(); 160 183 161 this.x = new Variable[this.instances[0].length]; 184 162 163 185 164 for(int j=0; j < this.x.length; j++) { 186 165 this.x[j] = Variable.create(config, "X"+j, CommandGene.DoubleClass); … … 227 206 Class[][] argTypes = { {} }; 228 207 229 // variables + functions 208 // variables + functions, we set the variables with the values of the instances here 230 209 CommandGene[] vars = new CommandGene[this.instances[0].length]; 231 210 for(int j=0; j < this.instances[0].length; j++) { … … 256 235 } 257 236 } 237 258 238 259 239 /** … … 317 297 } 318 298 319 // value gives us a double, if >0.5 we set this instance as faulty320 value = program.execute_double(0, NO_ARGS); 299 // value gives us a double, if < 0.5 we set this instance as faulty 300 value = program.execute_double(0, NO_ARGS); // todo: test with this.x 321 301 322 302 if(value < 0.5) { … … 337 317 338 318 // number of nodes in the programm, if lower then 10 we assign sFitness of 10 319 // we can set metadata with setProgramData to save this 339 320 if(program.getChromosome(0).getSize(0) < 10) { 321 program.setApplicationData(10.0f); 340 322 this.sfitness = 10.0f; 341 323 //System.out.println("wenige nodes: "+program.getChromosome(0).getSize(0)); … … 346 328 347 329 return pfitness; 330 } 331 } 332 333 @Override 334 public void buildClassifier(Instances traindata) throws Exception { 335 InstanceData train = new InstanceData(traindata); 336 this.problem = new CrossPareGP(train.getX(), train.getY(), this.populationSize, this.initMinDepth, this.initMaxDepth, this.tournamentSize); 337 this.gp = problem.create(); 338 this.gp.evolve(this.maxGenerations); 339 } 340 } 341 342 /** 343 * GP Multiple Data Sets Validation-Voting Classifier 344 * 345 * 346 */ 347 public class GPVVClassifier extends GPVClassifier { 348 349 private List<Classifier> classifiers = null; 350 351 @Override 352 public void buildClassifier(Instances arg0) throws Exception { 353 // TODO Auto-generated method stub 354 355 } 356 357 public void buildClassifier(SetUniqueList<Instances> traindataSet) throws Exception { 358 359 // each classifier is trained with one project from the set 360 // then is evaluated on the rest 361 for(int i=0; i < traindataSet.size(); i++) { 362 Classifier classifier = new GPRun(); 363 364 // one project is training data 365 classifier.buildClassifier(traindataSet.get(i)); 366 367 double[] errors; 368 369 // rest of the set is evaluation data, we evaluate now 370 for(int j=0; j < traindataSet.size(); j++) { 371 if(j != i) { 372 // if type1 and type2 errors are < 0.5 we allow the model in the final voting 373 errors = this.evaluate((GPRun)classifier, traindataSet.get(j)); 374 if((errors[0] / traindataSet.get(j).numInstances()) < 0.5 && (errors[0] / traindataSet.get(j).numInstances()) < 0.5) { 375 classifiers.add(classifier); 376 } 377 } 378 } 379 } 380 } 381 382 /** 383 * Use the remaining classifiers for our voting 384 */ 385 @Override 386 public double classifyInstance(Instance instance) { 387 388 int vote_positive = 0; 389 int vote_negative = 0; 390 391 for (int i = 0; i < classifiers.size(); i++) { 392 Classifier classifier = classifiers.get(i); 393 394 GPGenotype gp = ((GPRun)classifier).getGp(); 395 Variable[] vars = ((GPRun)classifier).getVariables(); 396 397 IGPProgram fitest = gp.getAllTimeBest(); // all time fitest 398 for(int j = 0; j < instance.numAttributes()-1; j++) { 399 vars[j].set(instance.value(j)); 400 } 401 402 if(fitest.execute_double(0, vars) < 0.5) { 403 vote_positive += 1; 404 }else { 405 vote_negative += 1; 406 } 407 } 408 409 if(vote_positive >= 3) { 410 return 1.0; 411 }else { 412 return 0.0; 413 } 414 } 415 } 416 417 /** 418 * GP Multiple Data Sets Validation Classifier 419 * 420 * 421 * for one test data set: 422 * for one in 6 possible training data sets: 423 * For 200 GP Runs: 424 * train one Classifier with this training data 425 * then evaluate the classifier with the remaining project 426 * if the candidate model performs bad (error type1 or type2 > 50%) discard it 427 * for the remaining model candidates the best one is used 428 * 429 */ 430 public class GPVClassifier extends AbstractClassifier { 431 432 private Classifier best = null; 433 434 private static final long serialVersionUID = 3708714057579101522L; 435 436 437 /** Build the GP Multiple Data Sets Validation Classifier 438 * 439 * - Traindata one of the Instances of the Set (which one? The firsT? as it is a list?) 440 * - Testdata one other Instances of the Set (the next one? chose randomly?) 441 * - Evaluation the rest of the instances 442 * 443 * @param traindataSet 444 * @throws Exception 445 */ 446 public void buildClassifier(SetUniqueList<Instances> traindataSet) throws Exception { 447 448 // each classifier is trained with one project from the set 449 // then is evaluated on the rest 450 for(int i=0; i < traindataSet.size(); i++) { 451 Classifier classifier = new GPRun(); 452 453 // one project is training data 454 classifier.buildClassifier(traindataSet.get(i)); 455 456 // rest of the set is evaluation data, we evaluate now 457 double smallest_error_count = Double.MAX_VALUE; 458 double[] errors; 459 for(int j=0; j < traindataSet.size(); j++) { 460 if(j != i) { 461 errors = this.evaluate((GPRun)classifier, traindataSet.get(j)); 462 if(errors[0]+errors[1] < smallest_error_count) { 463 this.best = classifier; 464 } 465 } 466 } 467 } 468 } 469 470 @Override 471 public void buildClassifier(Instances traindata) throws Exception { 472 final Classifier classifier = new GPRun(); 473 classifier.buildClassifier(traindata); 474 best = classifier; 475 } 476 477 public double[] evaluate(GPRun classifier, Instances evalData) { 478 GPGenotype gp = classifier.getGp(); 479 Variable[] vars = classifier.getVariables(); 480 481 IGPProgram fitest = gp.getAllTimeBest(); // selects the fitest of all not just the last generation 482 483 double classification; 484 int error_type1 = 0; 485 int error_type2 = 0; 486 int number_instances = evalData.numInstances(); 487 488 for(Instance instance: evalData) { 489 490 for(int i = 0; i < instance.numAttributes()-1; i++) { 491 vars[i].set(instance.value(i)); 492 } 493 494 classification = fitest.execute_double(0, vars); 495 496 // classification < 0.5 we say defective 497 if(classification < 0.5) { 498 if(instance.classValue() != 1.0) { 499 error_type1 += 1; 500 } 501 }else { 502 if(instance.classValue() == 1.0) { 503 error_type2 += 1; 504 } 505 } 506 } 507 508 double et1_per = error_type1 / number_instances; 509 double et2_per = error_type2 / number_instances; 510 511 // return some kind of fehlerquote? 512 //return (error_type1 + error_type2) / number_instances; 513 return new double[]{error_type1, error_type2}; 514 } 515 516 /** 517 * Use only the best classifier from our evaluation phase 518 */ 519 @Override 520 public double classifyInstance(Instance instance) { 521 GPGenotype gp = ((GPRun)best).getGp(); 522 Variable[] vars = ((GPRun)best).getVariables(); 523 524 IGPProgram fitest = gp.getAllTimeBest(); // all time fitest 525 for(int i = 0; i < instance.numAttributes()-1; i++) { 526 vars[i].set(instance.value(i)); 527 } 528 529 double classification = fitest.execute_double(0, vars); 530 531 if(classification < 0.5) { 532 return 1.0; 533 }else { 534 return 0.0; 348 535 } 349 536 }
Note: See TracChangeset
for help on using the changeset viewer.