Context Navigation

source: trunk/CrossPare/src/de/ugoe/cs/cpdp/training/GPTraining.java @ 129

Last change on this file since 129 was 125, checked in by sherbold, 8 years ago
fixed bug in GPTraining that led to the possibility that no classifier is generated in case of bad performance on the training data
File size: 31.0 KB

Line
1	package de.ugoe.cs.cpdp.training;
2
3	import java.util.LinkedList;
4	import java.util.List;
5
6	import org.apache.commons.collections4.list.SetUniqueList;
7
8	import weka.classifiers.AbstractClassifier;
9	import weka.classifiers.Classifier;
10	import weka.core.Instance;
11	import weka.core.Instances;
12	import org.apache.commons.lang3.ArrayUtils;
13	import org.jgap.Configuration;
14	import org.jgap.InvalidConfigurationException;
15	import org.jgap.gp.CommandGene;
16	import org.jgap.gp.GPProblem;
17
18	import org.jgap.gp.function.Add;
19	import org.jgap.gp.function.Multiply;
20	import org.jgap.gp.function.Log;
21	import org.jgap.gp.function.Subtract;
22	import org.jgap.gp.function.Divide;
23	import org.jgap.gp.function.Sine;
24	import org.jgap.gp.function.Cosine;
25	import org.jgap.gp.function.Max;
26	import org.jgap.gp.function.Exp;
27
28	import org.jgap.gp.impl.DeltaGPFitnessEvaluator;
29	import org.jgap.gp.impl.GPConfiguration;
30	import org.jgap.gp.impl.GPGenotype;
31	import org.jgap.gp.impl.TournamentSelector;
32	import org.jgap.gp.terminal.Terminal;
33	import org.jgap.gp.GPFitnessFunction;
34	import org.jgap.gp.IGPProgram;
35	import org.jgap.gp.terminal.Variable;
36	import org.jgap.gp.MathCommand;
37	import org.jgap.util.ICloneable;
38
39	import de.ugoe.cs.cpdp.util.WekaUtils;
40
41	import org.jgap.gp.impl.ProgramChromosome;
42	import org.jgap.util.CloneException;
43
44	/**
45	* Genetic Programming Trainer
46	*
47	* Implementation (mostly) according to Liu et al. Evolutionary Optimization of Software Quality Modeling with Multiple Repositories.
48	*
49	* - GPRun is a Run of a complete Genetic Programm Evolution, we want several complete runs.
50	* - GPVClassifier is the Validation Classifier
51	* - GPVVClassifier is the Validation-Voting Classifier
52	*
53	* config: <setwisetrainer name="GPTraining" param="populationSize:1000,numberRuns:10" />
54	*/
55	public class GPTraining implements ISetWiseTrainingStrategy, IWekaCompatibleTrainer {
56
57	private GPVVClassifier classifier = null;
58
59	// default values from the paper
60	private int populationSize = 1000;
61	private int initMinDepth = 2;
62	private int initMaxDepth = 6;
63	private int tournamentSize = 7;
64	private int maxGenerations = 50;
65	private double errorType2Weight = 15;
66	private int numberRuns = 20; // im paper 20 per errorType2Weight then additional 20
67	private int maxDepth = 20; // max depth within one program
68	private int maxNodes = 100; // max nodes within one program
69
70	@Override
71	public void setParameter(String parameters) {
72
73	String[] params = parameters.split(",");
74	String[] keyvalue = new String[2];
75
76	for(int i=0; i < params.length; i++) {
77	keyvalue = params[i].split(":");
78
79	switch(keyvalue[0]) {
80	case "populationSize":
81	this.populationSize = Integer.parseInt(keyvalue[1]);
82	break;
83
84	case "initMinDepth":
85	this.initMinDepth = Integer.parseInt(keyvalue[1]);
86	break;
87
88	case "tournamentSize":
89	this.tournamentSize = Integer.parseInt(keyvalue[1]);
90	break;
91
92	case "maxGenerations":
93	this.maxGenerations = Integer.parseInt(keyvalue[1]);
94	break;
95
96	case "errorType2Weight":
97	this.errorType2Weight = Double.parseDouble(keyvalue[1]);
98	break;
99
100	case "numberRuns":
101	this.numberRuns = Integer.parseInt(keyvalue[1]);
102	break;
103
104	case "maxDepth":
105	this.maxDepth = Integer.parseInt(keyvalue[1]);
106	break;
107
108	case "maxNodes":
109	this.maxNodes = Integer.parseInt(keyvalue[1]);
110	break;
111	}
112	}
113
114	this.classifier = new GPVVClassifier();
115	((GPVClassifier)this.classifier).configure(populationSize, initMinDepth, initMaxDepth, tournamentSize, maxGenerations, errorType2Weight, numberRuns, maxDepth, maxNodes);
116	}
117
118	@Override
119	public void apply(SetUniqueList<Instances> traindataSet) {
120	try {
121	classifier.buildClassifier(traindataSet);
122	}catch(Exception e) {
123	throw new RuntimeException(e);
124	}
125	}
126
127	@Override
128	public String getName() {
129	return "GPTraining";
130	}
131
132	@Override
133	public Classifier getClassifier() {
134	return this.classifier;
135	}
136
137	public class InstanceData {
138	private double[][] instances_x;
139	private boolean[] instances_y;
140
141	public InstanceData(Instances instances) {
142	this.instances_x = new double[instances.numInstances()][instances.numAttributes()-1];
143	this.instances_y = new boolean[instances.numInstances()];
144
145	Instance current;
146	for(int i=0; i < this.instances_x.length; i++) {
147	current = instances.get(i);
148	this.instances_x[i] = WekaUtils.instanceValues(current);
149	this.instances_y[i] = 1.0 == current.classValue();
150	}
151	}
152
153	public double[][] getX() {
154	return instances_x;
155	}
156	public boolean[] getY() {
157	return instances_y;
158	}
159	}
160
161	/**
162	* One Run executed by a GP Classifier
163	*/
164	public class GPRun extends AbstractClassifier {
165	private static final long serialVersionUID = -4250422550107888789L;
166
167	private int populationSize;
168	private int initMinDepth;
169	private int initMaxDepth;
170	private int tournamentSize;
171	private int maxGenerations;
172	private double errorType2Weight;
173	private int maxDepth;
174	private int maxNodes;
175
176	private GPGenotype gp;
177	private GPProblem problem;
178
179	public void configure(int populationSize, int initMinDepth, int initMaxDepth, int tournamentSize, int maxGenerations, double errorType2Weight, int maxDepth, int maxNodes) {
180	this.populationSize = populationSize;
181	this.initMinDepth = initMinDepth;
182	this.initMaxDepth = initMaxDepth;
183	this.tournamentSize = tournamentSize;
184	this.maxGenerations = maxGenerations;
185	this.errorType2Weight = errorType2Weight;
186	this.maxDepth = maxDepth;
187	this.maxNodes = maxNodes;
188	}
189
190	public GPGenotype getGp() {
191	return this.gp;
192	}
193
194	public Variable[] getVariables() {
195	return ((CrossPareGP)this.problem).getVariables();
196	}
197
198	@Override
199	public void buildClassifier(Instances traindata) throws Exception {
200	InstanceData train = new InstanceData(traindata);
201	this.problem = new CrossPareGP(train.getX(), train.getY(), this.populationSize, this.initMinDepth, this.initMaxDepth, this.tournamentSize, this.errorType2Weight, this.maxDepth, this.maxNodes);
202	this.gp = problem.create();
203	this.gp.evolve(this.maxGenerations);
204	}
205
206	/**
207	* GPProblem implementation
208	*/
209	class CrossPareGP extends GPProblem {
210	private double[][] instances;
211	private boolean[] output;
212
213	private int maxDepth;
214	private int maxNodes;
215
216	private Variable[] x;
217
218	public CrossPareGP(double[][] instances, boolean[] output, int populationSize, int minInitDept, int maxInitDepth, int tournamentSize, double errorType2Weight, int maxDepth, int maxNodes) throws InvalidConfigurationException {
219	super(new GPConfiguration());
220
221	this.instances = instances;
222	this.output = output;
223	this.maxDepth = maxDepth;
224	this.maxNodes = maxNodes;
225
226	Configuration.reset();
227	GPConfiguration config = this.getGPConfiguration();
228
229	this.x = new Variable[this.instances[0].length];
230
231	for(int j=0; j < this.x.length; j++) {
232	this.x[j] = Variable.create(config, "X"+j, CommandGene.DoubleClass);
233	}
234
235	config.setGPFitnessEvaluator(new DeltaGPFitnessEvaluator()); // smaller fitness is better
236	//config.setGPFitnessEvaluator(new DefaultGPFitnessEvaluator()); // bigger fitness is better
237
238	config.setMinInitDepth(minInitDept);
239	config.setMaxInitDepth(maxInitDepth);
240
241	config.setCrossoverProb((float)0.60);
242	config.setReproductionProb((float)0.10);
243	config.setMutationProb((float)0.30);
244
245	config.setSelectionMethod(new TournamentSelector(tournamentSize));
246
247	config.setPopulationSize(populationSize);
248
249	config.setMaxCrossoverDepth(4);
250	config.setFitnessFunction(new CrossPareFitness(this.x, this.instances, this.output, errorType2Weight));
251	config.setStrictProgramCreation(true);
252	}
253
254	// used for running the fitness function again for testing
255	public Variable[] getVariables() {
256	return this.x;
257	}
258
259
260	public GPGenotype create() throws InvalidConfigurationException {
261	GPConfiguration config = this.getGPConfiguration();
262
263	// return type
264	Class[] types = {CommandGene.DoubleClass};
265
266	// Arguments of result-producing chromosome: none
267	Class[][] argTypes = { {} };
268
269	// variables + functions, we set the variables with the values of the instances here
270	CommandGene[] vars = new CommandGene[this.instances[0].length];
271	for(int j=0; j < this.instances[0].length; j++) {
272	vars[j] = this.x[j];
273	}
274	CommandGene[] funcs = {
275	new Add(config, CommandGene.DoubleClass),
276	new Subtract(config, CommandGene.DoubleClass),
277	new Multiply(config, CommandGene.DoubleClass),
278	new Divide(config, CommandGene.DoubleClass),
279	new Sine(config, CommandGene.DoubleClass),
280	new Cosine(config, CommandGene.DoubleClass),
281	new Exp(config, CommandGene.DoubleClass),
282	new Log(config, CommandGene.DoubleClass),
283	new GT(config, CommandGene.DoubleClass),
284	new Max(config, CommandGene.DoubleClass),
285	new Terminal(config, CommandGene.DoubleClass, -100.0, 100.0, true), // min, max, whole numbers
286	};
287
288	CommandGene[] comb = (CommandGene[])ArrayUtils.addAll(vars, funcs);
289	CommandGene[][] nodeSets = {
290	comb,
291	};
292
293	// we only have one chromosome so this suffices
294	int minDepths[] = {config.getMinInitDepth()};
295	int maxDepths[] = {this.maxDepth};
296	GPGenotype result = GPGenotype.randomInitialGenotype(config, types, argTypes, nodeSets, minDepths, maxDepths, this.maxNodes, false); // 40 = maxNodes, true = verbose output
297
298	return result;
299	}
300	}
301
302
303	/**
304	* Fitness function
305	*/
306	class CrossPareFitness extends GPFitnessFunction {
307
308	private static final long serialVersionUID = 75234832484387L;
309
310	private Variable[] x;
311
312	private double[][] instances;
313	private boolean[] output;
314
315	private double errorType2Weight = 1.0;
316
317	// needed in evaluate
318	//private Object[] NO_ARGS = new Object[0];
319
320	private double sfitness = 0.0f;
321	private int errorType1 = 0;
322	private int errorType2 = 0;
323
324	public CrossPareFitness(Variable[] x, double[][] instances, boolean[] output, double errorType2Weight) {
325	this.x = x;
326	this.instances = instances;
327	this.output = output;
328	this.errorType2Weight = errorType2Weight;
329	}
330
331	public int getErrorType1() {
332	return this.errorType1;
333	}
334
335	public int getErrorType2() {
336	return this.errorType2;
337	}
338
339	public double getSecondFitness() {
340	return this.sfitness;
341	}
342
343	public int getNumInstances() {
344	return this.instances.length;
345	}
346
347	/**
348	* This is the fitness function
349	*
350	* Our fitness is best if we have the less wrong classifications, this includes a weight for type2 errors
351	*/
352	@Override
353	protected double evaluate(final IGPProgram program) {
354	double pfitness = 0.0f;
355	this.sfitness = 0.0f;
356	double value = 0.0f;
357
358	// count classification errors
359	this.errorType1 = 0;
360	this.errorType2 = 0;
361
362	for(int i=0; i < this.instances.length; i++) {
363
364	// requires that we have a variable for each column of our dataset (attribute of instance)
365	for(int j=0; j < this.x.length; j++) {
366	this.x[j].set(this.instances[i][j]);
367	}
368
369	// value gives us a double, if < 0.5 we set this instance as faulty
370	value = program.execute_double(0, this.x);
371
372	if(value < 0.5) {
373	if(this.output[i] != true) {
374	this.errorType1 += 1;
375	}
376	}else {
377	if(this.output[i] == true) {
378	this.errorType2 += 1;
379	}
380	}
381	}
382
383	// now calc pfitness
384	pfitness = (this.errorType1 + this.errorType2Weight * this.errorType2) / this.instances.length;
385
386	// number of nodes in the programm, if lower then 10 we assign sFitness of 10
387	// we can set metadata with setProgramData to save this
388	if(program.getChromosome(0).getSize(0) < 10) {
389	program.setApplicationData(10.0f);
390	}
391
392	return pfitness;
393	}
394	}
395
396	/**
397	* Custom GT implementation used in the GP Algorithm.
398	*/
399	public class GT extends MathCommand implements ICloneable {
400
401	private static final long serialVersionUID = 113454184817L;
402
403	public GT(final GPConfiguration a_conf, java.lang.Class a_returnType) throws InvalidConfigurationException {
404	super(a_conf, 2, a_returnType);
405	}
406
407	public String toString() {
408	return "GT(&1, &2)";
409	}
410
411	public String getName() {
412	return "GT";
413	}
414
415	public float execute_float(ProgramChromosome c, int n, Object[] args) {
416	float f1 = c.execute_float(n, 0, args);
417	float f2 = c.execute_float(n, 1, args);
418
419	float ret = 1.0f;
420	if(f1 > f2) {
421	ret = 0.0f;
422	}
423
424	return ret;
425	}
426
427	public double execute_double(ProgramChromosome c, int n, Object[] args) {
428	double f1 = c.execute_double(n, 0, args);
429	double f2 = c.execute_double(n, 1, args);
430
431	double ret = 1;
432	if(f1 > f2) {
433	ret = 0;
434	}
435	return ret;
436	}
437
438	public Object clone() {
439	try {
440	GT result = new GT(getGPConfiguration(), getReturnType());
441	return result;
442	}catch(Exception ex) {
443	throw new CloneException(ex);
444	}
445	}
446	}
447	}
448
449	/**
450	* GP Multiple Data Sets Validation-Voting Classifier
451	*
452	* Basically the same as the GP Multiple Data Sets Validation Classifier.
453	* But here we do keep a model candidate for each training set which may later vote
454	*
455	*/
456	public class GPVVClassifier extends GPVClassifier {
457
458	private static final long serialVersionUID = -654710583852839901L;
459	private List<Classifier> classifiers = null;
460
461	@Override
462	public void buildClassifier(Instances arg0) throws Exception {
463	// TODO Auto-generated method stub
464
465	}
466
467	/** Build the GP Multiple Data Sets Validation-Voting Classifier
468	*
469	* This is according to Section 6 of the Paper by Liu et al.
470	* It is basically the Multiple Data Sets Validation Classifier but here we keep the best models an let them vote.
471	*
472	* @param traindataSet
473	* @throws Exception
474	*/
475	public void buildClassifier(SetUniqueList<Instances> traindataSet) throws Exception {
476
477	// each classifier is trained with one project from the set
478	// then is evaluated on the rest
479	classifiers = new LinkedList<>();
480	for(int i=0; i < traindataSet.size(); i++) {
481
482	// candidates we get out of evaluation
483	LinkedList<Classifier> candidates = new LinkedList<>();
484
485	// number of runs, yields the best of these
486	double smallest_error_count_train = Double.MAX_VALUE;
487	Classifier bestTrain = null;
488	for(int k=0; k < this.numberRuns; k++) {
489	double[] errors_eval = {0.0, 0.0};
490	Classifier classifier = new GPRun();
491	((GPRun)classifier).configure(this.populationSize, this.initMinDepth, this.initMaxDepth, this.tournamentSize, this.maxGenerations, this.errorType2Weight, this.maxDepth, this.maxNodes);
492
493	// one project is training data
494	classifier.buildClassifier(traindataSet.get(i));
495
496	double[] errors;
497	// rest of the set is evaluation data, we evaluate now
498	for(int j=0; j < traindataSet.size(); j++) {
499	if(j != i) {
500	// if type1 and type2 errors are < 0.5 we allow the model in the candidates
501	errors = this.evaluate((GPRun)classifier, traindataSet.get(j));
502	errors_eval[0] += errors[0];
503	errors_eval[1] += errors[1];
504	if((errors[0] < 0.5) && (errors[1] < 0.5)) {
505	candidates.add(classifier);
506	}
507	}
508	}
509
510	// if the candidate made fewer errors it is now the best
511	if(errors_eval[0] + errors_eval[1] < smallest_error_count_train) {
512	bestTrain = classifier;
513	smallest_error_count_train = errors_eval[0] + errors_eval[1];
514	}
515	}
516
517	// now after the evaluation we do a model selection where only one model remains for the given training data
518	// we select the model which is best on all evaluation data
519	double smallest_error_count = Double.MAX_VALUE;
520	double[] errors;
521	Classifier best = null;
522	for(int ii=0; ii < candidates.size(); ii++) {
523	double[] errors_eval = {0.0, 0.0};
524
525	// we add the errors the candidate makes over the evaldata
526	for(int j=0; j < traindataSet.size(); j++) {
527	if(j != i) {
528	errors = this.evaluate((GPRun)candidates.get(ii), traindataSet.get(j));
529	errors_eval[0] += errors[0];
530	errors_eval[1] += errors[1];
531	}
532	}
533
534	// if the candidate made fewer errors it is now the best
535	if(errors_eval[0] + errors_eval[1] < smallest_error_count) {
536	best = candidates.get(ii);
537	smallest_error_count = errors_eval[0] + errors_eval[1];
538	}
539	}
540
541	if( best==null ) {
542	best = bestTrain;
543	}
544	// now we have the best classifier for this training data
545	classifiers.add(best);
546	}
547	}
548
549	/**
550	* Use the best classifiers for each training data in a majority voting
551	*/
552	@Override
553	public double classifyInstance(Instance instance) {
554
555	int vote_positive = 0;
556
557	for (int i = 0; i < classifiers.size(); i++) {
558	Classifier classifier = classifiers.get(i);
559
560	GPGenotype gp = ((GPRun)classifier).getGp();
561	Variable[] vars = ((GPRun)classifier).getVariables();
562
563	IGPProgram fitest = gp.getAllTimeBest(); // all time fitest
564	for(int j = 0; j < instance.numAttributes()-1; j++) {
565	vars[j].set(instance.value(j));
566	}
567
568	if(fitest.execute_double(0, vars) < 0.5) {
569	vote_positive += 1;
570	}
571	}
572
573	if(vote_positive >= (classifiers.size()/2)) {
574	return 1.0;
575	}else {
576	return 0.0;
577	}
578	}
579	}
580
581	/**
582	* GP Multiple Data Sets Validation Classifier
583	*
584	* We train a Classifier with one training project $numberRun times.
585	* Then we evaluate the classifier on the rest of the training projects and keep the best classifier.
586	* After that we have for each training project the best classifier as per the evaluation on the rest of the data set.
587	* Then we determine the best classifier from these candidates and keep it to be used later.
588	*/
589	public class GPVClassifier extends AbstractClassifier {
590
591	private List<Classifier> classifiers = null;
592	private Classifier best = null;
593
594	private static final long serialVersionUID = 3708714057579101522L;
595
596	protected int populationSize;
597	protected int initMinDepth;
598	protected int initMaxDepth;
599	protected int tournamentSize;
600	protected int maxGenerations;
601	protected double errorType2Weight;
602	protected int numberRuns;
603	protected int maxDepth;
604	protected int maxNodes;
605
606	/**
607	* Configure the GP Params and number of Runs
608	*
609	* @param populationSize
610	* @param initMinDepth
611	* @param initMaxDepth
612	* @param tournamentSize
613	* @param maxGenerations
614	* @param errorType2Weight
615	*/
616	public void configure(int populationSize, int initMinDepth, int initMaxDepth, int tournamentSize, int maxGenerations, double errorType2Weight, int numberRuns, int maxDepth, int maxNodes) {
617	this.populationSize = populationSize;
618	this.initMinDepth = initMinDepth;
619	this.initMaxDepth = initMaxDepth;
620	this.tournamentSize = tournamentSize;
621	this.maxGenerations = maxGenerations;
622	this.errorType2Weight = errorType2Weight;
623	this.numberRuns = numberRuns;
624	this.maxDepth = maxDepth;
625	this.maxNodes = maxNodes;
626	}
627
628	/** Build the GP Multiple Data Sets Validation Classifier
629	*
630	* This is according to Section 6 of the Paper by Liu et al. except for the selection of the best model.
631	* Section 4 describes a slightly different approach.
632	*
633	* @param traindataSet
634	* @throws Exception
635	*/
636	public void buildClassifier(SetUniqueList<Instances> traindataSet) throws Exception {
637
638	// each classifier is trained with one project from the set
639	// then is evaluated on the rest
640	for(int i=0; i < traindataSet.size(); i++) {
641
642	// candidates we get out of evaluation
643	LinkedList<Classifier> candidates = new LinkedList<>();
644
645	// numberRuns full GPRuns, we generate numberRuns models for each traindata
646	for(int k=0; k < this.numberRuns; k++) {
647	Classifier classifier = new GPRun();
648	((GPRun)classifier).configure(this.populationSize, this.initMinDepth, this.initMaxDepth, this.tournamentSize, this.maxGenerations, this.errorType2Weight, this.maxDepth, this.maxNodes);
649
650	classifier.buildClassifier(traindataSet.get(i));
651
652	double[] errors;
653
654	// rest of the set is evaluation data, we evaluate now
655	for(int j=0; j < traindataSet.size(); j++) {
656	if(j != i) {
657	// if type1 and type2 errors are < 0.5 we allow the model in the candidate list
658	errors = this.evaluate((GPRun)classifier, traindataSet.get(j));
659	if((errors[0] < 0.5) && (errors[1] < 0.5)) {
660	candidates.add(classifier);
661	}
662	}
663	}
664	}
665
666	// now after the evaluation we do a model selection where only one model remains for the given training data
667	// we select the model which is best on all evaluation data
668	double smallest_error_count = Double.MAX_VALUE;
669	double[] errors;
670	Classifier best = null;
671	for(int ii=0; ii < candidates.size(); ii++) {
672	double[] errors_eval = {0.0, 0.0};
673
674	// we add the errors the candidate makes over the evaldata
675	for(int j=0; j < traindataSet.size(); j++) {
676	if(j != i) {
677	errors = this.evaluate((GPRun)candidates.get(ii), traindataSet.get(j));
678	errors_eval[0] += errors[0];
679	errors_eval[1] += errors[1];
680	}
681	}
682
683	// if the candidate made fewer errors it is now the best
684	if(errors_eval[0] + errors_eval[1] < smallest_error_count) {
685	best = candidates.get(ii);
686	smallest_error_count = errors_eval[0] + errors_eval[1];
687	}
688	}
689
690
691	// now we have the best classifier for this training data
692	classifiers.add(best);
693
694	} /* endfor trainData */
695
696	// now we have one best classifier for each trainData
697	// we evaluate again to find the best classifier of all time
698	// this selection is now according to section 4 of the paper and not 6 where an average of the 6 models is build
699	double smallest_error_count = Double.MAX_VALUE;
700	double error_count;
701	double errors[];
702	for(int j=0; j < classifiers.size(); j++) {
703	error_count = 0;
704	Classifier current = classifiers.get(j);
705	for(int i=0; i < traindataSet.size(); i++) {
706	errors = this.evaluate((GPRun)current, traindataSet.get(i));
707	error_count = errors[0] + errors[1];
708	}
709
710	if(error_count < smallest_error_count) {
711	best = current;
712	}
713	}
714	}
715
716	@Override
717	public void buildClassifier(Instances traindata) throws Exception {
718	final Classifier classifier = new GPRun();
719	((GPRun)classifier).configure(populationSize, initMinDepth, initMaxDepth, tournamentSize, maxGenerations, errorType2Weight, this.maxDepth, this.maxNodes);
720	classifier.buildClassifier(traindata);
721	classifiers.add(classifier);
722	}
723
724	/**
725	* Evaluation of the Classifier
726	*
727	* We evaluate the classifier with the Instances of the evalData.
728	* It basically assigns the instance attribute values to the variables of the s-expression-tree and
729	* then counts the missclassifications.
730	*
731	* @param classifier
732	* @param evalData
733	* @return
734	*/
735	public double[] evaluate(GPRun classifier, Instances evalData) {
736	GPGenotype gp = classifier.getGp();
737	Variable[] vars = classifier.getVariables();
738
739	IGPProgram fitest = gp.getAllTimeBest(); // selects the fitest of all not just the last generation
740
741	double classification;
742	int error_type1 = 0;
743	int error_type2 = 0;
744	int positive = 0;
745	int negative = 0;
746
747	for(Instance instance: evalData) {
748
749	// assign instance attribute values to the variables of the s-expression-tree
750	double[] tmp = WekaUtils.instanceValues(instance);
751	for(int i = 0; i < tmp.length; i++) {
752	vars[i].set(tmp[i]);
753	}
754
755	classification = fitest.execute_double(0, vars);
756
757	// we need to count the absolutes of positives for percentage
758	if(instance.classValue() == 1.0) {
759	positive +=1;
760	}else {
761	negative +=1;
762	}
763
764	// classification < 0.5 we say defective
765	if(classification < 0.5) {
766	if(instance.classValue() != 1.0) {
767	error_type1 += 1;
768	}
769	}else {
770	if(instance.classValue() == 1.0) {
771	error_type2 += 1;
772	}
773	}
774	}
775
776	// return error types percentages for the types
777	double et1_per = error_type1 / negative;
778	double et2_per = error_type2 / positive;
779	return new double[]{et1_per, et2_per};
780	}
781
782	/**
783	* Use only the best classifier from our evaluation phase
784	*/
785	@Override
786	public double classifyInstance(Instance instance) {
787	GPGenotype gp = ((GPRun)best).getGp();
788	Variable[] vars = ((GPRun)best).getVariables();
789
790	IGPProgram fitest = gp.getAllTimeBest(); // all time fitest
791	for(int i = 0; i < instance.numAttributes()-1; i++) {
792	vars[i].set(instance.value(i));
793	}
794
795	double classification = fitest.execute_double(0, vars);
796
797	if(classification < 0.5) {
798	return 1.0;
799	}else {
800	return 0.0;
801	}
802	}
803	}
804	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: