Ignore:
Timestamp:
05/27/16 14:28:08 (9 years ago)
Author:
atrautsch
Message:

comments, little cleanup

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/CrossPare/src/de/ugoe/cs/cpdp/training/GPTraining.java

    r106 r108  
    6161    private int tournamentSize = 7; 
    6262    private int maxGenerations = 50; 
    63     private double errorType2Weight = 1; 
    64     private int numberRuns = 200;  // 200 in the paper 
     63    private double errorType2Weight = 15; 
     64    private int numberRuns = 1;  // 200 in the paper 
    6565    private int maxDepth = 20;  // max depth within one program 
    6666    private int maxNodes = 100;  // max nodes within one program 
     
    6868    @Override 
    6969    public void setParameter(String parameters) { 
     70         
     71        // todo: split parameters to get classifier and the configuration variables for the gprun 
    7072        if(parameters.equals("GPVVClassifier")) { 
    7173            this.classifier = new GPVVClassifier(); 
     
    125127     
    126128    /** 
    127      * One Run of a GP Classifier 
    128      * we want several runs to mitigate problems with local maxima/minima  
     129     * One Run executed by a GP Classifier 
    129130     */ 
    130131    public class GPRun extends AbstractClassifier { 
     
    311312            } 
    312313 
     314            /** 
     315             * This is the fitness function 
     316             *  
     317             * Our fitness is best if we have the less wrong classifications, this includes a weight for type2 errors 
     318             */ 
    313319            @Override 
    314320            protected double evaluate(final IGPProgram program) { 
     
    329335 
    330336                    // value gives us a double, if < 0.5 we set this instance as faulty 
    331                     value = program.execute_double(0, this.x);  // todo: test with this.x 
     337                    value = program.execute_double(0, this.x); 
    332338 
    333339                    if(value < 0.5) { 
     
    344350                // now calc pfitness 
    345351                pfitness = (this.errorType1 + this.errorType2Weight * this.errorType2) / this.instances.length; 
    346  
    347                 //System.out.println("pfitness: " + pfitness); 
    348352 
    349353                // number of nodes in the programm, if lower then 10 we assign sFitness of 10 
     
    413417     * GP Multiple Data Sets Validation-Voting Classifier 
    414418     *  
    415      * As the GP Multiple Data Sets Validation Classifier 
     419     * Basically the same as the GP Multiple Data Sets Validation Classifier. 
    416420     * But here we do keep a model candidate for each training set which may later vote 
    417421     * 
     
    428432        } 
    429433         
     434        /** Build the GP Multiple Data Sets Validation-Voting Classifier 
     435         *  
     436         * This is according to Section 6 of the Paper by Liu et al. 
     437         * It is basically the Multiple Data Sets Validation Classifier but here we keep the best models an let them vote. 
     438         *  
     439         * @param traindataSet 
     440         * @throws Exception 
     441         */ 
    430442        public void buildClassifier(SetUniqueList<Instances> traindataSet) throws Exception { 
    431443 
     
    450462                    for(int j=0; j < traindataSet.size(); j++) { 
    451463                        if(j != i) { 
    452                             // if type1 and type2 errors are < 0.5 we allow the model in the final voting 
     464                            // if type1 and type2 errors are < 0.5 we allow the model in the candidates 
    453465                            errors = this.evaluate((GPRun)classifier, traindataSet.get(j)); 
    454466                            if((errors[0] < 0.5) && (errors[0] < 0.5)) { 
     
    481493         
    482494        /** 
    483          * Use the remaining classifiers for our voting 
     495         * Use the best classifiers for each training data in a majority voting 
    484496         */ 
    485497        @Override 
     
    515527     * GP Multiple Data Sets Validation Classifier 
    516528     *  
    517      * 
    518      * for one test data set: 
    519      *   for one in X possible training data sets: 
    520      *     For Y GP Runs: 
    521      *       train one Classifier with this training data 
    522      *       then evaluate the classifier with the remaining project 
    523      *       if the candidate model performs bad (error type1 or type2 > 50%) discard it 
    524      * for the remaining model candidates the best one is used 
    525      * 
     529     * We train a Classifier with one training project $numberRun times. 
     530     * Then we evaluate the classifier on the rest of the training projects and keep the best classifier. 
     531     * After that we have for each training project the best classifier as per the evaluation on the rest of the data set. 
     532     * Then we determine the best classifier from these candidates and keep it to be used later. 
    526533     */ 
    527534    public class GPVClassifier extends AbstractClassifier { 
     
    566573        /** Build the GP Multiple Data Sets Validation Classifier 
    567574         *  
    568          * - Traindata one of the Instances of the Set (which one? The firsT? as it is a list?) 
    569          * - Testdata one other Instances of the Set (the next one? chose randomly?) 
    570          * - Evaluation the rest of the instances 
     575         * This is according to Section 6 of the Paper by Liu et al. except for the selection of the best model. 
     576         * Section 4 describes a slightly different approach. 
    571577         *  
    572578         * @param traindataSet 
     
    582588                LinkedList<Classifier> candidates = new LinkedList<>(); 
    583589                 
    584                 // 200 runs 
     590                // numberRuns full GPRuns, we generate numberRuns models for each traindata 
    585591                for(int k=0; k < this.numberRuns; k++) { 
    586592                    Classifier classifier = new GPRun(); 
    587593                    ((GPRun)classifier).configure(this.populationSize, this.initMinDepth, this.initMaxDepth, this.tournamentSize, this.maxGenerations, this.errorType2Weight, this.maxDepth, this.maxNodes); 
    588594                     
    589                     // one project is training data 
    590595                    classifier.buildClassifier(traindataSet.get(i)); 
    591596                     
    592597                    double[] errors; 
    593                      
     598 
    594599                    // rest of the set is evaluation data, we evaluate now 
    595600                    for(int j=0; j < traindataSet.size(); j++) { 
    596601                        if(j != i) { 
    597                             // if type1 and type2 errors are < 0.5 we allow the model in the final voting 
     602                            // if type1 and type2 errors are < 0.5 we allow the model in the candidate list 
    598603                            errors = this.evaluate((GPRun)classifier, traindataSet.get(j)); 
    599604                            if((errors[0] < 0.5) && (errors[0] < 0.5)) { 
    600                                 candidates.add(classifier);                             
     605                                candidates.add(classifier); 
    601606                            } 
    602607                        } 
     
    604609                } 
    605610                 
    606                 // now after the evaluation we do a model selection where only one model remains per training data set 
    607                 // from that we chose the best model 
    608                  
    609                 // now after the evaluation we do a model selection where only one model remains for the given training data 
     611                // after the numberRuns we have < numberRuns candidate models for this trainData 
     612                // we now evaluate the candidates 
     613                // finding the best model is not really described in the paper we go with least errors 
    610614                double smallest_error_count = Double.MAX_VALUE; 
    611615                double[] errors; 
     
    625629                // now we have the best classifier for this training data 
    626630                classifiers.add(best); 
    627             } 
    628              
    629             // now determine the best classifier for all training data 
     631            } /* endfor trainData */ 
     632             
     633            // now we have one best classifier for each trainData  
     634            // we evaluate again to find the best classifier of all time 
     635            // this selection is now according to section 4 of the paper and not 6 where an average of the 6 models is build  
    630636            double smallest_error_count = Double.MAX_VALUE; 
    631637            double error_count; 
     
    653659        } 
    654660         
     661        /** 
     662         * Evaluation of the Classifier 
     663         *  
     664         * We evaluate the classifier with the Instances of the evalData. 
     665         * It basically assigns the instance attribute values to the variables of the s-expression-tree and  
     666         * then counts the missclassifications.  
     667         *  
     668         * @param classifier 
     669         * @param evalData 
     670         * @return 
     671         */ 
    655672        public double[] evaluate(GPRun classifier, Instances evalData) { 
    656673            GPGenotype gp = classifier.getGp(); 
     
    667684            for(Instance instance: evalData) { 
    668685                 
    669                 for(int i = 0; i < instance.numAttributes()-1; i++) { 
    670                     vars[i].set(instance.value(i)); 
     686                // assign instance attribute values to the variables of the s-expression-tree 
     687                double[] tmp = WekaUtils.instanceValues(instance); 
     688                for(int i = 0; i < tmp.length; i++) { 
     689                    vars[i].set(tmp[i]); 
    671690                } 
    672691                 
Note: See TracChangeset for help on using the changeset viewer.