Context Navigation

← Previous Changeset
Next Changeset →

Changeset 108

Timestamp:

05/27/16 14:28:08 (9 years ago)

Author:

atrautsch

Message:

comments, little cleanup

File:

: 1 edited

trunk/CrossPare/src/de/ugoe/cs/cpdp/training/GPTraining.java (modified) (17 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/CrossPare/src/de/ugoe/cs/cpdp/training/GPTraining.java

-                      r106
+                      r108
     private int tournamentSize = 7;
     private int maxGenerations = 50;
     private double errorType2Weight = 1;
     private int numberRuns = 200;  // 200 in the paper
+    private double errorType2Weight = 15;
+    private int numberRuns = 1;  // 200 in the paper
     private int maxDepth = 20;  // max depth within one program
     private int maxNodes = 100;  // max nodes within one program
 …
     @Override
     public void setParameter(String parameters) {
+        // todo: split parameters to get classifier and the configuration variables for the gprun
         if(parameters.equals("GPVVClassifier")) {
             this.classifier = new GPVVClassifier();
 …
     /**
+     * One Run of a GP Classifier
+     * we want several runs to mitigate problems with local maxima/minima
+     * One Run executed by a GP Classifier
      */
     public class GPRun extends AbstractClassifier {
 …
+            }
+            /**
+             * This is the fitness function
+             *
+             * Our fitness is best if we have the less wrong classifications, this includes a weight for type2 errors
+             */
             @Override
             protected double evaluate(final IGPProgram program) {
 …
                     // value gives us a double, if < 0.5 we set this instance as faulty
                     value = program.execute_double(0, this.x);  // todo: test with this.x
+                    value = program.execute_double(0, this.x);
                     if(value < 0.5) {
 …
                 // now calc pfitness
                 pfitness = (this.errorType1 + this.errorType2Weight * this.errorType2) / this.instances.length;
-                //System.out.println("pfitness: " + pfitness);
                 // number of nodes in the programm, if lower then 10 we assign sFitness of 10
 …
      * GP Multiple Data Sets Validation-Voting Classifier
+     *
      * As the GP Multiple Data Sets Validation Classifier
+     * Basically the same as the GP Multiple Data Sets Validation Classifier.
      * But here we do keep a model candidate for each training set which may later vote
+     *
 …
+        }
+        /** Build the GP Multiple Data Sets Validation-Voting Classifier
+         *
+         * This is according to Section 6 of the Paper by Liu et al.
+         * It is basically the Multiple Data Sets Validation Classifier but here we keep the best models an let them vote.
+         *
+         * @param traindataSet
+         * @throws Exception
+         */
         public void buildClassifier(SetUniqueList<Instances> traindataSet) throws Exception {
 …
                     for(int j=0; j < traindataSet.size(); j++) {
                         if(j != i) {
                             // if type1 and type2 errors are < 0.5 we allow the model in the final voting
+                            // if type1 and type2 errors are < 0.5 we allow the model in the candidates
                             errors = this.evaluate((GPRun)classifier, traindataSet.get(j));
                             if((errors[0] < 0.5) && (errors[0] < 0.5)) {
 …
         /**
          * Use the remaining classifiers for our voting
+         * Use the best classifiers for each training data in a majority voting
          */
         @Override
 …
      * GP Multiple Data Sets Validation Classifier
+     *
+     *
+     * for one test data set:
+     *   for one in X possible training data sets:
+     *     For Y GP Runs:
+     *       train one Classifier with this training data
+     *       then evaluate the classifier with the remaining project
+     *       if the candidate model performs bad (error type1 or type2 > 50%) discard it
+     * for the remaining model candidates the best one is used
+     *
+     * We train a Classifier with one training project $numberRun times.
+     * Then we evaluate the classifier on the rest of the training projects and keep the best classifier.
+     * After that we have for each training project the best classifier as per the evaluation on the rest of the data set.
+     * Then we determine the best classifier from these candidates and keep it to be used later.
      */
     public class GPVClassifier extends AbstractClassifier {
 …
         /** Build the GP Multiple Data Sets Validation Classifier
+         *
+         * - Traindata one of the Instances of the Set (which one? The firsT? as it is a list?)
+         * - Testdata one other Instances of the Set (the next one? chose randomly?)
+         * - Evaluation the rest of the instances
+         * This is according to Section 6 of the Paper by Liu et al. except for the selection of the best model.
+         * Section 4 describes a slightly different approach.
+         *
          * @param traindataSet
 …
                 LinkedList<Classifier> candidates = new LinkedList<>();
                 // 200 runs
+                // numberRuns full GPRuns, we generate numberRuns models for each traindata
                 for(int k=0; k < this.numberRuns; k++) {
                     Classifier classifier = new GPRun();
                     ((GPRun)classifier).configure(this.populationSize, this.initMinDepth, this.initMaxDepth, this.tournamentSize, this.maxGenerations, this.errorType2Weight, this.maxDepth, this.maxNodes);
-                    // one project is training data
                     classifier.buildClassifier(traindataSet.get(i));
                     double[] errors;
                     // rest of the set is evaluation data, we evaluate now
                     for(int j=0; j < traindataSet.size(); j++) {
                         if(j != i) {
                             // if type1 and type2 errors are < 0.5 we allow the model in the final voting
+                            // if type1 and type2 errors are < 0.5 we allow the model in the candidate list
                             errors = this.evaluate((GPRun)classifier, traindataSet.get(j));
                             if((errors[0] < 0.5) && (errors[0] < 0.5)) {
                                 candidates.add(classifier);
+                                candidates.add(classifier);
+                            }
+                        }
 …
+                }
+                // now after the evaluation we do a model selection where only one model remains per training data set
+                // from that we chose the best model
+                // now after the evaluation we do a model selection where only one model remains for the given training data
+                // after the numberRuns we have < numberRuns candidate models for this trainData
+                // we now evaluate the candidates
+                // finding the best model is not really described in the paper we go with least errors
                 double smallest_error_count = Double.MAX_VALUE;
                 double[] errors;
 …
                 // now we have the best classifier for this training data
                 classifiers.add(best);
+            }
+            // now determine the best classifier for all training data
+            } /* endfor trainData */
+            // now we have one best classifier for each trainData
+            // we evaluate again to find the best classifier of all time
+            // this selection is now according to section 4 of the paper and not 6 where an average of the 6 models is build
             double smallest_error_count = Double.MAX_VALUE;
             double error_count;
 …
+        }
+        /**
+         * Evaluation of the Classifier
+         *
+         * We evaluate the classifier with the Instances of the evalData.
+         * It basically assigns the instance attribute values to the variables of the s-expression-tree and
+         * then counts the missclassifications.
+         *
+         * @param classifier
+         * @param evalData
+         * @return
+         */
         public double[] evaluate(GPRun classifier, Instances evalData) {
             GPGenotype gp = classifier.getGp();
 …
             for(Instance instance: evalData) {
+                for(int i = 0; i < instance.numAttributes()-1; i++) {
+                    vars[i].set(instance.value(i));
+                // assign instance attribute values to the variables of the s-expression-tree
+                double[] tmp = WekaUtils.instanceValues(instance);
+                for(int i = 0; i < tmp.length; i++) {
+                    vars[i].set(tmp[i]);
+                }

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 108

Legend:

trunk/CrossPare/src/de/ugoe/cs/cpdp/training/GPTraining.java

Download in other formats: