Changeset 135

trunk/CrossPare/src/de/ugoe/cs/cpdp/ExperimentConfiguration.java

-                      r98
+                      r135
      */
     private Boolean saveClassifier = null;
     /**
      * number of repetitions of an experiment (to account for randomness)
 …
         return saveClassifier;
+    }
     /**
      * number of repetitions of an experiment
 …
                 saveClassifier = true;
+            }
             else if( qName.equals("repetitions")) {
+            else if (qName.equals("repetitions")) {
                 repetitions = Integer.parseInt(attributes.getValue("number"));
+            }

trunk/CrossPare/src/de/ugoe/cs/cpdp/Runner.java

r100	r135
53	53	createConfig(threadPool, file.getAbsolutePath());
54	54	}
55		else if (file.isDirectory() && file.listFiles()~~!=null~~ ) {
	55	else if (file.isDirectory() && file.listFiles() != null) {
56	56	for (File subfile : file.listFiles()) {
57	57	if (subfile.isFile()) {

trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/CLAMIProcessor.java

-                      r86
+                      r135
     @Override
     public void setParameter(String parameters) {
+        // TODO Auto-generated method stub
+        // dummy, parameters not used
+    }

trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/CLAProcessor.java

-                      r86
+                      r135
     @Override
     public void setParameter(String parameters) {
+        // TODO Auto-generated method stub
+        // dummy, parameters not used
+    }

trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/LogarithmTransform.java

-                      r86
+                      r135
             Instance instance = traindata.instance(i);
             for (int j = 0; j < testdata.numAttributes(); j++) {
+                if (traindata.attribute(j) != classAttribute && traindata.attribute(j).isNumeric())
+                if (traindata.attribute(j) != classAttribute &&
+                    traindata.attribute(j).isNumeric())
+                {
                     if (instance.value(j) < 0) {

trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/MORPH.java

-                      r120
+                      r135
 /**
  * Implements the MORPH data privatization.
+ * Implements the MORPH data privatization.
+ *
+ *
 …
      */
     Random rand = new Random();
     /**
      * parameter alpha for MORPH, default is 0.15
      */
     double alpha = 0.15;
     /**
      * parameter beta for MORPH, default is 0.35
      */
     double beta = 0.35;
     /**
      * Does not have parameters. String is ignored.
 …
         if (parameters != null && !parameters.equals("")) {
             String[] values = parameters.split(" ");
             if( values.length!=2 ) {
+            if (values.length != 2) {
                 throw new InvalidParameterException("MORPH requires two doubles as parameter or no parameters to use default values");
+            }
 …
                 alpha = Double.parseDouble(values[0]);
                 beta = Double.parseDouble(values[1]);
+            } catch(NumberFormatException e) {
+            }
+            catch (NumberFormatException e) {
                 throw new InvalidParameterException("MORPH requires two doubles as parameter or no parameters to use default values");
+            }
 …
     @Override
     public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) {
         for( Instances traindata : traindataSet ) {
+        for (Instances traindata : traindataSet) {
             applyMORPH(traindata);
+        }
 …
         applyMORPH(traindata);
+    }
     /**
+     *
 …
      * </p>
+     *
+     * @param data data to which the processor is applied
+     * @param data
+     *            data to which the processor is applied
      */
     public void applyMORPH(Instances data) {
         for (int i=0; i<data.numInstances(); i++ ) {
+        for (int i = 0; i < data.numInstances(); i++) {
             morphInstance(data.get(i), data);
+        }
+    }
     /**
      * <p>
 …
      * </p>
+     *
+     * @param instance instance that is morphed
+     * @param data data based on which the instance is morphed
+     * @param instance
+     *            instance that is morphed
+     * @param data
+     *            data based on which the instance is morphed
      */
     public void morphInstance(Instance instance, Instances data) {
         Instance nearestUnlikeNeighbor = getNearestUnlikeNeighbor(instance, data);
+        if( nearestUnlikeNeighbor==null ) {
+            throw new RuntimeException("could not find nearest unlike neighbor within the data: " + data.relationName());
+        if (nearestUnlikeNeighbor == null) {
+            throw new RuntimeException("could not find nearest unlike neighbor within the data: " +
+                data.relationName());
+        }
+        for( int j=0; j<data.numAttributes() ; j++ ) {
+            if( data.attribute(j)!=data.classAttribute() && data.attribute(j).isNumeric()) {
+                double randVal = rand.nextDouble()*(beta-alpha)+alpha;
+                instance.setValue(j, instance.value(j) + randVal*(instance.value(j)-nearestUnlikeNeighbor.value(j)) );
+        for (int j = 0; j < data.numAttributes(); j++) {
+            if (data.attribute(j) != data.classAttribute() && data.attribute(j).isNumeric()) {
+                double randVal = rand.nextDouble() * (beta - alpha) + alpha;
+                instance.setValue(j, instance.value(j) +
+                    randVal * (instance.value(j) - nearestUnlikeNeighbor.value(j)));
+            }
+        }
+    }
     /**
      * <p>
      * Determines the nearest unlike neighbor of an instance.
+     * Determines the nearest unlike neighbor of an instance.
      * </p>
+     *
+     * @param instance instance to which the nearest unlike neighbor is determined
+     * @param data data where the nearest unlike neighbor is determined from
+     * @param instance
+     *            instance to which the nearest unlike neighbor is determined
+     * @param data
+     *            data where the nearest unlike neighbor is determined from
      * @return nearest unlike instance
      */
     public Instance getNearestUnlikeNeighbor(Instance instance, Instances data) {
         Instance nearestUnlikeNeighbor = null;
         double[] instanceVector = new double[data.numAttributes()-1];
+        double[] instanceVector = new double[data.numAttributes() - 1];
         int tmp = 0;
         for( int j=0; j<data.numAttributes(); j++ ) {
             if( data.attribute(j)!=data.classAttribute() && data.attribute(j).isNumeric()) {
+        for (int j = 0; j < data.numAttributes(); j++) {
+            if (data.attribute(j) != data.classAttribute() && data.attribute(j).isNumeric()) {
                 instanceVector[tmp] = instance.value(j);
+            }
+        }
         double minDistance = Double.MAX_VALUE;
         for( int i=0 ; i<data.numInstances() ; i++ ) {
             if( instance.classValue() != data.instance(i).classValue() ) {
+        for (int i = 0; i < data.numInstances(); i++) {
+            if (instance.classValue() != data.instance(i).classValue()) {
                 double[] otherVector = new double[data.numAttributes() - 1];
                 tmp = 0;
                 for (int j = 0; j < data.numAttributes(); j++) {
+                    if (data.attribute(j) != data.classAttribute() && data.attribute(j).isNumeric()) {
+                    if (data.attribute(j) != data.classAttribute() &&
+                        data.attribute(j).isNumeric())
+                    {
                         otherVector[tmp++] = data.instance(i).value(j);
+                    }
+                }
                 if( MathArrays.distance(instanceVector, otherVector)<minDistance) {
+                if (MathArrays.distance(instanceVector, otherVector) < minDistance) {
                     minDistance = MathArrays.distance(instanceVector, otherVector);
                     nearestUnlikeNeighbor = data.instance(i);

trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/MedianAsReference.java

-                      r86
+                      r135
             Instance instance = traindata.instance(i);
             for (int j = 0; j < traindata.numAttributes(); j++) {
+                if (traindata.attribute(j) != classAttribute && traindata.attribute(j).isNumeric())
+                if (traindata.attribute(j) != classAttribute &&
+                    traindata.attribute(j).isNumeric())
+                {
                     instance.setValue(j, instance.value(j) + (median[j] - currentmedian[j]));

trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/NominalAttributeFilter.java

-                      r86
+                      r135
             // delete all instances where nominal attribute has the value of one of the parameter
             if (indexOfnominalAttributeValues.contains(wekaInstance
                 .value(indexOfConfidenceAttribute)))
+            if (indexOfnominalAttributeValues
+                .contains(wekaInstance.value(indexOfConfidenceAttribute)))
+            {
                 traindata.delete(j);

trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/Oversampling.java

-                      r86
+                      r135
             Resample resample = new Resample();
-            // TODO: resample.setSampleSizePercent((100.0*counts[1])/100+0.01);
-            // Ohne +0.01 wird bei tomcat, xerces-1.2 und jedit-4.0 ein negative
-            // weniger zurückgegeben
             resample.setSampleSizePercent((100.0 * counts[0]) / counts[1]);
             try {

trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/SynonymAttributePruning.java

-                      r86
+                      r135
         double distance;
         for (int j = traindata.numAttributes() - 1; j >= 0; j--) {
             if( j!=traindata.classIndex() ) {
+            if (j != traindata.classIndex()) {
                 boolean hasClosest = false;
                 for (int i1 = 0; !hasClosest && i1 < traindata.size(); i1++) {
 …
                             double distanceJ = Double.MAX_VALUE;
                             for (int k = 0; k < traindata.numAttributes(); k++) {
+                                distance = Math.abs(traindata.get(i1).value(k) - traindata.get(i2).value(k));
+                                distance = Math
+                                    .abs(traindata.get(i1).value(k) - traindata.get(i2).value(k));
                                 if (distance < minVal) {
                                     minVal = distance;

trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/TCAPlusNormalization.java

-                      r86
+                      r135
 import weka.core.Instances;
+// normalization selected according to TCA+ rules (TCA has to be applied separately
+/**
+ * <p>
+ * Normalization selected according to the TCA+ rules after Nam et al. (Transfer Defect Learning).
+ * </p>
+ *
+ * @author Steffen Herbold
+ */
 public class TCAPlusNormalization implements IProcessesingStrategy {
 …
     @Override
     public void setParameter(String parameters) {
+        // TODO Auto-generated method stub
+        // dummy, paramters not used
+    }
+    /*
+     * (non-Javadoc)
+     *
+     * @see de.ugoe.cs.cpdp.dataprocessing.IProcessesingStrategy#apply(weka.core.Instances,
+     * weka.core.Instances)
+     */
     @Override
     public void apply(Instances testdata, Instances traindata) {
         applyTCAPlus(testdata, traindata);
+    }
     private void applyTCAPlus(Instances testdata, Instances traindata) {
         DistChar dcTest = WekaUtils.datasetDistance(testdata);
         DistChar dcTrain = WekaUtils.datasetDistance(traindata);
         // RULE 1:
+        if( 0.9*dcTrain.mean<=dcTest.mean && 1.1*dcTrain.mean>=dcTest.mean &&
+.9*dcTrain.std<=dcTest.std && 1.1*dcTrain.std>=dcTest.std) {
+        if (0.9 * dcTrain.mean <= dcTest.mean && 1.1 * dcTrain.mean >= dcTest.mean &&
+.9 * dcTrain.std <= dcTest.std && 1.1 * dcTrain.std >= dcTest.std)
+        {
             // do nothing
+        }
         // RULE 2:
+        else if((0.4*dcTrain.min>dcTest.min || 1.6*dcTrain.min<dcTest.min) &&
+                (0.4*dcTrain.max>dcTest.max || 1.6*dcTrain.min<dcTest.max) &&
+                (0.4*dcTrain.min>dcTest.num || 1.6*dcTrain.min<dcTest.num)) {
+        else if ((0.4 * dcTrain.min > dcTest.min || 1.6 * dcTrain.min < dcTest.min) &&
+            (0.4 * dcTrain.max > dcTest.max || 1.6 * dcTrain.min < dcTest.max) &&
+            (0.4 * dcTrain.min > dcTest.num || 1.6 * dcTrain.min < dcTest.num))
+        {
             NormalizationUtil.minMax(testdata);
             NormalizationUtil.minMax(traindata);
+        }
         // RULE 3:
+        else if((0.4*dcTrain.std>dcTest.std && dcTrain.num<dcTest.num) ||
+                (1.6*dcTrain.std<dcTest.std)&& dcTrain.num>dcTest.num) {
+        else if ((0.4 * dcTrain.std > dcTest.std && dcTrain.num < dcTest.num) ||
+            (1.6 * dcTrain.std < dcTest.std) && dcTrain.num > dcTest.num)
+        {
             NormalizationUtil.zScoreTraining(testdata, traindata);
+        }
         // RULE 4:
+        else if((0.4*dcTrain.std>dcTest.std && dcTrain.num>dcTest.num) ||
+                (1.6*dcTrain.std<dcTest.std)&& dcTrain.num<dcTest.num) {
+        else if ((0.4 * dcTrain.std > dcTest.std && dcTrain.num > dcTest.num) ||
+            (1.6 * dcTrain.std < dcTest.std) && dcTrain.num < dcTest.num)
+        {
             NormalizationUtil.zScoreTarget(testdata, traindata);
+        }
         //RULE 5:
+        // RULE 5:
         else {
             NormalizationUtil.zScore(testdata);

trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/TopMetricFilter.java

-                      r129
+                      r135
      */
     double correlationThreshold = 0.5;
     /*
      * (non-Javadoc)
 …
     @Override
     public void setParameter(String parameters) {
         if( parameters!=null && !parameters.equals("")) {
+        if (parameters != null && !parameters.equals("")) {
             correlationThreshold = Double.parseDouble(parameters);
+        }
 …
+    }
+    private void determineTopKAttributes(Instances testdata, SetUniqueList<Instances> traindataSet) throws Exception {
+        Integer[] counts = new Integer[traindataSet.get(0).numAttributes()-1];
+        IntStream.range(0,counts.length).forEach(val -> counts[val] = 0);
+        for( Instances traindata : traindataSet ) {
+    private void determineTopKAttributes(Instances testdata, SetUniqueList<Instances> traindataSet)
+        throws Exception
+    {
+        Integer[] counts = new Integer[traindataSet.get(0).numAttributes() - 1];
+        IntStream.range(0, counts.length).forEach(val -> counts[val] = 0);
+        for (Instances traindata : traindataSet) {
             J48 decisionTree = new J48();
             decisionTree.buildClassifier(traindata);
             int k=0;
             for( int j=0; j<traindata.numAttributes(); j++) {
                 if(j!=traindata.classIndex()){
                     if( decisionTree.toString().contains(traindata.attribute(j).name()) ) {
                         counts[k] = counts[k]+1;
+            int k = 0;
+            for (int j = 0; j < traindata.numAttributes(); j++) {
+                if (j != traindata.classIndex()) {
+                    if (decisionTree.toString().contains(traindata.attribute(j).name())) {
+                        counts[k] = counts[k] + 1;
+                    }
                     k++;
 …
+        }
         int[] topkIndex = new int[counts.length];
         IntStream.range(0,counts.length).forEach(val -> topkIndex[val] = val);
+        IntStream.range(0, counts.length).forEach(val -> topkIndex[val] = val);
         SortUtils.quicksort(counts, topkIndex, true);
         // get CFSs for each training set
         List<Set<Integer>> cfsSets = new LinkedList<>();
         for( Instances traindata : traindataSet ) {
+        for (Instances traindata : traindataSet) {
             boolean selectionSuccessful = false;
             boolean secondAttempt = false;
 …
                         attsel.SelectAttributes(traindataCopy);
                         Set<Integer> cfsSet = new HashSet<>();
                         for( int attr : attsel.selectedAttributes() ) {
+                        for (int attr : attsel.selectedAttributes()) {
                             cfsSet.add(attr);
+                        }
 …
                         attsel.SelectAttributes(traindata);
                         Set<Integer> cfsSet = new HashSet<>();
                         for( int attr : attsel.selectedAttributes() ) {
+                        for (int attr : attsel.selectedAttributes()) {
                             cfsSet.add(attr);
+                        }
 …
             while (!selectionSuccessful); // dummy loop for internal continue
+        }
         double[] coverages = new double[topkIndex.length];
         for( Set<Integer> cfsSet : cfsSets ) {
+        for (Set<Integer> cfsSet : cfsSets) {
             Set<Integer> topkSet = new HashSet<>();
             for( int k=0; k<topkIndex.length ; k++ ) {
+            for (int k = 0; k < topkIndex.length; k++) {
                 topkSet.add(topkIndex[k]);
                 coverages[k] += (coverage(topkSet, cfsSet)/traindataSet.size());
+                coverages[k] += (coverage(topkSet, cfsSet) / traindataSet.size());
+            }
+        }
         double bestCoverageValue = Double.MIN_VALUE;
         int bestCoverageIndex = 0;
         for( int i=0; i<coverages.length; i++ ) {
             if( coverages[i]>bestCoverageValue) {
+        for (int i = 0; i < coverages.length; i++) {
+            if (coverages[i] > bestCoverageValue) {
                 bestCoverageValue = coverages[i];
                 bestCoverageIndex = i;
 …
         SpearmansCorrelation corr = new SpearmansCorrelation();
         double[][] correlationMatrix = new double[bestCoverageIndex][bestCoverageIndex];
         for( Instances traindata : traindataSet ) {
+        for (Instances traindata : traindataSet) {
             double[][] vectors = new double[bestCoverageIndex][traindata.size()];
             for( int i=0; i<traindata.size(); i++ ) {
                 for( int j=0; j<bestCoverageIndex; j++) {
+            for (int i = 0; i < traindata.size(); i++) {
+                for (int j = 0; j < bestCoverageIndex; j++) {
                     vectors[j][i] = traindata.get(i).value(topkIndex[j]);
+                }
+            }
             for( int j=0; j<bestCoverageIndex; j++ ) {
                 for( int k=j+1; k<bestCoverageIndex; k++ ) {
+            for (int j = 0; j < bestCoverageIndex; j++) {
+                for (int k = j + 1; k < bestCoverageIndex; k++) {
                     correlationMatrix[j][k] = Math.abs(corr.correlation(vectors[j], vectors[k]));
+                }
 …
+        }
         Set<Integer> topkSetIndexSet = new TreeSet<>();
+        // j<30 ensures that the computational time does not explode since the powerset is 2^n in complexity
+        for( int j=0; j<bestCoverageIndex && j<30 ; j++ ) {
+        // j<30 ensures that the computational time does not explode since the powerset is 2^n in
+        // complexity
+        for (int j = 0; j < bestCoverageIndex && j < 30; j++) {
             topkSetIndexSet.add(j);
+        }
 …
         double bestOptCoverage = Double.MIN_VALUE;
         Set<Integer> opttopkSetIndexSet = null;
         for( Set<Integer> combination : allCombinations ) {
             if( isUncorrelated(correlationMatrix, combination) ) {
+        for (Set<Integer> combination : allCombinations) {
+            if (isUncorrelated(correlationMatrix, combination)) {
                 double currentCoverage = 0.0;
                 Set<Integer> topkCombination = new TreeSet<>();
                 for( Integer index : combination ) {
+                for (Integer index : combination) {
                     topkCombination.add(topkIndex[index]);
+                }
                 for( Set<Integer> cfsSet : cfsSets ) {
                     currentCoverage += (coverage(topkCombination, cfsSet)/traindataSet.size());
+                }
                 if( currentCoverage > bestOptCoverage ) {
+                for (Set<Integer> cfsSet : cfsSets) {
+                    currentCoverage += (coverage(topkCombination, cfsSet) / traindataSet.size());
+                }
+                if (currentCoverage > bestOptCoverage) {
                     bestOptCoverage = currentCoverage;
                     opttopkSetIndexSet = combination;
 …
+        }
         Set<Integer> opttopkIndex = new TreeSet<>();
         for( Integer index : opttopkSetIndexSet) {
+        for (Integer index : opttopkSetIndexSet) {
             opttopkIndex.add(topkIndex[index]);
+        }
         Console.traceln(Level.FINE, "selected the following metrics:");
         for( Integer index : opttopkIndex) {
+        for (Integer index : opttopkIndex) {
             Console.traceln(Level.FINE, traindataSet.get(0).attribute(index).name());
+        }
         // finally remove attributes
         for( int j=testdata.numAttributes()-1; j>=0; j-- ) {
             if( j!=testdata.classIndex() && !opttopkIndex.contains(j) ) {
+        for (int j = testdata.numAttributes() - 1; j >= 0; j--) {
+            if (j != testdata.classIndex() && !opttopkIndex.contains(j)) {
                 testdata.deleteAttributeAt(j);
                 for( Instances traindata : traindataSet ) {
+                for (Instances traindata : traindataSet) {
                     traindata.deleteAttributeAt(j);
+                }
 …
+        }
+    }
     private boolean isUncorrelated(double[][] correlationMatrix, Set<Integer> combination) {
         Integer[] intCombination = combination.toArray(new Integer[0]);
         boolean areUncorrelated = true;
+        for( int i=0 ; areUncorrelated && i<intCombination.length ; i++ ) {
+            for( int j=i+1; areUncorrelated && j<intCombination.length ; j++ ) {
+                areUncorrelated &= correlationMatrix[intCombination[i]][intCombination[j]]>correlationThreshold;
+        for (int i = 0; areUncorrelated && i < intCombination.length; i++) {
+            for (int j = i + 1; areUncorrelated && j < intCombination.length; j++) {
+                areUncorrelated &=
+                    correlationMatrix[intCombination[i]][intCombination[j]] > correlationThreshold;
+            }
+        }
         return areUncorrelated;
+    }
     private double coverage(Set<Integer> topkSet, Set<Integer> cfsSet) {
         Set<Integer> topkSetCopy1 = new HashSet<>(topkSet);
 …
         Set<Integer> topkSetCopy2 = new HashSet<>(topkSet);
         topkSetCopy2.addAll(cfsSet);
         return ((double) topkSetCopy1.size())/topkSetCopy2.size();
+        return ((double) topkSetCopy1.size()) / topkSetCopy2.size();
+    }
+}

trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/TransferComponentAnalysis.java

-                      r86
+                      r135
  * </p>
+ *
- * TODO comment class
  * @author Steffen Herbold
  */
 public class TransferComponentAnalysis implements IProcessesingStrategy {
+    /**
+     * Dimension of the reduced data.
+     */
     int reducedDimension = 5;
+    /*
+     * (non-Javadoc)
+     *
+     * @see de.ugoe.cs.cpdp.IParameterizable#setParameter(java.lang.String)
+     */
     @Override
     public void setParameter(String parameters) {
+    }
+        // dummy, paramters ignored
+    }
+    /*
+     * (non-Javadoc)
+     *
+     * @see de.ugoe.cs.cpdp.dataprocessing.IProcessesingStrategy#apply(weka.core.Instances,
+     * weka.core.Instances)
+     */
     @Override
     public void apply(Instances testdata, Instances traindata) {
 …
+    }
+    /**
+     * <p>
+     * calculates the linear kernel function between two instances
+     * </p>
+     *
+     * @param x1
+     *            first instance
+     * @param x2
+     *            second instance
+     * @return kernel value
+     */
     private double linearKernel(Instance x1, Instance x2) {
         double value = 0.0d;
 …
+    }
+    /**
+     * <p>
+     * Applies TCA to the test and training data.
+     * </p>
+     *
+     * @param testdata
+     *            the test data
+     * @param traindata
+     *            the training data
+     */
     private void applyTCA(Instances testdata, Instances traindata) {
         final int sizeTest = testdata.numInstances();
 …
+    }
+    /**
+     * <p>
+     * Creates the kernel matrix of the test and training data
+     * </p>
+     *
+     * @param testdata
+     *            the test data
+     * @param traindata
+     *            the training data
+     * @return kernel matrix
+     */
     private PrimitiveMatrix buildKernel(Instances testdata, Instances traindata) {
         final int kernelDim = traindata.numInstances() + testdata.numInstances();
 …
+    }
+    /**
+     * <p>
+     * Calculates the kernel norm matrix, i.e., the matrix which is used for matrix multiplication
+     * to calculate the kernel norm.
+     * </p>
+     *
+     * @param dimTest
+     *            dimension of the test data
+     * @param sizeTrain
+     *            number of instances of the training data
+     * @return kernel norm matrix
+     */
     private PrimitiveMatrix buildKernelNormMatrix(final int dimTest, final int sizeTrain) {
         final double trainSquared = 1.0 / (sizeTrain * (double) sizeTrain);
 …
+    }
+    /**
+     * <p>
+     * Creates the center matrix
+     * </p>
+     *
+     * @param sizeTest
+     *            number of instances of the test data
+     * @param sizeTrain
+     *            number of instances of the training data
+     * @return center matrix
+     */
     private PrimitiveMatrix buildCenterMatrix(final int sizeTest, final int sizeTrain) {
         Builder<PrimitiveMatrix> centerMatrix =
 …
+    }
+    /**
+     * <p>
+     * Builds the mu-Matrix for offsetting values.
+     * </p>
+     *
+     * @param sizeTest
+     *            number of instances of the test data
+     * @param sizeTrain
+     *            number of instances of the training data
+     * @param mu
+     *            mu parameter
+     * @return mu-Matrix
+     */
     private PrimitiveMatrix buildMuMatrix(final int sizeTest,
                                           final int sizeTrain,

trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/Undersampling.java

-                      r86
+                      r135
             Resample resample = new Resample();
-            // TODO: resample.setSampleSizePercent((100.0*counts[1])/100+0.01);
-            // Ohne +0.01 wird bei tomcat, xerces-1.2 und jedit-4.0 ein negative weniger
-            // zurückgegeben
             resample.setSampleSizePercent((100.0 * counts[1]) / counts[0]);
             try {

trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/ZScoreTargetNormalization.java

-                      r86
+                      r135
  * @author Steffen Herbold
  */
+public class ZScoreTargetNormalization implements ISetWiseProcessingStrategy, IProcessesingStrategy
+public class ZScoreTargetNormalization
+    implements ISetWiseProcessingStrategy, IProcessesingStrategy
+{

trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/AbstractCharacteristicSelection.java

r86	r135
104	104	}
105	105	else if ("median".equals(characteristics[j])) {
106		instanceValues[i * characteristics.length + j] = Utils.kthSmallestValue(testdata.attributeToDoubleArray(i), testdata.size()/2);
	106	instanceValues[i * characteristics.length + j] =
	107	Utils.kthSmallestValue(testdata.attributeToDoubleArray(i),
	108	testdata.size() / 2);
107	109	}
108	110	else {
…	…
138	140	}
139	141	else if ("median".equals(characteristics[j])) {
140		instanceValues[i * characteristics.length + j] = Utils.kthSmallestValue(traindata.attributeToDoubleArray(i), traindata.size()/2);
	142	instanceValues[i * characteristics.length + j] =
	143	Utils.kthSmallestValue(traindata.attributeToDoubleArray(i),
	144	traindata.size() / 2);
141	145	}
142	146	else {
…	…
173	177	}
174	178	catch (Exception e) {
175		throw new RuntimeException(
176		"Unexpected exception during normalization of distributional characteristics.",
	179	throw new RuntimeException("Unexpected exception during normalization of distributional characteristics.",
177	180	e);
178	181	}

trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/CLIFF.java

-                      r120
+                      r135
 public class CLIFF implements IPointWiseDataselectionStrategy, ISetWiseDataselectionStrategy {
+    /**
+     * percentage of data selected
+     */
     private double percentage = 0.10;
+    /**
+     * number of ranges considered
+     */
     private final int numRanges = 10;
 …
     @Override
     public void setParameter(String parameters) {
         if( parameters!=null ) {
+        if (parameters != null) {
             percentage = Double.parseDouble(parameters);
+        }
+    }
     /**
+    /*
      * @see de.ugoe.cs.cpdp.dataselection.SetWiseDataselectionStrategy#apply(weka.core.Instances,
      *      org.apache.commons.collections4.list.SetUniqueList)
+     * org.apache.commons.collections4.list.SetUniqueList)
      */
     @Override
     public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) {
         for( Instances traindata : traindataSet ) {
+        for (Instances traindata : traindataSet) {
             applyCLIFF(traindata);
+        }
+    }
     /**
+    /*
      * @see de.ugoe.cs.cpdp.dataselection.PointWiseDataselectionStrategy#apply(weka.core.Instances,
      *      weka.core.Instances)
+     * weka.core.Instances)
      */
     @Override
 …
+    }
+    /**
+     * <p>
+     * Applies the CLIFF relevancy filter to the data.
+     * </p>
+     *
+     * @param data
+     *            the data
+     * @return CLIFF-filtered data
+     */
     protected Instances applyCLIFF(Instances data) {
         final double[][] powerAttributes = new double[data.size()][data.numAttributes()];
         final double[] powerEntity = new double[data.size()];
         final int[] counts = data.attributeStats(data.classIndex()).nominalCounts;
         final double probDefect = data.numInstances() / (double) counts[1];
         for( int j=0; j<data.numAttributes(); j++ ) {
             if( data.attribute(j)!=data.classAttribute()) {
+        for (int j = 0; j < data.numAttributes(); j++) {
+            if (data.attribute(j) != data.classAttribute()) {
                 final double[] ranges = getRanges(data, j);
                 final double[] probDefectRange = getRangeProbabilities(data, j, ranges);
                 for( int i=0 ; i<data.numInstances() ; i++ ) {
+                for (int i = 0; i < data.numInstances(); i++) {
                     final double value = data.instance(i).value(j);
                     final int range = determineRange(ranges, value);
                     double probClass, probNotClass, probRangeClass, probRangeNotClass;
                     if( data.instance(i).classValue()==1 ) {
+                    if (data.instance(i).classValue() == 1) {
                         probClass = probDefect;
                         probNotClass = 1.0-probDefect;
+                        probNotClass = 1.0 - probDefect;
                         probRangeClass = probDefectRange[range];
+                        probRangeNotClass = 1.0-probDefectRange[range];
+                    } else {
+                        probClass = 1.0-probDefect;
+                        probRangeNotClass = 1.0 - probDefectRange[range];
+                    }
+                    else {
+                        probClass = 1.0 - probDefect;
                         probNotClass = probDefect;
                         probRangeClass = 1.0-probDefectRange[range];
+                        probRangeClass = 1.0 - probDefectRange[range];
                         probRangeNotClass = probDefectRange[range];
+                    }
+                    powerAttributes[i][j] = Math.pow(probRangeClass, 2.0)/(probRangeClass*probClass+probRangeNotClass*probNotClass);
+                    powerAttributes[i][j] = Math.pow(probRangeClass, 2.0) /
+                        (probRangeClass * probClass + probRangeNotClass * probNotClass);
+                }
+            }
+        }
         for( int i=0; i<data.numInstances(); i++ ) {
+        for (int i = 0; i < data.numInstances(); i++) {
             powerEntity[i] = 1.0;
             for (int j=0; j<data.numAttributes() ; j++ ) {
+            for (int j = 0; j < data.numAttributes(); j++) {
                 powerEntity[i] *= powerAttributes[i][j];
+            }
 …
         double[] sortedPower = powerEntity.clone();
         Arrays.sort(sortedPower);
         double cutOff = sortedPower[(int) (data.numInstances()*(1-percentage))];
+        double cutOff = sortedPower[(int) (data.numInstances() * (1 - percentage))];
         final Instances selected = new Instances(data);
         selected.delete();
         for (int i=0; i<data.numInstances(); i++) {
             if( powerEntity[i]>=cutOff ) {
+        for (int i = 0; i < data.numInstances(); i++) {
+            if (powerEntity[i] >= cutOff) {
                 selected.add(data.instance(i));
+            }
 …
         return selected;
+    }
+    /**
+     * <p>
+     * Gets an array with the ranges from the data for a given attribute
+     * </p>
+     *
+     * @param data
+     *            the data
+     * @param j
+     *            index of the attribute
+     * @return the ranges for the attribute
+     */
     private double[] getRanges(Instances data, int j) {
         double[] values = new double[numRanges+1];
         for( int k=0; k<numRanges; k++ ) {
             values[k] = data.kthSmallestValue(j, (int) (data.size()*(k+1.0)/numRanges));
+        double[] values = new double[numRanges + 1];
+        for (int k = 0; k < numRanges; k++) {
+            values[k] = data.kthSmallestValue(j, (int) (data.size() * (k + 1.0) / numRanges));
+        }
         values[numRanges] = data.attributeStats(j).numericStats.max;
         return values;
+    }
+    /**
+     * <p>
+     * Gets the probabilities of a positive prediction for each range for a given attribute
+     * </p>
+     *
+     * @param data
+     *            the data
+     * @param j
+     *            index of the attribute
+     * @param ranges
+     *            the ranges
+     * @return probabilities for each range
+     */
     private double[] getRangeProbabilities(Instances data, int j, double[] ranges) {
         double[] probDefectRange = new double[numRanges];
         int[] countRange = new int[numRanges];
         int[] countDefect = new int[numRanges];
         for( int i=0; i<data.numInstances() ; i++ ) {
             int range = determineRange(ranges, data.instance(i).value(j));
+        for (int i = 0; i < data.numInstances(); i++) {
+            int range = determineRange(ranges, data.instance(i).value(j));
             countRange[range]++;
             if( data.instance(i).classValue()== 1 ) {
+            if (data.instance(i).classValue() == 1) {
                 countDefect[range]++;
+            }
+        }
         for( int k=0; k<numRanges; k++ ) {
+        for (int k = 0; k < numRanges; k++) {
             probDefectRange[k] = ((double) countDefect[k]) / countRange[k];
+        }
         return probDefectRange;
+    }
+    /**
+     * <p>
+     * Determines the range of a give value
+     * </p>
+     *
+     * @param ranges
+     *            the possible ranges
+     * @param value
+     *            the value
+     * @return index of the range
+     */
     private int determineRange(double[] ranges, double value) {
         for( int k=0; k<numRanges; k++ ) {
             if( value<=ranges[k+1] ) {
+        for (int k = 0; k < numRanges; k++) {
+            if (value <= ranges[k + 1]) {
                 return k;
+            }

trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/DBSCANFilter.java

-                      r92
+                      r135
                     .valid(); clusterIter.advance())
+                {
+                    int internalIndex = clusterIter.internalGetIndex() - testdata.size() - firstInternalIndex;
+                    int internalIndex =
+                        clusterIter.internalGetIndex() - testdata.size() - firstInternalIndex;
                     if (internalIndex >= 0) {
                         // index belongs to a training instance

trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/DecisionTreeSelection.java

r116	r135
84	84	}
85	85	REPTree repTree = new REPTree();
86		if~~( repTree.getNumFolds()>similarityData.size()~~ ) {
	86	if (repTree.getNumFolds() > similarityData.size()) {
87	87	repTree.setNumFolds(similarityData.size());
88	88	}

trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/LACE2.java

-                      r120
+                      r135
 //   See the License for the specific language governing permissions and
 //   limitations under the License.
 package de.ugoe.cs.cpdp.dataselection;
 …
 public class LACE2 implements ISetWiseDataselectionStrategy {
+    /**
+     * percentage of data selected by the internal CLIFF.
+     */
     private double percentage = 0.10;
+    /*
+     * (non-Javadoc)
+     *
+     * @see de.ugoe.cs.cpdp.IParameterizable#setParameter(java.lang.String)
+     */
     @Override
     public void setParameter(String parameters) {
         if( parameters!=null && !parameters.isEmpty()) {
+        if (parameters != null && !parameters.isEmpty()) {
             percentage = Double.parseDouble(parameters);
+        }
+    }
+    /*
+     * (non-Javadoc)
+     *
+     * @see de.ugoe.cs.cpdp.dataselection.ISetWiseDataselectionStrategy#apply(weka.core.Instances,
+     * org.apache.commons.collections4.list.SetUniqueList)
+     */
     @Override
     public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) {
         Instances selectedData = new Instances(testdata);
         selectedData.clear();
         LinkedList<Instances> traindataCopy = new LinkedList<>(traindataSet);
         Collections.shuffle(traindataCopy);
         CLIFF cliff = new CLIFF();
         cliff.setParameter(Double.toString(percentage));
 …
         Median median = new Median();
         double minDist = Double.MIN_VALUE;
         for( Instances traindata : traindataCopy ) {
+        for (Instances traindata : traindataCopy) {
             Instances cliffedData = cliff.applyCLIFF(traindata);
             if( minDist==Double.MIN_VALUE ) {
+            if (minDist == Double.MIN_VALUE) {
                 // determine distance for leader-follower algorithm
                 Instances sample;
                 if( traindata.size()>100 ) {
+                if (traindata.size() > 100) {
                     Resample resample = new Resample();
                     resample.setSampleSizePercent(100.0/traindata.size()*100.0);
+                    resample.setSampleSizePercent(100.0 / traindata.size() * 100.0);
                     resample.setBiasToUniformClass(0.0);
                     resample.setNoReplacement(true);
 …
                         throw new RuntimeException(e);
+                    }
+                } else {
+                }
+                else {
                     sample = new Instances(traindata);
+                }
                 double[] distances = new double[sample.size()];
                 for( int i=0; i<sample.size(); i++ ) {
+                for (int i = 0; i < sample.size(); i++) {
                     Instance unlikeNeighbor = morph.getNearestUnlikeNeighbor(sample.get(i), sample);
+                    distances[i] = MathArrays.distance(WekaUtils.instanceValues(sample.get(i)), WekaUtils.instanceValues(unlikeNeighbor));
+                    distances[i] = MathArrays.distance(WekaUtils.instanceValues(sample.get(i)),
+                                                       WekaUtils.instanceValues(unlikeNeighbor));
+                }
                 minDist = median.evaluate(distances);
+            }
+            for( int i=0; i<cliffedData.size(); i++ ) {
+                Instance unlikeNeighbor = morph.getNearestUnlikeNeighbor(cliffedData.get(i), selectedData);
+                if( unlikeNeighbor==null ) {
+            for (int i = 0; i < cliffedData.size(); i++) {
+                Instance unlikeNeighbor =
+                    morph.getNearestUnlikeNeighbor(cliffedData.get(i), selectedData);
+                if (unlikeNeighbor == null) {
                     selectedData.add(cliffedData.get(i));
+                } else {
+                    double distance = MathArrays.distance(WekaUtils.instanceValues(cliffedData.get(i)), WekaUtils.instanceValues(unlikeNeighbor));
+                    if( distance>minDist ) {
+                }
+                else {
+                    double distance =
+                        MathArrays.distance(WekaUtils.instanceValues(cliffedData.get(i)),
+                                            WekaUtils.instanceValues(unlikeNeighbor));
+                    if (distance > minDist) {
                         morph.morphInstance(cliffedData.get(i), cliffedData);
                         selectedData.add(cliffedData.get(i));
 …
+        }
+    }
+}

trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/MahalanobisOutlierRemoval.java

-                      r117
+                      r135
         RealMatrix inverseCovariance;
         try {
+            inverseCovariance =
+            new LUDecomposition(new Covariance(values).getCovarianceMatrix()).getSolver()
+                .getInverse();
+        } catch(SingularMatrixException e) {
+            Console.traceln(Level.WARNING, "could not perform Mahalanobis outlier removal due to singular covariance matrix");
+            inverseCovariance = new LUDecomposition(new Covariance(values).getCovarianceMatrix())
+                .getSolver().getInverse();
+        }
+        catch (SingularMatrixException e) {
+            Console
+                .traceln(Level.WARNING,
+                         "could not perform Mahalanobis outlier removal due to singular covariance matrix");
             return;
+        }

trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/NeighborhoodFilter.java

-                      r86
+                      r135
     @Override
     public void setParameter(String parameters) {
+        // TODO Auto-generated method stub
+        // dummy, parameters not used
+    }
 …
      * </p>
+     *
+     * @param testdata test data
+     * @param traindata training data
+     * @param testdata
+     *            test data
+     * @param traindata
+     *            training data
      * @return filtered trainind data
      */

trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/PetersFilter.java

-                      r86
+                      r135
 /**
+ * Filter according to F. Peters, T. Menzies, and A. Marcus: Better Cross Company Defect Prediction <br>
+ * Filter according to F. Peters, T. Menzies, and A. Marcus: Better Cross Company Defect Prediction
+ * <br>
  * <br>
  * This filter does not work, the paper has been withdrawn.
 …
 public class PetersFilter implements IPointWiseDataselectionStrategy {
     /**
+    /*
      * @see de.ugoe.cs.cpdp.IParameterizable#setParameter(java.lang.String)
      */
 …
+    }
     /**
+    /*
      * @see de.ugoe.cs.cpdp.dataselection.IPointWiseDataselectionStrategy#apply(weka.core.Instances,
      *      weka.core.Instances)
+     * weka.core.Instances)
      */
     @Override

trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/PointWiseEMClusterSelection.java

-                      r86
+                      r135
  * Use in Config:
+ *
  * Specify number of clusters -N = Num Clusters <pointwiseselector
  * name="PointWiseEMClusterSelection" param="-N 10"/>
+ * Specify number of clusters -N = Num Clusters
+ * <pointwiseselector name="PointWiseEMClusterSelection" param="-N 10"/>
+ *
  * Try to determine the number of clusters: -I 10 = max iterations -X 5 = 5 folds for cross
  * evaluation -max = max number of clusters <pointwiseselector name="PointWiseEMClusterSelection"
  * param="-I 10 -X 5 -max 300"/>
+ * evaluation -max = max number of clusters
+ * <pointwiseselector name="PointWiseEMClusterSelection" param="-I 10 -X 5 -max 300"/>
+ *
  * Don't forget to add: <preprocessor name="Normalization" param=""/>
 …
 public class PointWiseEMClusterSelection implements IPointWiseDataselectionStrategy {
+    /**
+     * paramters passed to the selection
+     */
     private String[] params;
+    /*
+     * (non-Javadoc)
+     *
+     * @see de.ugoe.cs.cpdp.IParameterizable#setParameter(java.lang.String)
+     */
     @Override
     public void setParameter(String parameters) {
 …
+            }
+            Console.traceln(Level.INFO,
+                            String.format("our testdata is in: " + selectedCluster.size() +
+                                " different clusters"));
+            Console.traceln(Level.INFO, String
+                .format("our testdata is in: " + selectedCluster.size() + " different clusters"));
             // 5. get cluster membership of our traindata
 …
             for (int j = 0; j < ctrain.numInstances(); j++) {
                 // get the cluster number from the attributes
+                cnumber =
+                    Integer.parseInt(ctrain.get(j).stringValue(ctrain.get(j).numAttributes() - 1)
+                        .replace("cluster", ""));
+                cnumber = Integer.parseInt(ctrain.get(j)
+                    .stringValue(ctrain.get(j).numAttributes() - 1).replace("cluster", ""));
                 // Console.traceln(Level.INFO,
 …
+            }
+            Console.traceln(Level.INFO,
+                            String.format("that leaves us with: " + selected.numInstances() +
+                                " traindata instances from " + traindata.numInstances()));
+            Console.traceln(Level.INFO, String.format("that leaves us with: " +
+                selected.numInstances() + " traindata instances from " + traindata.numInstances()));
+        }
         catch (Exception e) {

trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/SeparatabilitySelection.java

-                      r86
+                      r135
                     inst.setClassValue(1.0);
                     sample.add(inst);
+                    inst =
+                        new DenseInstance(
+                                          traindata.instance(rand.nextInt(traindata.numInstances())));
+                    inst = new DenseInstance(traindata
+                        .instance(rand.nextInt(traindata.numInstances())));
                     inst.setDataset(sample);
                     inst.setClassValue(0.0);
 …
+                }
                 catch (Exception e) {
+                    throw new RuntimeException(
+                                               "cross-validation during calculation of separatability failed",
+                    throw new RuntimeException("cross-validation during calculation of separatability failed",
                                                e);
+                }

trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/SetWiseEMClusterSelection.java

r86	r135
74	74	}
75	75	catch (Exception e) {
76		throw new RuntimeException(
77		"error applying setwise EM clustering training data selection",
	76	throw new RuntimeException("error applying setwise EM clustering training data selection",
78	77	e);
79	78	}

trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/SetWiseEMContextSelection.java

-                      r86
+                      r135
 public class SetWiseEMContextSelection implements ISetWiseDataselectionStrategy {
+    /**
+     * context factors
+     */
     private String[] project_context_factors; // = new String[]{"TND", "TNC", "TNF", "TLOC"};
+    /*
+     * (non-Javadoc)
+     *
+     * @see de.ugoe.cs.cpdp.IParameterizable#setParameter(java.lang.String)
+     */
     @Override
     public void setParameter(String parameters) {
 …
+        }
         catch (Exception e) {
+            throw new RuntimeException(
+                                       "error applying setwise EM clustering training data selection",
+            throw new RuntimeException("error applying setwise EM clustering training data selection",
                                        e);
+        }
+    }
+    /*
+     * (non-Javadoc)
+     *
+     * @see de.ugoe.cs.cpdp.dataselection.ISetWiseDataselectionStrategy#apply(weka.core.Instances,
+     * org.apache.commons.collections4.list.SetUniqueList)
+     */
     @Override
     public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) {
 …
      * @return
      */
+    protected Instances getContextFactors(Instances testdata, SetUniqueList<Instances> traindataSet)
+    protected Instances getContextFactors(Instances testdata,
+                                          SetUniqueList<Instances> traindataSet)
+    {
         // setup weka Instances for clustering
 …
                 remove.add(traindata);
                 // Console.traceln(Level.WARNING,
+                // "rmove attribute "+attribute+" test: "+testdata.firstInstance().value(testdata.attribute(attribute))+" train: "+traindata.firstInstance().value(traindata.attribute(attribute)));
+                // "rmove attribute "+attribute+" test:
+                // "+testdata.firstInstance().value(testdata.attribute(attribute))+" train:
+                // "+traindata.firstInstance().value(traindata.attribute(attribute)));
+            }
+        }
 …
+        }
         catch (Exception e) {
+            throw new RuntimeException(
+                                       "Unexpected exception during normalization of distributional characteristics.",
+            throw new RuntimeException("Unexpected exception during normalization of distributional characteristics.",
                                        e);
+        }

trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/SetWiseKNNSelection.java

-                      r86
+                      r135
         int closestIndex = 1;
         for (int i = 1; i < data.numInstances(); i++) {
+            double distance =
+                MathArrays.distance(data.instance(0).toDoubleArray(), data.instance(i)
+                    .toDoubleArray());
+            double distance = MathArrays.distance(data.instance(0).toDoubleArray(),
+                                                  data.instance(i).toDoubleArray());
             if (distance < closestDistance) {
                 closestDistance = distance;

trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/SynonymOutlierRemoval.java

-                      r86
+                      r135
 /**
  * <p>
  * Synonym outlier removal after Amasaki et al. (2015).
+ * Synonym outlier removal after Amasaki et al. (2015).
  * </p>
+ *
 …
 public class SynonymOutlierRemoval implements IPointWiseDataselectionStrategy {
+    /* (non-Javadoc)
+    /*
+     * (non-Javadoc)
+     *
      * @see de.ugoe.cs.cpdp.IParameterizable#setParameter(java.lang.String)
      */
 …
+    }
+    /* (non-Javadoc)
+     * @see de.ugoe.cs.cpdp.dataselection.IPointWiseDataselectionStrategy#apply(weka.core.Instances, weka.core.Instances)
+    /*
+     * (non-Javadoc)
+     *
+     * @see de.ugoe.cs.cpdp.dataselection.IPointWiseDataselectionStrategy#apply(weka.core.Instances,
+     * weka.core.Instances)
      */
     @Override
 …
      * </p>
+     *
+     * @param traindata data from which the outliers are removed.
+     * @param traindata
+     *            data from which the outliers are removed.
      */
     public void applySynonymRemoval(Instances traindata) {
         double minDistance[][] = new double[traindata.size()][traindata.numAttributes()-1];
         double minDistanceAttribute[] = new double[traindata.numAttributes()-1];
+        double minDistance[][] = new double[traindata.size()][traindata.numAttributes() - 1];
+        double minDistanceAttribute[] = new double[traindata.numAttributes() - 1];
         double distance;
         for( int j=0; j<minDistanceAttribute.length; j++ ) {
+        for (int j = 0; j < minDistanceAttribute.length; j++) {
             minDistanceAttribute[j] = Double.MAX_VALUE;
+        }
         for (int i1 = traindata.size()-1; i1 < traindata.size(); i1++) {
             int k=0;
+        for (int i1 = traindata.size() - 1; i1 < traindata.size(); i1++) {
+            int k = 0;
             for (int j = 0; j < traindata.numAttributes(); j++) {
                 if( j!=traindata.classIndex() ) {
+                if (j != traindata.classIndex()) {
                     minDistance[i1][k] = Double.MAX_VALUE;
                     for (int i2 = 0; i2 < traindata.size(); i2++) {
                         if (i1 != i2) {
+                            distance = Math.abs(traindata.get(i1).value(j) - traindata.get(i2).value(j));
+                            distance =
+                                Math.abs(traindata.get(i1).value(j) - traindata.get(i2).value(j));
                             if (distance < minDistance[i1][k]) {
                                 minDistance[i1][k] = distance;
+                            }
                             if( distance < minDistanceAttribute[k] ) {
+                            if (distance < minDistanceAttribute[k]) {
                                 minDistanceAttribute[k] = distance;
+                            }
 …
+            }
+        }
         for( int i=traindata.size()-1; i>=0; i-- ) {
+        for (int i = traindata.size() - 1; i >= 0; i--) {
             boolean hasClosest = false;
             for( int j=0; !hasClosest && j<traindata.numAttributes(); j++ ) {
                 hasClosest = minDistance[i][j]<=minDistanceAttribute[j];
+            for (int j = 0; !hasClosest && j < traindata.numAttributes(); j++) {
+                hasClosest = minDistance[i][j] <= minDistanceAttribute[j];
+            }
             if( !hasClosest ) {
+            if (!hasClosest) {
                 traindata.delete(i);
+            }

trunk/CrossPare/src/de/ugoe/cs/cpdp/eval/AbstractWekaEvaluation.java

-                      r132
+                      r135
  * <ul>
  * <li>succHe: Success with recall>0.7, precision>0.5</li>
  * <li>succZi: Success with recall>0.7, precision>0.7</li>
+ * <li>succZi: Success with recall>=0.75, precision>=0.7, and error<=0.25</li>
  * <li>succG75: Success with gscore>0.75</li>
  * <li>succG60: Success with gscore>0.6</li>
 …
     private PrintWriter output = new PrintWriter(System.out);
+    /**
+     * flag that defines if the output is the system out
+     */
     private boolean outputIsSystemOut = true;
+    /**
+     * name of the configuration
+     */
     private String configurationName = "default";
 …
                       Instances traindata,
                       List<ITrainer> trainers,
                       List<Double> efforts,
+                      List<Double> efforts,
                       boolean writeHeader,
                       List<IResultStorage> storages)
 …
         final List<ExperimentResult> experimentResults = new LinkedList<>();
         String productName = testdata.relationName();
         for (ITrainer trainer : trainers) {
             if (trainer instanceof IWekaCompatibleTrainer) {
                 classifiers.add(((IWekaCompatibleTrainer) trainer).getClassifier());
+                experimentResults.add(new ExperimentResult(configurationName, productName, ((IWekaCompatibleTrainer) trainer).getName()));
+                experimentResults
+                    .add(new ExperimentResult(configurationName, productName,
+                                              ((IWekaCompatibleTrainer) trainer).getName()));
+            }
             else {
 …
             double aucec = calculateReviewEffort(testdata, classifier, efforts);
             double succHe = eval.recall(1) >= 0.7 && eval.precision(1) >= 0.5 ? 1.0 : 0.0;
             double succZi = eval.recall(1) >= 0.7 && eval.precision(1) >= 0.7 ? 1.0 : 0.0;
+            double succZi = eval.recall(1) >= 0.75 && eval.precision(1) >= 0.75 && eval.errorRate()<=0.25 ? 1.0 : 0.0;
             double succG75 = gmeasure > 0.75 ? 1.0 : 0.0;
             double succG60 = gmeasure > 0.6 ? 1.0 : 0.0;
             output.append("," + succHe);
             output.append("," + succZi);
             output.append("," + succG75);
             output.append("," + succG60);
+            output.append("," + succG60);
             output.append("," + eval.errorRate());
             output.append("," + eval.recall(1));
 …
             output.append("," + eval.numTrueNegatives(1));
             output.append("," + eval.numFalsePositives(1));
             ExperimentResult result = resultIter.next();
             result.setSizeTestData(testdata.numInstances());
             result.setSizeTrainingData(traindata.numInstances());
-            result.setSuccHe(succHe);
-            result.setSuccZi(succZi);
-            result.setSuccG75(succG75);
-            result.setSuccG60(succG60);
             result.setError(eval.errorRate());
             result.setRecall(eval.recall(1));
 …
             result.setTn(eval.numTrueNegatives(1));
             result.setFp(eval.numFalsePositives(1));
             for( IResultStorage storage : storages ) {
+            for (IResultStorage storage : storages) {
                 storage.addResult(result);
+            }
 …
         output.flush();
+    }
+    private double calculateReviewEffort(Instances testdata, Classifier classifier, List<Double> efforts) {
+        if( efforts==null ) {
+    /**
+     * <p>
+     * Calculates the effort. TODO: IMPLEMENTATION BUGGY! MUST BE FIXED!
+     * </p>
+     *
+     * @param testdata
+     *            the test data
+     * @param classifier
+     *            the classifier
+     * @param efforts
+     *            the effort information for each instance in the test data
+     * @return
+     */
+    private double calculateReviewEffort(Instances testdata,
+                                         Classifier classifier,
+                                         List<Double> efforts)
+    {
+        if (efforts == null) {
             return 0;
+        }
         final List<Integer> bugPredicted = new ArrayList<>();
         final List<Integer> nobugPredicted = new ArrayList<>();
 …
+            }
             catch (Exception e) {
+                throw new RuntimeException(
+                                           "unexpected error during the evaluation of the review effort",
+                throw new RuntimeException("unexpected error during the evaluation of the review effort",
                                            e);
+            }
 …
+    }
+    /**
+     * <p>
+     * Calculates effort. Deprecated. Do not use!
+     * </p>
+     *
+     * @param testdata
+     *            the test data
+     * @param classifier
+     *            the classifier
+     * @return
+     */
     @SuppressWarnings("unused")
     @Deprecated
 …
             loc = testdata.attribute("CountLineCodeExe");
+        }
         if( loc == null ) {
+        if (loc == null) {
             return 0.0;
+        }
 …
+            }
             catch (Exception e) {
+                throw new RuntimeException(
+                                           "unexpected error during the evaluation of the review effort",
+                throw new RuntimeException("unexpected error during the evaluation of the review effort",
                                            e);
+            }
 …
                 output = new PrintWriter(new FileOutputStream(parameters));
                 outputIsSystemOut = false;
                 int filenameStart = parameters.lastIndexOf('/')+1;
+                int filenameStart = parameters.lastIndexOf('/') + 1;
                 int filenameEnd = parameters.lastIndexOf('.');
                 configurationName = parameters.substring(filenameStart, filenameEnd);

trunk/CrossPare/src/de/ugoe/cs/cpdp/eval/CVWekaEvaluation.java

r86	r135
31	31	public class CVWekaEvaluation extends AbstractWekaEvaluation {
32	32
33		/**
	33	/*
34	34	* @see de.ugoe.cs.cpdp.eval.AbstractWekaEvaluation#createEvaluator(weka.core.Instances,
35	35	* weka.classifiers.Classifier)

trunk/CrossPare/src/de/ugoe/cs/cpdp/eval/ExperimentResult.java

-                      r68
+                      r135
 package de.ugoe.cs.cpdp.eval;
+/**
+ * <p>
+ * Data class to store experiment results
+ * </p>
+ *
+ * @author Steffen Herbold
+ */
 public class ExperimentResult {
+    /**
+     * configuration name of the experiment
+     */
     private final String configurationName;
+    /**
+     * name of the target product
+     */
     private final String productName;
+    /**
+     * name of the classifier used
+     */
     private final String classifier;
+    /**
+     * number of instances of the target product
+     */
+    int sizeTestData;
+    /**
+     * number of instances of the training data
+     */
+    int sizeTrainingData;
+    /**
+     * error of the prediction
+     */
+    double error = Double.NaN;
+    /**
+     * recall of the prediction
+     */
+    double recall = Double.NaN;
+    /**
+     * precision of the prediction
+     */
+    double precision = Double.NaN;
+    /**
+     * F1 score of the prediction
+     */
+    double fscore = Double.NaN;
+    /**
+     * G score of the prediction
+     */
+    double gscore = Double.NaN;
+    /**
+     * Matthews correlation coefficient of the prediction
+     */
+    double mcc = Double.NaN;
+    /**
+     * Area under the curve of the prediction
+     */
+    double auc = Double.NaN;
+    /**
+     * Effort of the prediction
+     */
+    double aucec = Double.NaN;
+    /**
+     * True positive rate of the prediction
+     */
+    double tpr = Double.NaN;
+    /**
+     * True negative rate of the prediction
+     */
+    double tnr = Double.NaN;
+    /**
+     * false positive rate of the prediction
+     */
+    double fpr = Double.NaN;
+    /**
+     * false negative rate of the prediction
+     */
+    double fnr = Double.NaN;
+    /**
+     * number of true positives
+     */
+    double tp = Double.NaN;
+    /**
+     * number of false negatives
+     */
+    double fn = Double.NaN;
+    /**
+     * number of true negatives
+     */
+    double tn = Double.NaN;
+    /**
+     * number of false positives
+     */
+    double fp = Double.NaN;
+    /**
+     * <p>
+     * Constructor. Creates a new ExperimentResult.
+     * </p>
+     *
+     * @param configurationName
+     *            the configuration name
+     * @param productName
+     *            the product name
+     * @param classifier
+     *            the classifier name
+     */
     public ExperimentResult(String configurationName, String productName, String classifier) {
         this.configurationName = configurationName;
 …
         this.classifier = classifier;
+    }
+    int sizeTestData;
+    int sizeTrainingData;
+    double succHe = Double.NaN;
+    double succZi = Double.NaN;
+    double succG75 = Double.NaN;
+    double succG60 = Double.NaN;
+    double error = Double.NaN;
+    double recall = Double.NaN;
+    double precision = Double.NaN;
+    double fscore = Double.NaN;
+    double gscore = Double.NaN;
+    double mcc = Double.NaN;
+    double auc = Double.NaN;
+    double aucec = Double.NaN;
+    double tpr = Double.NaN;
+    double tnr = Double.NaN;
+    double fpr = Double.NaN;
+    double fnr = Double.NaN;
+    double tp = Double.NaN;
+    double fn = Double.NaN;
+    double tn = Double.NaN;
+    double fp = Double.NaN;
+    /**
+     * <p>
+     * returns the configuration name
+     * </p>
+     *
+     * @return the configuration name
+     */
     public String getConfigurationName() {
         return configurationName;
+    }
+    /**
+     * <p>
+     * returns the product name
+     * </p>
+     *
+     * @return the product name
+     */
     public String getProductName() {
         return productName;
+    }
+    /**
+     * <p>
+     * returns the classifier name
+     * </p>
+     *
+     * @return the classifier name
+     */
     public String getClassifier() {
         return classifier;
+    }
+    /**
+     * <p>
+     * returns the number of instances of the target product
+     * </p>
+     *
+     * @return number of instances
+     */
     public int getSizeTestData() {
         return sizeTestData;
+    }
+    /**
+     * <p>
+     * sets the number of instances of the target product
+     * </p>
+     *
+     * @param sizeTestData
+     *            number of instances
+     */
     public void setSizeTestData(int sizeTestData) {
         this.sizeTestData = sizeTestData;
+    }
+    /**
+     * <p>
+     * returns the number of instances of the training data
+     * </p>
+     *
+     * @return number of instances
+     */
     public int getSizeTrainingData() {
         return sizeTrainingData;
+    }
+    /**
+     * <p>
+     * sets the number of instances of the training data
+     * </p>
+     *
+     * @param sizeTrainingData
+     *            number of instances
+     */
     public void setSizeTrainingData(int sizeTrainingData) {
         this.sizeTrainingData = sizeTrainingData;
+    }
+    public double getSuccHe() {
+        return succHe;
+    }
+    public void setSuccHe(double succHe) {
+        this.succHe = succHe;
+    }
+    public double getSuccZi() {
+        return succZi;
+    }
+    public void setSuccZi(double succZi) {
+        this.succZi = succZi;
+    }
+    public double getSuccG75() {
+        return succG75;
+    }
+    public void setSuccG75(double succG75) {
+        this.succG75 = succG75;
+    }
+    public double getSuccG60() {
+        return succG60;
+    }
+    public void setSuccG60(double succG60) {
+        this.succG60 = succG60;
+    }
+    /**
+     * <p>
+     * returns the error
+     * </p>
+     *
+     * @return the error
+     */
     public double getError() {
         return error;
+    }
+    /**
+     * <p>
+     * sets the error
+     * </p>
+     *
+     * @param error
+     *            the error
+     */
     public void setError(double error) {
         this.error = error;
+    }
+    /**
+     * <p>
+     * returns the recall
+     * </p>
+     *
+     * @return the recall
+     */
     public double getRecall() {
         return recall;
+    }
+    /**
+     * <p>
+     * sets the recall
+     * </p>
+     *
+     * @param recall
+     *            the recall
+     */
     public void setRecall(double recall) {
         this.recall = recall;
+    }
+    /**
+     * <p>
+     * returns the precision
+     * </p>
+     *
+     * @return the precision
+     */
     public double getPrecision() {
         return precision;
+    }
+    /**
+     * <p>
+     * sets the precision
+     * </p>
+     *
+     * @param precision
+     *            the precision
+     */
     public void setPrecision(double precision) {
         this.precision = precision;
+    }
+    /**
+     * <p>
+     * returns the F1 score
+     * </p>
+     *
+     * @return the F1 score
+     */
     public double getFscore() {
         return fscore;
+    }
+    /**
+     * <p>
+     * sets the F1 score
+     * </p>
+     *
+     * @param fscore
+     *            the F1 score
+     */
     public void setFscore(double fscore) {
         this.fscore = fscore;
+    }
+    /**
+     * <p>
+     * returns the G score
+     * </p>
+     *
+     * @return the G score
+     */
     public double getGscore() {
         return gscore;
+    }
+    /**
+     * <p>
+     * sets the G score
+     * </p>
+     *
+     * @param gscore
+     *            the G score
+     */
     public void setGscore(double gscore) {
         this.gscore = gscore;
+    }
+    /**
+     * <p>
+     * returns the MCC
+     * </p>
+     *
+     * @return the MCC
+     */
     public double getMcc() {
         return mcc;
+    }
+    /**
+     * <p>
+     * sets the MCC
+     * </p>
+     *
+     * @param mcc
+     *            the MCC
+     */
     public void setMcc(double mcc) {
         this.mcc = mcc;
+    }
+    /**
+     * <p>
+     * returns the AUC
+     * </p>
+     *
+     * @return the AUC
+     */
     public double getAuc() {
         return auc;
+    }
+    /**
+     * <p>
+     * sets the AUC
+     * </p>
+     *
+     * @param auc
+     *            the AUC
+     */
     public void setAuc(double auc) {
         this.auc = auc;
+    }
+    /**
+     * <p>
+     * returns the effort as AUCEC
+     * </p>
+     *
+     * @return the effort
+     */
     public double getAucec() {
         return aucec;
+    }
+    /**
+     * <p>
+     * sets the effort as AUCEC
+     * </p>
+     *
+     * @param aucec
+     *            the effort
+     */
     public void setAucec(double aucec) {
         this.aucec = aucec;
+    }
+    /**
+     * <p>
+     * returns the TPR
+     * </p>
+     *
+     * @return the TPR
+     */
     public double getTpr() {
         return tpr;
+    }
+    /**
+     * <p>
+     * sets the TPR
+     * </p>
+     *
+     * @param tpr
+     *            the TPR
+     */
     public void setTpr(double tpr) {
         this.tpr = tpr;
+    }
+    /**
+     * <p>
+     * sets the TNR
+     * </p>
+     *
+     * @return the TNR
+     */
     public double getTnr() {
         return tnr;
+    }
+    /**
+     * <p>
+     * sets the TNR
+     * </p>
+     *
+     * @param tnr
+     *            the TNR
+     */
     public void setTnr(double tnr) {
         this.tnr = tnr;
+    }
+    /**
+     * <p>
+     * returns the FPR
+     * </p>
+     *
+     * @return the FPR
+     */
     public double getFpr() {
         return fpr;
+    }
+    /**
+     * <p>
+     * sets the FPR
+     * </p>
+     *
+     * @param fpr
+     *            the FPR
+     */
     public void setFpr(double fpr) {
         this.fpr = fpr;
+    }
+    /**
+     * <p>
+     * returns the FNR
+     * </p>
+     *
+     * @return the FNR
+     */
     public double getFnr() {
         return fnr;
+    }
+    /**
+     * <p>
+     * sets the FNR
+     * </p>
+     *
+     * @param fnr
+     *            the FNR
+     */
     public void setFnr(double fnr) {
         this.fnr = fnr;
+    }
+    /**
+     * <p>
+     * returns the TPs
+     * </p>
+     *
+     * @return the TPs
+     */
     public double getTp() {
         return tp;
+    }
+    /**
+     * <p>
+     * sets the TPs
+     * </p>
+     *
+     * @param tp
+     *            the TPs
+     */
     public void setTp(double tp) {
         this.tp = tp;
+    }
+    /**
+     * <p>
+     * returns the FNs
+     * </p>
+     *
+     * @return the FNs
+     */
     public double getFn() {
         return fn;
+    }
+    /**
+     * <p>
+     * sets the FNs
+     * </p>
+     *
+     * @param fn
+     */
     public void setFn(double fn) {
         this.fn = fn;
+    }
+    /**
+     * <p>
+     * returns the TNs
+     * </p>
+     *
+     * @return the TNs
+     */
     public double getTn() {
         return tn;
+    }
+    /**
+     * <p>
+     * sets the TNs
+     * </p>
+     *
+     * @param tn
+     *            the TNs
+     */
     public void setTn(double tn) {
         this.tn = tn;
+    }
+    /**
+     * <p>
+     * returns the FPs
+     * </p>
+     *
+     * @return the FPs
+     */
     public double getFp() {
         return fp;
+    }
+    /**
+     * <p>
+     * sets the FPs
+     * </p>
+     *
+     * @param fp
+     *            the FPs
+     */
     public void setFp(double fp) {
         this.fp = fp;

trunk/CrossPare/src/de/ugoe/cs/cpdp/eval/MySQLResultStorage.java

-                      r121
+                      r135
 import java.util.Properties;
 import com.mysql.jdbc.jdbc2.optional.MysqlDataSource;
 …
 public class MySQLResultStorage implements IResultStorage {
-    /**
-     * Connection to the database
-     */
-    //private Connection con = null;
     /**
      * Connection pool for the data base.
 …
         sql.append(result.getSizeTestData() + ",");
         sql.append(result.getSizeTrainingData() + ",");
-        sql.append(result.getSuccHe() + ",");
-        sql.append(result.getSuccZi() + ",");
-        sql.append(result.getSuccG75() + ",");
-        sql.append(result.getSuccG60() + ",");
         sql.append(result.getError() + ",");
         sql.append(result.getRecall() + ",");
 …
     public int containsResult(String experimentName, String productName, String classifierName) {
         String sql = "SELECT COUNT(*) as cnt FROM crosspare.results WHERE configurationName=\'" +
+            experimentName + "\' AND productName=\'" + productName + "\' AND classifier=\'" + classifierName + "\';";
+            experimentName + "\' AND productName=\'" + productName + "\' AND classifier=\'" +
+            classifierName + "\';";
         Statement stmt;
         try {

trunk/CrossPare/src/de/ugoe/cs/cpdp/eval/NormalWekaEvaluation.java

-                      r86
+                      r135
 public class NormalWekaEvaluation extends AbstractWekaEvaluation {
     /**
+    /*
      * @see de.ugoe.cs.cpdp.eval.AbstractWekaEvaluation#createEvaluator(weka.core.Instances,
      *      weka.classifiers.Classifier)
+     * weka.classifiers.Classifier)
      */
     @Override

trunk/CrossPare/src/de/ugoe/cs/cpdp/execution/AbstractCrossProjectExperiment.java

-                      r132
+                      r135
+            }
+        }
         // sort versions
         Collections.sort(versions);
 …
+    }
+    /**
+     * <p>
+     * helper function that checks if the results are already in the data store
+     * </p>
+     *
+     * @param version
+     *            version for which the results are checked
+     * @return
+     */
     private int resultsAvailable(SoftwareVersion version) {
         if (config.getResultStorages().isEmpty()) {
             return 0;
+        }
         List<ITrainer> allTrainers = new LinkedList<>();
         for (ISetWiseTrainingStrategy setwiseTrainer : config.getSetWiseTrainers()) {
 …
             allTrainers.add(trainer);
+        }
         int available = Integer.MAX_VALUE;
         for (IResultStorage storage : config.getResultStorages()) {
             String classifierName = ((IWekaCompatibleTrainer) allTrainers.get(0)).getName();
+            int curAvailable = storage.containsResult(config.getExperimentName(), version.getVersion(), classifierName);
+            if( curAvailable<available ) {
+            int curAvailable = storage.containsResult(config.getExperimentName(),
+                                                      version.getVersion(), classifierName);
+            if (curAvailable < available) {
                 available = curAvailable;
+            }

trunk/CrossPare/src/de/ugoe/cs/cpdp/execution/ClassifierCreationExperiment.java

-                      r132
+                      r135
             for (IProcessesingStrategy processor : config.getPreProcessors()) {
+                Console.traceln(Level.FINE, String
+                    .format("[%s] [%02d/%02d] %s: applying preprocessor %s",
+                            config.getExperimentName(), versionCount, versions.size(),
+                            testVersion.getProject(), processor.getClass().getName()));
+                Console.traceln(Level.FINE,
+                                String.format("[%s] [%02d/%02d] %s: applying preprocessor %s",
+                                              config.getExperimentName(), versionCount,
+                                              versions.size(), testVersion.getProject(),
+                                              processor.getClass().getName()));
                 processor.apply(testdata, traindata);
+            }
             for (IPointWiseDataselectionStrategy dataselector : config.getPointWiseSelectors()) {
+                Console.traceln(Level.FINE, String
+                    .format("[%s] [%02d/%02d] %s: applying pointwise selection %s",
+                            config.getExperimentName(), versionCount, versions.size(),
+                            testVersion.getProject(), dataselector.getClass().getName()));
+                Console
+                    .traceln(Level.FINE,
+                             String.format("[%s] [%02d/%02d] %s: applying pointwise selection %s",
+                                           config.getExperimentName(), versionCount,
+                                           versions.size(), testVersion.getProject(),
+                                           dataselector.getClass().getName()));
                 traindata = dataselector.apply(testdata, traindata);
+            }
             for (IProcessesingStrategy processor : config.getPostProcessors()) {
+                Console.traceln(Level.FINE, String
+                    .format("[%s] [%02d/%02d] %s: applying setwise postprocessor %s",
+                            config.getExperimentName(), versionCount, versions.size(),
+                            testVersion.getProject(), processor.getClass().getName()));
+                Console
+                    .traceln(Level.FINE,
+                             String.format("[%s] [%02d/%02d] %s: applying setwise postprocessor %s",
+                                           config.getExperimentName(), versionCount,
+                                           versions.size(), testVersion.getProject(),
+                                           processor.getClass().getName()));
                 processor.apply(testdata, traindata);
+            }
 …
                     try {
                         weka.core.SerializationHelper.write(resultsDir.getAbsolutePath() + "/" +
+                                                                trainer.getName() + "-" +
+                                                                testVersion.getProject(),
+                            trainer.getName() + "-" + testVersion.getProject(),
                                                             trainerToSave.getClassifier());
+                    }
 …
             for (IEvaluationStrategy evaluator : config.getEvaluators()) {
+                Console.traceln(Level.FINE, String
+                    .format("[%s] [%02d/%02d] %s: applying evaluator %s",
+                            config.getExperimentName(), versionCount, versions.size(),
+                            testVersion.getProject(), evaluator.getClass().getName()));
+                Console.traceln(Level.FINE,
+                                String.format("[%s] [%02d/%02d] %s: applying evaluator %s",
+                                              config.getExperimentName(), versionCount,
+                                              versions.size(), testVersion.getProject(),
+                                              evaluator.getClass().getName()));
                 if (writeHeader) {
 …
                         config.getExperimentName() + ".csv");
+                }
+                evaluator.apply(testdata, traindata, allTrainers, efforts, writeHeader, config.getResultStorages());
+                evaluator.apply(testdata, traindata, allTrainers, efforts, writeHeader,
+                                config.getResultStorages());
                 writeHeader = false;
+            }
 …
             versionCount++;
+            Console.traceln(Level.INFO, String.format("[%s] [%02d/%02d] %s: finished",
+                                                      config.getExperimentName(), versionCount,
+                                                      versions.size(), testVersion.getProject()));
+            Console.traceln(Level.INFO,
+                            String.format("[%s] [%02d/%02d] %s: finished",
+                                          config.getExperimentName(), versionCount, versions.size(),
+                                          testVersion.getProject()));
+        }

trunk/CrossPare/src/de/ugoe/cs/cpdp/execution/CrossValidationExperiment.java

-                      r132
+                      r135
+            }
+        }
         numTrainers += config.getSetWiseTrainers().size();
         numTrainers += config.getSetWiseTestdataAwareTrainers().size();
 …
                                               testVersionCount, testVersion.getVersion()));
                 int numResultsAvailable = resultsAvailable(testVersion);
                 if (numResultsAvailable >= numTrainers*config.getRepetitions()) {
+                if (numResultsAvailable >= numTrainers * config.getRepetitions()) {
                     Console.traceln(Level.INFO,
                                     String.format(
 …
                 Instances testdata = testVersion.getInstances();
                 List<Double> efforts = testVersion.getEfforts();
                 for (ITrainingStrategy trainer : config.getTrainers()) {
                     Console.traceln(Level.FINE,
 …
                     trainer.apply(testdata);
+                }
                 File resultsDir = new File(config.getResultsPath());
                 if (!resultsDir.exists()) {
 …
+    }
+    /**
+     * <p>
+     * helper function that checks if the results are already in the data store
+     * </p>
+     *
+     * @param version
+     *            version for which the results are checked
+     * @return
+     */
     private int resultsAvailable(SoftwareVersion version) {
         if (config.getResultStorages().isEmpty()) {
             return 0;
+        }
         List<ITrainer> allTrainers = new LinkedList<>();
         for (ISetWiseTrainingStrategy setwiseTrainer : config.getSetWiseTrainers()) {
 …
             allTrainers.add(trainer);
+        }
         int available = Integer.MAX_VALUE;
         for (IResultStorage storage : config.getResultStorages()) {
             String classifierName = ((IWekaCompatibleTrainer) allTrainers.get(0)).getName();
+            int curAvailable = storage.containsResult(config.getExperimentName(), version.getVersion(), classifierName);
+            if( curAvailable<available ) {
+            int curAvailable = storage.containsResult(config.getExperimentName(),
+                                                      version.getVersion(), classifierName);
+            if (curAvailable < available) {
                 available = curAvailable;
+            }

trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/AUDIChangeFolderLoader.java

-                      r86
+                      r135
 package de.ugoe.cs.cpdp.loader;
+/**
+ * <p>
+ * Loads data from the automative defect data set from Audi Electronic Ventures donated by Altinger
+ * et al. at the MSR 2015. This loader contains the changes per commit, i.e., it is for JIT defect
+ * prediction.
+ * </p>
+ *
+ * @author Steffen Herbold
+ */
 public class AUDIChangeFolderLoader extends AbstractFolderLoader {

trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/AUDIChangeLoader.java

-                      r86
+                      r135
 /**
+ * TODO
+ * <p>
+ * Loads data from the automative defect data set from Audi Electronic Ventures donated by Altinger
+ * et al. at the MSR 2015. This loader contains the changes per commit, i.e., it is for JIT defect
+ * prediction.
+ * </p>
+ *
+ * @author sherbold
+ *
+ * @author Steffen Herbold
  */
 class AUDIChangeLoader implements SingleVersionLoader {
+    /**
+     * <p>
+     * Internal helper class.
+     * </p>
+     *
+     * @author Steffen Herbold
+     */
     private class EntityRevisionPair implements Comparable<EntityRevisionPair> {
+        /**
+         * string that defines an entity
+         */
         private final String entity;
+        /**
+         * revision number of the entity
+         */
         private final int revision;
+        /**
+         * <p>
+         * Constructor. Creates a new EntityRevisionPair.
+         * </p>
+         *
+         * @param entity
+         *            the entity
+         * @param revision
+         *            the revision
+         */
         public EntityRevisionPair(String entity, int revision) {
             this.entity = entity;
 …
+        }
+        /*
+         * (non-Javadoc)
+         *
+         * @see java.lang.Object#equals(java.lang.Object)
+         */
         @Override
         public boolean equals(Object other) {
 …
+        }
+        /*
+         * (non-Javadoc)
+         *
+         * @see java.lang.Object#hashCode()
+         */
         @Override
         public int hashCode() {
 …
+        }
+        /*
+         * (non-Javadoc)
+         *
+         * @see java.lang.Comparable#compareTo(java.lang.Object)
+         */
         @Override
         public int compareTo(EntityRevisionPair other) {
 …
+        }
+        /*
+         * (non-Javadoc)
+         *
+         * @see java.lang.Object#toString()
+         */
         @Override
         public String toString() {
 …
+    }
+    /*
+     * (non-Javadoc)
+     *
+     * @see de.ugoe.cs.cpdp.loader.SingleVersionLoader#load(java.io.File)
+     */
     @Override
     public Instances load(File file) {
 …
         for (int i = 1; i < linesBug.length; i++) {
             lineSplitBug = linesBug[i].split(";");
+            entityRevisionPairs.put(new EntityRevisionPair(lineSplitBug[0], Integer
+                                        .parseInt(lineSplitBug[revisionIndex])), i);
+            entityRevisionPairs.put(
+                                    new EntityRevisionPair(lineSplitBug[0],
+                                                           Integer
+                                                               .parseInt(lineSplitBug[revisionIndex])),
+                                    i);
+        }

trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/AUDIDataLoader.java

-                      r86
+                      r135
 /**
+ * TODO
+ * Loads data from the automative defect data set from Audi Electronic Ventures donated by Altinger
+ * et al. at the MSR 2015. This loader creates overall defect labels, for the final revision.
+ *
  * @author sherbold
+ * @author Steffen Herbold
+ *
  */

trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/AUDIFolderLoader.java

-                      r86
+                      r135
 package de.ugoe.cs.cpdp.loader;
+/**
+ *
+ * <p>
+ * Loads data from the automative defect data set from Audi Electronic Ventures donated by Altinger
+ * et al. at the MSR 2015. This loader creates overall defect labels, for the final revision.
+ * </p>
+ *
+ * @author Steffen Herbold
+ */
 public class AUDIFolderLoader extends AbstractFolderLoader {

trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/AbstractFolderLoader.java

-                      r132
+                      r135
+    }
     /**
+    /*
      * @see de.ugoe.cs.cpdp.loader.IVersionLoader#load()
      */
 …
                             Instances data = instancesLoader.load(versionFile);
                             String versionName = data.relationName();
+                            List<Double> efforts = getEfforts(data);
+                            versions.add(new SoftwareVersion(projectName, versionName, data, efforts));
+                            List<Double> efforts = getEfforts(data);
+                            versions
+                                .add(new SoftwareVersion(projectName, versionName, data, efforts));
+                        }
+                    }
 …
         return versions;
+    }
+    /**
+     * <p>
+     * Sets the efforts for the instances
+     * </p>
+     *
+     * @param data
+     *            the data
+     * @return
+     */
     private List<Double> getEfforts(Instances data) {
         // attribute in the JURECZKO data and default
 …
             effortAtt = data.attribute("CountLineCodeExe");
+        }
         if( effortAtt == null ) {
+        if (effortAtt == null) {
             return null;
+        }
         List<Double> efforts = new ArrayList<>(data.size());
         for( int i=0; i<data.size(); i++ ) {
+        for (int i = 0; i < data.size(); i++) {
             efforts.add(data.get(i).value(effortAtt));
+        }
 …
      * Returns the concrete {@link SingleVersionLoader} to be used with this folder loader.
+     *
      * @return
+     * @return the version loader
      */
     abstract protected SingleVersionLoader getSingleLoader();

trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/CSVMockusDataLoader.java

-                      r86
+                      r135
 import de.ugoe.cs.util.FileTools;
+/**
+ * <p>
+ * Reads data from the data set provided by Mockus (and Zhang) for universal defect prediction.
+ * </p>
+ *
+ * @author Steffen Herbold
+ */
 class CSVMockusDataLoader implements SingleVersionLoader {

trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/CSVMockusFolderLoader.java

-                      r86
+                      r135
 package de.ugoe.cs.cpdp.loader;
+/**
+ * <p>
+ * Reads data from the data set provided by Mockus (and Zhang) for universal defect prediction.
+ * </p>
+ *
+ * @author Steffen Herbold
+ */
 public class CSVMockusFolderLoader extends AbstractFolderLoader {

trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/DecentDataLoader.java

-                      r86
+                      r135
         for (String key : EPackage.Registry.INSTANCE.keySet()) {
             metaModelCache.put(key, EPackage.Registry.INSTANCE.get(key));
         };
+        } ;
         for (String key : metaModelCache.keySet()) {
             EPackage.Registry.INSTANCE.remove(key);
         };
+        } ;
         // Workaround to gernerate a usable URI. Absolute path is not
 …
+        }
         else {
+            Console.printerrln("Could not determine model type, file should end with either .etl or .eol");
+            Console
+                .printerrln("Could not determine model type, file should end with either .etl or .eol");
             return null;
+        }

trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/IDecentVersionLoader.java

-                      r86
+                      r135
 import de.ugoe.cs.cpdp.versions.SoftwareVersion;
+/**
+ * <p>
+ * Extends the version load for the loading of DECENT models
+ * </p>
+ *
+ * @author Fabian Trautsch
+ */
 public interface IDecentVersionLoader extends IVersionLoader {
+    /**
+     * <p>
+     * loads the versions and defines the DECENT attributes to be used
+     * </p>
+     *
+     * @param decentAttributes the attributes
+     * @return the versions
+     */
     public List<SoftwareVersion> load(List<String> decentAttributes);

trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/RelinkLoader.java

-                      r119
+                      r135
 package de.ugoe.cs.cpdp.loader;
 …
 import weka.core.Instances;
+/**
+ * <p>
+ * Loads data from the RELINK data set.
+ * </p>
+ *
+ * @author Steffen Herbold
+ */
 public class RelinkLoader implements SingleVersionLoader {
 …
         attrNames.add("SumEssential");
         attrNames.add("isDefective");
         for( int j=tmpData.numAttributes()-1; j>=0 ; j-- ) {
             if( !attrNames.contains(tmpData.attribute(j).name()) ) {
+        for (int j = tmpData.numAttributes() - 1; j >= 0; j--) {
+            if (!attrNames.contains(tmpData.attribute(j).name())) {
                 tmpData.deleteAttributeAt(j);
+            }
+        }
         // setting class attribute
         tmpData.setClassIndex(tmpData.numAttributes() - 1);

trunk/CrossPare/src/de/ugoe/cs/cpdp/training/FixClass.java

-                      r86
+                      r135
 public class FixClass extends AbstractClassifier {
+    /**
+     * default serial ID
+     */
     private static final long serialVersionUID = 1L;
+    /**
+     * default prediction: non-defective
+     */
     private double fixedClassValue = 0.0d;
 …
+    }
+    /*
+     * (non-Javadoc)
+     *
+     * @see weka.classifiers.AbstractClassifier#setOptions(java.lang.String[])
+     */
     @Override
     public void setOptions(String[] options) throws Exception {
 …
+    }
+    /*
+     * (non-Javadoc)
+     *
+     * @see weka.classifiers.AbstractClassifier#classifyInstance(weka.core.Instance)
+     */
     @Override
     public double classifyInstance(Instance instance) {
 …
+    }
+    /*
+     * (non-Javadoc)
+     *
+     * @see weka.classifiers.Classifier#buildClassifier(weka.core.Instances)
+     */
     @Override
     public void buildClassifier(Instances traindata) throws Exception {

trunk/CrossPare/src/de/ugoe/cs/cpdp/training/GPTraining.java

-                      r125
+                      r135
 package de.ugoe.cs.cpdp.training;
 …
  * Genetic Programming Trainer
+ *
+ * Implementation (mostly) according to Liu et al. Evolutionary Optimization of Software Quality Modeling with Multiple Repositories.
+ * Implementation (mostly) according to Liu et al. Evolutionary Optimization of Software Quality
+ * Modeling with Multiple Repositories.
+ *
+ * - GPRun is a Run of a complete Genetic Programm Evolution, we want several complete runs.
+ * - GPVClassifier is the Validation Classifier
+ * - GPVVClassifier is the Validation-Voting Classifier
+ * - GPRun is a Run of a complete Genetic Programm Evolution, we want several complete runs. -
+ * GPVClassifier is the Validation Classifier - GPVVClassifier is the Validation-Voting Classifier
+ *
+ *  config: <setwisetrainer name="GPTraining" param="populationSize:1000,numberRuns:10" />
+ * config: <setwisetrainer name="GPTraining" param="populationSize:1000,numberRuns:10" />
+ *
+ * @author Alexander Trautsch
  */
+public class GPTraining implements ISetWiseTrainingStrategy, IWekaCompatibleTrainer  {
+public class GPTraining implements ISetWiseTrainingStrategy, IWekaCompatibleTrainer {
+    /**
+     * the interal validation-and-voting classifier
+     */
     private GPVVClassifier classifier = null;
+    // default values from the paper
+    /**
+     * size of the population of the genetic program; default from the paper is 1000
+     */
     private int populationSize = 1000;
+    /**
+     * minimal depth of the S-expression tree at the start of the training; default from the paper
+     * is 2
+     */
     private int initMinDepth = 2;
+    /**
+     * maximal depth of the S-expression tree at the start of the training; default from the paper
+     * is 6
+     */
     private int initMaxDepth = 6;
+    /**
+     * size of the tournaments used for selection; default from the paper is 7
+     */
     private int tournamentSize = 7;
+    /**
+     * number of genetic generations considered (i.e., number of iterations; default from the paper
+     * is 50
+     */
     private int maxGenerations = 50;
+    /**
+     * weight factor for the prediction errors for cost estimation; default from the paper is 15
+     */
     private double errorType2Weight = 15;
+    private int numberRuns = 20;  // im paper 20 per errorType2Weight then additional 20
+    private int maxDepth = 20;  // max depth within one program
+    private int maxNodes = 100;  // max nodes within one program
+    /**
+     * number of internal replications from which the best result is picked; default from the paper
+     * is 20
+     */
+    private int numberRuns = 20;
+    /**
+     * maximal depth of the S-expression tree; default from the paper is 20
+     */
+    private int maxDepth = 20;
+    /**
+     * maximal number of nodes of the S-expression tree; default from the paper is 100
+     */
+    private int maxNodes = 100;
+    /*
+     * (non-Javadoc)
+     *
+     * @see de.ugoe.cs.cpdp.IParameterizable#setParameter(java.lang.String)
+     */
     @Override
     public void setParameter(String parameters) {
         String[] params = parameters.split(",");
         String[] keyvalue = new String[2];
         for(int i=0; i < params.length; i++) {
+        for (int i = 0; i < params.length; i++) {
             keyvalue = params[i].split(":");
+            switch(keyvalue[0]) {
+            switch (keyvalue[0])
+            {
                 case "populationSize":
                     this.populationSize = Integer.parseInt(keyvalue[1]);
                 break;
+                    break;
                 case "initMinDepth":
                     this.initMinDepth = Integer.parseInt(keyvalue[1]);
                 break;
+                    break;
                 case "tournamentSize":
                     this.tournamentSize = Integer.parseInt(keyvalue[1]);
                 break;
+                    break;
                 case "maxGenerations":
                     this.maxGenerations = Integer.parseInt(keyvalue[1]);
                 break;
+                    break;
                 case "errorType2Weight":
                     this.errorType2Weight = Double.parseDouble(keyvalue[1]);
                 break;
+                    break;
                 case "numberRuns":
                     this.numberRuns = Integer.parseInt(keyvalue[1]);
                 break;
+                    break;
                 case "maxDepth":
                     this.maxDepth = Integer.parseInt(keyvalue[1]);
                 break;
+                    break;
                 case "maxNodes":
                     this.maxNodes = Integer.parseInt(keyvalue[1]);
                 break;
+            }
+        }
+                    break;
+            }
+        }
         this.classifier = new GPVVClassifier();
+        ((GPVClassifier)this.classifier).configure(populationSize, initMinDepth, initMaxDepth, tournamentSize, maxGenerations, errorType2Weight, numberRuns, maxDepth, maxNodes);
+        ((GPVClassifier) this.classifier)
+            .configure(populationSize, initMinDepth, initMaxDepth, tournamentSize, maxGenerations,
+                       errorType2Weight, numberRuns, maxDepth, maxNodes);
+    }
+    /*
+     * (non-Javadoc)
+     *
+     * @see
+     * de.ugoe.cs.cpdp.training.ISetWiseTrainingStrategy#apply(org.apache.commons.collections4.list.
+     * SetUniqueList)
+     */
     @Override
     public void apply(SetUniqueList<Instances> traindataSet) {
         try {
             classifier.buildClassifier(traindataSet);
+        }catch(Exception e) {
+        }
+        catch (Exception e) {
             throw new RuntimeException(e);
+        }
+    }
+    /*
+     * (non-Javadoc)
+     *
+     * @see de.ugoe.cs.cpdp.training.ISetWiseTrainingStrategy#getName()
+     */
     @Override
     public String getName() {
 …
+    }
+    /*
+     * (non-Javadoc)
+     *
+     * @see de.ugoe.cs.cpdp.training.IWekaCompatibleTrainer#getClassifier()
+     */
     @Override
     public Classifier getClassifier() {
         return this.classifier;
+    }
+    /**
+     * <p>
+     * Internal helper class that stores the data in a format that can be used by the genetic
+     * program.
+     * </p>
+     *
+     * @author Alexander Trautsch
+     */
     public class InstanceData {
+        /**
+         * instances values
+         */
         private double[][] instances_x;
+        /**
+         * class labels
+         */
         private boolean[] instances_y;
+        /**
+         * <p>
+         * Constructor. Creates the internal data representation.
+         * </p>
+         *
+         * @param instances
+         */
         public InstanceData(Instances instances) {
             this.instances_x = new double[instances.numInstances()][instances.numAttributes()-1];
+            this.instances_x = new double[instances.numInstances()][instances.numAttributes() - 1];
             this.instances_y = new boolean[instances.numInstances()];
             Instance current;
             for(int i=0; i < this.instances_x.length; i++) {
+            for (int i = 0; i < this.instances_x.length; i++) {
                 current = instances.get(i);
                 this.instances_x[i] = WekaUtils.instanceValues(current);
 …
+            }
+        }
+        /**
+         * <p>
+         * returns the instance values
+         * </p>
+         *
+         * @return
+         */
         public double[][] getX() {
             return instances_x;
+        }
+        /**
+         * <p>
+         * returns the instance labels
+         * </p>
+         *
+         * @return
+         */
         public boolean[] getY() {
             return instances_y;
+        }
+    }
     /**
      * One Run executed by a GP Classifier
      */
     public class GPRun extends AbstractClassifier {
+        /**
+         * generated serialization ID
+         */
         private static final long serialVersionUID = -4250422550107888789L;
+        /**
+         * size of the population of the genetic program
+         */
         private int populationSize;
+        /**
+         * minimal depth of the S-expression tree at the start of the training
+         */
         private int initMinDepth;
+        /**
+         * maximal depth of the S-expression tree at the start of the training
+         */
         private int initMaxDepth;
+        /**
+         * size of the tournaments used for selection
+         */
         private int tournamentSize;
+        /**
+         * number of genetic generations considered (i.e., number of iterations
+         */
         private int maxGenerations;
+        /**
+         * weight factor for the prediction errors for cost estimation
+         */
         private double errorType2Weight;
+        /**
+         * maximal depth of the S-expression tree
+         */
         private int maxDepth;
+        /**
+         * maximal number of nodes of the S-expression tree
+         */
         private int maxNodes;
+        /**
+         * genetic program
+         */
         private GPGenotype gp;
+        /**
+         * description of the problem to be solved by the genetic program
+         */
         private GPProblem problem;
+        public void configure(int populationSize, int initMinDepth, int initMaxDepth, int tournamentSize, int maxGenerations, double errorType2Weight, int maxDepth, int maxNodes) {
+        /**
+         * <p>
+         * Configures the runner
+         * </p>
+         *
+         * @param populationSize
+         *            the population size
+         * @param initMinDepth
+         *            the initial minimal depth of the S-expression tree
+         * @param initMaxDepth
+         *            the initial maximal depth of the S-expression tree
+         * @param tournamentSize
+         *            the tournament size for selection
+         * @param maxGenerations
+         *            the number of generations created
+         * @param errorType2Weight
+         *            weigth factor for the prediction errors
+         * @param maxDepth
+         *            maximal depth of the S-expression tree
+         * @param maxNodes
+         *            maximal number of nodes of the S-expression tree
+         */
+        public void configure(int populationSize,
+                              int initMinDepth,
+                              int initMaxDepth,
+                              int tournamentSize,
+                              int maxGenerations,
+                              double errorType2Weight,
+                              int maxDepth,
+                              int maxNodes)
+        {
             this.populationSize = populationSize;
             this.initMinDepth = initMinDepth;
 …
             this.maxNodes = maxNodes;
+        }
+        /**
+         * <p>
+         * returns the genetic program
+         * </p>
+         *
+         * @return the genetic program
+         */
         public GPGenotype getGp() {
             return this.gp;
+        }
+        /**
+         * <p>
+         * returns the variables of the genetic program
+         * </p>
+         *
+         * @return the variables
+         */
         public Variable[] getVariables() {
+            return ((CrossPareGP)this.problem).getVariables();
+        }
+            return ((CrossPareGP) this.problem).getVariables();
+        }
+        /*
+         * (non-Javadoc)
+         *
+         * @see weka.classifiers.Classifier#buildClassifier(weka.core.Instances)
+         */
         @Override
         public void buildClassifier(Instances traindata) throws Exception {
+            InstanceData train = new InstanceData(traindata);
+            this.problem = new CrossPareGP(train.getX(), train.getY(), this.populationSize, this.initMinDepth, this.initMaxDepth, this.tournamentSize, this.errorType2Weight, this.maxDepth, this.maxNodes);
+            InstanceData train = new InstanceData(traindata);
+            this.problem =
+                new CrossPareGP(train.getX(), train.getY(), this.populationSize, this.initMinDepth,
+                                this.initMaxDepth, this.tournamentSize, this.errorType2Weight,
+                                this.maxDepth, this.maxNodes);
             this.gp = problem.create();
             this.gp.evolve(this.maxGenerations);
+        }
         /**
          * GPProblem implementation
+         *
+         * @author Alexander Trautsch
          */
         class CrossPareGP extends GPProblem {
+            /**
+             * Instance values of the training data
+             */
             private double[][] instances;
+            /**
+             * Classifications of the training data
+             */
             private boolean[] output;
+            /**
+             * maximal depth of the S-expression tree
+             */
             private int maxDepth;
+            /**
+             * maximal number of nodes of the S-expression tree
+             */
             private int maxNodes;
+            /**
+             * variables of the genetic program
+             */
             private Variable[] x;
+            public CrossPareGP(double[][] instances, boolean[] output, int populationSize, int minInitDept, int maxInitDepth, int tournamentSize, double errorType2Weight, int maxDepth, int maxNodes) throws InvalidConfigurationException {
+            /**
+             *
+             * <p>
+             * Constructor. Creates a new genetic program.
+             * </p>
+             *
+             * @param instances
+             *            instance values of the training data
+             * @param output
+             *            classifications of the training data
+             * @param populationSize
+             *            the population size
+             * @param initMinDepth
+             *            the initial minimal depth of the S-expression tree
+             * @param initMaxDepth
+             *            the initial maximal depth of the S-expression tree
+             * @param tournamentSize
+             *            the tournament size for selection
+             * @param maxGenerations
+             *            the number of generations created
+             * @param errorType2Weight
+             *            weigth factor for the prediction errors
+             * @param maxDepth
+             *            maximal depth of the S-expression tree
+             * @param maxNodes
+             *            maximal number of nodes of the S-expression tree
+             * @throws InvalidConfigurationException
+             *             thrown in case the problem cannot be created
+             */
+            public CrossPareGP(double[][] instances,
+                               boolean[] output,
+                               int populationSize,
+                               int minInitDept,
+                               int maxInitDepth,
+                               int tournamentSize,
+                               double errorType2Weight,
+                               int maxDepth,
+                               int maxNodes)
+                throws InvalidConfigurationException
+            {
                 super(new GPConfiguration());
                 this.instances = instances;
                 this.output = output;
 …
                 Configuration.reset();
                 GPConfiguration config = this.getGPConfiguration();
                 this.x = new Variable[this.instances[0].length];
+                for(int j=0; j < this.x.length; j++) {
+                    this.x[j] = Variable.create(config, "X"+j, CommandGene.DoubleClass);
+                }
+                config.setGPFitnessEvaluator(new DeltaGPFitnessEvaluator()); // smaller fitness is better
+                //config.setGPFitnessEvaluator(new DefaultGPFitnessEvaluator()); // bigger fitness is better
+                for (int j = 0; j < this.x.length; j++) {
+                    this.x[j] = Variable.create(config, "X" + j, CommandGene.DoubleClass);
+                }
+                config.setGPFitnessEvaluator(new DeltaGPFitnessEvaluator()); // smaller fitness is
+                                                                             // better
+                // config.setGPFitnessEvaluator(new DefaultGPFitnessEvaluator()); // bigger fitness
+                // is better
                 config.setMinInitDepth(minInitDept);
                 config.setMaxInitDepth(maxInitDepth);
                 config.setCrossoverProb((float)0.60);
                 config.setReproductionProb((float)0.10);
                 config.setMutationProb((float)0.30);
+                config.setCrossoverProb((float) 0.60);
+                config.setReproductionProb((float) 0.10);
+                config.setMutationProb((float) 0.30);
                 config.setSelectionMethod(new TournamentSelector(tournamentSize));
 …
                 config.setMaxCrossoverDepth(4);
+                config.setFitnessFunction(new CrossPareFitness(this.x, this.instances, this.output, errorType2Weight));
+                config.setFitnessFunction(new CrossPareFitness(this.x, this.instances, this.output,
+                                                               errorType2Weight));
                 config.setStrictProgramCreation(true);
+            }
+            // used for running the fitness function again for testing
+            /**
+             * <p>
+             * Returns the variables of the problem. Used for running the fitness function again for
+             * testing.
+             * </p>
+             *
+             * @return the variables
+             */
             public Variable[] getVariables() {
                 return this.x;
+            }
+            /**
+             * creates the genetic program
+             */
+            @SuppressWarnings("rawtypes")
             public GPGenotype create() throws InvalidConfigurationException {
                 GPConfiguration config = this.getGPConfiguration();
                 // return type
+                Class[] types = {CommandGene.DoubleClass};
+                Class[] types =
+                    { CommandGene.DoubleClass };
                 // Arguments of result-producing chromosome: none
+                Class[][] argTypes = { {} };
+                Class[][] argTypes =
+                    { { } };
                 // variables + functions, we set the variables with the values of the instances here
                 CommandGene[] vars = new CommandGene[this.instances[0].length];
                 for(int j=0; j < this.instances[0].length; j++) {
+                for (int j = 0; j < this.instances[0].length; j++) {
                     vars[j] = this.x[j];
+                }
+                CommandGene[] funcs = {
+                    new Add(config, CommandGene.DoubleClass),
+                    new Subtract(config, CommandGene.DoubleClass),
+                    new Multiply(config, CommandGene.DoubleClass),
+                    new Divide(config, CommandGene.DoubleClass),
+                    new Sine(config, CommandGene.DoubleClass),
+                    new Cosine(config, CommandGene.DoubleClass),
+                    new Exp(config, CommandGene.DoubleClass),
+                    new Log(config, CommandGene.DoubleClass),
+                    new GT(config, CommandGene.DoubleClass),
+                    new Max(config, CommandGene.DoubleClass),
+                    new Terminal(config, CommandGene.DoubleClass, -100.0, 100.0, true), // min, max, whole numbers
+                };
+                CommandGene[] comb = (CommandGene[])ArrayUtils.addAll(vars, funcs);
+                CommandGene[][] nodeSets = {
+                    comb,
+                };
+                CommandGene[] funcs =
+                    { new Add(config, CommandGene.DoubleClass),
+                        new Subtract(config, CommandGene.DoubleClass),
+                        new Multiply(config, CommandGene.DoubleClass),
+                        new Divide(config, CommandGene.DoubleClass),
+                        new Sine(config, CommandGene.DoubleClass),
+                        new Cosine(config, CommandGene.DoubleClass),
+                        new Exp(config, CommandGene.DoubleClass),
+                        new Log(config, CommandGene.DoubleClass),
+                        new GT(config, CommandGene.DoubleClass),
+                        new Max(config, CommandGene.DoubleClass),
+                        new Terminal(config, CommandGene.DoubleClass, -100.0, 100.0, true), // min,
+                                                                                            // max,
+                                                                                            // whole
+                                                                                            // numbers
+                    };
+                CommandGene[] comb = (CommandGene[]) ArrayUtils.addAll(vars, funcs);
+                CommandGene[][] nodeSets =
+                    { comb, };
                 // we only have one chromosome so this suffices
+                int minDepths[] = {config.getMinInitDepth()};
+                int maxDepths[] = {this.maxDepth};
+                GPGenotype result = GPGenotype.randomInitialGenotype(config, types, argTypes, nodeSets, minDepths, maxDepths, this.maxNodes, false); // 40 = maxNodes, true = verbose output
+                int minDepths[] =
+                    { config.getMinInitDepth() };
+                int maxDepths[] =
+                    { this.maxDepth };
+                GPGenotype result =
+                    GPGenotype.randomInitialGenotype(config, types, argTypes, nodeSets, minDepths,
+                                                     maxDepths, this.maxNodes, false); // 40 =
+                                                                                       // maxNodes,
+                                                                                       // true =
+                                                                                       // verbose
+                                                                                       // output
                 return result;
 …
+        }
+        /**
+         * Fitness function
+        /**
+         * Internal helper class for the fitness function.
+         *
+         * @author Alexander Trautsch
          */
         class CrossPareFitness extends GPFitnessFunction {
+            /**
+             * generated serialization ID
+             */
             private static final long serialVersionUID = 75234832484387L;
+            /**
+             * variables of the genetic program
+             */
             private Variable[] x;
+            /**
+             * instance values of the training data
+             */
             private double[][] instances;
+            /**
+             * classifications of the training data
+             */
             private boolean[] output;
+            /**
+             * weight of the error costs
+             */
             private double errorType2Weight = 1.0;
             // needed in evaluate
+            //private Object[] NO_ARGS = new Object[0];
+            // private Object[] NO_ARGS = new Object[0];
+            /**
+             * fitness value
+             */
             private double sfitness = 0.0f;
+            /**
+             * type I error
+             */
             private int errorType1 = 0;
+            /**
+             * type II error
+             */
             private int errorType2 = 0;
+            public CrossPareFitness(Variable[] x, double[][] instances, boolean[] output, double errorType2Weight) {
+            /**
+             * <p>
+             * Constructor. Creates a new fitness function.
+             * </p>
+             *
+             * @param x
+             *            variables of the genetic program
+             * @param instances
+             *            instance values of the training data
+             * @param output
+             *            classification of the training data
+             * @param errorType2Weight
+             *            weight of the error costs
+             */
+            public CrossPareFitness(Variable[] x,
+                                    double[][] instances,
+                                    boolean[] output,
+                                    double errorType2Weight)
+            {
                 this.x = x;
                 this.instances = instances;
 …
+            }
+            /**
+             * <p>
+             * returns the type I error
+             * </p>
+             *
+             * @return type I error
+             */
             public int getErrorType1() {
                 return this.errorType1;
+            }
+            /**
+             * <p>
+             * returns the type II error
+             * </p>
+             *
+             * @return type II error
+             */
             public int getErrorType2() {
                 return this.errorType2;
+            }
+            /**
+             * <p>
+             * returns the value of the secondary fitness function
+             * </p>
+             *
+             * @return secondary fitness
+             */
             public double getSecondFitness() {
                 return this.sfitness;
+            }
+            /**
+             * <p>
+             * returns the number of training instances
+             * </p>
+             *
+             * @return number of instances
+             */
             public int getNumInstances() {
                 return this.instances.length;
 …
             /**
+             * This is the fitness function
+             * <p>
+             * The fitness function. Our fitness is best if we have the less wrong classifications,
+             * this includes a weight for type2 errors.
+             * </p>
+             *
+             * Our fitness is best if we have the less wrong classifications, this includes a weight for type2 errors
+             * @param program
+             *            the genetic program whose fitness is evaluated.
+             *
+             * @see org.jgap.gp.GPFitnessFunction#evaluate(org.jgap.gp.IGPProgram)
              */
             @Override
 …
                 this.errorType2 = 0;
+                for(int i=0; i < this.instances.length; i++) {
+                    // requires that we have a variable for each column of our dataset (attribute of instance)
+                    for(int j=0; j < this.x.length; j++) {
+                for (int i = 0; i < this.instances.length; i++) {
+                    // requires that we have a variable for each column of our dataset (attribute of
+                    // instance)
+                    for (int j = 0; j < this.x.length; j++) {
                         this.x[j].set(this.instances[i][j]);
+                    }
 …
                     value = program.execute_double(0, this.x);
                     if(value < 0.5) {
                         if(this.output[i] != true) {
+                    if (value < 0.5) {
+                        if (this.output[i] != true) {
                             this.errorType1 += 1;
+                        }
+                    }else {
+                        if(this.output[i] == true) {
+                    }
+                    else {
+                        if (this.output[i] == true) {
                             this.errorType2 += 1;
+                        }
 …
                 // now calc pfitness
+                pfitness = (this.errorType1 + this.errorType2Weight * this.errorType2) / this.instances.length;
+                pfitness = (this.errorType1 + this.errorType2Weight * this.errorType2) /
+                    this.instances.length;
                 // number of nodes in the programm, if lower then 10 we assign sFitness of 10
                 // we can set metadata with setProgramData to save this
                 if(program.getChromosome(0).getSize(0) < 10) {
+                if (program.getChromosome(0).getSize(0) < 10) {
                     program.setApplicationData(10.0f);
+                }
 …
+            }
+        }
         /**
          * Custom GT implementation used in the GP Algorithm.
+         */
+         public class GT extends MathCommand implements ICloneable {
+             private static final long serialVersionUID = 113454184817L;
+             public GT(final GPConfiguration a_conf, java.lang.Class a_returnType) throws InvalidConfigurationException {
+                 super(a_conf, 2, a_returnType);
+             }
+             public String toString() {
+                 return "GT(&1, &2)";
+             }
+             public String getName() {
+                 return "GT";
+             }
+             public float execute_float(ProgramChromosome c, int n, Object[] args) {
+                 float f1 = c.execute_float(n, 0, args);
+                 float f2 = c.execute_float(n, 1, args);
+                 float ret = 1.0f;
+                 if(f1 > f2) {
+                     ret = 0.0f;
+                 }
+                 return ret;
+             }
+             public double execute_double(ProgramChromosome c, int n, Object[] args) {
+                 double f1 = c.execute_double(n, 0, args);
+                 double f2 = c.execute_double(n, 1, args);
+                 double ret = 1;
+                 if(f1 > f2)  {
+                     ret = 0;
+                 }
+                 return ret;
+             }
+             public Object clone() {
+                 try {
+                     GT result = new GT(getGPConfiguration(), getReturnType());
+                     return result;
+                 }catch(Exception ex) {
+                     throw new CloneException(ex);
+                 }
+             }
+         }
+         *
+         * @author Alexander Trautsch
+         */
+        public class GT extends MathCommand implements ICloneable {
+            /**
+             * generated serialization ID.
+             */
+            private static final long serialVersionUID = 113454184817L;
+            /**
+             * <p>
+             * Constructor. Creates a new GT.
+             * </p>
+             *
+             * @param a_conf
+             *            the configuration of the genetic program
+             * @param a_returnType
+             *            the return type
+             * @throws InvalidConfigurationException
+             *             thrown is there is a problem during the initialization of the super class
+             *
+             * @see MathCommand
+             */
+            public GT(final GPConfiguration a_conf, @SuppressWarnings("rawtypes") java.lang.Class a_returnType)
+                throws InvalidConfigurationException
+            {
+                super(a_conf, 2, a_returnType);
+            }
+            /*
+             * (non-Javadoc)
+             *
+             * @see org.jgap.gp.CommandGene#toString()
+             */
+            @Override
+            public String toString() {
+                return "GT(&1, &2)";
+            }
+            /*
+             * (non-Javadoc)
+             *
+             * @see org.jgap.gp.CommandGene#getName()
+             */
+            @Override
+            public String getName() {
+                return "GT";
+            }
+            /*
+             * (non-Javadoc)
+             *
+             * @see org.jgap.gp.CommandGene#execute_float(org.jgap.gp.impl.ProgramChromosome, int,
+             * java.lang.Object[])
+             */
+            @Override
+            public float execute_float(ProgramChromosome c, int n, Object[] args) {
+                float f1 = c.execute_float(n, 0, args);
+                float f2 = c.execute_float(n, 1, args);
+                float ret = 1.0f;
+                if (f1 > f2) {
+                    ret = 0.0f;
+                }
+                return ret;
+            }
+            /*
+             * (non-Javadoc)
+             *
+             * @see org.jgap.gp.CommandGene#execute_double(org.jgap.gp.impl.ProgramChromosome, int,
+             * java.lang.Object[])
+             */
+            @Override
+            public double execute_double(ProgramChromosome c, int n, Object[] args) {
+                double f1 = c.execute_double(n, 0, args);
+                double f2 = c.execute_double(n, 1, args);
+                double ret = 1;
+                if (f1 > f2) {
+                    ret = 0;
+                }
+                return ret;
+            }
+            /*
+             * (non-Javadoc)
+             *
+             * @see java.lang.Object#clone()
+             */
+            @Override
+            public Object clone() {
+                try {
+                    GT result = new GT(getGPConfiguration(), getReturnType());
+                    return result;
+                }
+                catch (Exception ex) {
+                    throw new CloneException(ex);
+                }
+            }
+        }
+    }
     /**
      * GP Multiple Data Sets Validation-Voting Classifier
+     *
      * Basically the same as the GP Multiple Data Sets Validation Classifier.
      * But here we do keep a model candidate for each training set which may later vote
+     * Basically the same as the GP Multiple Data Sets Validation Classifier. But here we do keep a
+     * model candidate for each training set which may later vote
+     *
      */
     public class GPVVClassifier extends GPVClassifier {
+        /**
+         * generated serialization ID
+         */
         private static final long serialVersionUID = -654710583852839901L;
+        /**
+         * classifiers for each validation set
+         */
         private List<Classifier> classifiers = null;
+        /*
+         * (non-Javadoc)
+         *
+         * @see
+         * de.ugoe.cs.cpdp.training.GPTraining.GPVClassifier#buildClassifier(weka.core.Instances)
+         */
         @Override
         public void buildClassifier(Instances arg0) throws Exception {
             // TODO Auto-generated method stub
+        }
         /** Build the GP Multiple Data Sets Validation-Voting Classifier
+         *
          * This is according to Section 6 of the Paper by Liu et al.
          * It is basically the Multiple Data Sets Validation Classifier but here we keep the best models an let them vote.
+        }
+        /**
+         * Build the GP Multiple Data Sets Validation-Voting Classifier
+         *
+         * This is according to Section 6 of the Paper by Liu et al. It is basically the Multiple
+         * Data Sets Validation Classifier but here we keep the best models an let them vote.
+         *
          * @param traindataSet
+         *            the training data
          * @throws Exception
+         *             thrown in case of a problem with the training
          */
         public void buildClassifier(SetUniqueList<Instances> traindataSet) throws Exception {
 …
             // then is evaluated on the rest
             classifiers = new LinkedList<>();
             for(int i=0; i < traindataSet.size(); i++) {
+            for (int i = 0; i < traindataSet.size(); i++) {
                 // candidates we get out of evaluation
                 LinkedList<Classifier> candidates = new LinkedList<>();
                 // number of runs, yields the best of these
                 double smallest_error_count_train = Double.MAX_VALUE;
+                double smallest_error_count_train = Double.MAX_VALUE;
                 Classifier bestTrain = null;
+                for(int k=0; k < this.numberRuns; k++) {
+                    double[] errors_eval = {0.0, 0.0};
+                for (int k = 0; k < this.numberRuns; k++) {
+                    double[] errors_eval =
+                        { 0.0, 0.0 };
                     Classifier classifier = new GPRun();
+                    ((GPRun)classifier).configure(this.populationSize, this.initMinDepth, this.initMaxDepth, this.tournamentSize, this.maxGenerations, this.errorType2Weight, this.maxDepth, this.maxNodes);
+                    ((GPRun) classifier).configure(this.populationSize, this.initMinDepth,
+                                                   this.initMaxDepth, this.tournamentSize,
+                                                   this.maxGenerations, this.errorType2Weight,
+                                                   this.maxDepth, this.maxNodes);
                     // one project is training data
                     classifier.buildClassifier(traindataSet.get(i));
                     double[] errors;
                     // rest of the set is evaluation data, we evaluate now
+                    for(int j=0; j < traindataSet.size(); j++) {
+                        if(j != i) {
+                            // if type1 and type2 errors are < 0.5 we allow the model in the candidates
+                            errors = this.evaluate((GPRun)classifier, traindataSet.get(j));
+                    for (int j = 0; j < traindataSet.size(); j++) {
+                        if (j != i) {
+                            // if type1 and type2 errors are < 0.5 we allow the model in the
+                            // candidates
+                            errors = this.evaluate((GPRun) classifier, traindataSet.get(j));
                             errors_eval[0] += errors[0];
                             errors_eval[1] += errors[1];
                             if((errors[0] < 0.5) && (errors[1] < 0.5)) {
+                            if ((errors[0] < 0.5) && (errors[1] < 0.5)) {
                                 candidates.add(classifier);
+                            }
+                        }
+                    }
                     // if the candidate made fewer errors it is now the best
                     if(errors_eval[0] + errors_eval[1] < smallest_error_count_train) {
+                    if (errors_eval[0] + errors_eval[1] < smallest_error_count_train) {
                         bestTrain = classifier;
                         smallest_error_count_train = errors_eval[0] + errors_eval[1];
 …
+                }
+                // now after the evaluation we do a model selection where only one model remains for the given training data
+                // now after the evaluation we do a model selection where only one model remains for
+                // the given training data
                 // we select the model which is best on all evaluation data
                 double smallest_error_count = Double.MAX_VALUE;
                 double[] errors;
                 Classifier best = null;
+                for(int ii=0; ii < candidates.size(); ii++) {
+                    double[] errors_eval = {0.0, 0.0};
+                for (int ii = 0; ii < candidates.size(); ii++) {
+                    double[] errors_eval =
+                        { 0.0, 0.0 };
                     // we add the errors the candidate makes over the evaldata
                     for(int j=0; j < traindataSet.size(); j++) {
                         if(j != i) {
                             errors = this.evaluate((GPRun)candidates.get(ii), traindataSet.get(j));
+                    for (int j = 0; j < traindataSet.size(); j++) {
+                        if (j != i) {
+                            errors = this.evaluate((GPRun) candidates.get(ii), traindataSet.get(j));
                             errors_eval[0] += errors[0];
                             errors_eval[1] += errors[1];
+                        }
+                    }
                     // if the candidate made fewer errors it is now the best
                     if(errors_eval[0] + errors_eval[1] < smallest_error_count) {
+                    if (errors_eval[0] + errors_eval[1] < smallest_error_count) {
                         best = candidates.get(ii);
                         smallest_error_count = errors_eval[0] + errors_eval[1];
+                    }
+                }
                 if( best==null ) {
+                if (best == null) {
                     best = bestTrain;
+                }
 …
+            }
+        }
         /**
          * Use the best classifiers for each training data in a majority voting
+         *
+         * @param instance
+         *            instance that is classified
+         *
+         * @see de.ugoe.cs.cpdp.training.GPTraining.GPVClassifier#classifyInstance(weka.core.Instance)
          */
         @Override
         public double classifyInstance(Instance instance) {
             int vote_positive = 0;
             for (int i = 0; i < classifiers.size(); i++) {
                 Classifier classifier = classifiers.get(i);
                 GPGenotype gp = ((GPRun)classifier).getGp();
                 Variable[] vars = ((GPRun)classifier).getVariables();
                 IGPProgram fitest = gp.getAllTimeBest();  // all time fitest
                 for(int j = 0; j < instance.numAttributes()-1; j++) {
                    vars[j].set(instance.value(j));
+                }
                 if(fitest.execute_double(0, vars) < 0.5) {
+                GPGenotype gp = ((GPRun) classifier).getGp();
+                Variable[] vars = ((GPRun) classifier).getVariables();
+                IGPProgram fitest = gp.getAllTimeBest(); // all time fitest
+                for (int j = 0; j < instance.numAttributes() - 1; j++) {
+                    vars[j].set(instance.value(j));
+                }
+                if (fitest.execute_double(0, vars) < 0.5) {
                     vote_positive += 1;
+                }
+            }
             if(vote_positive >= (classifiers.size()/2)) {
+            if (vote_positive >= (classifiers.size() / 2)) {
                 return 1.0;
+            }else {
+            }
+            else {
                 return 0.0;
+            }
+        }
+    }
     /**
      * GP Multiple Data Sets Validation Classifier
+     *
+     * We train a Classifier with one training project $numberRun times.
+     * Then we evaluate the classifier on the rest of the training projects and keep the best classifier.
+     * After that we have for each training project the best classifier as per the evaluation on the rest of the data set.
+     * Then we determine the best classifier from these candidates and keep it to be used later.
+     * We train a Classifier with one training project $numberRun times. Then we evaluate the
+     * classifier on the rest of the training projects and keep the best classifier. After that we
+     * have for each training project the best classifier as per the evaluation on the rest of the
+     * data set. Then we determine the best classifier from these candidates and keep it to be used
+     * later.
+     *
+     * @author sherbold Alexander Trautsch
      */
     public class GPVClassifier extends AbstractClassifier {
         private List<Classifier> classifiers = null;
         private Classifier best = null;
 …
         private static final long serialVersionUID = 3708714057579101522L;
+        /**
+         * size of the population of the genetic program
+         */
         protected int populationSize;
+        /**
+         * minimal depth of the S-expression tree at the start of the training
+         */
         protected int initMinDepth;
+        /**
+         * maximal depth of the S-expression tree at the start of the training
+         */
         protected int initMaxDepth;
+        /**
+         * size of the tournaments used for selection
+         */
         protected int tournamentSize;
+        /**
+         * number of genetic generations considered (i.e., number of iterations
+         */
         protected int maxGenerations;
+        /**
+         * weight factor for the prediction errors for cost estimation
+         */
         protected double errorType2Weight;
+        protected int numberRuns;
+        /**
+         * number of internal replications from which the best result is picked
+         */
+        protected int numberRuns = 20;
+        /**
+         * maximal depth of the S-expression tree
+         */
         protected int maxDepth;
+        /**
+         * maximal number of nodes of the S-expression tree
+         */
         protected int maxNodes;
         /**
+         *
+         * <p>
          * Configure the GP Params and number of Runs
+         * </p>
+         *
          * @param populationSize
+         *            the population size
          * @param initMinDepth
+         *            the initial minimal depth of the S-expression tree
          * @param initMaxDepth
+         *            the initial maximal depth of the S-expression tree
          * @param tournamentSize
+         *            the tournament size for selection
          * @param maxGenerations
+         *            the number of generations created
          * @param errorType2Weight
+         */
+        public void configure(int populationSize, int initMinDepth, int initMaxDepth, int tournamentSize, int maxGenerations, double errorType2Weight, int numberRuns, int maxDepth, int maxNodes) {
+         *            weigth factor for the prediction errors
+         * @param numberRuns
+         *            number of internal replications from which the best result is picked
+         * @param maxDepth
+         *            maximal depth of the S-expression tree
+         * @param maxNodes
+         *            maximal number of nodes of the S-expression tree
+         */
+        public void configure(int populationSize,
+                              int initMinDepth,
+                              int initMaxDepth,
+                              int tournamentSize,
+                              int maxGenerations,
+                              double errorType2Weight,
+                              int numberRuns,
+                              int maxDepth,
+                              int maxNodes)
+        {
             this.populationSize = populationSize;
             this.initMinDepth = initMinDepth;
 …
             this.maxNodes = maxNodes;
+        }
+        /** Build the GP Multiple Data Sets Validation Classifier
+         *
+         * This is according to Section 6 of the Paper by Liu et al. except for the selection of the best model.
+         * Section 4 describes a slightly different approach.
+        /**
+         * Build the GP Multiple Data Sets Validation Classifier
+         *
+         * This is according to Section 6 of the Paper by Liu et al. except for the selection of the
+         * best model. Section 4 describes a slightly different approach.
+         *
          * @param traindataSet
+         *            the training data
          * @throws Exception
+         *             thrown in case of a problem with the training
          */
         public void buildClassifier(SetUniqueList<Instances> traindataSet) throws Exception {
 …
             // each classifier is trained with one project from the set
             // then is evaluated on the rest
             for(int i=0; i < traindataSet.size(); i++) {
+            for (int i = 0; i < traindataSet.size(); i++) {
                 // candidates we get out of evaluation
                 LinkedList<Classifier> candidates = new LinkedList<>();
                 // numberRuns full GPRuns, we generate numberRuns models for each traindata
                 for(int k=0; k < this.numberRuns; k++) {
+                for (int k = 0; k < this.numberRuns; k++) {
                     Classifier classifier = new GPRun();
+                    ((GPRun)classifier).configure(this.populationSize, this.initMinDepth, this.initMaxDepth, this.tournamentSize, this.maxGenerations, this.errorType2Weight, this.maxDepth, this.maxNodes);
+                    ((GPRun) classifier).configure(this.populationSize, this.initMinDepth,
+                                                   this.initMaxDepth, this.tournamentSize,
+                                                   this.maxGenerations, this.errorType2Weight,
+                                                   this.maxDepth, this.maxNodes);
                     classifier.buildClassifier(traindataSet.get(i));
                     double[] errors;
                     // rest of the set is evaluation data, we evaluate now
+                    for(int j=0; j < traindataSet.size(); j++) {
+                        if(j != i) {
+                            // if type1 and type2 errors are < 0.5 we allow the model in the candidate list
+                            errors = this.evaluate((GPRun)classifier, traindataSet.get(j));
+                            if((errors[0] < 0.5) && (errors[1] < 0.5)) {
+                    for (int j = 0; j < traindataSet.size(); j++) {
+                        if (j != i) {
+                            // if type1 and type2 errors are < 0.5 we allow the model in the
+                            // candidate list
+                            errors = this.evaluate((GPRun) classifier, traindataSet.get(j));
+                            if ((errors[0] < 0.5) && (errors[1] < 0.5)) {
                                 candidates.add(classifier);
+                            }
 …
+                    }
+                }
+                // now after the evaluation we do a model selection where only one model remains for the given training data
+                // now after the evaluation we do a model selection where only one model remains for
+                // the given training data
                 // we select the model which is best on all evaluation data
                 double smallest_error_count = Double.MAX_VALUE;
                 double[] errors;
                 Classifier best = null;
+                for(int ii=0; ii < candidates.size(); ii++) {
+                    double[] errors_eval = {0.0, 0.0};
+                for (int ii = 0; ii < candidates.size(); ii++) {
+                    double[] errors_eval =
+                        { 0.0, 0.0 };
                     // we add the errors the candidate makes over the evaldata
                     for(int j=0; j < traindataSet.size(); j++) {
                         if(j != i) {
                             errors = this.evaluate((GPRun)candidates.get(ii), traindataSet.get(j));
+                    for (int j = 0; j < traindataSet.size(); j++) {
+                        if (j != i) {
+                            errors = this.evaluate((GPRun) candidates.get(ii), traindataSet.get(j));
                             errors_eval[0] += errors[0];
                             errors_eval[1] += errors[1];
+                        }
+                    }
                     // if the candidate made fewer errors it is now the best
                     if(errors_eval[0] + errors_eval[1] < smallest_error_count) {
+                    if (errors_eval[0] + errors_eval[1] < smallest_error_count) {
                         best = candidates.get(ii);
                         smallest_error_count = errors_eval[0] + errors_eval[1];
+                    }
+                }
                 // now we have the best classifier for this training data
                 classifiers.add(best);
             } /* endfor trainData */
             // now we have one best classifier for each trainData
+            // now we have one best classifier for each trainData
             // we evaluate again to find the best classifier of all time
+            // this selection is now according to section 4 of the paper and not 6 where an average of the 6 models is build
+            // this selection is now according to section 4 of the paper and not 6 where an average
+            // of the 6 models is build
             double smallest_error_count = Double.MAX_VALUE;
             double error_count;
             double errors[];
             for(int j=0; j < classifiers.size(); j++) {
+            for (int j = 0; j < classifiers.size(); j++) {
                 error_count = 0;
                 Classifier current = classifiers.get(j);
                 for(int i=0; i < traindataSet.size(); i++) {
                     errors = this.evaluate((GPRun)current, traindataSet.get(i));
+                for (int i = 0; i < traindataSet.size(); i++) {
+                    errors = this.evaluate((GPRun) current, traindataSet.get(i));
                     error_count = errors[0] + errors[1];
+                }
                 if(error_count < smallest_error_count) {
+                if (error_count < smallest_error_count) {
                     best = current;
+                }
+            }
+        }
+        /*
+         * (non-Javadoc)
+         *
+         * @see weka.classifiers.Classifier#buildClassifier(weka.core.Instances)
+         */
         @Override
         public void buildClassifier(Instances traindata) throws Exception {
             final Classifier classifier = new GPRun();
+            ((GPRun)classifier).configure(populationSize, initMinDepth, initMaxDepth, tournamentSize, maxGenerations, errorType2Weight, this.maxDepth, this.maxNodes);
+            ((GPRun) classifier).configure(populationSize, initMinDepth, initMaxDepth,
+                                           tournamentSize, maxGenerations, errorType2Weight,
+                                           this.maxDepth, this.maxNodes);
             classifier.buildClassifier(traindata);
             classifiers.add(classifier);
+        }
+        /**
+         * Evaluation of the Classifier
+         *
+         * We evaluate the classifier with the Instances of the evalData.
+         * It basically assigns the instance attribute values to the variables of the s-expression-tree and
+         * then counts the missclassifications.
+        /**
+         * <p>
+         * Evaluation of the Classifier.
+         * </p>
+         * <p>
+         * We evaluate the classifier with the Instances of the evalData. It basically assigns the
+         * instance attribute values to the variables of the s-expression-tree and then counts the
+         * missclassifications.
+         * </p>
+         *
          * @param classifier
+         *            the classifier that is evaluated
          * @param evalData
+         *            the validation data
          * @return
          */
 …
             GPGenotype gp = classifier.getGp();
             Variable[] vars = classifier.getVariables();
+            IGPProgram fitest = gp.getAllTimeBest();  // selects the fitest of all not just the last generation
+            IGPProgram fitest = gp.getAllTimeBest(); // selects the fitest of all not just the last
+                                                     // generation
             double classification;
             int error_type1 = 0;
 …
             int positive = 0;
             int negative = 0;
             for(Instance instance: evalData) {
+            for (Instance instance : evalData) {
                 // assign instance attribute values to the variables of the s-expression-tree
                 double[] tmp = WekaUtils.instanceValues(instance);
                 for(int i = 0; i < tmp.length; i++) {
+                for (int i = 0; i < tmp.length; i++) {
                     vars[i].set(tmp[i]);
+                }
                 classification = fitest.execute_double(0, vars);
                 // we need to count the absolutes of positives for percentage
+                if(instance.classValue() == 1.0) {
+                    positive +=1;
+                }else {
+                    negative +=1;
+                }
+                if (instance.classValue() == 1.0) {
+                    positive += 1;
+                }
+                else {
+                    negative += 1;
+                }
                 // classification < 0.5 we say defective
                 if(classification < 0.5) {
                     if(instance.classValue() != 1.0) {
+                if (classification < 0.5) {
+                    if (instance.classValue() != 1.0) {
                         error_type1 += 1;
+                    }
+                }else {
+                    if(instance.classValue() == 1.0) {
+                }
+                else {
+                    if (instance.classValue() == 1.0) {
                         error_type2 += 1;
+                    }
+                }
+            }
             // return error types percentages for the types
+            // return error types percentages for the types
             double et1_per = error_type1 / negative;
+            double et2_per = error_type2 / positive;
+            return new double[]{et1_per, et2_per};
+        }
+            double et2_per = error_type2 / positive;
+            return new double[]
+                { et1_per, et2_per };
+        }
         /**
          * Use only the best classifier from our evaluation phase
+         *
+         * @param instance
+         *            instance that is classified
+         *
+         * @see weka.classifiers.AbstractClassifier#classifyInstance(weka.core.Instance)
          */
         @Override
         public double classifyInstance(Instance instance) {
             GPGenotype gp = ((GPRun)best).getGp();
             Variable[] vars = ((GPRun)best).getVariables();
             IGPProgram fitest = gp.getAllTimeBest();  // all time fitest
             for(int i = 0; i < instance.numAttributes()-1; i++) {
                vars[i].set(instance.value(i));
+            }
+            GPGenotype gp = ((GPRun) best).getGp();
+            Variable[] vars = ((GPRun) best).getVariables();
+            IGPProgram fitest = gp.getAllTimeBest(); // all time fitest
+            for (int i = 0; i < instance.numAttributes() - 1; i++) {
+                vars[i].set(instance.value(i));
+            }
             double classification = fitest.execute_double(0, vars);
             if(classification < 0.5) {
+            if (classification < 0.5) {
                 return 1.0;
+            }else {
+            }
+            else {
                 return 0.0;
+            }

trunk/CrossPare/src/de/ugoe/cs/cpdp/training/ISetWiseTestdataAwareTrainingStrategy.java

-                      r45
+                      r135
 package de.ugoe.cs.cpdp.training;
 …
 import weka.core.Instances;
+/**
+ * <p>
+ * Training strategy for training with access to the target data and one data set per input product.
+ * </p>
+ *
+ * @author Steffen Herbold
+ */
 public interface ISetWiseTestdataAwareTrainingStrategy extends ITrainer {
+    /**
+     * <p>
+     * Applies the training strategy.
+     * </p>
+     *
+     * @param traindataSet
+     *            the training data per product
+     * @param testdata
+     *            the test data from the target product
+     */
     void apply(SetUniqueList<Instances> traindataSet, Instances testdata);
+    /**
+     * <p>
+     * returns the name of the training strategy
+     * </p>
+     *
+     * @return the name
+     */
     String getName();
+    // TODO: these two methods look like they should be removed and instead be handled using the parameters
     void setMethod(String method);
     void setThreshold(String threshold);
+}

trunk/CrossPare/src/de/ugoe/cs/cpdp/training/ISetWiseTrainingStrategy.java

-                      r86
+                      r135
 import weka.core.Instances;
+// Bagging Strategy: separate models for each training data set
+/**
+ * <p>
+ * Training strategy for training with one data set per input product.
+ * </p>
+ *
+ * @author Steffen Herbold
+ */
 public interface ISetWiseTrainingStrategy extends ITrainer {
+    /**
+     * <p>
+     * Applies the training strategy.
+     * </p>
+     *
+     * @param traindataSet
+     *            the training data per product
+     */
     void apply(SetUniqueList<Instances> traindataSet);
+    /**
+     * <p>
+     * returns the name of the training strategy
+     * </p>
+     *
+     * @return the name
+     */
     String getName();
+}

trunk/CrossPare/src/de/ugoe/cs/cpdp/training/ITestAwareTrainingStrategy.java

-                      r65
+                      r135
 import weka.core.Instances;
+/**
+ * <p>
+ * Training strategy for training with access to the target data and the training data as a single data set.
+ * </p>
+ *
+ * @author Steffen Herbold
+ */
 public interface ITestAwareTrainingStrategy extends ITrainer {
+    /**
+     * <p>
+     * Applies the training strategy.
+     * </p>
+     *
+     * @param traindata
+     *            the training data for all products
+     * @param testdata
+     *            the test data from the target product
+     */
     void apply(Instances testdata, Instances traindata);
+    /**
+     * <p>
+     * returns the name of the training strategy
+     * </p>
+     *
+     * @return the name
+     */
     String getName();
+}

trunk/CrossPare/src/de/ugoe/cs/cpdp/training/ITrainer.java

-                      r86
+                      r135
 import de.ugoe.cs.cpdp.IParameterizable;
+/**
+ * <p>
+ * Marker interface for all CrossPare trainers.
+ * </p>
+ *
+ * @author Steffen Herbold
+ */
 public interface ITrainer extends IParameterizable {

trunk/CrossPare/src/de/ugoe/cs/cpdp/training/ITrainingStrategy.java

-                      r86
+                      r135
 import weka.core.Instances;
+/**
+ * <p>
+ * Training strategy for training with the training data as a single data set.
+ * </p>
+ *
+ * @author Steffen Herbold
+ */
 public interface ITrainingStrategy extends ITrainer {
+    /**
+     * <p>
+     * Applies the training strategy.
+     * </p>
+     *
+     * @param traindata
+     *            the training data for all target products
+     */
     void apply(Instances traindata);
+    /**
+     * <p>
+     * returns the name of the training strategy
+     * </p>
+     *
+     * @return the name
+     */
     String getName();
+}

trunk/CrossPare/src/de/ugoe/cs/cpdp/training/IWekaCompatibleTrainer.java

-                      r86
+                      r135
 import weka.classifiers.Classifier;
+/**
+ * <p>
+ * Common interface for all training strategies that internally use the {@link Classifier} from WEKA.
+ * </p>
+ *
+ * @author Steffen Herbold
+ */
 public interface IWekaCompatibleTrainer extends ITrainer {
+    /**
+     * <p>
+     * returns the WEKA classifier
+     * </p>
+     *
+     * @return the classifier
+     */
     Classifier getClassifier();
+    /**
+     * <p>
+     * returns the name of the training strategy
+     * </p>
+     *
+     * @return the name
+     */
     String getName();
+}

trunk/CrossPare/src/de/ugoe/cs/cpdp/training/QuadTree.java

-                      r86
+                      r135
 /**
+ * QuadTree implementation
+ * <p>
+ * QuadTree implementation.
+ * </p>
+ * <p>
+ * QuadTree gets a list of instances and then recursively split them into 4 children For this it
+ * uses the median of the 2 values x,y.
+ * </p>
+ *
+ * QuadTree gets a list of instances and then recursively split them into 4 childs For this it uses
+ * the median of the 2 values x,y
+ * @author Alexander Trautsch
  */
 public class QuadTree {
+    /* 1 parent or null */
+    /**
+     * 1 parent or null
+     */
     private QuadTree parent = null;
+    /* 4 childs, 1 per quadrant */
+    /**
+     * north-west quadrant
+     */
     private QuadTree child_nw;
+    /**
+     * north-east quadrant
+     */
     private QuadTree child_ne;
+    /**
+     * south-east quadrant
+     */
     private QuadTree child_se;
+    /**
+     * south-west quadrant
+     */
     private QuadTree child_sw;
+    /* list (only helps with generation of list of childs!) */
+    /**
+     * helper list for child quadrant generation
+     */
     private ArrayList<QuadTree> l = new ArrayList<QuadTree>();
+    /* level only used for debugging */
+    /**
+     * debugging attribute
+     */
     public int level = 0;
+    /* size of the quadrant */
+    /**
+     * size of the quadrant in x-dimension
+     */
     private double[] x;
+    /**
+     * size of the quadrant in y-dimension
+     */
     private double[] y;
+    /**
+     * debugging parameter
+     */
     public static boolean verbose = false;
+    /**
+     * global size of the QuadTree.
+     */
     public static int size = 0;
+    /**
+     * recursion parameter alpha
+     */
     public static double alpha = 0;
+    /* cluster payloads */
+    /**
+     * data for each cluster
+     */
     public static ArrayList<ArrayList<QuadTreePayload<Instance>>> ccluster =
         new ArrayList<ArrayList<QuadTreePayload<Instance>>>();
+    /* cluster sizes (index is cluster number, arraylist is list of boxes (x0,y0,x1,y1) */
+    /**
+     * cluster sizes (index is cluster number, {@link ArrayList} is list of boxes (x0,y0,x1,y1
+     */
     public static HashMap<Integer, ArrayList<Double[][]>> csize =
         new HashMap<Integer, ArrayList<Double[][]>>();
+    /* payload of this instance */
+    /**
+     * data within this quadrant
+     */
     private ArrayList<QuadTreePayload<Instance>> payload;
+    /**
+     * <p>
+     * Constructor. Creates a new QuadTree.
+     * </p>
+     *
+     * @param parent
+     *            parent of this tree
+     * @param payload
+     *            data within the quadrant
+     */
     public QuadTree(QuadTree parent, ArrayList<QuadTreePayload<Instance>> payload) {
         this.parent = parent;
 …
+    }
+    /*
+     * (non-Javadoc)
+     *
+     * @see java.lang.Object#toString()
+     */
+    @Override
     public String toString() {
         String n = "";
 …
     /**
+     * <p>
      * Returns the payload, used for clustering in the clustering list we only have children with
+     * paylod
+     *
+     * @return payload
+     * payload
+     * </p>
+     *
+     * @return payload the payload
      */
     public ArrayList<QuadTreePayload<Instance>> getPayload() {
 …
     /**
+     * Calculate the density of this quadrant
+     *
+     * density = number of instances / global size (all instances)
+     *
+     * @return density
+     * <p>
+     * Calculate the density of this quadrant as
+     * <ul>
+     * <li>density = number of instances / global size (all instances)</li>
+     * </ul>
+     *
+     * @return density the density
      */
     public double getDensity() {
 …
+    }
+    /**
+     * <p>
+     * sets the size coordinates of the quadrant
+     * </p>
+     *
+     * @param x
+     *            x-dimension
+     * @param y
+     *            y-dimension
+     */
     public void setSize(double[] x, double[] y) {
         this.x = x;
 …
+    }
+    /**
+     * <p>
+     * returns the size of the quadrant
+     * </p>
+     *
+     * @return size of the current quadrant
+     */
     public double[][] getSize() {
         return new double[][]
 …
+    }
+    /**
+     * <p>
+     * returns the size of the quadrant
+     * </p>
+     *
+     * @return size of the current quadrant
+     */
     public Double[][] getSizeDouble() {
         Double[] tmpX = new Double[2];
 …
     /**
+     * TODO: DRY, median ist immer dasselbe
+     * <p>
+     * calculates the median for the x axis
+     * </p>
+     *
      * @return median for x
 …
+    }
+    /**
+     * <p>
+     * calculates the median for the y axis
+     * </p>
+     *
+     * @return median for y
+     */
     private double getMedianForY() {
         double med_y = 0;
 …
     /**
+     * Reurns the number of instances in the payload
+     *
+     * @return int number of instances
+     * <p>
+     * Returns the number of instances in the payload
+     * </p>
+     *
+     * @return number of instances
      */
     public int getNumbers() {
 …
     /**
+     * <p>
      * Calculate median values of payload for x, y and split into 4 sectors
+     * </p>
+     *
      * @return Array of QuadTree nodes (4 childs)
 …
     /**
+     * TODO: static method
+     *
+     * <p>
+     * creates the children of a QuadTree and recursively splits them as well
+     * </p>
+     *
      * @param q
+     */
+    public void recursiveSplit(QuadTree q) {
+     *            tree that is split
+     */
+    public static void recursiveSplit(QuadTree q) {
         if (QuadTree.verbose) {
             System.out.println("splitting: " + q);
 …
             try {
                 QuadTree[] childs = q.split();
                 this.recursiveSplit(childs[0]);
                 this.recursiveSplit(childs[1]);
                 this.recursiveSplit(childs[2]);
                 this.recursiveSplit(childs[3]);
+                recursiveSplit(childs[0]);
+                recursiveSplit(childs[1]);
+                recursiveSplit(childs[2]);
+                recursiveSplit(childs[3]);
+            }
             catch (Exception e) {
 …
     /**
+     * returns an list of childs sorted by density
+     * <p>
+     * returns an list of children sorted by density
+     * </p>
+     *
      * @param q
      *            QuadTree
-     * @return list of QuadTrees
      */
     private void generateList(QuadTree q) {
 …
     /**
+     * <p>
      * Checks if passed QuadTree is neighboring to us
+     * </p>
+     *
      * @param q
 …
     /**
+     * <p>
      * Perform pruning and clustering of the quadtree
+     *
+     * </p>
+     * <p>
      * Pruning according to: Tim Menzies, Andrew Butcher, David Cok, Andrian Marcus, Lucas Layman,
      * Forrest Shull, Burak Turhan, Thomas Zimmermann,
      * "Local versus Global Lessons for Defect Prediction and Effort Estimation," IEEE Transactions
      * on Software Engineering, vol. 39, no. 6, pp. 822-834, June, 2013
+     *
+     * 1) get list of leaf quadrants 2) sort by their density 3) set stop_rule to 0.5 * highest
+     * Density in the list 4) merge all nodes with a density > stop_rule to the new cluster and
+     * remove all from list 5) repeat
+     * </p>
+     * <ol>
+     * <li>get list of leaf quadrants</li>
+     * <li>sort by their density</li>
+     * <li>set stop_rule to 0.5*highest Density in the list</li>
+     * <li>merge all nodes with a density > stop_rule to the new cluster and remove all from list
+     * </li>
+     * <li>repeat</li>
+     * </ol>
+     *
      * @param q
 …
+    }
+    /**
+     * <p>
+     * debugging function that prints information about the QuadTree
+     * </p>
+     *
+     */
     public void printInfo() {
         System.out.println("we have " + ccluster.size() + " clusters");
 …
     /**
+     * <p>
      * Helper Method to get a sorted list (by density) for all children
+     * </p>
+     *
      * @param q

trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaBaggingTraining.java

-                      r99
+                      r135
 /**
+ * Programmatic WekaBaggingTraining
+ *
+ * first parameter is Trainer Name. second parameter is class name
+ *
+ * all subsequent parameters are configuration params (for example for trees) Cross Validation
+ * params always come last and are prepended with -CVPARAM
+ *
+ * <p>
+ * The first parameter is the trainer name, second parameter is class name. All subsequent
+ * parameters are configuration parameters of the algorithms. Cross validation parameters always
+ * come last and are prepended with -CVPARAM
+ * </p>
+ * <p>
  * XML Configurations for Weka Classifiers:
+ *
 …
  * }
  * </pre>
+ * </p>
+ *
+ * @author Alexander Trautsch
  */
 public class WekaBaggingTraining extends WekaBaseTraining implements ISetWiseTrainingStrategy {
+    /**
+     * the classifier
+     */
     private final TraindatasetBagging classifier = new TraindatasetBagging();
+    /*
+     * (non-Javadoc)
+     *
+     * @see de.ugoe.cs.cpdp.training.WekaBaseTraining#getClassifier()
+     */
     @Override
     public Classifier getClassifier() {
 …
+    }
+    /*
+     * (non-Javadoc)
+     *
+     * @see
+     * de.ugoe.cs.cpdp.training.ISetWiseTrainingStrategy#apply(org.apache.commons.collections4.list.
+     * SetUniqueList)
+     */
     @Override
     public void apply(SetUniqueList<Instances> traindataSet) {
 …
+    }
+    /**
+     * <p>
+     * Helper class for bagging classifiers.
+     * </p>
+     *
+     * @author Steffen Herbold
+     */
     public class TraindatasetBagging extends AbstractClassifier {
+        /**
+         * default serialization ID.
+         */
         private static final long serialVersionUID = 1L;
+        /**
+         * internal storage of the training data
+         */
         private List<Instances> trainingData = null;
+        /**
+         * bagging classifier for each training data set
+         */
         private List<Classifier> classifiers = null;
+        /*
+         * (non-Javadoc)
+         *
+         * @see weka.classifiers.AbstractClassifier#classifyInstance(weka.core.Instance)
+         */
         @Override
         public double classifyInstance(Instance instance) {
 …
+        }
+        /**
+         * <p>
+         * trains a new dataset wise bagging classifier
+         * </p>
+         *
+         * @param traindataSet
+         *            the training data per prodcut
+         * @throws Exception
+         *             thrown if an error occurs during the training of the classifiers for any
+         *             product
+         */
         public void buildClassifier(SetUniqueList<Instances> traindataSet) throws Exception {
             classifiers = new LinkedList<>();
 …
+        }
+        /*
+         * (non-Javadoc)
+         *
+         * @see weka.classifiers.Classifier#buildClassifier(weka.core.Instances)
+         */
         @Override
         public void buildClassifier(Instances traindata) throws Exception {

trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaBaseTraining.java

-                      r131
+                      r135
 /**
+ * WekaBaseTraining2
+ * <p>
+ * Allows specification of the Weka classifier and its params in the XML experiment configuration.
+ * </p>
+ * <p>
+ * Important conventions of the XML format: Cross Validation params always come last and are
+ * prepended with -CVPARAM.<br>
+ * Example:
+ *
+ * Allows specification of the Weka classifier and its params in the XML experiment configuration.
+ * <pre>
+ * {@code
+ * <trainer name="WekaTraining" param="RandomForestLocal weka.classifiers.trees.RandomForest -CVPARAM I 5 25 5"/>
+ * }
+ * </pre>
+ *
+ * Important conventions of the XML format: Cross Validation params always come last and are
+ * prepended with -CVPARAM Example: <trainer name="WekaTraining"
+ * param="RandomForestLocal weka.classifiers.trees.RandomForest -CVPARAM I 5 25 5"/>
+ * @author Alexander Trautsch
  */
 public abstract class WekaBaseTraining implements IWekaCompatibleTrainer {
+    /**
+     * reference to the Weka classifier
+     */
     protected Classifier classifier = null;
+    /**
+     * qualified class name of the weka classifier
+     */
     protected String classifierClassName;
+    /**
+     * name of the classifier
+     */
     protected String classifierName;
+    /**
+     * parameters of the training
+     */
     protected String[] classifierParams;
+    /*
+     * (non-Javadoc)
+     *
+     * @see de.ugoe.cs.cpdp.IParameterizable#setParameter(java.lang.String)
+     */
     @Override
     public void setParameter(String parameters) {
 …
         classifierParams = Arrays.copyOfRange(params, 2, params.length);
         //classifier = setupClassifier();
+        // classifier = setupClassifier();
+    }
+    /*
+     * (non-Javadoc)
+     *
+     * @see de.ugoe.cs.cpdp.training.IWekaCompatibleTrainer#getClassifier()
+     */
     @Override
     public Classifier getClassifier() {
 …
+    }
+    /**
+     * <p>
+     * helper function that sets up the Weka classifier including its parameters
+     * </p>
+     *
+     * @return
+     */
     protected Classifier setupClassifier() {
         Classifier cl = null;
 …
             cl = obj;
             if( cl instanceof Vote ) {
+            if (cl instanceof Vote) {
                 Vote votingClassifier = (Vote) cl;
                 for( Classifier classifier : votingClassifier.getClassifiers() ) {
                     if( classifier instanceof BayesNet ) {
+                for (Classifier classifier : votingClassifier.getClassifiers()) {
+                    if (classifier instanceof BayesNet) {
                         ((BayesNet) classifier).setUseADTree(false);
+                    }
 …
+    }
+    /*
+     * (non-Javadoc)
+     *
+     * @see de.ugoe.cs.cpdp.training.IWekaCompatibleTrainer#getName()
+     */
     @Override
     public String getName() {

trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaLASERTraining.java

-                      r91
+                      r135
 import weka.core.Instances;
 /**
  * <p>
  * TODO comment
+ * Implements training following the LASER classification scheme.
  * </p>
+ *
 …
 public class WekaLASERTraining extends WekaBaseTraining implements ITrainingStrategy {
+    /**
+     * Internal classifier used for LASER.
+     */
     private final LASERClassifier internalClassifier = new LASERClassifier();
+    /*
+     * (non-Javadoc)
+     *
+     * @see de.ugoe.cs.cpdp.training.WekaBaseTraining#getClassifier()
+     */
     @Override
     public Classifier getClassifier() {
 …
+    }
+    /*
+     * (non-Javadoc)
+     *
+     * @see de.ugoe.cs.cpdp.training.ITrainingStrategy#apply(weka.core.Instances)
+     */
     @Override
     public void apply(Instances traindata) {
 …
+    }
+    /**
+     * <p>
+     * Internal helper class that defines the laser classifier.
+     * </p>
+     *
+     * @author Steffen Herbold
+     */
     public class LASERClassifier extends AbstractClassifier {
+        /**
+         * Default serial ID.
+         */
         private static final long serialVersionUID = 1L;
+        /**
+         * Internal reference to the classifier.
+         */
         private Classifier laserClassifier = null;
+        /**
+         * Internal storage of the training data required for NN analysis.
+         */
         private Instances traindata = null;
+        /*
+         * (non-Javadoc)
+         *
+         * @see weka.classifiers.AbstractClassifier#classifyInstance(weka.core.Instance)
+         */
         @Override
         public double classifyInstance(Instance instance) throws Exception {
             List<Integer> closestInstances = new LinkedList<>();
             double minDistance = Double.MAX_VALUE;
             for( int i=0; i<traindata.size(); i++ ) {
+            for (int i = 0; i < traindata.size(); i++) {
                 double distance = WekaUtils.hammingDistance(instance, traindata.get(i));
                 if( distance<minDistance) {
+                if (distance < minDistance) {
                     minDistance = distance;
+                }
+            }
             for( int i=0; i<traindata.size(); i++ ) {
+            for (int i = 0; i < traindata.size(); i++) {
                 double distance = WekaUtils.hammingDistance(instance, traindata.get(i));
                 if( distance<=minDistance ) {
+                if (distance <= minDistance) {
                     closestInstances.add(i);
+                }
+            }
             if( closestInstances.size()==1 ) {
+            if (closestInstances.size() == 1) {
                 int closestIndex = closestInstances.get(0);
                 Instance closestTrainingInstance = traindata.get(closestIndex);
                 List<Integer> closestToTrainingInstance = new LinkedList<>();
                 double minTrainingDistance = Double.MAX_VALUE;
+                for( int i=0; i<traindata.size(); i++ ) {
+                    if( closestIndex!=i ) {
+                        double distance = WekaUtils.hammingDistance(closestTrainingInstance, traindata.get(i));
+                        if( distance<minTrainingDistance ) {
+                for (int i = 0; i < traindata.size(); i++) {
+                    if (closestIndex != i) {
+                        double distance =
+                            WekaUtils.hammingDistance(closestTrainingInstance, traindata.get(i));
+                        if (distance < minTrainingDistance) {
                             minTrainingDistance = distance;
+                        }
+                    }
+                }
+                for( int i=0; i<traindata.size(); i++ ) {
+                    if( closestIndex!=i ) {
+                        double distance = WekaUtils.hammingDistance(closestTrainingInstance, traindata.get(i));
+                        if( distance<=minTrainingDistance ) {
+                for (int i = 0; i < traindata.size(); i++) {
+                    if (closestIndex != i) {
+                        double distance =
+                            WekaUtils.hammingDistance(closestTrainingInstance, traindata.get(i));
+                        if (distance <= minTrainingDistance) {
                             closestToTrainingInstance.add(i);
+                        }
+                    }
+                }
                 if( closestToTrainingInstance.size()==1 ) {
+                if (closestToTrainingInstance.size() == 1) {
                     return laserClassifier.classifyInstance(instance);
+                }
 …
                     double label = Double.NaN;
                     boolean allEqual = true;
                     for( Integer index : closestToTrainingInstance ) {
                         if( Double.isNaN(label) ) {
+                    for (Integer index : closestToTrainingInstance) {
+                        if (Double.isNaN(label)) {
                             label = traindata.get(index).classValue();
+                        }
                         else if( label!=traindata.get(index).classValue() ) {
+                        else if (label != traindata.get(index).classValue()) {
                             allEqual = false;
                             break;
+                        }
+                    }
                     if( allEqual ) {
+                    if (allEqual) {
                         return label;
+                    }
 …
+                    }
+                }
+            } else {
+            }
+            else {
                 double label = Double.NaN;
                 boolean allEqual = true;
                 for( Integer index : closestInstances ) {
                     if( Double.isNaN(label) ) {
+                for (Integer index : closestInstances) {
+                    if (Double.isNaN(label)) {
                         label = traindata.get(index).classValue();
+                    }
                     else if( label!=traindata.get(index).classValue() ) {
+                    else if (label != traindata.get(index).classValue()) {
                         allEqual = false;
                         break;
+                    }
+                }
                 if( allEqual ) {
+                if (allEqual) {
                     return label;
+                }
 …
+        }
+        /*
+         * (non-Javadoc)
+         *
+         * @see weka.classifiers.Classifier#buildClassifier(weka.core.Instances)
+         */
         @Override
         public void buildClassifier(Instances traindata) throws Exception {

trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaLocalEMTraining.java

-                      r99
+                      r135
 /**
+ * WekaLocalEMTraining
+ * <p>
+ * Local Trainer with EM Clustering for data partitioning. Currently supports only EM Clustering.
+ * </p>
+ * <ol>
+ * <li>Cluster training data</li>
+ * <li>for each cluster train a classifier with training data from cluster</li>
+ * <li>match test data instance to a cluster, then classify with classifier from the cluster</li>
+ * </ol>
+ *
  * Local Trainer with EM Clustering for data partitioning. Currently supports only EM Clustering.
+ * XML configuration:
+ *
+ * 1. Cluster training data 2. for each cluster train a classifier with training data from cluster
+ * 3. match test data instance to a cluster, then classify with classifier from the cluster
+ *
+ * XML configuration: <!-- because of clustering --> <preprocessor name="Normalization" param=""/>
+ *
+ * <!-- cluster trainer --> <trainer name="WekaLocalEMTraining"
+ * param="NaiveBayes weka.classifiers.bayes.NaiveBayes" />
+ * <pre>
+ * {@code
+ * <trainer name="WekaLocalEMTraining" param="NaiveBayes weka.classifiers.bayes.NaiveBayes" />
+ * }
+ * </pre>
  */
 public class WekaLocalEMTraining extends WekaBaseTraining implements ITrainingStrategy {
+    /**
+     * the classifier
+     */
     private final TraindatasetCluster classifier = new TraindatasetCluster();
+    /*
+     * (non-Javadoc)
+     *
+     * @see de.ugoe.cs.cpdp.training.WekaBaseTraining#getClassifier()
+     */
     @Override
     public Classifier getClassifier() {
 …
+    }
+    /*
+     * (non-Javadoc)
+     *
+     * @see de.ugoe.cs.cpdp.training.ITrainingStrategy#apply(weka.core.Instances)
+     */
     @Override
     public void apply(Instances traindata) {
 …
+    }
+    /**
+     * <p>
+     * Weka classifier for the local model with EM clustering.
+     * </p>
+     *
+     * @author Alexander Trautsch
+     */
     public class TraindatasetCluster extends AbstractClassifier {
+        /**
+         * default serializtion ID
+         */
         private static final long serialVersionUID = 1L;
+        /**
+         * EM clusterer used
+         */
         private EM clusterer = null;
+        /**
+         * classifiers for each cluster
+         */
         private HashMap<Integer, Classifier> cclassifier;
+        /**
+         * training data for each cluster
+         */
         private HashMap<Integer, Instances> ctraindata;
 …
+        }
+        /*
+         * (non-Javadoc)
+         *
+         * @see weka.classifiers.AbstractClassifier#classifyInstance(weka.core.Instance)
+         */
         @Override
         public double classifyInstance(Instance instance) {
 …
+        }
+        /*
+         * (non-Javadoc)
+         *
+         * @see weka.classifiers.Classifier#buildClassifier(weka.core.Instances)
+         */
         @Override
         public void buildClassifier(Instances traindata) throws Exception {

trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaLocalFQTraining.java

-                      r99
+                      r135
 /**
+ * <p>
  * Trainer with reimplementation of WHERE clustering algorithm from: Tim Menzies, Andrew Butcher,
  * David Cok, Andrian Marcus, Lucas Layman, Forrest Shull, Burak Turhan, Thomas Zimmermann,
  * "Local versus Global Lessons for Defect Prediction and Effort Estimation," IEEE Transactions on
  * Software Engineering, vol. 39, no. 6, pp. 822-834, June, 2013
+ *
+ * With WekaLocalFQTraining we do the following: 1) Run the Fastmap algorithm on all training data,
+ * let it calculate the 2 most significant dimensions and projections of each instance to these
+ * dimensions 2) With these 2 dimensions we span a QuadTree which gets recursively split on
+ * median(x) and median(y) values. 3) We cluster the QuadTree nodes together if they have similar
+ * density (50%) 4) We save the clusters and their training data 5) We only use clusters with >
+ * ALPHA instances (currently Math.sqrt(SIZE)), rest is discarded with the training data of this
+ * cluster 6) We train a Weka classifier for each cluster with the clusters training data 7) We
+ * recalculate Fastmap distances for a single instance with the old pivots and then try to find a
+ * cluster containing the coords of the instance. 7.1.) If we can not find a cluster (due to coords
+ * outside of all clusters) we find the nearest cluster. 8) We classify the Instance with the
+ * classifier and traindata from the Cluster we found in 7.
+ * </p>
+ * <p>
+ * With WekaLocalFQTraining we do the following:
+ * <ol>
+ * <li>Run the Fastmap algorithm on all training data, let it calculate the 2 most significant
+ * dimensions and projections of each instance to these dimensions</li>
+ * <li>With these 2 dimensions we span a QuadTree which gets recursively split on median(x) and
+ * median(y) values.</li>
+ * <li>We cluster the QuadTree nodes together if they have similar density (50%)</li>
+ * <li>We save the clusters and their training data</li>
+ * <li>We only use clusters with > ALPHA instances (currently Math.sqrt(SIZE)), the rest is
+ * discarded with the training data of this cluster</li>
+ * <li>We train a Weka classifier for each cluster with the clusters training data</li>
+ * <li>We recalculate Fastmap distances for a single instance with the old pivots and then try to
+ * find a cluster containing the coords of the instance. If we can not find a cluster (due to coords
+ * outside of all clusters) we find the nearest cluster.</li>
+ * <li>We classify the Instance with the classifier and traindata from the Cluster we found in 7.
+ * </li>
+ * </p>
  */
 public class WekaLocalFQTraining extends WekaBaseTraining implements ITrainingStrategy {
+    /**
+     * the classifier
+     */
     private final TraindatasetCluster classifier = new TraindatasetCluster();
+    /*
+     * (non-Javadoc)
+     *
+     * @see de.ugoe.cs.cpdp.training.WekaBaseTraining#getClassifier()
+     */
     @Override
     public Classifier getClassifier() {
 …
+    }
+    /*
+     * (non-Javadoc)
+     *
+     * @see de.ugoe.cs.cpdp.training.ITrainingStrategy#apply(weka.core.Instances)
+     */
     @Override
     public void apply(Instances traindata) {
 …
+    }
+    /**
+     * <p>
+     * Weka classifier for the local model with WHERE clustering
+     * </p>
+     *
+     * @author Alexander Trautsch
+     */
     public class TraindatasetCluster extends AbstractClassifier {
+        /**
+         * default serialization ID
+         */
         private static final long serialVersionUID = 1L;
+        /* classifier per cluster */
+        /**
+         * classifiers for each cluster
+         */
         private HashMap<Integer, Classifier> cclassifier;
+        /* instances per cluster */
+        /**
+         * training data for each cluster
+         */
         private HashMap<Integer, Instances> ctraindata;
         /*
+        /**
          * holds the instances and indices of the pivot objects of the Fastmap calculation in
          * buildClassifier
 …
         private HashMap<Integer, Instance> cpivots;
+        /* holds the indices of the pivot objects for x,y and the dimension [x,y][dimension] */
+        /**
+         * holds the indices of the pivot objects for x,y and the dimension [x,y][dimension]
+         */
         private int[][] cpivotindices;
+        /* holds the sizes of the cluster multiple "boxes" per cluster */
+        /**
+         * holds the sizes of the cluster multiple "boxes" per cluster
+         */
         private HashMap<Integer, ArrayList<Double[][]>> csize;
+        /* debug vars */
+        /**
+         * debug variable
+         */
         @SuppressWarnings("unused")
         private boolean show_biggest = true;
+        /**
+         * debug variable
+         */
         @SuppressWarnings("unused")
         private int CFOUND = 0;
+        /**
+         * debug variable
+         */
         @SuppressWarnings("unused")
         private int CNOTFOUND = 0;
+        /**
+         * <p>
+         * copies an instance such that is is compatible with the local model
+         * </p>
+         *
+         * @param instances
+         *            instance format
+         * @param instance
+         *            instance that is copied
+         * @return
+         */
         private Instance createInstance(Instances instances, Instance instance) {
             // attributes for feeding instance to classifier
 …
         /**
+         * <p>
          * Because Fastmap saves only the image not the values of the attributes it used we can not
          * use the old data directly to classify single instances to clusters.
+         * </p>
+         * <p>
+         * To classify a single instance we do a new Fastmap computation with only the instance and
+         * the old pivot elements.
+         * </p>
+         * </p>
+         * After that we find the cluster with our Fastmap result for x and y.
+         * </p>
+         *
+         * To classify a single instance we do a new fastmap computation with only the instance and
+         * the old pivot elements.
+         *
+         * After that we find the cluster with our fastmap result for x and y.
+         * @param instance
+         *            instance that is classified
+         * @see weka.classifiers.AbstractClassifier#classifyInstance(weka.core.Instance)
          */
         @Override
 …
                 double[][] distmat = new double[2 * FMAP.target_dims + 1][2 * FMAP.target_dims + 1];
                 distmat[0][0] = 0;
+                distmat[0][1] =
+                    dist.distance(clusterInstance,
+                                  this.cpivots.get((Integer) this.cpivotindices[0][0]));
+                distmat[0][2] =
+                    dist.distance(clusterInstance,
+                                  this.cpivots.get((Integer) this.cpivotindices[1][0]));
+                distmat[0][3] =
+                    dist.distance(clusterInstance,
+                                  this.cpivots.get((Integer) this.cpivotindices[0][1]));
+                distmat[0][4] =
+                    dist.distance(clusterInstance,
+                                  this.cpivots.get((Integer) this.cpivotindices[1][1]));
+                distmat[1][0] =
+                    dist.distance(this.cpivots.get((Integer) this.cpivotindices[0][0]),
+                                  clusterInstance);
+                distmat[0][1] = dist.distance(clusterInstance,
+                                              this.cpivots.get((Integer) this.cpivotindices[0][0]));
+                distmat[0][2] = dist.distance(clusterInstance,
+                                              this.cpivots.get((Integer) this.cpivotindices[1][0]));
+                distmat[0][3] = dist.distance(clusterInstance,
+                                              this.cpivots.get((Integer) this.cpivotindices[0][1]));
+                distmat[0][4] = dist.distance(clusterInstance,
+                                              this.cpivots.get((Integer) this.cpivotindices[1][1]));
+                distmat[1][0] = dist.distance(this.cpivots.get((Integer) this.cpivotindices[0][0]),
+                                              clusterInstance);
                 distmat[1][1] = 0;
+                distmat[1][2] =
+                    dist.distance(this.cpivots.get((Integer) this.cpivotindices[0][0]),
+                                  this.cpivots.get((Integer) this.cpivotindices[1][0]));
+                distmat[1][3] =
+                    dist.distance(this.cpivots.get((Integer) this.cpivotindices[0][0]),
+                                  this.cpivots.get((Integer) this.cpivotindices[0][1]));
+                distmat[1][4] =
+                    dist.distance(this.cpivots.get((Integer) this.cpivotindices[0][0]),
+                                  this.cpivots.get((Integer) this.cpivotindices[1][1]));
+                distmat[2][0] =
+                    dist.distance(this.cpivots.get((Integer) this.cpivotindices[1][0]),
+                                  clusterInstance);
+                distmat[2][1] =
+                    dist.distance(this.cpivots.get((Integer) this.cpivotindices[1][0]),
+                                  this.cpivots.get((Integer) this.cpivotindices[0][0]));
+                distmat[1][2] = dist.distance(this.cpivots.get((Integer) this.cpivotindices[0][0]),
+                                              this.cpivots.get((Integer) this.cpivotindices[1][0]));
+                distmat[1][3] = dist.distance(this.cpivots.get((Integer) this.cpivotindices[0][0]),
+                                              this.cpivots.get((Integer) this.cpivotindices[0][1]));
+                distmat[1][4] = dist.distance(this.cpivots.get((Integer) this.cpivotindices[0][0]),
+                                              this.cpivots.get((Integer) this.cpivotindices[1][1]));
+                distmat[2][0] = dist.distance(this.cpivots.get((Integer) this.cpivotindices[1][0]),
+                                              clusterInstance);
+                distmat[2][1] = dist.distance(this.cpivots.get((Integer) this.cpivotindices[1][0]),
+                                              this.cpivots.get((Integer) this.cpivotindices[0][0]));
                 distmat[2][2] = 0;
+                distmat[2][3] =
+                    dist.distance(this.cpivots.get((Integer) this.cpivotindices[1][0]),
+                                  this.cpivots.get((Integer) this.cpivotindices[0][1]));
+                distmat[2][4] =
+                    dist.distance(this.cpivots.get((Integer) this.cpivotindices[1][0]),
+                                  this.cpivots.get((Integer) this.cpivotindices[1][1]));
+                distmat[3][0] =
+                    dist.distance(this.cpivots.get((Integer) this.cpivotindices[0][1]),
+                                  clusterInstance);
+                distmat[3][1] =
+                    dist.distance(this.cpivots.get((Integer) this.cpivotindices[0][1]),
+                                  this.cpivots.get((Integer) this.cpivotindices[0][0]));
+                distmat[3][2] =
+                    dist.distance(this.cpivots.get((Integer) this.cpivotindices[0][1]),
+                                  this.cpivots.get((Integer) this.cpivotindices[1][0]));
+                distmat[2][3] = dist.distance(this.cpivots.get((Integer) this.cpivotindices[1][0]),
+                                              this.cpivots.get((Integer) this.cpivotindices[0][1]));
+                distmat[2][4] = dist.distance(this.cpivots.get((Integer) this.cpivotindices[1][0]),
+                                              this.cpivots.get((Integer) this.cpivotindices[1][1]));
+                distmat[3][0] = dist.distance(this.cpivots.get((Integer) this.cpivotindices[0][1]),
+                                              clusterInstance);
+                distmat[3][1] = dist.distance(this.cpivots.get((Integer) this.cpivotindices[0][1]),
+                                              this.cpivots.get((Integer) this.cpivotindices[0][0]));
+                distmat[3][2] = dist.distance(this.cpivots.get((Integer) this.cpivotindices[0][1]),
+                                              this.cpivots.get((Integer) this.cpivotindices[1][0]));
                 distmat[3][3] = 0;
+                distmat[3][4] =
+                    dist.distance(this.cpivots.get((Integer) this.cpivotindices[0][1]),
+                                  this.cpivots.get((Integer) this.cpivotindices[1][1]));
+                distmat[4][0] =
+                    dist.distance(this.cpivots.get((Integer) this.cpivotindices[1][1]),
+                                  clusterInstance);
+                distmat[4][1] =
+                    dist.distance(this.cpivots.get((Integer) this.cpivotindices[1][1]),
+                                  this.cpivots.get((Integer) this.cpivotindices[0][0]));
+                distmat[4][2] =
+                    dist.distance(this.cpivots.get((Integer) this.cpivotindices[1][1]),
+                                  this.cpivots.get((Integer) this.cpivotindices[1][0]));
+                distmat[4][3] =
+                    dist.distance(this.cpivots.get((Integer) this.cpivotindices[1][1]),
+                                  this.cpivots.get((Integer) this.cpivotindices[0][1]));
+                distmat[3][4] = dist.distance(this.cpivots.get((Integer) this.cpivotindices[0][1]),
+                                              this.cpivots.get((Integer) this.cpivotindices[1][1]));
+                distmat[4][0] = dist.distance(this.cpivots.get((Integer) this.cpivotindices[1][1]),
+                                              clusterInstance);
+                distmat[4][1] = dist.distance(this.cpivots.get((Integer) this.cpivotindices[1][1]),
+                                              this.cpivots.get((Integer) this.cpivotindices[0][0]));
+                distmat[4][2] = dist.distance(this.cpivots.get((Integer) this.cpivotindices[1][1]),
+                                              this.cpivots.get((Integer) this.cpivotindices[1][0]));
+                distmat[4][3] = dist.distance(this.cpivots.get((Integer) this.cpivotindices[1][1]),
+                                              this.cpivots.get((Integer) this.cpivotindices[0][1]));
                 distmat[4][4] = 0;
 …
                  * distmat[0].length; j++) { if(biggest < distmat[i][j]) { biggest = distmat[i][j];
                  * } } } if(this.show_biggest) { Console.traceln(Level.INFO,
                  * String.format(""+clusterInstance)); Console.traceln(Level.INFO,
                  * String.format("biggest distances: "+ biggest)); this.show_biggest = false; }
+                 * String.format(""+clusterInstance)); Console.traceln(Level.INFO, String.format(
+                 * "biggest distances: "+ biggest)); this.show_biggest = false; }
                  */
 …
                         cnumber = clusternumber.next();
                         for (int i = 0; i < ctraindata.get(cnumber).size(); i++) {
+                            if (dist.distance(instance, ctraindata.get(cnumber).get(i)) <= min_distance)
+                            if (dist.distance(instance,
+                                              ctraindata.get(cnumber).get(i)) <= min_distance)
+                            {
                                 found_cnumber = cnumber;
 …
+        }
+        /*
+         * (non-Javadoc)
+         *
+         * @see weka.classifiers.Classifier#buildClassifier(weka.core.Instances)
+         */
         @Override
         public void buildClassifier(Instances traindata) throws Exception {
 …
             // Console.traceln(Level.INFO,
+            // String.format("size for cluster ("+small[0]+","+small[1]+") - ("+big[0]+","+big[1]+")"));
+            // String.format("size for cluster ("+small[0]+","+small[1]+") -
+            // ("+big[0]+","+big[1]+")"));
             // 5. generate quadtree
 …
             // recursive split und grid clustering eher static
             TREE.recursiveSplit(TREE);
+            QuadTree.recursiveSplit(TREE);
             // generate list of nodes sorted by density (childs only)
 …
+                }
                 else {
+                    Console.traceln(Level.INFO,
+                                    String.format("drop cluster, only: " + current.size() +
+                                        " instances"));
+                    Console.traceln(Level.INFO, String
+                        .format("drop cluster, only: " + current.size() + " instances"));
+                }
+            }
 …
                 // traindata_count += ctraindata.get(cnumber).size();
                 // Console.traceln(Level.INFO,
                 // String.format("building classifier in cluster "+cnumber +"  with "+
+                // String.format("building classifier in cluster "+cnumber +" with "+
                 // ctraindata.get(cnumber).size() +" traindata instances"));
+            }
 …
     /**
+     * Payload for the QuadTree. x and y are the calculated Fastmap values. T is a weka instance.
+     * <p>
+     * Payload for the QuadTree. x and y are the calculated Fastmap values. T is a Weka instance.
+     * </p>
+     *
+     * @author Alexander Trautsch
      */
     public class QuadTreePayload<T> {
+        public double x;
+        public double y;
+        /**
+         * x-value
+         */
+        public final double x;
+        /**
+         * y-value
+         */
+        public final double y;
+        /**
+         * associated instance
+         */
         private T inst;
+        /**
+         * <p>
+         * Constructor. Creates the payload.
+         * </p>
+         *
+         * @param x
+         *            x-value
+         * @param y
+         *            y-value
+         * @param value
+         *            associated instace
+         */
         public QuadTreePayload(double x, double y, T value) {
             this.x = x;
 …
+        }
+        /**
+         * <p>
+         * returns the instance
+         * </p>
+         *
+         * @return
+         */
         public T getInst() {
             return this.inst;
 …
     /**
      * Fastmap implementation
+     *
      * Faloutsos, C., & Lin, K. I. (1995). FastMap: A fast algorithm for indexing, data-mining and
+     * <p>
+     * Fastmap implementation after:<br>
+     * * Faloutsos, C., & Lin, K. I. (1995). FastMap: A fast algorithm for indexing, data-mining and
      * visualization of traditional and multimedia datasets (Vol. 24, No. 2, pp. 163-174). ACM.
+     * </p>
      */
     public class Fastmap {
+        /* N x k Array, at the end, the i-th row will be the image of the i-th object */
+        /**
+         * N x k Array, at the end, the i-th row will be the image of the i-th object
+         */
         private double[][] X;
+        /* 2 x k pivot Array one pair per recursive call */
+        /**
+         * 2 x k pivot Array one pair per recursive call
+         */
         private int[][] PA;
+        /* Objects we got (distance matrix) */
+        /**
+         * Objects we got (distance matrix)
+         */
         private double[][] O;
+        /* column of X currently updated (also the dimension) */
+        /**
+         * column of X currently updated (also the dimension)
+         */
         private int col = 0;
+        /* number of dimensions we want */
+        /**
+         * number of dimensions we want
+         */
         private int target_dims = 0;
+        // if we already have the pivot elements
+        /**
+         * if we already have the pivot elements
+         */
         private boolean pivot_set = false;
+        /**
+         * <p>
+         * Constructor. Creates a new Fastmap object.
+         * </p>
+         *
+         * @param k
+         */
         public Fastmap(int k) {
             this.target_dims = k;
 …
         /**
+         * Sets the distance matrix and params that depend on this
+         * <p>
+         * Sets the distance matrix and params that depend on this.
+         * </p>
+         *
          * @param O
+         *            distance matrix
          */
         public void setDistmat(double[][] O) {
 …
         /**
+         * <p>
          * Set pivot elements, we need that to classify instances after the calculation is complete
          * (because we then want to reuse only the pivot elements).
+         * </p>
+         *
          * @param pi
+         *            the pivots
          */
         public void setPivots(int[][] pi) {
 …
         /**
+         * <p>
          * Return the pivot elements that were chosen during the calculation
+         * </p>
+         *
          * @return
+         * @return the pivots
          */
         public int[][] getPivots() {
 …
         /**
+         * The distance function for euclidean distance
+         *
+         * Acts according to equation 4 of the fastmap paper
+         * <p>
+         * The distance function for euclidean distance. Acts according to equation 4 of the Fastmap
+         * paper.
+         * </p>
+         *
          * @param x
 …
          * @param y
          *            y index of y image (if k==0 y object)
+         * @param kdimensionality
+         * @return distance
+         * @param k
+         *            dimensionality
+         * @return the distance
          */
         private double dist(int x, int y, int k) {
 …
         /**
+         * Find the object farthest from the given index This method is a helper Method for
+         * findDistandObjects
+         * <p>
+         * Find the object farthest from the given index. This method is a helper Method for
+         * findDistandObjects.
+         * </p>
+         *
          * @param index
 …
         /**
+         * Finds the pivot objects
+         * <p>
+         * Finds the pivot objects. This method is basically algorithm 1 of the Fastmap paper.
+         * </p>
+         *
+         * This method is basically algorithm 1 of the fastmap paper.
+         *
+         * @return 2 indexes of the choosen pivot objects
+         * @return 2 indexes of the chosen pivot objects
          */
         private int[] findDistantObjects() {
 …
         /**
+         * Calculates the new k-vector values (projections)
+         *
+         * This is basically algorithm 2 of the fastmap paper. We just added the possibility to
+         * pre-set the pivot elements because we need to classify single instances after the
+         * computation is already done.
+         *
+         * @param dims
+         *            dimensionality
+         * <p>
+         * Calculates the new k-vector values (projections) This is basically algorithm 2 of the
+         * fastmap paper. We just added the possibility to pre-set the pivot elements because we
+         * need to classify single instances after the computation is already done.
+         * </p>
          */
         public void calculate() {
 …
         /**
+         * <p>
          * returns the result matrix of the projections
+         * </p>
+         *
          * @return calculated result

trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaTestAwareTraining.java

-                      r99
+                      r135
 import weka.core.Instances;
+// TODO comment
+/**
+ * <p>
+ * Trainer that allows classifiers access to the training data. Classifiers need to make sure that
+ * they do not use the classification.
+ * </p>
+ *
+ * @author Steffen Herbold
+ */
 public class WekaTestAwareTraining extends WekaBaseTraining implements ITestAwareTrainingStrategy {
+    /*
+     * (non-Javadoc)
+     *
+     * @see de.ugoe.cs.cpdp.training.ITestAwareTrainingStrategy#apply(weka.core.Instances,
+     * weka.core.Instances)
+     */
     @Override
     public void apply(Instances testdata, Instances traindata) {
         classifier = setupClassifier();
         if( !(classifier instanceof ITestAwareClassifier) ) {
+        if (!(classifier instanceof ITestAwareClassifier)) {
             throw new RuntimeException("classifier must implement the ITestAwareClassifier interface in order to be used as TestAwareTrainingStrategy");
+        }

trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaTraining.java

-                      r99
+                      r135
 /**
+ * Programmatic WekaTraining
+ *
+ * first parameter is Trainer Name. second parameter is class name
+ *
+ * all subsequent parameters are configuration params (for example for trees) Cross Validation
+ * params always come last and are prepended with -CVPARAM
+ *
+ * XML Configurations for Weka Classifiers:
+ *
+ * <p>
+ * The first parameter is the trainer name, second parameter is class name. All subsequent
+ * parameters are configuration parameters of the algorithms. Cross validation parameters always
+ * come last and are prepended with -CVPARAM
+ * </p>
+ * XML Configurations for Weka Classifiers:
  * <pre>
  * {@code

trunk/CrossPare/src/de/ugoe/cs/cpdp/util/SortUtils.java

-                      r61
+                      r135
 package de.ugoe.cs.cpdp.util;
+/**
+ * <p>
+ * Utility functions for sorting.
+ * </p>
+ *
+ * @author Steffen Herbold
+ */
 public class SortUtils {
+    /**
+     * <p>
+     * Implements a quick sort that sorts an index set together with the array.
+     * </p>
+     *
+     * @param main
+     *            the array that is sorted
+     * @param index
+     *            the index set for the array
+     */
     public static <T extends Comparable<T>> void quicksort(T[] main, int[] index) {
         quicksort(main, index, 0, index.length - 1, false);
+    }
+    public static <T extends Comparable<T>> void quicksort(T[] main, int[] index, boolean descending) {
+    /**
+     * <p>
+     * Implements a quick sort that sorts an index set together with the array.
+     * </p>
+     *
+     * @param main
+     *            the array that is sorted
+     * @param index
+     *            the index set for the array
+     * @param descending
+     *            defines the sorting order
+     */
+    public static <T extends Comparable<T>> void quicksort(T[] main,
+                                                           int[] index,
+                                                           boolean descending)
+    {
         quicksort(main, index, 0, index.length - 1, descending);
+    }
+    // quicksort a[left] to a[right]
+    private static <T extends Comparable<T>> void quicksort(T[] a, int[] index, int left, int right, boolean descending) {
+    /**
+     * <p>
+     * internal quicksort implementation
+     * </p>
+     *
+     * @param main
+     *            the array that is sorted
+     * @param index
+     *            the index set for the array
+     * @param left
+     *            defines the current partition
+     * @param right
+     *            defines the current partition
+     * @param descending
+     *            defines the sorting order
+     */
+    private static <T extends Comparable<T>> void quicksort(T[] main,
+                                                            int[] index,
+                                                            int left,
+                                                            int right,
+                                                            boolean descending)
+    {
         if (right <= left)
             return;
         int i = partition(a, index, left, right, descending);
         quicksort(a, index, left, i - 1, descending);
         quicksort(a, index, i + 1, right, descending);
+        int i = partition(main, index, left, right, descending);
+        quicksort(main, index, left, i - 1, descending);
+        quicksort(main, index, i + 1, right, descending);
+    }
+    // partition a[left] to a[right], assumes left < right
+    private static <T extends Comparable<T>> int partition(T[] a, int[] index, int left, int right, boolean descending) {
+    /**
+     * <p>
+     * internal partitioning of the quicksort implementation
+     * </p>
+     *
+     * @param main
+     *            the array that is sorted
+     * @param index
+     *            the index set for the array
+     * @param left
+     *            defines the current partition
+     * @param right
+     *            defines the current partition
+     * @param descending
+     *            defines the sorting order
+     */
+    private static <T extends Comparable<T>> int partition(T[] main,
+                                                           int[] index,
+                                                           int left,
+                                                           int right,
+                                                           boolean descending)
+    {
         int i = left - 1;
         int j = right;
         while (true) {
             while (compare(a[++i], a[right], descending)) // find item on left to swap
+            while (compare(main[++i], main[right], descending)) // find item on left to swap
             ; // a[right] acts as sentinel
             while (compare(a[right], a[--j], descending)) // find item on right to swap
+            while (compare(main[right], main[--j], descending)) // find item on right to swap
                 if (j == left)
                     break; // don't go out-of-bounds
             if (i >= j)
                 break; // check if pointers cross
             exch(a, index, i, j); // swap two elements into place
+            swap(main, index, i, j); // swap two elements into place
+        }
         exch(a, index, i, right); // swap with partition element
+        swap(main, index, i, right); // swap with partition element
         return i;
+    }
+    // is x < y ?
+    /**
+     * <p>
+     * helper function for comparator evaluation
+     * </p>
+     *
+     * @param x
+     *            first element that is compared
+     * @param y
+     *            second element that is compared
+     * @param descending
+     *            defines the sorting order
+     * @return true if x is larger than y and descending is true or y is larger than x and
+     *         descending is false
+     */
     private static <T extends Comparable<T>> boolean compare(T x, T y, boolean descending) {
+        if( descending ) {
+            return x.compareTo(y)>0;
+        } else {
+            return x.compareTo(y)<0;
+        if (descending) {
+            return x.compareTo(y) > 0;
+        }
+        else {
+            return x.compareTo(y) < 0;
+        }
+    }
+    // exchange a[i] and a[j]
+    private static <T extends Comparable<T>> void exch(T[] a, int[] index, int i, int j) {
+        T swap = a[i];
+        a[i] = a[j];
+        a[j] = swap;
+    /**
+     * <p>
+     * swaps to elements
+     * </p>
+     *
+     * @param main
+     *            the array that is sorted
+     * @param index
+     *            the index set for the array
+     * @param i
+     *            index of the first element
+     * @param j
+     *            index of the second element
+     */
+    private static <T extends Comparable<T>> void swap(T[] main, int[] index, int i, int j) {
+        T tmp = main[i];
+        main[i] = main[j];
+        main[j] = tmp;
         int b = index[i];
         index[i] = index[j];

trunk/CrossPare/src/de/ugoe/cs/cpdp/util/WekaUtils.java

-                      r129
+                      r135
 package de.ugoe.cs.cpdp.util;
-// TODO comment
 import org.apache.commons.math3.ml.distance.EuclideanDistance;
 …
 import weka.core.Instances;
+/**
+ * <p>
+ * Collections of helper functions to work with Weka.
+ * </p>
+ *
+ * @author Steffen Herbold
+ */
 public class WekaUtils {
+    /**
+     * <p>
+     * Data class for distance between instances within a data set based on their distributional
+     * characteristics.
+     * </p>
+     *
+     * @author Steffen Herbold
+     */
     public static class DistChar {
         public final double mean;
 …
         public final double max;
         public final int num;
         private DistChar(double mean, double std, double min, double max, int num) {
             this.mean = mean;
 …
+        }
+    }
     /**
      * Scaling value that moves the decimal point by 5 digets.
      */
     public final static double SCALER = 10000.0d;
     /**
      * <p>
 …
         return distance;
+    }
+    /**
+     * <p>
+     * Returns a double array of the values without the classification.
+     * </p>
+     *
+     * @param instance
+     *            the instance
+     * @return double array
+     */
     public static double[] instanceValues(Instance instance) {
         double[] values = new double[instance.numAttributes()-1];
         int k=0;
         for( int j=0; j<instance.numAttributes() ; j++ ) {
             if( j!= instance.classIndex() ) {
+        double[] values = new double[instance.numAttributes() - 1];
+        int k = 0;
+        for (int j = 0; j < instance.numAttributes(); j++) {
+            if (j != instance.classIndex()) {
                 values[k] = instance.value(j);
                 k++;
 …
         return values;
+    }
+    /**
+     * <p>
+     * Calculates the distributional characteristics of the distances the instances within a data
+     * set have to each other.
+     * </p>
+     *
+     * @param data
+     *            data for which the instances are characterized
+     * @return characteristics
+     */
     public static DistChar datasetDistance(Instances data) {
         double distance;
 …
         int numCmp = 0;
         int l = 0;
         double[] inst1 = new double[data.numAttributes()-1];
         double[] inst2 = new double[data.numAttributes()-1];
+        double[] inst1 = new double[data.numAttributes() - 1];
+        double[] inst2 = new double[data.numAttributes() - 1];
         EuclideanDistance euclideanDistance = new EuclideanDistance();
         for( int i=0; i<data.numInstances(); i++ ) {
             l=0;
             for( int k=0; k<data.numAttributes(); k++ ) {
                 if( k!=data.classIndex() ) {
+        for (int i = 0; i < data.numInstances(); i++) {
+            l = 0;
+            for (int k = 0; k < data.numAttributes(); k++) {
+                if (k != data.classIndex()) {
                     inst1[l] = data.instance(i).value(k);
+                }
+            }
             for( int j=0; j<data.numInstances(); j++ ) {
                 if( j!=i ) {
                     l=0;
                     for( int k=0; k<data.numAttributes(); k++ ) {
                         if( k!=data.classIndex() ) {
+            for (int j = 0; j < data.numInstances(); j++) {
+                if (j != i) {
+                    l = 0;
+                    for (int k = 0; k < data.numAttributes(); k++) {
+                        if (k != data.classIndex()) {
                             inst2[l] = data.instance(j).value(k);
+                        }
 …
                     distance = euclideanDistance.compute(inst1, inst2);
                     sumAll += distance;
                     sumAllQ += distance*distance;
+                    sumAllQ += distance * distance;
                     numCmp++;
                     if( distance < min ) {
+                    if (distance < min) {
                         min = distance;
+                    }
                     if( distance > max ) {
+                    if (distance > max) {
                         max = distance;
+                    }
 …
+        }
         double mean = sumAll / numCmp;
+        double std = Math.sqrt((sumAllQ-(sumAll*sumAll)/numCmp) *
+                                  (1.0d / (numCmp - 1)));
+        double std = Math.sqrt((sumAllQ - (sumAll * sumAll) / numCmp) * (1.0d / (numCmp - 1)));
         return new DistChar(mean, std, min, max, data.numInstances());
+    }
+    // like above, but for single attribute
+    /**
+     * <p>
+     * Calculates the distributional characteristics of the distances of a single attribute the
+     * instances within a data set have to each other.
+     * </p>
+     *
+     * @param data
+     *            data for which the instances are characterized
+     * @param index
+     *            attribute for which the distances are characterized
+     * @return characteristics
+     */
     public static DistChar attributeDistance(Instances data, int index) {
         double distance;
 …
         int numCmp = 0;
         double value1, value2;
         for( int i=0; i<data.numInstances(); i++ ) {
+        for (int i = 0; i < data.numInstances(); i++) {
             value1 = data.instance(i).value(index);
             for( int j=0; j<data.numInstances(); j++ ) {
                 if( j!=i ) {
+            for (int j = 0; j < data.numInstances(); j++) {
+                if (j != i) {
                     value2 = data.instance(j).value(index);
                     distance = Math.abs(value1-value2);
+                    distance = Math.abs(value1 - value2);
                     sumAll += distance;
                     sumAllQ += distance*distance;
+                    sumAllQ += distance * distance;
                     numCmp++;
                     if( distance < min ) {
+                    if (distance < min) {
                         min = distance;
+                    }
                     if( distance > max ) {
+                    if (distance > max) {
                         max = distance;
+                    }
 …
+        }
         double mean = sumAll / numCmp;
+        double std = Math.sqrt((sumAllQ-(sumAll*sumAll)/numCmp) *
+                                  (1.0d / (numCmp - 1)));
+        double std = Math.sqrt((sumAllQ - (sumAll * sumAll) / numCmp) * (1.0d / (numCmp - 1)));
         return new DistChar(mean, std, min, max, data.numInstances());
+    }
     /**
      * <p>

trunk/CrossPare/src/de/ugoe/cs/cpdp/versions/AbstractVersionFilter.java

r86	r135
25	25	public abstract class AbstractVersionFilter implements IVersionFilter {
26	26
27		/**
	27	/*
28	28	* @see de.ugoe.cs.cpdp.versions.IVersionFilter#apply(java.util.List)
29	29	*/

trunk/CrossPare/src/de/ugoe/cs/cpdp/versions/MinClassNumberFilter.java

r86	r135
30	30	private int minInstances = 0;
31	31
32		/**
	32	/*
33	33	* @see de.ugoe.cs.cpdp.versions.IVersionFilter#apply(de.ugoe.cs.cpdp.versions.SoftwareVersion)
34	34	*/

trunk/CrossPare/src/de/ugoe/cs/cpdp/versions/MinInstanceNumberFilter.java

r86	r135
28	28	private int minInstances = 0;
29	29
30		/**
	30	/*
31	31	* @see de.ugoe.cs.cpdp.versions.IVersionFilter#apply(de.ugoe.cs.cpdp.versions.SoftwareVersion)
32	32	*/

trunk/CrossPare/src/de/ugoe/cs/cpdp/versions/SoftwareVersion.java

-                      r132
+                      r135
      */
     private final Instances instances;
     /**
      * Review effort per instance.
+     * Review effort per instance.
      */
     private final List<Double> efforts;
 …
      *            data of the version
      */
+    public SoftwareVersion(String project, String version, Instances instances, List<Double> efforts) {
+    public SoftwareVersion(String project,
+                           String version,
+                           Instances instances,
+                           List<Double> efforts)
+    {
         this.project = project;
         this.version = version;
 …
         this.efforts = efforts;
+    }
     /**
      * returns the project name
 …
         return new Instances(instances);
+    }
     /**
      * <p>

trunk/CrossPare/src/de/ugoe/cs/cpdp/wekaclassifier/BayesNetWrapper.java

r130	r135
40	40	* generated ID
41	41	*/
42		~~/** */~~
43	42	private static final long serialVersionUID = -4835134612921456157L;
44	43

trunk/CrossPare/src/de/ugoe/cs/cpdp/wekaclassifier/FixClass.java

-                      r86
+                      r135
 /**
  * Simple classifier that always predicts the same class
+ * Simple classifier that always predicts the same class.
+ *
  * @author Steffen Herbold
 …
 public class FixClass extends AbstractClassifier {
+    /**
+     * default serialization ID
+     */
     private static final long serialVersionUID = 1L;
+    /**
+     * default prediction: non-defective
+     */
     private double fixedClassValue = 0.0d;
-    public FixClass() {
-        // TODO Auto-generated constructor stub
+    }
     /**
 …
+    }
+    /*
+     * (non-Javadoc)
+     *
+     * @see weka.classifiers.AbstractClassifier#setOptions(java.lang.String[])
+     */
     @Override
     public void setOptions(String[] options) throws Exception {
 …
+    }
+    /*
+     * (non-Javadoc)
+     *
+     * @see weka.classifiers.AbstractClassifier#classifyInstance(weka.core.Instance)
+     */
     @Override
     public double classifyInstance(Instance instance) {
 …
+    }
+    /*
+     * (non-Javadoc)
+     *
+     * @see weka.classifiers.Classifier#buildClassifier(weka.core.Instances)
+     */
     @Override
     public void buildClassifier(Instances traindata) throws Exception {

trunk/CrossPare/src/de/ugoe/cs/cpdp/wekaclassifier/ITestAwareClassifier.java

-                      r66
+                      r135
 package de.ugoe.cs.cpdp.wekaclassifier;
 import weka.core.Instances;
+/**
+ * <p>
+ * Interface for test data aware classifier implementations
+ * </p>
+ *
+ * @author Steffen Herbold
+ */
 public interface ITestAwareClassifier {
+    /**
+     * <p>
+     * passes the test data to the classifier
+     * </p>
+     *
+     * @param testdata
+     *            the test data
+     */
     public void setTestdata(Instances testdata);

trunk/CrossPare/src/de/ugoe/cs/cpdp/wekaclassifier/RandomClass.java

-                      r86
+                      r135
 /**
+ * <p>
  * Assigns a random class label to the instance it is evaluated on.
+ *
+ * </p>
  * The range of class labels are hardcoded in fixedClassValues. This can later be extended to take
  * values from the XML configuration.
+ * </p>
+ *
+ * @author Alexander Trautsch
  */
 public class RandomClass extends AbstractClassifier {
+    /**
+     * default serialization id
+     */
     private static final long serialVersionUID = 1L;
+    /**
+     * class values
+     */
     private double[] fixedClassValues =
         { 0.0d, 1.0d };
+    /*
+     * (non-Javadoc)
+     *
+     * @see weka.classifiers.Classifier#buildClassifier(weka.core.Instances)
+     */
     @Override
     public void buildClassifier(Instances arg0) throws Exception {
 …
+    }
+    /*
+     * (non-Javadoc)
+     *
+     * @see weka.classifiers.AbstractClassifier#classifyInstance(weka.core.Instance)
+     */
     @Override
     public double classifyInstance(Instance instance) {

trunk/CrossPare/src/de/ugoe/cs/cpdp/wekaclassifier/VCBSVM.java

r105	r135
334	334	*/
335	335	private Instances weightedResample(final Instances data, final int size) {
336		if~~( data.isEmpty()~~ ) {
	336	if (data.isEmpty()) {
337	337	return data;
338	338	}

trunk/CrossPare/src/de/ugoe/cs/cpdp/wekaclassifier/WHICH.java

-                      r127
+                      r135
                 score = 0;
+            }
             if( score==0 ) {
+            if (score == 0) {
                 score = 0.000000001; // to disallow 0 total score
+            }
 …
                 else {
                     String range = ranges.get(k);
                     if( "'All'".equals(range) ) {
+                    if ("'All'".equals(range)) {
                         result = true;
+                    } else {
+                    }
+                    else {
                         double instanceValue = instance.value(attrIndex);
                         double lowerBound;
 …
                             else {
                                 // first value is positive
+                                if( splitResult[0].substring(2, splitResult[0].length()).equals("ll'")) {
+                                if (splitResult[0].substring(2, splitResult[0].length())
+                                    .equals("ll'"))
+                                {
                                     System.out.println("foo");
+                                }
                                 lowerBound = Double
                                     .parseDouble(splitResult[0].substring(2, splitResult[0].length()));
+                                lowerBound = Double.parseDouble(splitResult[0]
+                                    .substring(2, splitResult[0].length()));
                                 if (splitResult[1].startsWith("inf")) {
                                     upperBound = Double.POSITIVE_INFINITY;
 …
                         boolean upperBoundMatch = (range.charAt(range.length() - 2) == ')' &&
                             instanceValue < upperBound) ||
+                            (range.charAt(range.length() - 2) == ']' && instanceValue <= upperBound);
+                            (range.charAt(range.length() - 2) == ']' &&
+                                instanceValue <= upperBound);
                         result = lowerBoundMatch && upperBoundMatch;
+                    }

trunk/CrossPare/test/de/ugoe/cs/cpdp/eval/MySQLResultStorageTest.java

-                      r71
+                      r135
         result.setSizeTestData(100);
         result.setSizeTrainingData(200);
-        result.setSuccHe(0.1);
-        result.setSuccZi(0.05);
-        result.setSuccG75(0.2);
-        result.setSuccG60(0.4);
         result.setError(0.2);
         result.setRecall(0.8);

Context Navigation

Legend:

Download in other formats: