Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/CLAMIProcessor.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/CLAMIProcessor.java	(revision 42)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/CLAMIProcessor.java	(revision 42)
@@ -0,0 +1,216 @@
+// Copyright 2015 Georg-August-Universität Göttingen, Germany
+//
+//   Licensed under the Apache License, Version 2.0 (the "License");
+//   you may not use this file except in compliance with the License.
+//   You may obtain a copy of the License at
+//
+//       http://www.apache.org/licenses/LICENSE-2.0
+//
+//   Unless required by applicable law or agreed to in writing, software
+//   distributed under the License is distributed on an "AS IS" BASIS,
+//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//   See the License for the specific language governing permissions and
+//   limitations under the License.
+
+package de.ugoe.cs.cpdp.dataprocessing;
+
+import java.util.Iterator;
+import java.util.SortedSet;
+import java.util.TreeSet;
+import java.util.logging.Level;
+
+import org.apache.commons.math3.stat.descriptive.rank.Median;
+
+import de.ugoe.cs.util.console.Console;
+import weka.core.Instance;
+import weka.core.Instances;
+
+/**
+ * <p>
+ * This processor implements the CLAMI strategy from the CLAMI paper at ASE 2014 be Nam et al. With
+ * CLAMI, the original classification of the data is removed and instead a new classification is
+ * created based on metric values that are higher than the median of the metric. Afterwards, a
+ * subset of the metrics is selected, where the violations of this median threshold is minimal.
+ * Finally, all instances where the metric violations are not correct are dropped, leaving
+ * noise-free data regarding the median threshold classification.
+ * </p>
+ * <p>
+ * This can also be done for the test data (i.e., TestAsTraining data selection), as the original
+ * classification is completely ignored. Hence, CLAMI is an approach for unsupervised learning.
+ * </p>
+ * 
+ * @author Steffen Herbold
+ */
+public class CLAMIProcessor implements IProcessesingStrategy {
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see de.ugoe.cs.cpdp.IParameterizable#setParameter(java.lang.String)
+     */
+    @Override
+    public void setParameter(String parameters) {
+        // TODO Auto-generated method stub
+
+    }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see de.ugoe.cs.cpdp.dataprocessing.IProcessesingStrategy#apply(weka.core.Instances,
+     * weka.core.Instances)
+     */
+    @Override
+    public void apply(Instances testdata, Instances traindata) {
+        applyCLAMI(testdata, traindata);
+    }
+
+    /**
+     * <p>
+     * Applies the CLAMI processor to the data. The test data is also required, in order to
+     * guarantee a consistent metric set.
+     * </p>
+     *
+     * @param testdata
+     *            test data; the data is not modified, only metrics are dropped
+     * @param data
+     *            data to which the CLAMI processor is applied
+     */
+    public void applyCLAMI(Instances testdata, Instances data) {
+
+        // first determine medians
+        double[] medians = new double[data.numAttributes()];
+        // get medians
+        for (int j = 0; j < data.numAttributes(); j++) {
+            if (j != data.classIndex()) {
+                medians[j] = data.kthSmallestValue(j, (data.numInstances() + 1) >> 1);
+            }
+        }
+        // now determine cluster number for each instance
+        double[] clusterNumber = new double[data.numInstances()];
+        for (int i = 0; i < data.numInstances(); i++) {
+            int countHighValues = 0;
+            Instance currentInstance = data.get(i);
+            for (int j = 0; j < data.numAttributes(); j++) {
+                if (j != data.classIndex()) {
+                    if (currentInstance.value(j) > medians[j]) {
+                        countHighValues++;
+                    }
+                }
+            }
+            clusterNumber[i] = countHighValues;
+        }
+
+        // determine median of cluster number
+        Median m = new Median();
+        double medianClusterNumber = m.evaluate(clusterNumber);
+
+        // now we filter the metrics
+        int[] numMetricViolations = new int[data.numAttributes()];
+        for (int j = 0; j < data.numAttributes(); j++) {
+            int currentViolations = 0;
+            for (int i = 0; i < data.numInstances(); i++) {
+                Instance currentInstance = data.get(i);
+                if (j != data.classIndex()) {
+                    if (clusterNumber[i] > medianClusterNumber) {
+                        // "buggy"
+                        if (currentInstance.value(j) <= medians[j]) {
+                            currentViolations++;
+                        }
+                    }
+                    else {
+                        // "not buggy"
+                        if (currentInstance.value(j) > medians[j]) {
+                            currentViolations++;
+                        }
+                    }
+                }
+            }
+            numMetricViolations[j] = currentViolations;
+        }
+
+        SortedSet<Integer> distinctViolationCounts = new TreeSet<>();
+        for (int currentViolations : numMetricViolations) {
+            distinctViolationCounts.add(currentViolations);
+        }
+        Iterator<Integer> violationCountInterator = distinctViolationCounts.iterator();
+
+        int violationCutoff = violationCountInterator.next();
+        // now we filter the data;
+        boolean[] cleanInstances = new boolean[data.numInstances()];
+        int numCleanBuggyInstances = 0;
+        int numCleanBugfreeInstances = 0;
+        do {
+            violationCutoff = violationCountInterator.next();
+            cleanInstances = new boolean[data.numInstances()];
+            numCleanBuggyInstances = 0;
+            numCleanBugfreeInstances = 0;
+            for (int i = 0; i < data.numInstances(); i++) {
+                int currentViolations = 0;
+                Instance currentInstance = data.get(i);
+                for (int j = 0; j < data.numAttributes(); j++) {
+                    if (j != data.classIndex() && numMetricViolations[j] <= violationCutoff) {
+                        if (clusterNumber[i] > medianClusterNumber) {
+                            // "buggy"
+                            if (currentInstance.value(j) <= medians[j]) {
+                                currentViolations++;
+                            }
+                        }
+                        else {
+                            // "not buggy"
+                            if (currentInstance.value(j) > medians[j]) {
+                                currentViolations++;
+                            }
+                        }
+                    }
+                }
+                if (currentViolations == 0) {
+                    cleanInstances[i] = true;
+                    if (clusterNumber[i] > medianClusterNumber) {
+                        numCleanBuggyInstances++;
+                    }
+                    else {
+                        numCleanBugfreeInstances++;
+                    }
+                }
+                else {
+                    cleanInstances[i] = false;
+                }
+            }
+        }
+        while (numCleanBuggyInstances == 0 || numCleanBugfreeInstances == 0);
+        
+        // output some interesting information to provide insights into the CLAMI model
+        Console.traceln(Level.FINE, "Selected Metrics and Median-threshold: ");
+        for( int j=0 ; j<data.numAttributes(); j++) {
+            if( j!=data.classIndex() && numMetricViolations[j]<=violationCutoff ) {
+                Console.traceln(Level.FINE, "\t" + data.attribute(j).name() + ": " + medians[j]);
+            }
+        }
+        
+        // finally modify the instances
+        // drop the metrics (also from the testdata)
+        for (int j = data.numAttributes() - 1; j >= 0; j--) {
+            if (j != data.classIndex() && numMetricViolations[j] > violationCutoff) {
+                data.deleteAttributeAt(j);
+                testdata.deleteAttributeAt(j);
+            }
+        }
+        // drop the unclean instances
+        for (int i = data.numInstances() - 1; i >= 0; i--) {
+            if (!cleanInstances[i]) {
+                data.delete(i);
+            }
+            else {
+                // set the classification
+                if (clusterNumber[i] > medianClusterNumber) {
+                    data.get(i).setClassValue(1.0d);
+                }
+                else {
+                    data.get(i).setClassValue(0.0d);
+                }
+            }
+        }
+    }
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/CLAProcessor.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/CLAProcessor.java	(revision 42)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/CLAProcessor.java	(revision 42)
@@ -0,0 +1,106 @@
+// Copyright 2015 Georg-August-Universität Göttingen, Germany
+//
+//   Licensed under the Apache License, Version 2.0 (the "License");
+//   you may not use this file except in compliance with the License.
+//   You may obtain a copy of the License at
+//
+//       http://www.apache.org/licenses/LICENSE-2.0
+//
+//   Unless required by applicable law or agreed to in writing, software
+//   distributed under the License is distributed on an "AS IS" BASIS,
+//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//   See the License for the specific language governing permissions and
+//   limitations under the License.
+
+package de.ugoe.cs.cpdp.dataprocessing;
+
+import org.apache.commons.math3.stat.descriptive.rank.Median;
+
+import weka.core.Instance;
+import weka.core.Instances;
+
+/**
+ * <p>
+ * This processor implements the CLA strategy from the CLAMI paper at ASE 2014 be Nam et al. With
+ * CLA, the original classification of the data is removed and instead a new classification is
+ * created based on metric values that are higher than the median of the metric.
+ * </p>
+ * <p>
+ * This can also be done for the test data (i.e., TestAsTraining data selection), as the original
+ * classification is completely ignored. Hence, CLA is an approach for unsupervised learning.
+ * </p>
+ * 
+ * @author Steffen Herbold
+ */
+public class CLAProcessor implements IProcessesingStrategy {
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see de.ugoe.cs.cpdp.IParameterizable#setParameter(java.lang.String)
+     */
+    @Override
+    public void setParameter(String parameters) {
+        // TODO Auto-generated method stub
+
+    }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see de.ugoe.cs.cpdp.dataprocessing.IProcessesingStrategy#apply(weka.core. Instances,
+     * weka.core.Instances)
+     */
+    @Override
+    public void apply(Instances testdata, Instances traindata) {
+        applyCLA(traindata);
+    }
+
+    /**
+     * Applies the CLA processor the the data.
+     * 
+     * @param data
+     *            data to which the processor is applied
+     */
+    public void applyCLA(Instances data) {
+        // first determine medians
+        double[] medians = new double[data.numAttributes()];
+        // get medians
+        for (int j = 0; j < data.numAttributes(); j++) {
+            if (j != data.classIndex()) {
+                medians[j] = data.kthSmallestValue(j, (data.numInstances() + 1) >> 1);
+            }
+        }
+        // now determine cluster number for each instance
+        double[] clusterNumber = new double[data.numInstances()];
+        for (int i = 0; i < data.numInstances(); i++) {
+            int countHighValues = 0;
+            Instance currentInstance = data.get(i);
+            for (int j = 0; j < data.numAttributes(); j++) {
+                if (j != data.classIndex()) {
+                    if (currentInstance.value(j) > medians[j]) {
+                        countHighValues++;
+                    }
+                }
+            }
+            clusterNumber[i] = countHighValues;
+        }
+
+        // determine median of cluster number
+        Median m = new Median();
+        double medianClusterNumber = m.evaluate(clusterNumber);
+
+        // finally modify the instances
+        // drop the unclean instances
+        for (int i = data.numInstances() - 1; i >= 0; i--) {
+            // set the classification
+            if (clusterNumber[i] > medianClusterNumber) {
+                data.get(i).setClassValue(1.0d);
+            }
+            else {
+                data.get(i).setClassValue(0.0d);
+            }
+        }
+    }
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaBaseTraining.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaBaseTraining.java	(revision 41)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaBaseTraining.java	(revision 42)
@@ -56,5 +56,5 @@
         classifierParams = Arrays.copyOfRange(params, 2, params.length);
 
-        classifier = setupClassifier();
+        //classifier = setupClassifier();
     }
 
@@ -64,5 +64,5 @@
     }
 
-    public Classifier setupClassifier() {
+    protected Classifier setupClassifier() {
         Classifier cl = null;
         try {
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaTraining.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaTraining.java	(revision 41)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaTraining.java	(revision 42)
@@ -21,4 +21,5 @@
 
 import de.ugoe.cs.util.console.Console;
+import weka.classifiers.rules.ZeroR;
 import weka.core.Instances;
 
@@ -46,4 +47,5 @@
     @Override
     public void apply(Instances traindata) {
+        classifier = setupClassifier();
         PrintStream errStr = System.err;
         System.setErr(new PrintStream(new NullOutputStream()));
@@ -55,5 +57,19 @@
         }
         catch (Exception e) {
-            throw new RuntimeException(e);
+            if (e.getMessage().contains("Not enough training instances with class labels")) {
+                Console.traceln(Level.SEVERE,
+                                "failure due to lack of instances: " + e.getMessage());
+                Console.traceln(Level.SEVERE, "training ZeroR classifier instead");
+                classifier = new ZeroR();
+                try {
+                    classifier.buildClassifier(traindata);
+                }
+                catch (Exception e2) {
+                    throw new RuntimeException(e2);
+                }
+            }
+            else {
+                throw new RuntimeException(e);
+            }
         }
         finally {
