Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/SynonymAttributePruning.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/SynonymAttributePruning.java	(revision 64)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/SynonymAttributePruning.java	(revision 64)
@@ -0,0 +1,88 @@
+// Copyright 2015 Georg-August-Universität Göttingen, Germany
+//
+//   Licensed under the Apache License, Version 2.0 (the "License");
+//   you may not use this file except in compliance with the License.
+//   You may obtain a copy of the License at
+//
+//       http://www.apache.org/licenses/LICENSE-2.0
+//
+//   Unless required by applicable law or agreed to in writing, software
+//   distributed under the License is distributed on an "AS IS" BASIS,
+//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//   See the License for the specific language governing permissions and
+//   limitations under the License.
+
+package de.ugoe.cs.cpdp.dataprocessing;
+
+import weka.core.Instances;
+
+/**
+ * <p>
+ * Synonym pruning after Amasaki et al. (2015). The selection of the attributes for pruning happens
+ * only on the training data. The attributes are deleted from both the training and test data.
+ * </p>
+ * 
+ * @author Steffen Herbold
+ */
+public class SynonymAttributePruning implements IProcessesingStrategy {
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see de.ugoe.cs.cpdp.IParameterizable#setParameter(java.lang.String)
+     */
+    @Override
+    public void setParameter(String parameters) {
+
+    }
+
+    /**
+     * @see de.ugoe.cs.cpdp.dataprocessing.ProcessesingStrategy#apply(weka.core.Instances,
+     *      weka.core.Instances)
+     */
+    @Override
+    public void apply(Instances testdata, Instances traindata) {
+        applySynonymPruning(testdata, traindata);
+    }
+
+    /**
+     * <p>
+     * Applies the synonym pruning based on the training data.
+     * </p>
+     *
+     * @param testdata
+     *            the test data
+     * @param traindata
+     *            the training data
+     */
+    private void applySynonymPruning(Instances testdata, Instances traindata) {
+        double distance;
+        for (int j = traindata.numAttributes() - 1; j >= 0; j--) {
+            if( j!=traindata.classIndex() ) {
+                boolean hasClosest = false;
+                for (int i1 = 0; !hasClosest && i1 < traindata.size(); i1++) {
+                    for (int i2 = 0; !hasClosest && i2 < traindata.size(); i2++) {
+                        if (i1 != i2) {
+                            double minVal = Double.MAX_VALUE;
+                            double distanceJ = Double.MAX_VALUE;
+                            for (int k = 0; k < traindata.numAttributes(); k++) {
+                                distance = Math.abs(traindata.get(i1).value(k) - traindata.get(i2).value(k));
+                                if (distance < minVal) {
+                                    minVal = distance;
+                                }
+                                if (k == j) {
+                                    distanceJ = distance;
+                                }
+                            }
+                            hasClosest = distanceJ <= minVal;
+                        }
+                    }
+                }
+                if (!hasClosest) {
+                    testdata.deleteAttributeAt(j);
+                    traindata.deleteAttributeAt(j);
+                }
+            }
+        }
+    }
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/TCAPlusNormalization.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/TCAPlusNormalization.java	(revision 63)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/TCAPlusNormalization.java	(revision 64)
@@ -15,6 +15,6 @@
 package de.ugoe.cs.cpdp.dataprocessing;
 
-import org.apache.commons.math3.ml.distance.EuclideanDistance;
-
+import de.ugoe.cs.cpdp.util.WekaUtils;
+import de.ugoe.cs.cpdp.util.WekaUtils.DistChar;
 import weka.core.Instances;
 
@@ -22,19 +22,4 @@
 public class TCAPlusNormalization implements IProcessesingStrategy {
 
-    private class DistChar {
-        private final double mean;
-        private final double std;
-        private final double min;
-        private final double max;
-        private int num;
-        private DistChar(double mean, double std, double min, double max, int num) {
-            this.mean = mean;
-            this.std = std;
-            this.min = min;
-            this.max = max;
-            this.num = num;
-        }
-    }
-    
     /**
      * Does not have parameters. String is ignored.
@@ -55,6 +40,6 @@
     
     private void applyTCAPlus(Instances testdata, Instances traindata) {
-        DistChar dcTest = datasetDistance(testdata);
-        DistChar dcTrain = datasetDistance(traindata);
+        DistChar dcTest = WekaUtils.datasetDistance(testdata);
+        DistChar dcTrain = WekaUtils.datasetDistance(traindata);
         
         // RULE 1:
@@ -86,47 +71,3 @@
         }
     }
-    
-    private DistChar datasetDistance(Instances data) {
-        double distance;
-        double sumAll = 0.0;
-        double sumAllQ = 0.0;
-        double min = Double.MAX_VALUE;
-        double max = Double.MIN_VALUE;
-        int numCmp = 0;
-        int l = 0;
-        double[] inst1 = new double[data.numAttributes()-1];
-        double[] inst2 = new double[data.numAttributes()-1];
-        EuclideanDistance euclideanDistance = new EuclideanDistance();
-        for( int i=0; i<data.numInstances(); i++ ) {
-            l=0;
-            for( int k=0; k<data.numAttributes(); k++ ) {
-                if( k!=data.classIndex() ) {
-                    inst1[l] = data.instance(i).value(k);
-                }
-            }
-            for( int j=0; j<data.numInstances(); j++ ) {
-                l=0;
-                for( int k=0; k<data.numAttributes(); k++ ) {
-                    if( k!=data.classIndex() ) {
-                        inst2[l] = data.instance(j).value(k);
-                    }
-                }
-                distance = euclideanDistance.compute(inst1, inst2);
-                sumAll += distance;
-                sumAllQ += distance*distance;
-                numCmp++;
-                if( distance < min ) {
-                    min = distance;
-                }
-                if( distance > max ) {
-                    max = distance;
-                }
-            }
-        }
-        double mean = sumAll / numCmp;
-        double std = Math.sqrt((sumAllQ-(sumAll*sumAll)/numCmp) *
-                                  (1.0d / (numCmp - 1)));
-        return new DistChar(mean, std, min, max, data.numInstances());
-    }
-
 }
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/TopMetricFilter.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/TopMetricFilter.java	(revision 64)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/TopMetricFilter.java	(revision 64)
@@ -0,0 +1,207 @@
+// Copyright 2015 Georg-August-Universität Göttingen, Germany
+//
+//   Licensed under the Apache License, Version 2.0 (the "License");
+//   you may not use this file except in compliance with the License.
+//   You may obtain a copy of the License at
+//
+//       http://www.apache.org/licenses/LICENSE-2.0
+//
+//   Unless required by applicable law or agreed to in writing, software
+//   distributed under the License is distributed on an "AS IS" BASIS,
+//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//   See the License for the specific language governing permissions and
+//   limitations under the License.
+
+package de.ugoe.cs.cpdp.dataprocessing;
+
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.logging.Level;
+import java.util.stream.IntStream;
+
+import org.apache.commons.collections4.list.SetUniqueList;
+import org.apache.commons.math3.stat.correlation.SpearmansCorrelation;
+
+import com.google.common.collect.Sets;
+
+import de.ugoe.cs.cpdp.util.SortUtils;
+import de.ugoe.cs.util.console.Console;
+import weka.attributeSelection.AttributeSelection;
+import weka.attributeSelection.CfsSubsetEval;
+import weka.attributeSelection.GreedyStepwise;
+import weka.classifiers.trees.J48;
+import weka.core.Instances;
+
+/**
+ * <p>
+ * Implements the OPTTOPk filter after P. He et al. (2015).
+ * </p>
+ * 
+ * @author Steffen Herbold
+ */
+public class TopMetricFilter implements ISetWiseProcessingStrategy {
+
+    /**
+     * Internally used correlation threshold.
+     */
+    double correlationThreshold = 0.5;
+    
+    /*
+     * (non-Javadoc)
+     * 
+     * @see de.ugoe.cs.cpdp.IParameterizable#setParameter(java.lang.String)
+     */
+    @Override
+    public void setParameter(String parameters) {
+        if( parameters!=null && !parameters.equals("")) {
+            correlationThreshold = Double.parseDouble(parameters);
+        }
+    }
+
+    @Override
+    public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) {
+        try {
+            determineTopKAttributes(testdata, traindataSet);
+        }
+        catch (Exception e) {
+            Console.printerr("Failure during metric selection: " + e.getMessage());
+            throw new RuntimeException(e);
+        }
+    }
+
+    private void determineTopKAttributes(Instances testdata, SetUniqueList<Instances> traindataSet) throws Exception {
+        Integer[] counts = new Integer[traindataSet.get(0).numAttributes()-1];
+        for( Instances traindata : traindataSet ) {
+            J48 decisionTree = new J48();
+            decisionTree.buildClassifier(traindata);
+            int k=0;
+            for( int j=0; j<traindata.numAttributes(); j++) {
+                if(j!=traindata.classIndex()){
+                    if( decisionTree.toString().contains(traindata.attribute(j).name()) ) {
+                        if( counts[k]==null ){
+                            counts[k] = 1;
+                        } else {
+                            counts[k] = counts[k]+1;
+                        }
+                    }
+                    k++;
+                }
+            }
+        }
+        int[] topkIndex = new int[counts.length];
+        IntStream.range(0,counts.length).forEach(val -> topkIndex[val] = val);
+        SortUtils.quicksort(counts, topkIndex, true);
+        
+        // get CFSs for each training set
+        List<Set<Integer>> cfsSets = new LinkedList<>();
+        for( Instances traindata : traindataSet ) {
+            AttributeSelection attsel = new AttributeSelection();
+            CfsSubsetEval eval = new CfsSubsetEval();
+            GreedyStepwise search = new GreedyStepwise();
+            search.setSearchBackwards(true);
+            attsel.setEvaluator(eval);
+            attsel.setSearch(search);
+            attsel.SelectAttributes(traindata);
+            Set<Integer> cfsSet = new HashSet<>();
+            for( int attr : attsel.selectedAttributes() ) {
+                cfsSet.add(attr);
+            }
+            cfsSets.add(cfsSet);
+        }
+        
+        double[] coverages = new double[topkIndex.length];
+        for( Set<Integer> cfsSet : cfsSets ) {
+            Set<Integer> topkSet = new HashSet<>();
+            for( int k=0; k<topkIndex.length ; k++ ) {
+                topkSet.add(topkIndex[k]);
+                coverages[k] += (coverage(topkSet, cfsSet)/traindataSet.size());
+            }
+        }
+        double bestCoverageValue = Double.MIN_VALUE;
+        int bestCoverageIndex = 0;
+        for( int i=0; i<coverages.length; i++ ) {
+            if( coverages[i]>bestCoverageValue) {
+                bestCoverageValue = coverages[i];
+                bestCoverageIndex = i;
+            }
+        }
+        // build correlation matrix
+        SpearmansCorrelation corr = new SpearmansCorrelation();
+        double[][] correlationMatrix = new double[bestCoverageIndex][bestCoverageIndex];
+        for( Instances traindata : traindataSet ) {
+            double[][] vectors = new double[bestCoverageIndex][traindata.size()];
+            for( int i=0; i<traindata.size(); i++ ) {
+                for( int j=0; j<bestCoverageIndex; j++) {
+                    vectors[j][i] = traindata.get(i).value(topkIndex[j]);
+                }
+            }
+            for( int j=0; j<bestCoverageIndex; j++ ) {
+                for( int k=j+1; k<bestCoverageIndex; k++ ) {
+                    correlationMatrix[j][k] = Math.abs(corr.correlation(vectors[j], vectors[k]));
+                }
+            }
+        }
+        Set<Integer> topkSetIndexSet = new TreeSet<>();
+        for( int j=0; j<bestCoverageIndex; j++ ) {
+            topkSetIndexSet.add(j);
+        }
+        Set<Set<Integer>> allCombinations = Sets.powerSet(topkSetIndexSet);
+        double bestOptCoverage = Double.MIN_VALUE;
+        Set<Integer> opttopkSetIndexSet = null;
+        for( Set<Integer> combination : allCombinations ) {
+            if( isUncorrelated(correlationMatrix, combination) ) {
+                double currentCoverage = 0.0;
+                Set<Integer> topkCombination = new TreeSet<>();
+                for( Integer index : combination ) {
+                    topkCombination.add(topkIndex[index]);
+                }
+                for( Set<Integer> cfsSet : cfsSets ) {
+                    currentCoverage += (coverage(combination, cfsSet)/traindataSet.size());
+                }
+                if( currentCoverage > bestOptCoverage ) {
+                    bestOptCoverage = currentCoverage;
+                    opttopkSetIndexSet = combination;
+                }
+            }
+        }
+        Set<Integer> opttopkIndex = new TreeSet<>();
+        for( Integer index : opttopkSetIndexSet) {
+            opttopkIndex.add(topkIndex[index]);
+        }
+        Console.traceln(Level.FINE, "selected the following metrics:");
+        for( Integer index : opttopkIndex) {
+            Console.traceln(Level.FINE, traindataSet.get(0).attribute(index).name());
+        }
+        // finally remove attributes
+        for( int j=testdata.numAttributes()-1; j>=0; j-- ) {
+            if( j!=testdata.classIndex() && !opttopkIndex.contains(j) ) {
+                testdata.deleteAttributeAt(j);
+                for( Instances traindata : traindataSet ) {
+                    traindata.deleteAttributeAt(j);
+                }
+            }
+        }
+    }
+    
+    private boolean isUncorrelated(double[][] correlationMatrix, Set<Integer> combination) {
+        Integer[] intCombination = combination.toArray(new Integer[0]);
+        boolean areUncorrelated = true;
+        for( int i=0 ; areUncorrelated && i<intCombination.length ; i++ ) {
+            for( int j=i+1; areUncorrelated && j<intCombination.length ; j++ ) {
+                areUncorrelated &= correlationMatrix[intCombination[i]][intCombination[j]]>correlationThreshold;
+            }
+        }
+        return areUncorrelated;
+    }
+    
+    private double coverage(Set<Integer> topkSet, Set<Integer> cfsSet) {
+        Set<Integer> topkSetCopy1 = new HashSet<>(topkSet);
+        topkSetCopy1.retainAll(cfsSet);
+        Set<Integer> topkSetCopy2 = new HashSet<>(topkSet);
+        topkSetCopy2.addAll(cfsSet);
+        return ((double) topkSetCopy1.size())/topkSetCopy2.size();
+    }
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/TransferComponentAnalysis.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/TransferComponentAnalysis.java	(revision 63)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/TransferComponentAnalysis.java	(revision 64)
@@ -25,4 +25,5 @@
 import org.ojalgo.array.Array1D;
 
+import de.ugoe.cs.cpdp.util.SortUtils;
 import de.ugoe.cs.util.console.Console;
 import weka.core.Attribute;
@@ -89,5 +90,5 @@
         Array1D<ComplexNumber> eigenvaluesArray = eigenvalueDecomposition.getEigenvalues();
         System.out.println(eigenvaluesArray.length);
-        final double[] eigenvalues = new double[(int) eigenvaluesArray.length];
+        final Double[] eigenvalues = new Double[(int) eigenvaluesArray.length];
         final int[] index = new int[(int) eigenvaluesArray.length];
         // create kernel transformation matrix from eigenvectors
@@ -96,5 +97,5 @@
             index[i] = i;
         }
-        quicksort(eigenvalues, index);
+        SortUtils.quicksort(eigenvalues, index);
 
         final PrimitiveMatrix transformedKernel = kernelMatrix.multiplyRight(eigenvalueDecomposition
@@ -218,50 +219,3 @@
         return muMatrix.build();
     }
-
-    // below is from http://stackoverflow.com/a/1040503
-    private static void quicksort(double[] main, int[] index) {
-        quicksort(main, index, 0, index.length - 1);
-    }
-
-    // quicksort a[left] to a[right]
-    private static void quicksort(double[] a, int[] index, int left, int right) {
-        if (right <= left)
-            return;
-        int i = partition(a, index, left, right);
-        quicksort(a, index, left, i - 1);
-        quicksort(a, index, i + 1, right);
-    }
-
-    // partition a[left] to a[right], assumes left < right
-    private static int partition(double[] a, int[] index, int left, int right) {
-        int i = left - 1;
-        int j = right;
-        while (true) {
-            while (less(a[++i], a[right])) // find item on left to swap
-            ; // a[right] acts as sentinel
-            while (less(a[right], a[--j])) // find item on right to swap
-                if (j == left)
-                    break; // don't go out-of-bounds
-            if (i >= j)
-                break; // check if pointers cross
-            exch(a, index, i, j); // swap two elements into place
-        }
-        exch(a, index, i, right); // swap with partition element
-        return i;
-    }
-
-    // is x < y ?
-    private static boolean less(double x, double y) {
-        return (x < y);
-    }
-
-    // exchange a[i] and a[j]
-    private static void exch(double[] a, int[] index, int i, int j) {
-        double swap = a[i];
-        a[i] = a[j];
-        a[j] = swap;
-        int b = index[i];
-        index[i] = index[j];
-        index[j] = b;
-    }
 }
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/DBSCANFilter.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/DBSCANFilter.java	(revision 64)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/DBSCANFilter.java	(revision 64)
@@ -0,0 +1,114 @@
+// Copyright 2015 Georg-August-Universität Göttingen, Germany
+//
+//   Licensed under the Apache License, Version 2.0 (the "License");
+//   you may not use this file except in compliance with the License.
+//   You may obtain a copy of the License at
+//
+//       http://www.apache.org/licenses/LICENSE-2.0
+//
+//   Unless required by applicable law or agreed to in writing, software
+//   distributed under the License is distributed on an "AS IS" BASIS,
+//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//   See the License for the specific language governing permissions and
+//   limitations under the License.
+
+package de.ugoe.cs.cpdp.dataselection;
+
+import de.lmu.ifi.dbs.elki.algorithm.clustering.DBSCAN;
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.DoubleVector;
+import de.lmu.ifi.dbs.elki.data.model.Model;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.StaticArrayDatabase;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.datasource.ArrayAdapterDatabaseConnection;
+import de.lmu.ifi.dbs.elki.datasource.DatabaseConnection;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
+import weka.core.Instances;
+
+/**
+ * DBSCAN filter after Kawata et al. (2015)
+ * 
+ * @author Steffen Herbold
+ */
+public class DBSCANFilter implements IPointWiseDataselectionStrategy {
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see de.ugoe.cs.cpdp.IParameterizable#setParameter(java.lang.String)
+     */
+    @Override
+    public void setParameter(String parameters) {
+        // do nothing
+    }
+
+    /**
+     * @see de.ugoe.cs.cpdp.dataselection.PointWiseDataselectionStrategy#apply(weka.core.Instances,
+     *      weka.core.Instances)
+     */
+    @Override
+    public Instances apply(Instances testdata, Instances traindata) {
+        Instances filteredTraindata = new Instances(traindata);
+        filteredTraindata.clear();
+
+        double[][] data =
+            new double[testdata.size() + traindata.size()][testdata.numAttributes() - 1];
+        int classIndex = testdata.classIndex();
+        for (int i = 0; i < testdata.size(); i++) {
+            int k = 0;
+            for (int j = 0; j < testdata.numAttributes(); j++) {
+                if (j != classIndex) {
+                    data[i][k] = testdata.get(i).value(j);
+                    k++;
+                }
+            }
+        }
+        for (int i = 0; i < traindata.size(); i++) {
+            int k = 0;
+            for (int j = 0; j < traindata.numAttributes(); j++) {
+                if (j != classIndex) {
+                    data[i + testdata.size()][k] = traindata.get(i).value(j);
+                    k++;
+                }
+            }
+        }
+        DatabaseConnection dbc = new ArrayAdapterDatabaseConnection(data);
+        Database db = new StaticArrayDatabase(dbc, null);
+        db.initialize();
+        DBSCAN<DoubleVector> dbscan =
+            new DBSCAN<DoubleVector>(EuclideanDistanceFunction.STATIC, 1.0, 10);
+        Clustering<Model> clusterer = dbscan.run(db);
+        Relation<DoubleVector> rel = db.getRelation(TypeUtil.DOUBLE_VECTOR_FIELD);
+
+        for (Cluster<Model> cluster : clusterer.getAllClusters()) {
+            // check if cluster has any training data
+            DBIDIter iter = rel.iterDBIDs();
+            boolean noMatch = true;
+            for (int i = 0; noMatch && i < testdata.size(); i++) {
+                noMatch = !cluster.getIDs().contains(iter);
+                iter.advance();
+            }
+            if (!noMatch) {
+                // cluster contains test data
+                for (DBIDIter clusterIter = cluster.getIDs().iter(); clusterIter
+                    .valid(); clusterIter.advance())
+                {
+                    int internalIndex = clusterIter.internalGetIndex() - testdata.size() - 1;
+                    if (internalIndex >= 0) {
+                        // index belongs to a training instance
+                        filteredTraindata.add(traindata.get(internalIndex));
+                    }
+                }
+
+            }
+        }
+        System.out.println(filteredTraindata.size());
+
+        return filteredTraindata;
+    }
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/MahalanobisOutlierRemoval.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/MahalanobisOutlierRemoval.java	(revision 64)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/MahalanobisOutlierRemoval.java	(revision 64)
@@ -0,0 +1,143 @@
+// Copyright 2015 Georg-August-Universität Göttingen, Germany
+//
+//   Licensed under the Apache License, Version 2.0 (the "License");
+//   you may not use this file except in compliance with the License.
+//   You may obtain a copy of the License at
+//
+//       http://www.apache.org/licenses/LICENSE-2.0
+//
+//   Unless required by applicable law or agreed to in writing, software
+//   distributed under the License is distributed on an "AS IS" BASIS,
+//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//   See the License for the specific language governing permissions and
+//   limitations under the License.
+
+package de.ugoe.cs.cpdp.dataselection;
+
+import org.apache.commons.collections4.list.SetUniqueList;
+import org.apache.commons.math3.linear.BlockRealMatrix;
+import org.apache.commons.math3.linear.LUDecomposition;
+import org.apache.commons.math3.linear.RealMatrix;
+import org.apache.commons.math3.stat.correlation.Covariance;
+
+import de.ugoe.cs.cpdp.util.WekaUtils;
+import weka.core.Instances;
+
+/**
+ * <p>
+ * Uses the Mahalanobis distance for outlier removal. All instances that are epsilon times the
+ * distance are removed. The default for epsilon is 3.0.
+ * </p>
+ * 
+ * @author Steffen Herbold
+ */
+public class MahalanobisOutlierRemoval
+    implements IPointWiseDataselectionStrategy, ISetWiseDataselectionStrategy
+{
+
+    /**
+     * Distance outside which entities are removed as outliers.
+     */
+    private double epsilon = 3.0d;
+
+    /**
+     * Sets epsilon. Default is 3.0.
+     * 
+     * @see de.ugoe.cs.cpdp.IParameterizable#setParameter(java.lang.String)
+     */
+    @Override
+    public void setParameter(String parameters) {
+        if (parameters != null && parameters != "") {
+            epsilon = Double.parseDouble(parameters);
+        }
+    }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see de.ugoe.cs.cpdp.dataselection.ISetWiseDataselectionStrategy#apply(weka.core.Instances,
+     * org.apache.commons.collections4.list.SetUniqueList)
+     */
+    @Override
+    public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) {
+        for (Instances traindata : traindataSet) {
+            applyMahalanobisDistancesRemoval(traindata);
+        }
+    }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see de.ugoe.cs.cpdp.dataselection.IPointWiseDataselectionStrategy#apply(weka.core.Instances,
+     * weka.core.Instances)
+     */
+    @Override
+    public Instances apply(Instances testdata, Instances traindata) {
+        applyMahalanobisDistancesRemoval(traindata);
+        return traindata;
+    }
+
+    /**
+     * <p>
+     * removes all instances, whose Mahalanobi distance to the mean of the data is greater than
+     * epsilon.
+     * </p>
+     *
+     * @param data
+     *            data where the outliers are removed
+     */
+    private void applyMahalanobisDistancesRemoval(Instances data) {
+        RealMatrix values = new BlockRealMatrix(data.size(), data.numAttributes() - 1);
+        for (int i = 0; i < data.size(); i++) {
+            values.setRow(i, WekaUtils.instanceValues(data.get(i)));
+        }
+        RealMatrix inverseCovariance =
+            new LUDecomposition(new Covariance(values).getCovarianceMatrix()).getSolver()
+                .getInverse();
+
+        // create mean vector
+        double[] meanValues = new double[data.numAttributes() - 1];
+        int k = 0;
+        for (int j = 0; j < data.numAttributes(); j++) {
+            if (j != data.classIndex()) {
+                meanValues[k] = data.attributeStats(j).numericStats.mean;
+                k++;
+            }
+        }
+
+        for (int i = data.size() - 1; i >= 0; i--) {
+            double distance =
+                mahalanobisDistance(inverseCovariance, WekaUtils.instanceValues(data.get(i)),
+                                    meanValues);
+            if (distance > epsilon) {
+                data.remove(i);
+            }
+        }
+    }
+
+    /**
+     * <p>
+     * Calculates the Mahalanobis distance between two vectors for a given inverse covariance
+     * matric.
+     * </p>
+     *
+     * @param inverseCovariance
+     * @param vector1
+     * @param vector2
+     * @return
+     */
+    private double mahalanobisDistance(RealMatrix inverseCovariance,
+                                       double[] vector1,
+                                       double[] vector2)
+    {
+        RealMatrix x = new BlockRealMatrix(1, vector1.length);
+        x.setRow(0, vector1);
+        RealMatrix y = new BlockRealMatrix(1, vector2.length);
+        y.setRow(0, vector2);
+
+        RealMatrix deltaxy = x.subtract(y);
+
+        return Math
+            .sqrt(deltaxy.multiply(inverseCovariance).multiply(deltaxy.transpose()).getEntry(0, 0));
+    }
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/NeighborhoodFilter.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/NeighborhoodFilter.java	(revision 64)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/NeighborhoodFilter.java	(revision 64)
@@ -0,0 +1,86 @@
+// Copyright 2015 Georg-August-Universität Göttingen, Germany
+//
+//   Licensed under the Apache License, Version 2.0 (the "License");
+//   you may not use this file except in compliance with the License.
+//   You may obtain a copy of the License at
+//
+//       http://www.apache.org/licenses/LICENSE-2.0
+//
+//   Unless required by applicable law or agreed to in writing, software
+//   distributed under the License is distributed on an "AS IS" BASIS,
+//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//   See the License for the specific language governing permissions and
+//   limitations under the License.
+
+package de.ugoe.cs.cpdp.dataselection;
+
+import java.util.TreeSet;
+
+import de.ugoe.cs.cpdp.util.WekaUtils;
+import weka.core.Instances;
+
+/**
+ * <p>
+ * Relevancy filter after Ryu et al., 2015b.
+ * </p>
+ * 
+ * @author Steffen Herbold
+ */
+public class NeighborhoodFilter implements IPointWiseDataselectionStrategy {
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see de.ugoe.cs.cpdp.IParameterizable#setParameter(java.lang.String)
+     */
+    @Override
+    public void setParameter(String parameters) {
+        // TODO Auto-generated method stub
+
+    }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see de.ugoe.cs.cpdp.dataselection.IPointWiseDataselectionStrategy#apply(weka.core.Instances,
+     * weka.core.Instances)
+     */
+    @Override
+    public Instances apply(Instances testdata, Instances traindata) {
+        return applyNeighborhoodFilter(testdata, traindata);
+    }
+
+    /**
+     * <p>
+     * Applies the relevancy filter after Ryu et al.
+     * </p>
+     *
+     * @param testdata test data 
+     * @param traindata training data
+     * @return filtered trainind data
+     */
+    private Instances applyNeighborhoodFilter(Instances testdata, Instances traindata) {
+        TreeSet<Integer> selectedInstances = new TreeSet<>();
+        for (int i = 0; i < testdata.size(); i++) {
+            double minHam = Double.MAX_VALUE;
+            for (int j = 0; j < traindata.size(); j++) {
+                double distance = WekaUtils.hammingDistance(testdata.get(i), traindata.get(j));
+                if (distance < minHam) {
+                    minHam = distance;
+                }
+            }
+            for (int j = 0; j < traindata.size(); j++) {
+                double distance = WekaUtils.hammingDistance(testdata.get(i), traindata.get(j));
+                if (distance <= minHam) {
+                    selectedInstances.add(j);
+                }
+            }
+        }
+        Instances selectedTraindata = new Instances(testdata);
+        selectedTraindata.clear();
+        for (Integer index : selectedInstances) {
+            selectedTraindata.add(traindata.instance(index));
+        }
+        return selectedTraindata;
+    }
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/SynonymOutlierRemoval.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/SynonymOutlierRemoval.java	(revision 64)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/SynonymOutlierRemoval.java	(revision 64)
@@ -0,0 +1,89 @@
+// Copyright 2015 Georg-August-Universität Göttingen, Germany
+//
+//   Licensed under the Apache License, Version 2.0 (the "License");
+//   you may not use this file except in compliance with the License.
+//   You may obtain a copy of the License at
+//
+//       http://www.apache.org/licenses/LICENSE-2.0
+//
+//   Unless required by applicable law or agreed to in writing, software
+//   distributed under the License is distributed on an "AS IS" BASIS,
+//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//   See the License for the specific language governing permissions and
+//   limitations under the License.
+
+package de.ugoe.cs.cpdp.dataselection;
+
+import weka.core.Instances;
+
+/**
+ * <p>
+ * Synonym outlier removal after Amasaki et al. (2015). 
+ * </p>
+ * 
+ * @author Steffen Herbold
+ */
+public class SynonymOutlierRemoval implements IPointWiseDataselectionStrategy {
+
+    /* (non-Javadoc)
+     * @see de.ugoe.cs.cpdp.IParameterizable#setParameter(java.lang.String)
+     */
+    @Override
+    public void setParameter(String parameters) {
+        // do nothing
+    }
+
+    /* (non-Javadoc)
+     * @see de.ugoe.cs.cpdp.dataselection.IPointWiseDataselectionStrategy#apply(weka.core.Instances, weka.core.Instances)
+     */
+    @Override
+    public Instances apply(Instances testdata, Instances traindata) {
+        applySynonymRemoval(traindata);
+        return traindata;
+    }
+
+    /**
+     * <p>
+     * Applies the synonym outlier removal.
+     * </p>
+     *
+     * @param traindata data from which the outliers are removed.
+     */
+    public void applySynonymRemoval(Instances traindata) {
+        double minDistance[][] = new double[traindata.size()][traindata.numAttributes()-1];
+        double minDistanceAttribute[] = new double[traindata.numAttributes()-1];
+        double distance;
+        for( int j=0; j<minDistanceAttribute.length; j++ ) {
+            minDistanceAttribute[j] = Double.MAX_VALUE;
+        }
+        for (int i1 = traindata.size()-1; i1 < traindata.size(); i1++) {
+            int k=0;
+            for (int j = 0; j < traindata.numAttributes(); j++) {
+                if( j!=traindata.classIndex() ) {
+                    minDistance[i1][k] = Double.MAX_VALUE;
+                    for (int i2 = 0; i2 < traindata.size(); i2++) {
+                        if (i1 != i2) {
+                            distance = Math.abs(traindata.get(i1).value(j) - traindata.get(i2).value(j));
+                            if (distance < minDistance[i1][k]) {
+                                minDistance[i1][k] = distance;
+                            }
+                            if( distance < minDistanceAttribute[k] ) {
+                                minDistanceAttribute[k] = distance;
+                            }
+                        }
+                    }
+                    k++;
+                }
+            }
+        }
+        for( int i=traindata.size()-1; i>=0; i-- ) {
+            boolean hasClosest = false;
+            for( int j=0; !hasClosest && j<traindata.numAttributes(); j++ ) {
+                hasClosest = minDistance[i][j]<=minDistanceAttribute[j];
+            }
+            if( !hasClosest ) {
+                traindata.delete(i);
+            }
+        }
+    }
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaLASERTraining.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaLASERTraining.java	(revision 64)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaLASERTraining.java	(revision 64)
@@ -0,0 +1,155 @@
+// Copyright 2015 Georg-August-Universität Göttingen, Germany
+//
+//   Licensed under the Apache License, Version 2.0 (the "License");
+//   you may not use this file except in compliance with the License.
+//   You may obtain a copy of the License at
+//
+//       http://www.apache.org/licenses/LICENSE-2.0
+//
+//   Unless required by applicable law or agreed to in writing, software
+//   distributed under the License is distributed on an "AS IS" BASIS,
+//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//   See the License for the specific language governing permissions and
+//   limitations under the License.
+
+package de.ugoe.cs.cpdp.training;
+
+import java.io.PrintStream;
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.commons.io.output.NullOutputStream;
+
+import de.ugoe.cs.cpdp.util.WekaUtils;
+import weka.classifiers.AbstractClassifier;
+import weka.classifiers.Classifier;
+import weka.core.Instance;
+import weka.core.Instances;
+
+
+/**
+ * <p>
+ * TODO comment
+ * </p>
+ * 
+ * @author Steffen Herbold
+ */
+public class WekaLASERTraining extends WekaBaseTraining implements ITrainingStrategy {
+
+    private final LASERClassifier internalClassifier = new LASERClassifier();
+
+    @Override
+    public Classifier getClassifier() {
+        return internalClassifier;
+    }
+
+    @Override
+    public void apply(Instances traindata) {
+        PrintStream errStr = System.err;
+        System.setErr(new PrintStream(new NullOutputStream()));
+        try {
+            internalClassifier.buildClassifier(traindata);
+        }
+        catch (Exception e) {
+            throw new RuntimeException(e);
+        }
+        finally {
+            System.setErr(errStr);
+        }
+    }
+
+    public class LASERClassifier extends AbstractClassifier {
+
+        private static final long serialVersionUID = 1L;
+        
+        private Classifier laserClassifier = null;
+        private Instances traindata = null;
+
+        @Override
+        public double classifyInstance(Instance instance) throws Exception {
+            List<Integer> closestInstances = new LinkedList<>();
+            double minDistance = Double.MAX_VALUE;
+            for( int i=0; i<traindata.size(); i++ ) {
+                double distance = WekaUtils.hammingDistance(instance, traindata.get(i));
+                if( distance<minDistance) {
+                    minDistance = distance;
+                }
+            }
+            for( int i=0; i<traindata.size(); i++ ) {
+                double distance = WekaUtils.hammingDistance(instance, traindata.get(i));
+                if( distance<=minDistance ) {
+                    closestInstances.add(i);
+                }
+            }
+            if( closestInstances.size()==1 ) {
+                int closestIndex = closestInstances.get(0);
+                Instance closestTrainingInstance = traindata.get(closestIndex);
+                List<Integer> closestToTrainingInstance = new LinkedList<>();
+                double minTrainingDistance = Double.MAX_VALUE;
+                for( int i=0; i<traindata.size(); i++ ) {
+                    if( closestIndex!=i ) {
+                        double distance = WekaUtils.hammingDistance(closestTrainingInstance, traindata.get(i));
+                        if( distance<minTrainingDistance ) {
+                            minTrainingDistance = distance;
+                        }
+                    }
+                }
+                for( int i=0; i<traindata.size(); i++ ) {
+                    if( closestIndex!=i ) {
+                        double distance = WekaUtils.hammingDistance(closestTrainingInstance, traindata.get(i));
+                        if( distance<=minTrainingDistance ) {
+                            closestToTrainingInstance.add(i);
+                        }
+                    }
+                }
+                if( closestToTrainingInstance.size()==1 ) {
+                    return laserClassifier.classifyInstance(instance);
+                }
+                else {
+                    double label = Double.NaN;
+                    boolean allEqual = true;
+                    for( Integer index : closestToTrainingInstance ) {
+                        if( label == Double.NaN ) {
+                            label = traindata.get(closestToTrainingInstance.get(index)).classValue();
+                        }
+                        else if( label!=traindata.get(closestToTrainingInstance.get(index)).classValue() ) {
+                            allEqual = false;
+                            break;
+                        }
+                    }
+                    if( allEqual ) {
+                        return label;
+                    }
+                    else {
+                        return laserClassifier.classifyInstance(instance);
+                    }
+                }
+            } else {
+                double label = Double.NaN;
+                boolean allEqual = true;
+                for( Integer index : closestInstances ) {
+                    if( label == Double.NaN ) {
+                        label = traindata.get(closestInstances.get(index)).classValue();
+                    }
+                    else if( label!=traindata.get(closestInstances.get(index)).classValue() ) {
+                        allEqual = false;
+                        break;
+                    }
+                }
+                if( allEqual ) {
+                    return label;
+                }
+                else {
+                    return laserClassifier.classifyInstance(instance);
+                }
+            }
+        }
+
+        @Override
+        public void buildClassifier(Instances traindata) throws Exception {
+            this.traindata = new Instances(traindata);
+            laserClassifier = setupClassifier();
+            laserClassifier.buildClassifier(traindata);
+        }
+    }
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/wekaclassifier/AbstractCODEP.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/wekaclassifier/AbstractCODEP.java	(revision 63)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/wekaclassifier/AbstractCODEP.java	(revision 64)
@@ -27,4 +27,5 @@
 import weka.classifiers.functions.RBFNetwork;
 import weka.classifiers.rules.DecisionTable;
+import weka.classifiers.trees.ADTree;
 import weka.core.Attribute;
 import weka.core.DenseInstance;
@@ -103,5 +104,5 @@
     /**
      * <p>
-     * Creates a CODEP instance using the classications of the internal classifiers.
+     * Creates a CODEP instance using the classifications of the internal classifiers.
      * </p>
      *
@@ -110,5 +111,5 @@
      * @return CODEP instance
      * @throws Exception
-     *             thrown if an exception occurs during classification with an internal classifer
+     *             thrown if an exception occurs during classification with an internal classifier
      */
     private Instance createInternalInstance(Instance instance) throws Exception {
@@ -147,5 +148,5 @@
         // create training data with prediction labels
 
-        // TODO ADTree missing?!
+        internalClassifiers.add(new ADTree());
         internalClassifiers.add(new BayesNet());
         internalClassifiers.add(new DecisionTable());
