Index: trunk/CrossPare/.classpath
===================================================================
--- trunk/CrossPare/.classpath	(revision 2)
+++ trunk/CrossPare/.classpath	(revision 2)
@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+	<classpathentry kind="src" path="src"/>
+	<classpathentry kind="src" path="test"/>
+	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7"/>
+	<classpathentry kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/4"/>
+	<classpathentry kind="lib" path="lib/commons-collections4-4.0-alpha1.jar"/>
+	<classpathentry kind="lib" path="lib/commons-io-2.4.jar"/>
+	<classpathentry kind="lib" path="lib/commons-math3-3.1.jar"/>
+	<classpathentry kind="lib" path="lib/java-utils-0.0.1.jar"/>
+	<classpathentry kind="lib" path="lib/weka.jar"/>
+	<classpathentry kind="output" path="bin"/>
+</classpath>
Index: trunk/CrossPare/.project
===================================================================
--- trunk/CrossPare/.project	(revision 2)
+++ trunk/CrossPare/.project	(revision 2)
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>CrossPare</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.eclipse.jdt.core.javabuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.eclipse.jdt.core.javanature</nature>
+	</natures>
+</projectDescription>
Index: trunk/CrossPare/.settings/org.eclipse.jdt.core.prefs
===================================================================
--- trunk/CrossPare/.settings/org.eclipse.jdt.core.prefs	(revision 2)
+++ trunk/CrossPare/.settings/org.eclipse.jdt.core.prefs	(revision 2)
@@ -0,0 +1,11 @@
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7
+org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
+org.eclipse.jdt.core.compiler.compliance=1.7
+org.eclipse.jdt.core.compiler.debug.lineNumber=generate
+org.eclipse.jdt.core.compiler.debug.localVariable=generate
+org.eclipse.jdt.core.compiler.debug.sourceFile=generate
+org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
+org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
+org.eclipse.jdt.core.compiler.source=1.7
Index: trunk/CrossPare/build.xml
===================================================================
--- trunk/CrossPare/build.xml	(revision 2)
+++ trunk/CrossPare/build.xml	(revision 2)
@@ -0,0 +1,111 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<project basedir="." default="dist" name="Crosspare">
+	<!-- General Properties -->
+	<property environment="env" />
+	<property name="build.location" value="bin" />
+	<property name="dist.location" value="dist" />
+	<property name="javadoc.location" value="javadoc" />
+	<property name="downloads.location" value="downloads" />
+	<property name="debuglevel" value="source,lines,vars" />
+	<property name="target" value="1.7" />
+	<property name="source" value="1.7" />
+
+	<!-- Crosspare Properties -->
+	<property name="Crosspare.jarname" value="crosspare.jar" />
+	<path id="Crosspare.classpath">
+		<pathelement location="${build.location.eventbenchconsole}" />
+		<pathelement location="lib/commons-collections4-4.0-alpha1.jar" />
+		<pathelement location="lib/commons-io-2.4.jar" />
+		<pathelement location="lib/commons-math3-3.1.jar" />
+		<pathelement location="lib/java-utils-0.0.1.jar" />
+		<pathelement location="lib/weka.jar" />
+	</path>
+
+	<!-- Initialization Targets -->
+	<target name="init.build">
+		<mkdir dir="${build.location}" />
+	</target>
+	<target name="init.dist">
+		<mkdir dir="${dist.location}" />
+	</target>
+	<target name="init.downloads">
+		<mkdir dir="${downloads.location}" />
+	</target>
+	<target name="init.javadoc">
+		<mkdir dir="${javadoc.location}" />
+	</target>
+
+	<!-- Clean-up Targets -->
+	<target name="clean.build">
+		<delete dir="${build.location}" />
+	</target>
+	<target name="clean.dist">
+		<delete dir="${dist.location}" />
+	</target>
+	<target name="clean.javadoc">
+		<delete dir="${javadoc.location}" />
+	</target>
+	<target depends="clean.build,clean.dist,clean.javadoc" name="clean.all" />
+
+	<!-- Build Targets -->
+	<target depends="init.build" name="build">
+		<javac debug="true" debuglevel="${debuglevel}"
+			destdir="${build.location}" source="${source}"
+			target="${target}" includeantruntime="false">
+			<src path="src" />
+			<classpath refid="Crosspare.classpath" />
+		</javac>
+	</target>
+	
+	<!-- Distribution Targets -->
+	<target depends="build,init.dist,javadoc" name="dist">
+		<!-- dist of main components, i.e., the console -->
+		<pathconvert property="dist.libs" pathsep=" ">
+			<mapper>
+				<chainedmapper>
+					<flattenmapper />
+					<globmapper from="*" to="lib/*" />
+				</chainedmapper>
+			</mapper>
+			<path>
+				<fileset dir="lib">
+					<include name="**/*.jar" />
+				</fileset>
+			</path>
+		</pathconvert>
+		<property name="dist.classpath"	value="${dist.libs}"></property>
+		<jar destfile="${dist.location}/${Crosspare.jarname}" basedir="${build.location}">
+			<manifest>
+				<attribute name="Built-By" value="${user.name}" />
+				<attribute name="Main-Class" value="de.ugoe.cs.cpdp.Runner" />
+				<attribute name="Class-Path" value="${dist.classpath}" />
+			</manifest>
+		</jar>
+		<copy includeemptydirs="false" todir="${dist.location}/lib">
+			<fileset dir="lib" />
+		</copy>
+		
+		<!-- copy Javadoc to dist -->
+		<copy includeemptydirs="false" todir="${dist.location}/javadoc">
+			<fileset dir="${javadoc.location}" />
+		</copy>
+	</target>
+
+
+	<target depends="init.downloads,dist" name="createDownloads">
+		<zip destfile="${downloads.location}/crosspare.zip">
+			<fileset dir="${dist.location}" />
+		</zip>
+	</target>
+
+	<!-- Javadoc Targets -->
+	<target depends="init.javadoc" name="javadoc"
+		description="o Create Javadocs (Requires Javadoc 1.4+)">
+		<javadoc destdir="${javadoc.location}"
+			additionalparam="-J-Dorg.apache.commons.attributes.javadoc.CATaglet.sources=${basedir}">
+			<classpath refid="Crosspare.classpath" />
+			<link href="http://download.oracle.com/javase/7/docs/api/" />
+			<fileset dir="src" includes="**/*.java" />
+		</javadoc>
+	</target>
+</project>
Index: trunk/CrossPare/experimentconfig.xsd
===================================================================
--- trunk/CrossPare/experimentconfig.xsd	(revision 2)
+++ trunk/CrossPare/experimentconfig.xsd	(revision 2)
@@ -0,0 +1,42 @@
+<?xml version="1.0" encoding="utf-8"?>
+<xs:schema targetNamespace="experimentconfig"
+    elementFormDefault="qualified"
+    xmlns="experimentconfig"
+    xmlns:xs="http://www.w3.org/2001/XMLSchema"
+>
+  <xs:element name="config">
+    <xs:complexType>
+      <xs:sequence minOccurs="1" maxOccurs="1">
+        <xs:element name="partialconfig" type="pathType" minOccurs="0" maxOccurs="unbounded"/>
+        <xs:element name="loader" type="datapathType" minOccurs="0" maxOccurs="1"/>
+        <xs:element name="resultspath" type="pathType" minOccurs="0" maxOccurs="1"/>
+        <xs:element name="versionfilter" type="setupType" minOccurs="0" maxOccurs="unbounded"/>
+        <xs:element name="testVersionfilter" type="setupType" minOccurs="0" maxOccurs="unbounded"/>
+        <xs:element name="trainVersionfilter" type="setupType" minOccurs="0" maxOccurs="unbounded"/>
+        <xs:element name="setwisepreprocessor" type="setupType" minOccurs="0" maxOccurs="unbounded"/>
+        <xs:element name="setwiseselector" type="setupType" minOccurs="0" maxOccurs="unbounded"/>
+        <xs:element name="setwisepostprocessor" type="setupType" minOccurs="0" maxOccurs="unbounded"/>
+        <xs:element name="setwisetrainer" type="setupType" minOccurs="0" maxOccurs="unbounded"/>
+        <xs:element name="preprocessor" type="setupType" minOccurs="0" maxOccurs="unbounded"/>
+        <xs:element name="pointwiseselector" type="setupType" minOccurs="0" maxOccurs="unbounded"/>
+        <xs:element name="postprocessor" type="setupType" minOccurs="0" maxOccurs="unbounded"/>
+        <xs:element name="trainer" type="setupType" minOccurs="0" maxOccurs="unbounded"/>
+        <xs:element name="eval" type="setupType" minOccurs="0" maxOccurs="unbounded"/>
+        <xs:element name="partialconfig" type="pathType" minOccurs="0" maxOccurs="unbounded"/>
+      </xs:sequence>
+    </xs:complexType>
+  </xs:element>
+  <xs:complexType name="datapathType">
+    <xs:attribute name="name" type="xs:string" />
+    <xs:attribute name="datalocation" type="xs:string" />
+    <xs:attribute name="relative" type="xs:boolean" default="true"/>
+  </xs:complexType>
+  <xs:complexType name="pathType">
+    <xs:attribute name="path" type="xs:string" />
+    <xs:attribute name="relative" type="xs:boolean" default="true"/>
+  </xs:complexType>
+  <xs:complexType name="setupType">
+    <xs:attribute name="name" type="xs:string" />
+    <xs:attribute name="param" type="xs:string" />
+  </xs:complexType>
+</xs:schema>
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/Experiment.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/Experiment.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/Experiment.java	(revision 2)
@@ -0,0 +1,200 @@
+package de.ugoe.cs.cpdp;
+
+import java.io.File;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.logging.Level;
+
+import org.apache.commons.collections4.list.SetUniqueList;
+
+import weka.core.Instances;
+import de.ugoe.cs.cpdp.dataprocessing.IProcessesingStrategy;
+import de.ugoe.cs.cpdp.dataprocessing.ISetWiseProcessingStrategy;
+import de.ugoe.cs.cpdp.dataselection.IPointWiseDataselectionStrategy;
+import de.ugoe.cs.cpdp.dataselection.ISetWiseDataselectionStrategy;
+import de.ugoe.cs.cpdp.eval.IEvaluationStrategy;
+import de.ugoe.cs.cpdp.loader.IVersionLoader;
+import de.ugoe.cs.cpdp.training.ISetWiseTrainingStrategy;
+import de.ugoe.cs.cpdp.training.ITrainer;
+import de.ugoe.cs.cpdp.training.ITrainingStrategy;
+import de.ugoe.cs.cpdp.versions.IVersionFilter;
+import de.ugoe.cs.cpdp.versions.SoftwareVersion;
+import de.ugoe.cs.util.console.Console;
+
+/**
+ * Class responsible for executing an experiment according to an {@link ExperimentConfiguration}. The steps of an experiment are as follows:
+ * <ul>
+ *  <li>load the data from the provided data path</li>
+ *  <li>filter the data sets according to the provided version filters</li>
+ *  <li>execute the following steps for each data sets as test data that is not ignored through the test version filter:
+ *  <ul>
+ *   <li>filter the data sets to setup the candidate training data:
+ *   <ul>
+ *    <li>remove all data sets from the same project</li>
+ *    <li>filter all data sets according to the training data filter
+ *   </ul></li>
+ *   <li>apply the setwise preprocessors</li>
+ *   <li>apply the setwise data selection algorithms</li>
+ *   <li>apply the setwise postprocessors</li>
+ *   <li>train the setwise training classifiers</li>
+ *   <li>unify all remaining training data into one data set</li>
+ *   <li>apply the preprocessors</li>
+ *   <li>apply the pointwise data selection algorithms</li>
+ *   <li>apply the postprocessors</li>
+ *   <li>train the normal classifiers</li>
+ *   <li>evaluate the results for all trained classifiers on the training data</li>
+ *  </ul></li>
+ * </ul>
+ * 
+ * Note that this class implements {@link Runnable}, i.e., each experiment can be started in its own thread.
+ * @author Steffen Herbold
+ */
+public class Experiment implements Runnable {
+
+	/**
+	 * configuration of the experiment
+	 */
+	private final ExperimentConfiguration config;
+	
+	/**
+	 * Constructor. Creates a new experiment based on a configuration.
+	 * @param config configuration of the experiment
+	 */
+	public Experiment(ExperimentConfiguration config) {
+		this.config = config;
+	}
+	
+	/**
+	 * Executes the experiment with the steps as described in the class comment.
+	 * @see Runnable#run() 
+	 */
+	@Override
+	public void run() {
+		final List<SoftwareVersion> versions = new LinkedList<>();
+		
+		for(IVersionLoader loader : config.getLoaders()) {
+			versions.addAll(loader.load());
+		}
+		
+		for( IVersionFilter filter : config.getVersionFilters() ) {
+			filter.apply(versions);
+		}
+		boolean writeHeader = true;
+		int versionCount = 1;
+		int testVersionCount = 0;
+		
+		for( SoftwareVersion testVersion : versions ) {
+			if( isVersion(testVersion, config.getTestVersionFilters()) ) {
+				testVersionCount++;
+			}
+		}
+		
+		for( SoftwareVersion testVersion : versions ) {
+			if( isVersion(testVersion, config.getTestVersionFilters()) ) {
+				Console.traceln(Level.INFO, String.format("[%s] [%02d/%02d] %s: starting", config.getExperimentName(), versionCount, testVersionCount, testVersion.getVersion()));
+				
+				// Setup testdata and training data
+				Instances testdata = testVersion.getInstances();
+				String testProject = testVersion.getProject();
+				SetUniqueList<Instances> traindataSet = SetUniqueList.setUniqueList(new LinkedList<Instances>());
+				for( SoftwareVersion trainingVersion : versions ) {
+					if( isVersion(trainingVersion, config.getTrainingVersionFilters()) ) {
+						if( trainingVersion!=testVersion ) {
+							if( !trainingVersion.getProject().equals(testProject) ) {
+								traindataSet.add(trainingVersion.getInstances());
+							}
+						}
+					}
+				}
+				
+				for( ISetWiseProcessingStrategy processor : config.getSetWisePreprocessors() ) {
+					Console.traceln(Level.FINE, String.format("[%s] [%02d/%02d] %s: applying setwise preprocessor %s", config.getExperimentName(), versionCount, testVersionCount, testVersion.getVersion(), processor.getClass().getName()));
+					processor.apply(testdata, traindataSet);
+				}
+				for( ISetWiseDataselectionStrategy dataselector : config.getSetWiseSelectors() ) {
+					Console.traceln(Level.FINE, String.format("[%s] [%02d/%02d] %s: applying setwise selection %s", config.getExperimentName(), versionCount, testVersionCount, testVersion.getVersion(), dataselector.getClass().getName()));
+					dataselector.apply(testdata, traindataSet);
+				}
+				for( ISetWiseProcessingStrategy processor : config.getSetWisePostprocessors() ) {
+					Console.traceln(Level.FINE, String.format("[%s] [%02d/%02d] %s: applying setwise postprocessor %s", config.getExperimentName(), versionCount, testVersionCount, testVersion.getVersion(), processor.getClass().getName()));
+					processor.apply(testdata, traindataSet);
+				}
+				for( ISetWiseTrainingStrategy setwiseTrainer : config.getSetWiseTrainers() ) {
+					Console.traceln(Level.FINE, String.format("[%s] [%02d/%02d] %s: applying setwise trainer %s", config.getExperimentName(), versionCount, testVersionCount, testVersion.getVersion(), setwiseTrainer.getClass().getName()));
+					setwiseTrainer.apply(traindataSet);
+				}
+				Instances traindata = makeSingleTrainingSet(traindataSet);
+				for( IProcessesingStrategy processor : config.getPreProcessors() ) {
+					Console.traceln(Level.FINE, String.format("[%s] [%02d/%02d] %s: applying preprocessor %s", config.getExperimentName(), versionCount, testVersionCount, testVersion.getVersion(), processor.getClass().getName()));
+					processor.apply(testdata, traindata);
+				}
+				for( IPointWiseDataselectionStrategy dataselector : config.getPointWiseSelectors() ) {
+					Console.traceln(Level.FINE, String.format("[%s] [%02d/%02d] %s: applying pointwise selection %s", config.getExperimentName(), versionCount, testVersionCount, testVersion.getVersion(), dataselector.getClass().getName()));
+					traindata = dataselector.apply(testdata, traindata);
+				}
+				for( IProcessesingStrategy processor : config.getPostProcessors() ) {
+					Console.traceln(Level.FINE, String.format("[%s] [%02d/%02d] %s: applying setwise postprocessor %s", config.getExperimentName(), versionCount, testVersionCount, testVersion.getVersion(), processor.getClass().getName()));
+					processor.apply(testdata, traindata);
+				}
+				for( ITrainingStrategy trainer : config.getTrainers() ) {
+					Console.traceln(Level.FINE, String.format("[%s] [%02d/%02d] %s: applying trainer %s", config.getExperimentName(), versionCount, testVersionCount, testVersion.getVersion(), trainer.getClass().getName()));
+					trainer.apply(traindata);
+				}
+				File resultsDir = new File(config.getResultsPath());
+				if (!resultsDir.exists()) {
+					resultsDir.mkdir();
+				}
+				for( IEvaluationStrategy evaluator : config.getEvaluators() ) {
+					Console.traceln(Level.FINE, String.format("[%s] [%02d/%02d] %s: applying evaluator %s", config.getExperimentName(), versionCount, testVersionCount, testVersion.getVersion(), evaluator.getClass().getName()));
+					List<ITrainer> allTrainers = new LinkedList<>();
+					for( ISetWiseTrainingStrategy setwiseTrainer : config.getSetWiseTrainers() ) {
+						allTrainers.add(setwiseTrainer);
+					}
+					for( ITrainingStrategy trainer : config.getTrainers() ) {
+						allTrainers.add(trainer);
+					}
+					if( writeHeader ) {
+						evaluator.setParameter(config.getResultsPath() + "/" + config.getExperimentName() + ".csv");
+					}
+					evaluator.apply(testdata, traindata, allTrainers, writeHeader);
+					writeHeader = false;
+				}
+				Console.traceln(Level.INFO, String.format("[%s] [%02d/%02d] %s: finished", config.getExperimentName(), versionCount, testVersionCount, testVersion.getVersion()));
+				versionCount++;
+			}
+		}
+	}
+	
+	/**
+	 * Helper method that checks if a version passes all filters.
+	 * @param version version that is checked
+	 * @param filters list of the filters
+	 * @return true, if the version passes all filters, false otherwise
+	 */
+	private boolean isVersion(SoftwareVersion version, List<IVersionFilter> filters) {
+		boolean result = true;
+		for( IVersionFilter filter : filters) {
+			result &= !filter.apply(version);
+		}
+		return result;
+	}
+
+	/**
+	 * Helper method that combines a set of Weka {@link Instances} sets into a single {@link Instances} set.
+	 * @param traindataSet set of {@link Instances} to be combines
+	 * @return single {@link Instances} set
+	 */
+	public static Instances makeSingleTrainingSet(SetUniqueList<Instances> traindataSet) {
+		Instances traindataFull = null;
+		for( Instances traindata : traindataSet) {
+			if( traindataFull==null ) {
+				traindataFull = new Instances(traindata);
+			} else {
+				for( int i=0 ; i<traindata.numInstances() ; i++ ) {
+					traindataFull.add(traindata.instance(i));
+				}
+			}
+		}
+		return traindataFull;
+	}
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/ExperimentConfiguration.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/ExperimentConfiguration.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/ExperimentConfiguration.java	(revision 2)
@@ -0,0 +1,469 @@
+package de.ugoe.cs.cpdp;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.UnsupportedEncodingException;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.logging.Level;
+
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+import de.ugoe.cs.cpdp.dataprocessing.IProcessesingStrategy;
+import de.ugoe.cs.cpdp.dataprocessing.ISetWiseProcessingStrategy;
+import de.ugoe.cs.cpdp.dataselection.IPointWiseDataselectionStrategy;
+import de.ugoe.cs.cpdp.dataselection.ISetWiseDataselectionStrategy;
+import de.ugoe.cs.cpdp.eval.IEvaluationStrategy;
+import de.ugoe.cs.cpdp.loader.IVersionLoader;
+import de.ugoe.cs.cpdp.training.ISetWiseTrainingStrategy;
+import de.ugoe.cs.cpdp.training.ITrainingStrategy;
+import de.ugoe.cs.cpdp.versions.IVersionFilter;
+import de.ugoe.cs.util.StringTools;
+import de.ugoe.cs.util.console.Console;
+
+/**
+ * Class that contains all meta information about an experiment, i.e., its configuration. The configuration is loaded from an XML file.
+ * <br><br>
+ * In the current implementation, the experiment configuration can only be created using an XML file. Programmatic creation of experiment configurations is currently not possibly. 
+ * @author Steffen Herbold
+ */
+public class ExperimentConfiguration  extends DefaultHandler {
+
+	/**
+	 * handle of the file that contains the configuration
+	 */
+	private final File configFile;
+	
+	/**
+	 * name of the experiment (automatically set to the file name without the .xml ending)
+	 */
+	private String experimentName = "exp";
+	
+	/**
+	 * loads instances
+	 */
+	private List<IVersionLoader> loaders;
+	
+	/**
+	 * path were the results of the experiments are stored
+	 */
+	private String resultsPath = "results";
+	
+	/**
+	 * data set filters applied to all data
+	 */
+	private List<IVersionFilter> versionFilters;
+	
+	/**
+	 * data set filters that decide if a data set is used as test data
+	 */
+	private List<IVersionFilter> testVersionFilters;
+	
+	/**
+	 * data set filters that decide if a data is used as candidate training data
+	 */
+	private List<IVersionFilter> trainingVersionFilters;
+	
+	/**
+	 * setwise data processors that are applied before the setwise data selection
+	 */
+	private List<ISetWiseProcessingStrategy> setwisepreprocessors;
+	
+	/**
+	 * setwise data selection strategies
+	 */
+	private List<ISetWiseDataselectionStrategy> setwiseselectors;
+	
+	/**
+	 * setwise data processors that are applied after the setwise data selection
+	 */
+	private List<ISetWiseProcessingStrategy> setwisepostprocessors;
+	
+	/**
+	 * setwise trainers, i.e., trainers that require the selected training data to be separate from each other
+	 */
+	private List<ISetWiseTrainingStrategy> setwiseTrainers;
+	
+	/**
+	 * data processors that are applied before the pointwise data selection
+	 */
+	private List<IProcessesingStrategy> preprocessors;
+	
+	/**
+	 * pointwise data selection strategies
+	 */
+	private List<IPointWiseDataselectionStrategy> pointwiseselectors;
+	
+	/**
+	 * data processors that are applied before the pointwise data selection
+	 */
+	private List<IProcessesingStrategy> postprocessors;
+	
+	/**
+	 * normal trainers, i.e., trainers that require the selected training data in a single data set
+	 */
+	private List<ITrainingStrategy> trainers;
+	
+	/**
+	 * evaluators used for the the experiment results
+	 */
+	private List<IEvaluationStrategy> evaluators;
+	
+	/**
+	 * Constructor. Creates a new configuration from a given file. 
+	 * @param filename name of the file from the configuration is loaded.
+	 * @throws ExperimentConfigurationException thrown if there is an error creating the configuration
+	 */
+	public ExperimentConfiguration(String filename) throws ExperimentConfigurationException {
+		this(new File(filename));
+	}
+	
+	/**
+	 * Constructor. Creates a new configuration from a given file. 
+	 * @param filename handle of the file from the configuration is loaded.
+	 * @throws ExperimentConfigurationException thrown if there is an error creating the configuration
+	 */
+	public ExperimentConfiguration(File file) throws ExperimentConfigurationException {
+		loaders = new LinkedList<>();
+		versionFilters = new LinkedList<>();
+		testVersionFilters = new LinkedList<>();
+		trainingVersionFilters = new LinkedList<>();
+		setwisepreprocessors = new LinkedList<>();
+		setwiseselectors = new LinkedList<>();
+		setwisepostprocessors = new LinkedList<>();
+		setwiseTrainers = new LinkedList<>();
+		preprocessors = new LinkedList<>();
+		pointwiseselectors = new LinkedList<>();
+		postprocessors = new LinkedList<>();		
+		trainers = new LinkedList<>();
+		evaluators = new LinkedList<>();
+		
+		if (file == null) {
+            throw new IllegalArgumentException("file must not be null");
+        }
+		if (file.isDirectory()) {
+			throw new IllegalArgumentException("file must not be a directory");
+		}
+		configFile = file;
+		
+		experimentName = file.getName().split("\\.")[0];
+
+        final SAXParserFactory spf = SAXParserFactory.newInstance();
+        spf.setValidating(true);
+
+        SAXParser saxParser = null;
+        InputSource inputSource = null;
+        try {
+			saxParser = spf.newSAXParser();
+		} catch (ParserConfigurationException | SAXException e) {
+			throw new ExperimentConfigurationException(e);
+		}
+		
+        InputStreamReader reader = null;
+		try {
+			reader = new InputStreamReader(new FileInputStream(file), "UTF-8");
+			inputSource = new InputSource(reader);
+		} catch (UnsupportedEncodingException | FileNotFoundException e) {
+			throw new ExperimentConfigurationException("Could not open configuration file.", e);
+		}
+		
+        if (inputSource != null) {
+            inputSource.setSystemId("file://" + file.getAbsolutePath());
+			try {
+				saxParser.parse(inputSource, this);
+			} catch (SAXException | IOException e) {
+				throw new ExperimentConfigurationException("Error parsing configuration.", e);
+			}
+		}
+        if( reader!=null ) {
+        	try {
+				reader.close();
+			} catch (IOException e) {
+				throw new ExperimentConfigurationException("Error closing reader.", e);
+			}
+        }
+	}
+	
+	/**
+	 * returns the name of the experiment
+	 * @return name of the experiment
+	 */
+	public String getExperimentName() {
+		return experimentName;
+	}
+	
+	/**
+	 * returns the loaders for instances
+	 * @return data loaders
+	 */
+	public List<IVersionLoader> getLoaders() {
+		return loaders;
+	}
+	
+	/**
+	 * returns the results path
+	 * @return results path
+	 */
+	public String getResultsPath() {
+		return resultsPath;
+	}
+	
+	/**
+	 * returns the data set filters of the experiment
+	 * @return data set filters of the experiment
+	 */
+	public List<IVersionFilter> getVersionFilters() {
+		return versionFilters;
+	}
+	
+	/**
+	 * returns the test set filters of the experiment
+	 * @return test set filters of the experiment
+	 */
+	public List<IVersionFilter> getTestVersionFilters() {
+		return testVersionFilters;
+	}
+	
+	/**
+	 * returns the candidate training version filters of the experiment 
+	 * @return candidate training version filters of the experiment
+	 */
+	public List<IVersionFilter> getTrainingVersionFilters() {
+		return trainingVersionFilters;
+	}
+	
+	/**
+	 * returns the setwise processors applied before the setwise data selection
+	 * @return setwise processors applied before the setwise data selection
+	 */
+	public List<ISetWiseProcessingStrategy> getSetWisePreprocessors() {
+		return setwisepreprocessors;
+	}
+	
+	/**
+	 * returns the setwise data selection strategies
+	 * @return setwise data selection strategies
+	 */
+	public List<ISetWiseDataselectionStrategy> getSetWiseSelectors() {
+		return setwiseselectors;
+	}
+	
+	/**
+	 * returns the setwise processors applied after the setwise data selection
+	 * @return setwise processors applied after the setwise data selection
+	 */
+	public List<ISetWiseProcessingStrategy> getSetWisePostprocessors() {
+		return setwisepostprocessors;
+	}
+	
+	/**
+	 * returns the setwise training algorithms
+	 * @return setwise training algorithms
+	 */
+	public List<ISetWiseTrainingStrategy> getSetWiseTrainers() {
+		return setwiseTrainers;
+	}
+	
+	/**
+	 * returns the processors applied before the pointwise data selection
+	 * @return processors applied before the pointwise data selection
+	 */
+	public List<IProcessesingStrategy> getPreProcessors() {
+		return preprocessors;
+	}
+	
+	/**
+	 * returns the pointwise data selection strategies
+	 * @return pointwise data selection strategies
+	 */
+	public List<IPointWiseDataselectionStrategy> getPointWiseSelectors() {
+		return pointwiseselectors;
+	}
+	
+	/**
+	 * returns the processors applied after the pointwise data selection
+	 * @return processors applied after the pointwise data selection
+	 */
+	public List<IProcessesingStrategy> getPostProcessors() {
+		return postprocessors;
+	}
+	
+	/**
+	 * returns the normal training algorithm 
+	 * @return normal training algorithms
+	 */
+	public List<ITrainingStrategy> getTrainers() {
+		return trainers;
+	}
+	
+	/**
+	 * returns the evaluation strategies
+	 * @return evaluation strategies
+	 */
+	public List<IEvaluationStrategy> getEvaluators() {
+		return evaluators;
+	}
+	
+	/* (non-Javadoc)
+	 * @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
+	 */
+	@Override
+	public void startElement(String uri, String localName, String qName,
+			Attributes attributes) throws SAXException {
+		try {
+			if( qName.equals("config") ) {
+				// ingore
+			}
+			else if( qName.equals("loader") ) {
+				final IVersionLoader loader = (IVersionLoader) Class.forName("de.ugoe.cs.cpdp.loader." + attributes.getValue("name")).newInstance();
+				loader.setLocation(attributes.getValue("datalocation"));
+				loaders.add(loader);
+				
+				// TODO location as relative
+			} 
+			else if( qName.equals("resultspath") ) {
+				resultsPath = attributes.getValue("path");
+			}
+			else if( qName.equals("versionfilter") ) {
+				final IVersionFilter filter = (IVersionFilter) Class.forName("de.ugoe.cs.cpdp.versions." + attributes.getValue("name")).newInstance();
+				filter.setParameter(attributes.getValue("param"));
+				versionFilters.add(filter);
+			}
+			else if( qName.equals("testVersionfilter") ) {
+				final IVersionFilter filter = (IVersionFilter) Class.forName("de.ugoe.cs.cpdp.versions." + attributes.getValue("name")).newInstance();
+				filter.setParameter(attributes.getValue("param"));
+				testVersionFilters.add(filter);
+			}
+			else if( qName.equals("trainVersionfilter") ) {
+				final IVersionFilter filter = (IVersionFilter) Class.forName("de.ugoe.cs.cpdp.versions." + attributes.getValue("name")).newInstance();
+				filter.setParameter(attributes.getValue("param"));
+				trainingVersionFilters.add(filter);
+			}
+			else if( qName.equals("setwisepreprocessor") ) {
+				final ISetWiseProcessingStrategy processor = (ISetWiseProcessingStrategy) Class.forName("de.ugoe.cs.cpdp.dataprocessing." + attributes.getValue("name")).newInstance(); 
+				processor.setParameter(attributes.getValue("param"));
+				setwisepreprocessors.add(processor);
+			}
+			else if( qName.equals("setwiseselector") ) {
+				final ISetWiseDataselectionStrategy selection = (ISetWiseDataselectionStrategy) Class.forName("de.ugoe.cs.cpdp.dataselection." +  attributes.getValue("name")).newInstance();
+				selection.setParameter(attributes.getValue("param"));
+				setwiseselectors.add(selection);
+			}
+			else if( qName.equals("setwisepostprocessor") ) {
+				final ISetWiseProcessingStrategy processor = (ISetWiseProcessingStrategy) Class.forName("de.ugoe.cs.cpdp.dataprocessing." + attributes.getValue("name")).newInstance(); 
+				processor.setParameter(attributes.getValue("param"));
+				setwisepostprocessors.add(processor);
+			}
+			else if( qName.equals("setwisetrainer") ) {
+				final ISetWiseTrainingStrategy trainer = (ISetWiseTrainingStrategy) Class.forName("de.ugoe.cs.cpdp.training." +  attributes.getValue("name")).newInstance();
+				trainer.setParameter(attributes.getValue("param"));
+				setwiseTrainers.add(trainer);
+			}
+			else if( qName.equals("preprocessor") ) {
+				final IProcessesingStrategy processor = (IProcessesingStrategy) Class.forName("de.ugoe.cs.cpdp.dataprocessing." +  attributes.getValue("name")).newInstance();
+				processor.setParameter( attributes.getValue("param"));
+				preprocessors.add(processor);
+			}
+			else if( qName.equals("pointwiseselector") ) {
+				final IPointWiseDataselectionStrategy selection = (IPointWiseDataselectionStrategy) Class.forName("de.ugoe.cs.cpdp.dataselection." +  attributes.getValue("name")).newInstance();
+				selection.setParameter( attributes.getValue("param"));
+				pointwiseselectors.add(selection);
+			}
+			else if( qName.equals("postprocessor") ) {
+				final IProcessesingStrategy processor = (IProcessesingStrategy) Class.forName("de.ugoe.cs.cpdp.dataprocessing." +  attributes.getValue("name")).newInstance();
+				processor.setParameter( attributes.getValue("param"));
+				postprocessors.add(processor);
+			}
+			else if( qName.equals("trainer") ) {
+				final ITrainingStrategy trainer = (ITrainingStrategy) Class.forName("de.ugoe.cs.cpdp.training." +  attributes.getValue("name")).newInstance();
+				trainer.setParameter(attributes.getValue("param"));
+				trainers.add(trainer);
+			}
+			else if( qName.equals("eval") ) {
+				final IEvaluationStrategy evaluator = (IEvaluationStrategy) Class.forName("de.ugoe.cs.cpdp.eval." + attributes.getValue("name")).newInstance();
+				evaluators.add(evaluator);
+			}
+			else if( qName.equals("partialconfig") ) {
+				String path = attributes.getValue("path");
+				try {
+					boolean relative = true;
+					if( attributes.getValue("relative")!=null ) {
+						relative = Boolean.parseBoolean(attributes.getValue("relative"));
+					}
+					
+					if( relative ) {
+						path = configFile.getParentFile().getPath() + "/" + path;
+					}
+					addConfigurations(new ExperimentConfiguration(path));
+				} catch (ExperimentConfigurationException e) {
+					throw new SAXException("Could not load partial configuration: " + path, e);
+				}	
+			} else {
+				Console.traceln(Level.WARNING, "element in config-file " +  configFile.getName() + " ignored: " + qName);
+			}
+		}
+        catch (NoClassDefFoundError | ClassNotFoundException | IllegalAccessException | InstantiationException | ClassCastException e) {
+        	throw new SAXException("Could not initialize class correctly", (Exception) e);
+        }
+	}
+	
+	/**
+	 * Adds the information of another experiment configuration to this configuration. This mechanism allows the usage of partial configuration files. The name of the other configuration is lost.
+	 * <br><br>
+	 * If the current data path is the empty string (&quot;&quot;), it is override by the datapath of the other configuration. Otherwise, the current data path is kept.
+	 * @param other experiment whose information is added
+	 */
+	private void addConfigurations(ExperimentConfiguration other) {
+		if( "results".equals(resultsPath) ) {
+			resultsPath = other.resultsPath;
+		}
+		loaders.addAll(other.loaders);
+		versionFilters.addAll(other.versionFilters);
+		testVersionFilters.addAll(other.testVersionFilters);
+		trainingVersionFilters.addAll(other.trainingVersionFilters);
+		setwisepreprocessors.addAll(other.setwisepreprocessors);
+		setwiseselectors.addAll(other.setwiseselectors);
+		setwisepostprocessors.addAll(other.setwisepostprocessors);
+		setwiseTrainers.addAll(other.setwiseTrainers);
+		preprocessors.addAll(other.preprocessors);
+		pointwiseselectors.addAll(other.pointwiseselectors);
+		postprocessors.addAll(other.postprocessors);
+		trainers.addAll(other.trainers);
+		evaluators.addAll(other.evaluators);
+	}
+	
+	/* (non-Javadoc)
+	 * @see java.lang.Object#toString()
+	 */
+	@Override
+	public String toString() {
+		final StringBuilder builder = new StringBuilder();
+		builder.append("Experiment name: " + experimentName + StringTools.ENDLINE);
+		builder.append("Loaders: " + loaders + StringTools.ENDLINE);
+		builder.append("Results path: " + resultsPath + StringTools.ENDLINE);
+		builder.append("Version filters: " + versionFilters.toString() + StringTools.ENDLINE);
+		builder.append("Test version filters: " + testVersionFilters.toString() + StringTools.ENDLINE);
+		builder.append("Training version filters: " + trainingVersionFilters.toString() + StringTools.ENDLINE);
+		builder.append("Setwise preprocessors: " + setwisepreprocessors.toString() + StringTools.ENDLINE);
+		builder.append("Setwise selectors: " + setwiseselectors.toString() + StringTools.ENDLINE);
+		builder.append("Setwise postprocessors: " + setwisepostprocessors.toString() + StringTools.ENDLINE);
+		builder.append("Setwise trainers: " + setwiseTrainers.toString() + StringTools.ENDLINE);
+		builder.append("Pointwise preprocessors: " + preprocessors.toString() + StringTools.ENDLINE);
+		builder.append("Pointwise selectors: " + pointwiseselectors.toString() + StringTools.ENDLINE);
+		builder.append("Pointwise postprocessors: " + postprocessors.toString() + StringTools.ENDLINE);
+		builder.append("Pointwise trainers: " + trainers.toString() + StringTools.ENDLINE);
+		builder.append("Evaluators: " + evaluators.toString() + StringTools.ENDLINE);
+		
+		return builder.toString();
+	}
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/ExperimentConfigurationException.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/ExperimentConfigurationException.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/ExperimentConfigurationException.java	(revision 2)
@@ -0,0 +1,42 @@
+package de.ugoe.cs.cpdp;
+
+/**
+ * Thrown if there is an error creating an experiment configuration.
+ * @author Steffen Herbold
+ */
+public class ExperimentConfigurationException extends Exception {
+
+	/**
+	 * Standard serialization ID. 
+	 */
+	private static final long serialVersionUID = 1L;
+	
+	/**
+	 * @see Exception#Exception() 
+	 */
+	public ExperimentConfigurationException() {
+		super();
+	}
+	
+	/**
+	 * @see Exception#Exception(String)
+	 */
+	public ExperimentConfigurationException(String message) {
+		super(message);
+	}
+	
+	/**
+	 * @see Exception#Exception(String, Throwable)
+	 */
+	public ExperimentConfigurationException(String message, Throwable e) {
+		super(message, e);
+	}
+	
+	/**
+	 * @see Exception#Exception(Throwable)
+	 */
+	public ExperimentConfigurationException(Throwable e) {
+		super(e);
+	}
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/IParameterizable.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/IParameterizable.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/IParameterizable.java	(revision 2)
@@ -0,0 +1,15 @@
+package de.ugoe.cs.cpdp;
+
+/**
+ * Interface that defines that an entity excepts a parameter string. Can be used to configure parts of an experiment. How (and if) this parameter is interpreted depends entirely on the entity.
+ * @author Steffen Herbold
+ *
+ */
+public interface IParameterizable {
+
+	/**
+	 * Sets the parameters of an entity.
+	 * @param parameters parameters as string
+	 */
+	void setParameter(String parameters);
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/Runner.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/Runner.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/Runner.java	(revision 2)
@@ -0,0 +1,63 @@
+package de.ugoe.cs.cpdp;
+
+import java.io.File;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import java.util.logging.Level;
+
+import de.ugoe.cs.util.console.Console;
+import de.ugoe.cs.util.console.TextConsole;
+
+/**
+ * Executable that can be used to run experiments.
+ * @author Steffen Herbold
+ *
+ */
+public class Runner {
+	
+	/**
+	 * Main class. The arguments are {@link ExperimentConfiguration} files. Each experiment is started in a separate thread. The number of concurrently running threads is the number of logical processors of the host system. 
+	 * @param args experiment configuration files
+	 */
+	public static void main(String[] args) {
+		new TextConsole(Level.FINE);
+		
+		final int concurrentThreads = Runtime.getRuntime().availableProcessors();
+		final ExecutorService threadPool = Executors.newFixedThreadPool(concurrentThreads);
+		for( String arg : args ) {
+			File file = new File(arg);
+			if( file.isFile() ) {
+				createConfig(threadPool, file.getAbsolutePath());
+			}
+			else if( file.isDirectory() ) {
+				for( File subfile : file.listFiles() ) {
+					if( subfile.isFile() ) {
+						createConfig(threadPool, subfile.getAbsolutePath());
+					}
+				}
+			}
+		}
+		threadPool.shutdown();
+		try {
+			threadPool.awaitTermination(Long.MAX_VALUE, TimeUnit.SECONDS);
+		} catch (InterruptedException e) {
+			e.printStackTrace();
+		}
+	}
+	
+	public static void createConfig(ExecutorService threadPool, String configFile) {
+		ExperimentConfiguration config = null;
+		try {
+			config = new ExperimentConfiguration(configFile);
+		} catch (Exception e) {
+			Console.printerrln("Failure initializing the experiment configuration for configuration file " + configFile);
+			e.printStackTrace();
+		}
+		if( config!=null ) {
+			Console.trace(Level.FINE, config.toString());
+			Experiment experiment = new Experiment(config);
+			threadPool.execute(experiment);
+		}
+	}
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/AttributeRemoval.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/AttributeRemoval.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/AttributeRemoval.java	(revision 2)
@@ -0,0 +1,62 @@
+package de.ugoe.cs.cpdp.dataprocessing;
+
+import org.apache.commons.collections4.list.SetUniqueList;
+
+import weka.core.Instances;
+
+/**
+ * Removes an attributes from all data sets using their name. 
+ * @author Steffen Herbold
+ */
+public class AttributeRemoval implements ISetWiseProcessingStrategy, IProcessesingStrategy {
+
+	/**
+	 * names of the attributes to be removed (determined by {@link #setParameter(String)}) 
+	 */
+	private String[] attributeNames = new String[]{};
+	
+	/**
+	 * Sets that attributes that will be removed. The string contains the blank-separated names of the attributes to be removed.
+	 * <br><br>
+	 * Note, that removal of attributes with blanks is currently not supported!
+	 * @param parameters string with the blank-separated attribute names
+	 */
+	@Override
+	public void setParameter(String parameters) {
+		if( parameters!=null ) {
+			attributeNames = parameters.split(" ");
+		}
+	}
+
+	/**
+	 * @see de.ugoe.cs.cpdp.dataprocessing.SetWiseProcessingStrategy#apply(weka.core.Instances, org.apache.commons.collections4.list.SetUniqueList)
+	 */
+	@Override
+	public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) {
+		for( String attributeName : attributeNames ) {
+			for( int i=0 ; i<testdata.numAttributes() ; i++ ) {
+				if( attributeName.equals(testdata.attribute(i).name()) ) {
+					testdata.deleteAttributeAt(i);
+					for( Instances traindata : traindataSet ) {
+						traindata.deleteAttributeAt(i);
+					}
+				}
+			}
+		}
+	}
+
+	/**
+	 * @see de.ugoe.cs.cpdp.dataprocessing.ProcessesingStrategy#apply(weka.core.Instances, weka.core.Instances)
+	 */
+	@Override
+	public void apply(Instances testdata, Instances traindata) {
+		for( String attributeName : attributeNames ) {
+			for( int i=0 ; i<testdata.numAttributes() ; i++ ) {
+				if( attributeName.equals(testdata.attribute(i).name()) ) {
+					testdata.deleteAttributeAt(i);
+					traindata.deleteAttributeAt(i);
+				}
+			}
+		}
+	}
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/AverageStandardization.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/AverageStandardization.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/AverageStandardization.java	(revision 2)
@@ -0,0 +1,96 @@
+package de.ugoe.cs.cpdp.dataprocessing;
+
+import org.apache.commons.collections4.list.SetUniqueList;
+
+import weka.core.Attribute;
+import weka.core.Instance;
+import weka.core.Instances;
+
+/**
+ * Standardization procedure after Watanabe et al.: Adapting a Fault Prediction Model to Allow Inter Language Reuse.
+ * <br><br>
+ * In comparison to Watanabe et al., we transform training data instead of the test data. Otherwise, this approach would not be feasible with multiple projects.
+ * @author Steffen Herbold
+ */
+public class AverageStandardization implements ISetWiseProcessingStrategy, IProcessesingStrategy {
+
+	/**
+	 * Does not have parameters. String is ignored.
+	 * @param parameters ignored
+	 */
+	@Override
+	public void setParameter(String parameters) {
+		// dummy
+	}
+
+	/**
+	 * @see de.ugoe.cs.cpdp.dataprocessing.SetWiseProcessingStrategy#apply(weka.core.Instances, org.apache.commons.collections4.list.SetUniqueList)
+	 */
+	@Override
+	public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) {
+		final Attribute classAttribute = testdata.classAttribute();
+		
+		final double[] meanTest = new double[testdata.numAttributes()];
+		
+		// get means of testdata
+		for( int j=0 ; j<testdata.numAttributes() ; j++ ) {
+			if( testdata.attribute(j)!=classAttribute ) {
+				meanTest[j] = testdata.meanOrMode(j);
+			}
+		}
+		
+		// preprocess training data
+		for( Instances traindata : traindataSet ) {
+			double[] meanTrain = new double[testdata.numAttributes()];
+			for( int j=0 ; j<testdata.numAttributes() ; j++ ) {
+				if( testdata.attribute(j)!=classAttribute ) {
+					meanTrain[j] = traindata.meanOrMode(j);
+				}
+			}
+			
+			for( int i=0 ; i<traindata.numInstances() ; i++ ) {
+				Instance instance = traindata.instance(i);
+				for( int j=0 ; j<testdata.numAttributes() ; j++ ) {
+					if( testdata.attribute(j)!=classAttribute ) {
+						instance.setValue(j, instance.value(j)*meanTest[j]/meanTrain[j]);
+					}
+				}
+			}
+		}
+	}
+
+	/**
+	 * @see de.ugoe.cs.cpdp.dataprocessing.ProcessesingStrategy#apply(weka.core.Instances, weka.core.Instances)
+	 */
+	@Override
+	public void apply(Instances testdata, Instances traindata) {
+		final Attribute classAttribute = testdata.classAttribute();
+		
+		final double[] meanTest = new double[testdata.numAttributes()];
+		
+		// get means of testdata
+		for( int j=0 ; j<testdata.numAttributes() ; j++ ) {
+			if( testdata.attribute(j)!=classAttribute ) {
+				meanTest[j] = testdata.meanOrMode(j);
+			}
+		}
+		
+		// preprocess training data
+		final double[] meanTrain = new double[testdata.numAttributes()];
+		for( int j=0 ; j<testdata.numAttributes() ; j++ ) {
+			if( testdata.attribute(j)!=classAttribute ) {
+				meanTrain[j] = traindata.meanOrMode(j);
+			}
+		}
+		
+		for( int i=0 ; i<traindata.numInstances() ; i++ ) {
+			Instance instance = traindata.instance(i);
+			for( int j=0 ; j<testdata.numAttributes() ; j++ ) {
+				if( testdata.attribute(j)!=classAttribute ) {
+					instance.setValue(j, instance.value(j)*meanTest[j]/meanTrain[j]);
+				}
+			}
+		}
+	}
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/BiasedWeights.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/BiasedWeights.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/BiasedWeights.java	(revision 2)
@@ -0,0 +1,77 @@
+package de.ugoe.cs.cpdp.dataprocessing;
+
+import org.apache.commons.collections4.list.SetUniqueList;
+
+import weka.core.Instance;
+import weka.core.Instances;
+
+/**
+ * Sets the bias of the weights of the training data. By using a bias of 0.5 (default value) the total weight of the positive instances (i.e.
+ * fault-prone) is equal to the total weight of the negative instances (i.e. non-fault-prone). Otherwise the weights between the two will be 
+ * distributed according to the bias, where &lt;0.5 means in favor of the negative instances and &gt;0.5 in favor of the positive instances. 
+ * equal to the total weight of the test 
+ * @author Steffen Herbold
+ */
+public class BiasedWeights implements IProcessesingStrategy, ISetWiseProcessingStrategy {
+
+	/**
+	 * bias used for the weighting
+	 */
+	private double bias = 0.5;
+	
+	
+	/**
+	 * Sets the bias to be used for weighting.
+	 * @param parameters string with the bias
+	 */
+	@Override
+	public void setParameter(String parameters) {
+		bias = Double.parseDouble(parameters);
+	}
+
+	/**
+	 * @see de.ugoe.cs.cpdp.dataprocessing.ProcessesingStrategy#apply(weka.core.Instances, weka.core.Instances)
+	 */
+	@Override
+	public void apply(Instances testdata, Instances traindata) {
+		//setBiasedWeights(testdata);
+		setBiasedWeights(traindata);
+	}
+	
+	/**
+	 * @see de.ugoe.cs.cpdp.dataprocessing.SetWiseProcessingStrategy#apply(weka.core.Instances, org.apache.commons.collections4.list.SetUniqueList)
+	 */
+	@Override
+	public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) {
+		for( Instances traindata : traindataSet ) {
+			setBiasedWeights(traindata);
+		}
+	}
+	
+	/**
+	 * Helper method that sets the weights for a given data set.
+	 * @param data data set whose weights are set
+	 */
+	private void setBiasedWeights(Instances data) {
+		final int classIndex = data.classIndex();
+		
+		final int[] counts = data.attributeStats(classIndex).nominalCounts;
+		
+		final double weightNegatives = ((1-bias)*data.numInstances()) / counts[0];
+		final double weightPositives = (bias*data.numInstances()) / counts[1];
+		
+		
+		for( int i=0 ; i<data.numInstances() ; i++ ) {
+			Instance instance = data.instance(i);
+			if( instance.value(classIndex)==0 ) {
+				instance.setWeight(weightNegatives);
+			}
+			if( instance.value(classIndex)==1 ) {
+				instance.setWeight(weightPositives);
+			}
+		}
+	}
+
+	
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/DataGravitation.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/DataGravitation.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/DataGravitation.java	(revision 2)
@@ -0,0 +1,68 @@
+package de.ugoe.cs.cpdp.dataprocessing;
+
+import org.apache.commons.collections4.list.SetUniqueList;
+
+import weka.core.Attribute;
+import weka.core.Instance;
+import weka.core.Instances;
+
+// TODO comment
+public class DataGravitation implements IProcessesingStrategy, ISetWiseProcessingStrategy {
+
+	/**
+	 * Does not have parameters. String is ignored.
+	 * @param parameters ignored
+	 */
+	@Override
+	public void setParameter(String parameters) {
+		// dummy
+	}
+	
+	/* (non-Javadoc)
+	 * @see de.ugoe.cs.cpdp.dataprocessing.ISetWiseProcessingStrategy#apply(weka.core.Instances, org.apache.commons.collections4.list.SetUniqueList)
+	 */
+	@Override
+	public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) {
+		for( Instances traindata : traindataSet ) {
+			apply(testdata, traindata);
+		}
+	}
+
+	/* (non-Javadoc)
+	 * @see de.ugoe.cs.cpdp.dataprocessing.IProcessesingStrategy#apply(weka.core.Instances, weka.core.Instances)
+	 */
+	@Override
+	public void apply(Instances testdata, Instances traindata) {
+		Attribute classAtt = testdata.classAttribute();
+		
+		double[] minAttValues = new double[testdata.numAttributes()];
+		double[] maxAttValues = new double[testdata.numAttributes()];
+		double[] weights = new double[traindata.numInstances()];
+		double weightsum = 0.0;
+		
+		for( int j=0; j<testdata.numAttributes(); j++) {
+			if( testdata.attribute(j)!=classAtt ) {
+				minAttValues[j] = testdata.attributeStats(j).numericStats.min;
+				maxAttValues[j] = testdata.attributeStats(j).numericStats.max;
+			}
+		}
+		
+		for( int i=0; i<traindata.numInstances(); i++ ) {
+			Instance inst = traindata.instance(i);
+			int similar = 0;
+			for( int j=0; j<testdata.numAttributes(); j++ ) {
+				if( testdata.attribute(j)!=classAtt ) {
+					if( inst.value(j)>=minAttValues[j] && inst.value(j)<=maxAttValues[j] )  {
+						similar++;
+					}
+				}
+			}
+			weights[i] = similar/Math.sqrt(testdata.numAttributes()-similar);
+			weightsum += weights[i];
+		}
+		for( int i=0; i<traindata.numInstances(); i++ ) {
+			traindata.instance(i).setWeight(weights[i]*traindata.numInstances()/weightsum);
+		}
+	}
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/IProcessesingStrategy.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/IProcessesingStrategy.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/IProcessesingStrategy.java	(revision 2)
@@ -0,0 +1,18 @@
+package de.ugoe.cs.cpdp.dataprocessing;
+
+import weka.core.Instances;
+import de.ugoe.cs.cpdp.IParameterizable;
+
+/**
+ * A data processing strategy that is applied to the test data and a single set of training data. 
+ * @author Steffen Herbold
+ */
+public interface IProcessesingStrategy extends IParameterizable {
+	
+	/**
+	 * Applies the processing strategy. 
+	 * @param testdata test data
+	 * @param traindata training data
+	 */
+	void apply(Instances testdata, Instances traindata);
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/ISetWiseProcessingStrategy.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/ISetWiseProcessingStrategy.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/ISetWiseProcessingStrategy.java	(revision 2)
@@ -0,0 +1,22 @@
+package de.ugoe.cs.cpdp.dataprocessing;
+
+import org.apache.commons.collections4.list.SetUniqueList;
+
+import de.ugoe.cs.cpdp.IParameterizable;
+
+import weka.core.Instances;
+
+/**
+ * A data processing strategy that is applied to the test data and a multiple sets of training data. 
+ * @author Steffen Herbold
+ */
+public interface ISetWiseProcessingStrategy extends IParameterizable {
+
+	/**
+	 * Applies the processing strategy. 
+	 * @param testdata test data
+	 * @param traindataSet training data sets
+	 */
+	void apply(Instances testdata, SetUniqueList<Instances> traindataSet);
+	
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/InformationGainFilter.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/InformationGainFilter.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/InformationGainFilter.java	(revision 2)
@@ -0,0 +1,121 @@
+package de.ugoe.cs.cpdp.dataprocessing;
+
+import java.util.Arrays;
+import java.util.Random;
+
+import org.apache.commons.collections4.list.SetUniqueList;
+
+import weka.attributeSelection.InfoGainAttributeEval;
+import weka.core.DenseInstance;
+import weka.core.Instance;
+import weka.core.Instances;
+
+/**
+ * Implements an attribute filter that is based on the information gain of each attribute after Z. He, F. Peters, T. Menzies, Y. Yang: Learning from Open-Source Projects: An Empirical Study on Defect Prediction.
+ * A logistic classifier is trained to separate a random sample of the training data from a random sample of the test data. As standard, the best 50% of attributes are retained. 
+ * This ratio can be adjusted using the parameter of the filter (0.5 = 50%). 
+ * <br><br>
+ * Best means the least information gain, because this means that the attribute is similar in both test and training data.
+ * @author Steffen Herbold
+ */
+public class InformationGainFilter implements ISetWiseProcessingStrategy, IProcessesingStrategy {
+
+	/**
+	 * size of the random sample that is drawn from both test data and training data 
+	 */
+	private final int sampleSize = 500;
+	
+	/**
+	 * ratio of features that is kept
+	 */
+	private double featureRatio = 0.5;
+	
+	/**
+	 * Sets the feature ratio. 
+	 * @param parameters feature ratio
+	 */
+	@Override
+	public void setParameter(String parameters) {
+		if( !"".equals(parameters) ) {
+			featureRatio = Double.parseDouble(parameters);
+		}
+	}
+
+	/**
+	 * @see de.ugoe.cs.cpdp.dataprocessing.SetWiseProcessingStrategy#apply(weka.core.Instances, org.apache.commons.collections4.list.SetUniqueList)
+	 */
+	@Override
+	public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) {
+		for( Instances traindata : traindataSet ) {
+			apply(testdata, traindata, false);
+		}
+		
+	}
+	
+	/**
+	 * @see de.ugoe.cs.cpdp.dataprocessing.ProcessesingStrategy#apply(weka.core.Instances, weka.core.Instances)
+	 */
+	@Override
+	public void apply(Instances testdata, Instances traindata) {
+		apply(testdata, traindata, true);
+	}
+	
+	/**
+	 * Internal helper function. TODO
+	 * @param testdata
+	 * @param traindata
+	 * @param removeFromTest
+	 */
+	private void apply(Instances testdata, Instances traindata, boolean removeFromTest) {
+		final Random rand = new Random(1);
+		final int removalNumber = (int) (featureRatio*(testdata.numAttributes()-1));
+		
+		final int classIndex = testdata.classIndex();
+		
+		// sample instances
+		final Instances sample = new Instances(testdata);
+		for( int j=0; j<sampleSize; j++ ) {
+			Instance inst = new DenseInstance(testdata.instance(rand.nextInt(testdata.numInstances())));
+			inst.setDataset(sample);
+			inst.setClassValue(1.0);
+			sample.add(inst);
+			inst = new DenseInstance(traindata.instance(rand.nextInt(traindata.numInstances())));
+			inst.setDataset(sample);
+			inst.setClassValue(0.0);
+			sample.add(inst);
+		}
+		
+		final double[] gain = new double[sample.numAttributes()];
+		
+		final InfoGainAttributeEval gainEval = new InfoGainAttributeEval();
+		try {
+			gainEval.buildEvaluator(sample);
+			for( int i=0 ; i<testdata.numAttributes() ; i++ ) {
+				//if( sample.classAttribute().equals(sample.attribute(i)) ) {
+				//	gain[i] = 0.0;
+				//} else {
+				if( !sample.classAttribute().equals(sample.attribute(i)) ) {
+					gain[i] = gainEval.evaluateAttribute(i);
+				}
+			}
+		} catch (Exception e) {
+			//throw new RuntimeException("could not determine information gain for all attributes", e);
+			// ignore exception; it is caused by attributes that are extremely 
+		}
+		
+		// select best attributes
+		final double[] gainCopy = Arrays.copyOf(gain, gain.length);
+		Arrays.sort(gainCopy);
+		final double cutoffGain = gainCopy[testdata.numAttributes()-removalNumber];
+		
+		for( int i=testdata.numAttributes()-1; i>=0 ; i-- ) {
+			if( gain[i]>=cutoffGain && i!=classIndex) {
+				traindata.deleteAttributeAt(i);
+				if( removeFromTest ) {
+					testdata.deleteAttributeAt(i);
+				}
+			}
+		}
+	}
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/LogarithmTransform.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/LogarithmTransform.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/LogarithmTransform.java	(revision 2)
@@ -0,0 +1,83 @@
+package de.ugoe.cs.cpdp.dataprocessing;
+
+import org.apache.commons.collections4.list.SetUniqueList;
+
+import weka.core.Attribute;
+import weka.core.Instance;
+import weka.core.Instances;
+
+/**
+ * Logarithm transformation after Carmargo Cruz and Ochimizu: Towards Logistic Regression Models for Predicting Fault-prone Code across Software Projects.
+ * <br><br>
+ * Transform each attribute value x into log(x+1). 
+ * @author Steffen Herbold
+ */
+public class LogarithmTransform implements ISetWiseProcessingStrategy, IProcessesingStrategy {
+
+	/**
+	 * Does not have parameters. String is ignored.
+	 * @param parameters ignored
+	 */
+	@Override
+	public void setParameter(String parameters) {
+		// dummy
+	}
+
+	/**
+	 * @see de.ugoe.cs.cpdp.dataprocessing.SetWiseProcessingStrategy#apply(weka.core.Instances, org.apache.commons.collections4.list.SetUniqueList)
+	 */
+	@Override
+	public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) {
+		final Attribute classAttribute = testdata.classAttribute();
+		
+		// preprocess testdata
+		for( int i=0 ; i<testdata.numInstances() ; i++ ) {
+			Instance instance = testdata.instance(i);
+			for( int j=0 ; j<testdata.numAttributes() ; j++ ) {
+				if( testdata.attribute(j)!=classAttribute ) {
+					instance.setValue(j, Math.log(1+instance.value(j)));
+				}
+			}
+		}
+		
+		// preprocess training data
+		for( Instances traindata : traindataSet ) {
+			for( int i=0 ; i<traindata.numInstances() ; i++ ) {
+				Instance instance = traindata.instance(i);
+				for( int j=0 ; j<testdata.numAttributes() ; j++ ) {
+					if( testdata.attribute(j)!=classAttribute ) {
+						instance.setValue(j, Math.log(1+instance.value(j)));
+					}
+				}
+			}
+		}
+	}
+
+	/**
+	 * @see de.ugoe.cs.cpdp.dataprocessing.ProcessesingStrategy#apply(weka.core.Instances, weka.core.Instances)
+	 */
+	@Override
+	public void apply(Instances testdata, Instances traindata) {
+		final Attribute classAttribute = testdata.classAttribute();
+		
+		// preprocess testdata
+		for( int i=0 ; i<testdata.numInstances() ; i++ ) {
+			Instance instance = testdata.instance(i);
+			for( int j=0 ; j<testdata.numAttributes() ; j++ ) {
+				if( testdata.attribute(j)!=classAttribute ) {
+					instance.setValue(j, Math.log(1+instance.value(j)));
+				}
+			}
+		}
+		
+		// preprocess training data
+		for( int i=0 ; i<traindata.numInstances() ; i++ ) {
+			Instance instance = traindata.instance(i);
+			for( int j=0 ; j<testdata.numAttributes() ; j++ ) {
+				if( testdata.attribute(j)!=classAttribute ) {
+					instance.setValue(j, Math.log(1+instance.value(j)));
+				}
+			}
+		}
+	}
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/MedianAsReference.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/MedianAsReference.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/MedianAsReference.java	(revision 2)
@@ -0,0 +1,102 @@
+package de.ugoe.cs.cpdp.dataprocessing;
+
+import org.apache.commons.collections4.list.SetUniqueList;
+
+import weka.core.Attribute;
+import weka.core.Instance;
+import weka.core.Instances;
+
+/**
+ * Median as reference transformation after Carmargo Cruz and Ochimizu: Towards Logistic Regression Models for Predicting Fault-prone Code across Software Projects
+ * <br><br>
+ * For each attribute value x, the new value is x-median of the test data
+ * @author Steffen Herbold
+ */
+public class MedianAsReference implements ISetWiseProcessingStrategy, IProcessesingStrategy {
+
+	/**
+	 * Does not have parameters. String is ignored.
+	 * @param parameters ignored
+	 */
+	@Override
+	public void setParameter(String parameters) {
+		// dummy
+	}
+
+	/**
+	 * @see de.ugoe.cs.cpdp.dataprocessing.SetWiseProcessingStrategy#apply(weka.core.Instances, org.apache.commons.collections4.list.SetUniqueList)
+	 */
+	@Override
+	public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) {
+		final Attribute classAttribute = testdata.classAttribute();
+		
+		final double[] median = new double[testdata.numAttributes()];
+		
+		// get medians
+		for( int j=0 ; j<testdata.numAttributes() ; j++ ) {
+			if( testdata.attribute(j)!=classAttribute ) {
+				median[j] = testdata.kthSmallestValue(j, (testdata.numInstances()+1)>>1); // (>>2 -> /2)
+			}
+		}
+		
+		// update testdata
+		for( int i=0 ; i<testdata.numInstances() ; i++ ) {
+			Instance instance = testdata.instance(i);
+			for( int j=0 ; j<testdata.numAttributes() ; j++ ) {
+				if( testdata.attribute(j)!=classAttribute ) {
+					instance.setValue(j, instance.value(j)-median[j]);
+				}
+			}
+		}
+		
+		// preprocess training data
+		for( Instances traindata : traindataSet ) {
+			for( int i=0 ; i<traindata.numInstances() ; i++ ) {
+				Instance instance = traindata.instance(i);
+				for( int j=0 ; j<testdata.numAttributes() ; j++ ) {
+					if( testdata.attribute(j)!=classAttribute ) {
+						instance.setValue(j, instance.value(j)-median[j]);
+					}
+				}
+			}
+		}
+	}
+
+	/**
+	 * @see de.ugoe.cs.cpdp.dataprocessing.ProcessesingStrategy#apply(weka.core.Instances, weka.core.Instances)
+	 */
+	@Override
+	public void apply(Instances testdata, Instances traindata) {
+		final Attribute classAttribute = testdata.classAttribute();
+		
+		final double[] median = new double[testdata.numAttributes()];
+		
+		// get medians
+		for( int j=0 ; j<testdata.numAttributes() ; j++ ) {
+			if( testdata.attribute(j)!=classAttribute ) {
+				median[j] = testdata.kthSmallestValue(j, (testdata.numInstances()+1)>>1); // (>>2 -> /2)
+			}
+		}
+		
+		// update testdata
+		for( int i=0 ; i<testdata.numInstances() ; i++ ) {
+			Instance instance = testdata.instance(i);
+			for( int j=0 ; j<testdata.numAttributes() ; j++ ) {
+				if( testdata.attribute(j)!=classAttribute ) {
+					instance.setValue(j, instance.value(j)-median[j]);
+				}
+			}
+		}
+		
+		// preprocess training data
+		for( int i=0 ; i<traindata.numInstances() ; i++ ) {
+			Instance instance = traindata.instance(i);
+			for( int j=0 ; j<testdata.numAttributes() ; j++ ) {
+				if( testdata.attribute(j)!=classAttribute ) {
+					instance.setValue(j, instance.value(j)-median[j]);
+				}
+			}
+		}
+	}
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/Normalization.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/Normalization.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/Normalization.java	(revision 2)
@@ -0,0 +1,103 @@
+package de.ugoe.cs.cpdp.dataprocessing;
+
+import org.apache.commons.collections4.list.SetUniqueList;
+
+import weka.core.Attribute;
+import weka.core.Instance;
+import weka.core.Instances;
+import weka.experiment.Stats;
+
+/**
+ * Normalizes each attribute of each data set separately. 
+ * @author Steffen Herbold
+ */
+public class Normalization implements ISetWiseProcessingStrategy, IProcessesingStrategy {
+
+	/**
+	 * @see de.ugoe.cs.cpdp.dataprocessing.SetWiseProcessingStrategy#apply(weka.core.Instances, org.apache.commons.collections4.list.SetUniqueList)
+	 */
+	@Override
+	public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) {
+		final Attribute classAtt = testdata.classAttribute();
+		
+		for( int i=0 ; i<testdata.numAttributes(); i++) {
+			if( !testdata.attribute(i).equals(classAtt) ) {
+				Stats teststats = testdata.attributeStats(i).numericStats;
+				
+				double minVal = teststats.min;
+				double maxVal = teststats.max;
+				
+				for( Instances traindata : traindataSet ) {
+					Stats trainstats = traindata.attributeStats(i).numericStats;
+					if( minVal>trainstats.min ) {
+						minVal = trainstats.min;
+					}
+					if( maxVal<trainstats.max ) {
+						maxVal = trainstats.max;
+					}
+				}
+	
+				for( int j=0 ; j<testdata.numInstances() ; j++ ) {
+					Instance inst = testdata.instance(j);
+					double newValue = (inst.value(i)-minVal)/(maxVal-minVal);
+					inst.setValue(i, newValue);
+				}
+				
+				for( Instances traindata : traindataSet ) {
+					for( int j=0 ; j<traindata.numInstances() ; j++ ) {
+						Instance inst = traindata.instance(j);
+						double newValue = (inst.value(i)-minVal)/(maxVal-minVal);
+						inst.setValue(i, newValue);
+					}
+				}
+			}
+		}
+		
+	}
+	
+	/**
+	 * @see de.ugoe.cs.cpdp.dataprocessing.ProcessesingStrategy#apply(weka.core.Instances, weka.core.Instances)
+	 */
+	@Override
+	public void apply(Instances testdata, Instances traindata) {
+		final Attribute classAtt = testdata.classAttribute();
+		
+		for( int i=0 ; i<testdata.numAttributes(); i++) {
+			if( !testdata.attribute(i).equals(classAtt) ) {
+				Stats teststats = testdata.attributeStats(i).numericStats;
+				
+				double minVal = teststats.min;
+				double maxVal = teststats.max;
+				
+				Stats trainstats = traindata.attributeStats(i).numericStats;
+				if( minVal>trainstats.min ) {
+					minVal = trainstats.min;
+				}
+				if( maxVal<trainstats.max ) {
+					maxVal = trainstats.max;
+				}
+	
+				for( int j=0 ; j<testdata.numInstances() ; j++ ) {
+					Instance inst = testdata.instance(j);
+					double newValue = (inst.value(i)-minVal)/(maxVal-minVal);
+					inst.setValue(i, newValue);
+				}
+				
+				for( int j=0 ; j<traindata.numInstances() ; j++ ) {
+					Instance inst = traindata.instance(j);
+					double newValue = (inst.value(i)-minVal)/(maxVal-minVal);
+					inst.setValue(i, newValue);
+				}
+			}
+		}
+	}
+
+	/**
+	 * Does not have parameters. String is ignored.
+	 * @param parameters ignored
+	 */
+	@Override
+	public void setParameter(String parameters) {
+		// no parameters
+	}
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/Undersampling.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/Undersampling.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/Undersampling.java	(revision 2)
@@ -0,0 +1,67 @@
+package de.ugoe.cs.cpdp.dataprocessing;
+
+import org.apache.commons.collections4.list.SetUniqueList;
+
+import weka.core.Instances;
+import weka.filters.Filter;
+import weka.filters.supervised.instance.Resample;
+
+// TODO comment
+public class Undersampling implements IProcessesingStrategy,
+		ISetWiseProcessingStrategy {
+
+	
+	/**
+	 * Does not have parameters. String is ignored.
+	 * @param parameters ignored
+	 */
+	@Override
+	public void setParameter(String parameters) {
+		// dummy
+	}
+
+
+	@Override
+	public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) {
+		for( Instances traindata : traindataSet ) {
+			apply(testdata, traindata);
+		}
+	}
+
+	@Override
+	public void apply(Instances testdata, Instances traindata) {
+		
+		final int[] counts = traindata.attributeStats(traindata.classIndex()).nominalCounts;
+		
+		if( counts[1]<counts[0] ) {
+			Instances negatives = new Instances(traindata);
+			Instances positives = new Instances(traindata);
+			
+			for( int i=traindata.size()-1 ; i>=0 ; i-- ) {
+				if( Double.compare(1.0, negatives.get(i).classValue())==0 ) {
+					negatives.remove(i);
+				}
+				if( Double.compare(0.0, positives.get(i).classValue())==0 ) {
+					positives.remove(i);
+				}
+			}
+			
+			Resample resample = new Resample();
+			resample.setSampleSizePercent((100.0* counts[1])/counts[0]);
+			try {
+				resample.setInputFormat(traindata);
+				negatives = Filter.useFilter(negatives, resample);
+			} catch (Exception e) {
+				throw new RuntimeException(e);
+			}
+			traindata.clear();
+			for( int i=0 ; i<negatives.size() ; i++ ) {
+				traindata.add(negatives.get(i));
+			}
+			for( int i=0 ; i<positives.size() ; i++ ) {
+				traindata.add(positives.get(i));
+			}
+		}
+	}
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/AbstractCharacteristicSelection.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/AbstractCharacteristicSelection.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/AbstractCharacteristicSelection.java	(revision 2)
@@ -0,0 +1,123 @@
+package de.ugoe.cs.cpdp.dataselection;
+
+import java.util.ArrayList;
+
+import org.apache.commons.collections4.list.SetUniqueList;
+
+import weka.core.Attribute;
+import weka.core.DenseInstance;
+import weka.core.Instance;
+import weka.core.Instances;
+import weka.experiment.Stats;
+import weka.filters.Filter;
+import weka.filters.unsupervised.attribute.Normalize;
+
+/**
+ * Abstract class that implements the foundation of setwise data selection strategies using distributional characteristics.
+ * This class provides the means to transform the data sets into their characteristic vectors.  
+ * @author Steffen Herbold
+ */
+public abstract class AbstractCharacteristicSelection implements
+		ISetWiseDataselectionStrategy {
+
+	/**
+	 * vector with the distributional characteristics 
+	 */
+	private String[] characteristics = new String[]{"mean","stddev"};
+	
+	/**
+	 * Sets the distributional characteristics. The names of the characteristics are separated by blanks. 
+	 */
+	@Override
+	public void setParameter(String parameters) {
+		if( !"".equals(parameters) ) {
+			characteristics = parameters.split(" ");
+		}
+	}
+	
+	/**
+	 * Transforms the data into the distributional characteristics. The first instance is the test data, followed by the training data. 
+	 * @param testdata test data
+	 * @param traindataSet training data sets
+	 * @return distributional characteristics of the data
+	 */
+	protected Instances characteristicInstances(Instances testdata, SetUniqueList<Instances> traindataSet) {
+		// setup weka Instances for clustering
+		final ArrayList<Attribute> atts = new ArrayList<Attribute>();
+		
+		final Attribute classAtt = testdata.classAttribute();
+		for( int i=0 ; i<testdata.numAttributes() ; i++ ) {
+			Attribute dataAtt = testdata.attribute(i);
+			if( !dataAtt.equals(classAtt) ) {
+				for( String characteristic : characteristics ) {
+					atts.add(new Attribute(dataAtt.name() + "_" + characteristic));
+				}
+			}
+		}
+		final Instances data = new Instances("distributional_characteristics", atts, 0);
+		
+		// setup data for clustering
+		double[] instanceValues = new double[atts.size()];
+		for( int i=0 ; i<testdata.numAttributes() ; i++ ) {
+			Attribute dataAtt = testdata.attribute(i);
+			if( !dataAtt.equals(classAtt) ) {
+				Stats stats = testdata.attributeStats(i).numericStats;
+				for( int j=0; j<characteristics.length; j++ ) {
+					if( "mean".equals(characteristics[j]) ) {
+						instanceValues[i*characteristics.length+j] = stats.mean;
+					} else if( "stddev".equals(characteristics[j])) {
+						instanceValues[i*characteristics.length+j] = stats.stdDev;
+					} else if( "var".equals(characteristics[j])) {
+						instanceValues[i*characteristics.length+j] = testdata.variance(j);
+					} else {
+						throw new RuntimeException("Unkown distributional characteristic: " + characteristics[j]);
+					}
+				}
+			}
+		}		
+		data.add(new DenseInstance(1.0, instanceValues));
+		
+		for( Instances traindata : traindataSet ) {
+			instanceValues = new double[atts.size()];
+			for( int i=0 ; i<traindata.numAttributes() ; i++ ) {
+				Attribute dataAtt = traindata.attribute(i);
+				if( !dataAtt.equals(classAtt) ) {
+					Stats stats = traindata.attributeStats(i).numericStats;
+					for( int j=0; j<characteristics.length; j++ ) {
+						if( "mean".equals(characteristics[j]) ) {
+							instanceValues[i*characteristics.length+j] = stats.mean;
+						} else if( "stddev".equals(characteristics[j])) {
+							instanceValues[i*characteristics.length+j] = stats.stdDev;
+						} else if( "var".equals(characteristics[j])) {
+							instanceValues[i*characteristics.length+j] = testdata.variance(j);
+						} else {
+							throw new RuntimeException("Unkown distributional characteristic: " + characteristics[j]);
+						}
+					}
+				}
+			}		
+			Instance instance = new DenseInstance(1.0, instanceValues);
+			
+			data.add(instance);
+		}
+		return data;
+	}
+	
+	/**
+	 * Returns the normalized distributional characteristics of the training data. 
+	 * @param testdata test data
+	 * @param traindataSet training data sets
+	 * @return normalized distributional characteristics of the data
+	 */
+	protected Instances normalizedCharacteristicInstances(Instances testdata, SetUniqueList<Instances> traindataSet) {
+		Instances data = characteristicInstances(testdata, traindataSet);
+		try {
+			final Normalize normalizer = new Normalize();
+			normalizer.setInputFormat(data);
+			data = Filter.useFilter(data, normalizer);
+		} catch (Exception e) {
+			throw new RuntimeException("Unexpected exception during normalization of distributional characteristics.", e);
+		}
+		return data;
+	}
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/IPointWiseDataselectionStrategy.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/IPointWiseDataselectionStrategy.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/IPointWiseDataselectionStrategy.java	(revision 2)
@@ -0,0 +1,20 @@
+package de.ugoe.cs.cpdp.dataselection;
+
+import de.ugoe.cs.cpdp.IParameterizable;
+
+import weka.core.Instances;
+
+/**
+ * Interface for pointwise data selection strategies. 
+ * @author Steffen Herbold
+ */
+public interface IPointWiseDataselectionStrategy extends IParameterizable {
+
+	/**
+	 * Applies the data selection strategy. 
+	 * @param testdata test data
+	 * @param traindata candidate training data
+	 * @return the selected training data
+	 */
+	Instances apply(Instances testdata, Instances traindata);
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/ISetWiseDataselectionStrategy.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/ISetWiseDataselectionStrategy.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/ISetWiseDataselectionStrategy.java	(revision 2)
@@ -0,0 +1,21 @@
+package de.ugoe.cs.cpdp.dataselection;
+
+import org.apache.commons.collections4.list.SetUniqueList;
+
+import de.ugoe.cs.cpdp.IParameterizable;
+
+import weka.core.Instances;
+
+/**
+ * Interface for setwise data selection strategies.
+ * @author Steffen Herbold
+ */
+public interface ISetWiseDataselectionStrategy extends IParameterizable {
+
+	/**
+	 * Applies a setwise data selection strategy. 
+	 * @param testdata test data for which the training data is selected
+	 * @param traindataSet candidate training data
+	 */
+	void apply(Instances testdata, SetUniqueList<Instances> traindataSet);
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/PetersFilter.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/PetersFilter.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/PetersFilter.java	(revision 2)
@@ -0,0 +1,103 @@
+package de.ugoe.cs.cpdp.dataselection;
+
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.commons.collections4.list.SetUniqueList;
+import org.apache.commons.math3.util.MathArrays;
+
+import weka.core.Attribute;
+import weka.core.Instance;
+import weka.core.Instances;
+
+/**
+ * Filter according to F. Peters, T. Menzies, and A. Marcus: Better Cross Company Defect Prediction
+ * <br><br>
+ * This filter does not work, the paper has been withdrawn. 
+ * @author Steffen Herbold
+ */
+@Deprecated
+public class PetersFilter implements IPointWiseDataselectionStrategy {
+
+	
+	/**
+	 * @see de.ugoe.cs.cpdp.IParameterizable#setParameter(java.lang.String)
+	 */
+	@Override
+	public void setParameter(String parameters) {
+		// dummy
+	}
+
+	/**
+	 * @see de.ugoe.cs.cpdp.dataselection.IPointWiseDataselectionStrategy#apply(weka.core.Instances, weka.core.Instances)
+	 */
+	@Override
+	public Instances apply(Instances testdata, Instances traindata) {
+		final Attribute classAttribute = testdata.classAttribute();
+			
+		final double[][] testDoubles = new double[testdata.numInstances()][testdata.numAttributes()];
+		for( int i=0; i<testdata.numInstances() ; i++ ) {
+			Instance instance = testdata.instance(i);
+			int tmp = 0;
+			for( int j=0 ; j<testdata.numAttributes(); j++ ) {
+				if( testdata.attribute(j)!=classAttribute ) {
+					testDoubles[i][tmp++] = instance.value(j);
+				}
+			}
+		}
+		
+		final double[][] trainDoubles = new double[traindata.numInstances()][testdata.numAttributes()];
+		for( int i=0; i<traindata.numInstances() ; i++ ) {
+			Instance instance = traindata.instance(i);
+			int tmp = 0;
+			for( int j=0 ; j<testdata.numAttributes(); j++ ) {
+				if( testdata.attribute(j)!=classAttribute ) {
+					trainDoubles[i][tmp++] = instance.value(j);
+				}
+			}
+		}
+		
+		final List<List<Integer>> fanList = new ArrayList<List<Integer>>(testdata.numInstances());
+		for( int i=0; i<testdata.numInstances(); i++ ) {
+			fanList.add(new LinkedList<Integer>());
+		}
+		
+		for( int i=0; i<traindata.numInstances(); i++ ) {
+			double minDistance = Double.MAX_VALUE;
+			int minIndex = 0;
+			for( int j=0; j<testdata.numInstances(); j++ ) {
+				double distance = MathArrays.distance(trainDoubles[i], testDoubles[j]);
+				if( distance<minDistance ) {
+					minDistance = distance;
+					minIndex = j;
+				}
+			}
+			fanList.get(minIndex).add(i);
+		}
+		
+		final SetUniqueList<Integer> selectedIndex = SetUniqueList.setUniqueList(new LinkedList<Integer>());
+		for( int i=0; i<testdata.numInstances(); i++ ) {
+			double minDistance = Double.MAX_VALUE;
+			int minIndex = -1;
+			for( Integer j : fanList.get(i) ) {
+				double distance = MathArrays.distance(testDoubles[i], trainDoubles[j]);
+				if( distance<minDistance && distance>0.0d ) {
+					minDistance = distance;
+					minIndex = j;
+				}
+			}
+			if( minIndex!=-1 ) {
+				selectedIndex.add(minIndex);
+			}
+		}
+		
+		final Instances selected = new Instances(testdata);
+		selected.delete();
+		for( Integer i : selectedIndex) {
+			selected.add(traindata.instance(i));
+		}
+		return selected;
+	}
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/PointWiseEMClusterSelection.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/PointWiseEMClusterSelection.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/PointWiseEMClusterSelection.java	(revision 2)
@@ -0,0 +1,141 @@
+package de.ugoe.cs.cpdp.dataselection;
+
+import java.util.LinkedList;
+import java.util.List;
+import java.util.logging.Level;
+
+import org.apache.commons.collections4.list.SetUniqueList;
+
+import weka.clusterers.EM;
+import weka.core.Instances;
+import weka.filters.Filter;
+import weka.filters.unsupervised.attribute.AddCluster;
+import weka.filters.unsupervised.attribute.Remove;
+import de.ugoe.cs.util.console.Console;
+
+
+/**
+ * Use in Config:
+ * 
+ * Specify number of clusters
+ * -N = Num Clusters
+ * <pointwiseselector name="PointWiseEMClusterSelection" param="-N 10"/>
+ *
+ * Try to determine the number of clusters:
+ * -I 10 = max iterations
+ * -X 5 = 5 folds for cross evaluation
+ * -max = max number of clusters
+ * <pointwiseselector name="PointWiseEMClusterSelection" param="-I 10 -X 5 -max 300"/>
+ * 
+ * Don't forget to add:
+ * <preprocessor name="Normalization" param=""/>
+ */
+public class PointWiseEMClusterSelection implements IPointWiseDataselectionStrategy {
+	
+	private String[] params; 
+	
+	@Override
+	public void setParameter(String parameters) {
+		params = parameters.split(" ");
+	}
+
+	
+	/**
+	 * 1. Cluster the traindata
+	 * 2. for each instance in the testdata find the assigned cluster
+	 * 3. select only traindata from the clusters we found in our testdata
+	 * 
+	 * @returns the selected training data
+	 */
+	@Override
+	public Instances apply(Instances testdata, Instances traindata) {
+		//final Attribute classAttribute = testdata.classAttribute();
+		
+		final List<Integer> selectedCluster = SetUniqueList.setUniqueList(new LinkedList<Integer>());
+
+		// 1. copy train- and testdata
+		Instances train = new Instances(traindata);
+		Instances test = new Instances(testdata);
+		
+		Instances selected = null;
+		
+		try {
+			// remove class attribute from traindata
+			Remove filter = new Remove();
+			filter.setAttributeIndices("" + (train.classIndex() + 1));
+			filter.setInputFormat(train);
+			train = Filter.useFilter(train, filter);
+			
+			Console.traceln(Level.INFO, String.format("starting clustering"));
+			
+			// 3. cluster data
+			EM clusterer = new EM();
+			clusterer.setOptions(params);
+			clusterer.buildClusterer(train);
+			int numClusters = clusterer.getNumClusters();
+			if ( numClusters == -1) {
+				Console.traceln(Level.INFO, String.format("we have unlimited clusters"));
+			}else {
+				Console.traceln(Level.INFO, String.format("we have: "+numClusters+" clusters"));
+			}
+			
+			
+			// 4. classify testdata, save cluster int
+			
+			// remove class attribute from testdata?
+			Remove filter2 = new Remove();
+			filter2.setAttributeIndices("" + (test.classIndex() + 1));
+			filter2.setInputFormat(test);
+			test = Filter.useFilter(test, filter2);
+			
+			int cnum;
+			for( int i=0; i < test.numInstances(); i++ ) {
+				cnum = ((EM)clusterer).clusterInstance(test.get(i));
+
+				// we dont want doubles (maybe use a hashset instead of list?)
+				if ( !selectedCluster.contains(cnum) ) {
+					selectedCluster.add(cnum);
+					//Console.traceln(Level.INFO, String.format("assigned to cluster: "+cnum));
+				}
+			}
+			
+			Console.traceln(Level.INFO, String.format("our testdata is in: "+selectedCluster.size()+" different clusters"));
+			
+			// 5. get cluster membership of our traindata
+			AddCluster cfilter = new AddCluster();
+			cfilter.setClusterer(clusterer);
+			cfilter.setInputFormat(train);
+			Instances ctrain = Filter.useFilter(train, cfilter);
+			
+			
+			// 6. for all traindata get the cluster int, if it is in our list of testdata cluster int add the traindata
+			// of this cluster to our returned traindata
+			int cnumber;
+			selected = new Instances(traindata);
+			selected.delete();
+			
+			for ( int j=0; j < ctrain.numInstances(); j++ ) {
+				// get the cluster number from the attributes
+				cnumber = Integer.parseInt(ctrain.get(j).stringValue(ctrain.get(j).numAttributes()-1).replace("cluster", ""));
+				
+				//Console.traceln(Level.INFO, String.format("instance "+j+" is in cluster: "+cnumber));
+				if ( selectedCluster.contains(cnumber) ) {
+					// this only works if the index does not change
+					selected.add(traindata.get(j));
+					// check for differences, just one attribute, we are pretty sure the index does not change
+					if ( traindata.get(j).value(3) != ctrain.get(j).value(3) ) {
+						Console.traceln(Level.WARNING, String.format("we have a difference between train an ctrain!"));
+					}
+				}
+			}
+			
+			Console.traceln(Level.INFO, String.format("that leaves us with: "+selected.numInstances()+" traindata instances from "+traindata.numInstances()));
+		}catch( Exception e ) {
+			Console.traceln(Level.WARNING, String.format("ERROR"));
+			throw new RuntimeException("error in pointwise em", e);
+		}
+	
+		return selected;
+	}
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/SeparatabilitySelection.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/SeparatabilitySelection.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/SeparatabilitySelection.java	(revision 2)
@@ -0,0 +1,98 @@
+package de.ugoe.cs.cpdp.dataselection;
+
+import java.util.Arrays;
+import java.util.Random;
+
+import org.apache.commons.collections4.list.SetUniqueList;
+
+import weka.classifiers.Evaluation;
+import weka.classifiers.functions.Logistic;
+import weka.core.DenseInstance;
+import weka.core.Instance;
+import weka.core.Instances;
+
+/**
+ * A setwise data selection strategy based on the separatability of the training data from the test data after Z. He, F. Peters, T. Menzies, Y. Yang: Learning from Open-Source Projects: An Empirical Study on Defect Prediction.
+ * <br><br>
+ * This is calculated through the error of a logistic regression classifier that tries to separate the sets. 
+ * @author Steffen Herbold
+ */
+public class SeparatabilitySelection implements ISetWiseDataselectionStrategy {
+
+	/**
+	 * size of the random sample that is drawn from both test data and training data 
+	 */
+	private final int sampleSize = 500;
+	
+	/**
+	 * number of repetitions of the sample drawing
+	 */
+	private final int maxRep = 10;
+	
+	/**
+	 * number of neighbors that are selected
+	 */
+	private int neighbors = 10;
+	
+	/**
+	 * Sets the number of neighbors that are selected.
+	 */
+	@Override
+	public void setParameter(String parameters) {
+		if( !"".equals(parameters) ) {
+			neighbors = Integer.parseInt(parameters);
+		}
+	}
+
+	/**
+	 * @see de.ugoe.cs.cpdp.dataselection.SetWiseDataselectionStrategy#apply(weka.core.Instances, org.apache.commons.collections4.list.SetUniqueList)
+	 */
+	@Override
+	public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) {
+		final Random rand = new Random(1);
+		
+		// calculate distances between testdata and traindata
+		final double[] distances = new double[traindataSet.size()]; 
+		
+		int i=0;
+		for( Instances traindata : traindataSet ) {
+			double distance = 0.0;
+			for( int rep=0; rep<maxRep ; rep++ ) {
+				// sample instances
+				Instances sample = new Instances(testdata);
+				for( int j=0; j<sampleSize; j++ ) {
+					Instance inst = new DenseInstance(testdata.instance(rand.nextInt(testdata.numInstances())));
+					inst.setDataset(sample);
+					inst.setClassValue(1.0);
+					sample.add(inst);
+					inst = new DenseInstance(traindata.instance(rand.nextInt(traindata.numInstances())));
+					inst.setDataset(sample);
+					inst.setClassValue(0.0);
+					sample.add(inst);
+				}
+				
+				// calculate separation
+				Evaluation eval;
+				try {
+					eval = new Evaluation(sample);
+					eval.crossValidateModel(new Logistic(), sample, 5, rand);
+				} catch (Exception e) {
+					throw new RuntimeException("cross-validation during calculation of separatability failed", e);
+				}
+				distance += eval.pctCorrect()/100.0;
+			}
+			distances[i++] = 2*((distance/maxRep)-0.5);
+		}
+		
+		// select closest neighbors
+		final double[] distancesCopy = Arrays.copyOf(distances, distances.length);
+		Arrays.sort(distancesCopy);
+		final double cutoffDistance = distancesCopy[neighbors];
+		
+		for( i=traindataSet.size()-1; i>=0 ; i-- ) {
+			if( distances[i]>cutoffDistance ) {
+				traindataSet.remove(i);
+			}
+		}
+	}
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/SetWiseEMClusterSelection.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/SetWiseEMClusterSelection.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/SetWiseEMClusterSelection.java	(revision 2)
@@ -0,0 +1,59 @@
+package de.ugoe.cs.cpdp.dataselection;
+
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.commons.collections4.list.SetUniqueList;
+
+import weka.clusterers.EM;
+import weka.core.Instance;
+import weka.core.Instances;
+
+/**
+ * Filter based on EM clustering after S. Herbold: Training data selection for cross-project defect prediction
+ * @author Steffen Herbold
+ */
+public class SetWiseEMClusterSelection extends AbstractCharacteristicSelection {
+	
+	/**
+	 * @see de.ugoe.cs.cpdp.dataselection.SetWiseDataselectionStrategy#apply(weka.core.Instances, org.apache.commons.collections4.list.SetUniqueList)
+	 */
+	@Override
+	public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) {
+		final Instances data = normalizedCharacteristicInstances(testdata, traindataSet); 
+		final Instance targetInstance = data.instance(0);
+		final List<Instance> candidateInstances = new LinkedList<Instance>();
+		for( int i=1; i<data.numInstances(); i++ ) {
+			candidateInstances.add(data.instance(i));
+		}
+		
+		// cluster and select
+		try {
+			final EM emeans = new EM();
+			boolean onlyTarget = true;
+			int targetCluster;
+			int maxNumClusters = candidateInstances.size();
+			do { // while(onlyTarget)
+				emeans.setMaximumNumberOfClusters(maxNumClusters);
+				emeans.buildClusterer(data);
+							
+				targetCluster = emeans.clusterInstance(targetInstance);
+				
+				// check if cluster only contains target project
+				for( int i=0 ; i<candidateInstances.size() && onlyTarget; i++ ) {
+					onlyTarget &= !(emeans.clusterInstance(candidateInstances.get(i))==targetCluster);
+				}
+				maxNumClusters = emeans.numberOfClusters()-1;
+			} while(onlyTarget);
+			
+			int numRemoved = 0;
+			for( int i=0 ; i<candidateInstances.size() ; i++ ) {
+				if( emeans.clusterInstance(candidateInstances.get(i))!=targetCluster ) {
+					traindataSet.remove(i-numRemoved++);
+				}
+			}
+		} catch(Exception e) {
+			throw new RuntimeException("error applying setwise EM clustering training data selection", e);
+		}
+	}	
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/SetWiseKNNSelection.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/SetWiseKNNSelection.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/SetWiseKNNSelection.java	(revision 2)
@@ -0,0 +1,81 @@
+package de.ugoe.cs.cpdp.dataselection;
+
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.commons.collections4.list.SetUniqueList;
+import org.apache.commons.math3.util.MathArrays;
+
+import weka.core.Instances;
+
+/**
+ * Filter based on the k-nearest neighbor (KNN) algorithm S. Herbold: Training data selection for cross-project defect prediction
+ * @author Steffen Herbold
+ */
+public class SetWiseKNNSelection extends AbstractCharacteristicSelection {
+	
+	/**
+	 * number of neighbors selected
+	 */
+	private int k = 1;
+	
+	/**
+	 * @see de.ugoe.cs.cpdp.dataselection.SetWiseDataselectionStrategy#apply(weka.core.Instances, org.apache.commons.collections4.list.SetUniqueList)
+	 */
+	@Override
+	public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) {
+		final Instances data = normalizedCharacteristicInstances(testdata, traindataSet);
+		
+		final Set<Integer> selected = new HashSet<Integer>();		
+		for( int i=0 ; i<k ; i++ ) {
+			int closestIndex = getClosest(data);
+			
+			selected.add(closestIndex);
+			data.delete(closestIndex);
+		}
+		
+		for( int i=traindataSet.size()-1; i>=0 ; i-- ) {
+			if( selected.contains(i) ) {
+				traindataSet.remove(i);
+			}
+		}
+	}
+	
+	/**
+	 * Helper method that determines the index of the instance with the smallest distance to the first instance (index 0).
+	 * @param data data set
+	 * @return index of the closest instance
+	 */
+	private int getClosest(Instances data) {
+		double closestDistance = Double.MAX_VALUE;
+		int closestIndex = 1;
+		for( int i=1 ; i<data.numInstances() ; i++ ) {
+			double distance = MathArrays.distance(data.instance(0).toDoubleArray(), data.instance(i).toDoubleArray());
+			if( distance < closestDistance) {
+				closestDistance = distance;
+				closestIndex = i;
+			}
+		}
+		return closestIndex;
+	}
+
+	/**
+	 * Sets the number of neighbors followed by the distributional characteristics, the values are separated by blanks.
+	 * @see AbstractCharacteristicSelection#setParameter(String) 
+	 */
+	@Override
+	public void setParameter(String parameters) {
+		if( !"".equals(parameters) ) {
+			final String[] split = parameters.split(" ");
+			k = Integer.parseInt(split[0]);
+			String str = "";
+			for( int i=1 ; i<split.length; i++ ) {
+				str += split[i];
+				if( i<split.length-1 )  {
+					str += " ";
+				}
+			}
+			super.setParameter(str);
+		}
+	}
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/TestAsTraining.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/TestAsTraining.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/TestAsTraining.java	(revision 2)
@@ -0,0 +1,31 @@
+package de.ugoe.cs.cpdp.dataselection;
+
+import org.apache.commons.collections4.list.SetUniqueList;
+
+import weka.core.Instances;
+
+/**
+ * Uses the test data as training data.
+ * @author Steffen Herbold
+ *
+ */
+public class TestAsTraining implements ISetWiseDataselectionStrategy {
+
+	/**
+	 * no parameters
+	 */
+	@Override
+	public void setParameter(String parameters) {
+		// dummy
+	}
+
+	/**(non-Javadoc)
+	 * @see de.ugoe.cs.cpdp.dataselection.ISetWiseDataselectionStrategy#apply(weka.core.Instances, org.apache.commons.collections4.list.SetUniqueList)
+	 */
+	@Override
+	public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) {
+		traindataSet.clear();
+		traindataSet.add(new Instances(testdata));
+	}
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/TurhanFilter.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/TurhanFilter.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/TurhanFilter.java	(revision 2)
@@ -0,0 +1,97 @@
+package de.ugoe.cs.cpdp.dataselection;
+
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.commons.collections4.list.SetUniqueList;
+import org.apache.commons.math3.util.MathArrays;
+
+import weka.core.Attribute;
+import weka.core.Instance;
+import weka.core.Instances;
+import de.ugoe.cs.util.ArrayTools;
+
+/**
+ * Filter according to B. Turhan, T. Menzies, A. Bener, and J. Die Stefano: On the relative value of cross-company and within company defect prediction
+ * @author Steffen Herbold
+ */
+public class TurhanFilter implements IPointWiseDataselectionStrategy {
+
+	/**
+	 * number of neighbors that are selected
+	 */
+	private int k = 10;
+	
+	/**
+	 * Sets the number of neighbors.
+	 * @param parameters number of neighbors
+	 */
+	@Override
+	public void setParameter(String parameters) {
+		k = Integer.parseInt(parameters);
+	}
+
+	/**
+	 * @see de.ugoe.cs.cpdp.dataselection.PointWiseDataselectionStrategy#apply(weka.core.Instances, weka.core.Instances)
+	 */
+	@Override
+	public Instances apply(Instances testdata, Instances traindata) {
+		final Attribute classAttribute = testdata.classAttribute();
+		
+		final List<Integer> selectedIndex = SetUniqueList.setUniqueList(new LinkedList<Integer>());
+		
+		final double[][] trainDoubles = new double[traindata.numInstances()][testdata.numAttributes()];
+		
+		for( int i=0; i<traindata.numInstances() ; i++ ) {
+			Instance instance = traindata.instance(i);
+			int tmp = 0;
+			for( int j=0 ; j<testdata.numAttributes(); j++ ) {
+				if( testdata.attribute(j)!=classAttribute ) {
+					trainDoubles[i][tmp++] = instance.value(j);
+				}
+			}
+		}
+		
+		for( int i=0; i<testdata.numInstances() ; i++ ) {
+			Instance testIntance = testdata.instance(i);
+			double[] targetVector = new double[testdata.numAttributes()-1];
+			int tmp = 0;
+			for( int j=0 ; j<testdata.numAttributes(); j++ ) {
+				if( testdata.attribute(j)!=classAttribute ) {
+					targetVector[tmp++] = testIntance.value(j);
+				}
+			}
+			
+			double farthestClosestDistance = Double.MAX_VALUE;
+			int farthestClosestIndex = 0;
+			double[] closestDistances = new double[k];
+			for( int m=0 ; m<closestDistances.length ; m++ ) {
+				closestDistances[m] = Double.MAX_VALUE;
+			}
+			int[] closestIndex = new int[k];
+			
+			for( int n=0; n<traindata.numInstances() ; n++ ) {
+				double distance = MathArrays.distance(targetVector, trainDoubles[n]);
+				
+				if( distance<farthestClosestDistance ) {
+					closestIndex[farthestClosestIndex] = n;
+					closestDistances[farthestClosestIndex] = distance;
+					
+					farthestClosestIndex = ArrayTools.findMax(closestDistances);
+					farthestClosestDistance = closestDistances[farthestClosestIndex];
+				}
+			}
+			for( int index : closestIndex ) {
+				selectedIndex.add(index);
+			}
+		}
+		
+		final Instances selected = new Instances(testdata);
+		selected.delete();
+		for( Integer i : selectedIndex) {
+			selected.add(traindata.instance(i));
+		}
+		return selected;
+	}
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/eval/AbstractWekaEvaluation.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/eval/AbstractWekaEvaluation.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/eval/AbstractWekaEvaluation.java	(revision 2)
@@ -0,0 +1,262 @@
+package de.ugoe.cs.cpdp.eval;
+
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.PrintWriter;
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+
+import de.ugoe.cs.cpdp.training.ITrainer;
+import de.ugoe.cs.cpdp.training.WekaCompatibleTrainer;
+import de.ugoe.cs.util.StringTools;
+import weka.classifiers.Classifier;
+import weka.classifiers.Evaluation;
+import weka.core.Attribute;
+import weka.core.Instances;
+
+/**
+ * Base class for the evaluation of results of classifiers compatible with the {@link Classifier} interface.
+ * For each classifier, the following metrics are calculated:
+ * <ul>
+ *  <li>Success with recall>0.7, precision>0.5</li>
+ *  <li>Success with recall>0.7, precision>0.5</li>
+ *  <li>Success with gscore>0.75</li>
+ *  <li>Success with gscore>0.6</li>
+ *  <li>error rate</li>
+ *  <li>recall</li>
+ *  <li>precision</li>
+ *  <li>fscore</li>
+ *  <li>gscore</li>
+ *  <li>AUC</li>
+ *  <li>AUCEC (weighted by LOC, if applicable; 0.0 if LOC not available)</li>
+ *  <li>true positive rate</li>
+ *  <li>true negative rate</li>
+ *  <li>true positives</li>
+ *  <li>false positives</li>
+ *  <li>true negatives</li>
+ *  <li>false negatives</li>
+ * </ul> 
+ * @author Steffen Herbold
+ */
+public abstract class AbstractWekaEvaluation implements IEvaluationStrategy {
+
+	/**
+	 * writer for the evaluation results
+	 */
+	private PrintWriter output = new PrintWriter(System.out);
+	
+	private boolean outputIsSystemOut = true;
+	
+	/**
+	 * Creates the weka evaluator. Allows the creation of the evaluator in different ways, e.g., for cross-validation
+	 * or evaluation on the test data.
+	 * @param testdata test data
+	 * @param classifier classifier used
+	 * @return evaluator
+	 */
+	protected abstract Evaluation createEvaluator(Instances testdata, Classifier classifier);
+	
+	/*
+	 * (non-Javadoc)
+	 * @see de.ugoe.cs.cpdp.eval.EvaluationStrategy#apply(weka.core.Instances, weka.core.Instances, java.util.List, boolean)
+	 */
+	@Override
+	public void apply(Instances testdata, Instances traindata, List<ITrainer> trainers,
+			boolean writeHeader) {
+		final List<Classifier> classifiers = new LinkedList<Classifier>();
+		for( ITrainer trainer : trainers ) {
+			if( trainer instanceof WekaCompatibleTrainer ) {
+				classifiers.add(((WekaCompatibleTrainer) trainer).getClassifier());
+			} else {
+				throw new RuntimeException("The selected evaluator only support Weka classifiers");
+			}
+		}
+		
+		if( writeHeader ) {
+			output.append("version,size_test,size_training");
+			for( ITrainer trainer : trainers ) {
+				output.append(",succHe_" + ((WekaCompatibleTrainer) trainer).getName());
+				output.append(",succZi_" + ((WekaCompatibleTrainer) trainer).getName());
+				output.append(",succG75_" + ((WekaCompatibleTrainer) trainer).getName());
+				output.append(",succG60_" + ((WekaCompatibleTrainer) trainer).getName());
+				output.append(",error_" + ((WekaCompatibleTrainer) trainer).getName());
+				output.append(",recall_" + ((WekaCompatibleTrainer) trainer).getName());
+				output.append(",precision_" + ((WekaCompatibleTrainer) trainer).getName());
+				output.append(",fscore_" + ((WekaCompatibleTrainer) trainer).getName());
+				output.append(",gscore_" + ((WekaCompatibleTrainer) trainer).getName());
+				output.append(",mcc_" + ((WekaCompatibleTrainer) trainer).getName());
+				output.append(",auc_" + ((WekaCompatibleTrainer) trainer).getName());
+				output.append(",aucec_" + ((WekaCompatibleTrainer) trainer).getName());
+				output.append(",tpr_" + ((WekaCompatibleTrainer) trainer).getName());
+				output.append(",tnr_" + ((WekaCompatibleTrainer) trainer).getName());
+				output.append(",tp_" + ((WekaCompatibleTrainer) trainer).getName());
+				output.append(",fn_" + ((WekaCompatibleTrainer) trainer).getName());
+				output.append(",tn_" + ((WekaCompatibleTrainer) trainer).getName());
+				output.append(",fp_" + ((WekaCompatibleTrainer) trainer).getName());
+			}
+			output.append(StringTools.ENDLINE);
+		}
+		
+		output.append(testdata.relationName());
+		output.append("," + testdata.numInstances());	
+		output.append("," + traindata.numInstances());
+		
+		Evaluation eval = null;
+		for( Classifier classifier : classifiers ) {
+			eval = createEvaluator(testdata, classifier);
+			
+			double pf = eval.numFalsePositives(1)/(eval.numFalsePositives(1)+eval.numTrueNegatives(1));
+			double gmeasure = 2*eval.recall(1)*(1.0-pf)/(eval.recall(1)+(1.0-pf));
+			double mcc = (eval.numTruePositives(1)*eval.numTrueNegatives(1)-eval.numFalsePositives(1)*eval.numFalseNegatives(1))/Math.sqrt((eval.numTruePositives(1)+eval.numFalsePositives(1))*(eval.numTruePositives(1)+eval.numFalseNegatives(1))*(eval.numTrueNegatives(1)+eval.numFalsePositives(1))*(eval.numTrueNegatives(1)+eval.numFalseNegatives(1)));
+			double aucec = calculateReviewEffort(testdata, classifier);
+			
+			if( eval.recall(1)>=0.7 && eval.precision(1) >= 0.5 ) {
+				output.append(",1");
+			} else {
+				output.append(",0");
+			}
+			
+			if( eval.recall(1)>=0.7 && eval.precision(1) >= 0.7 ) {
+				output.append(",1");
+			} else {
+				output.append(",0");
+			}
+			
+			if( gmeasure>0.75 ) {
+				output.append(",1");
+			} else {
+				output.append(",0");
+			}
+			
+			if( gmeasure>0.6 ) {
+				output.append(",1");
+			} else {
+				output.append(",0");
+			}
+			
+			output.append("," + eval.errorRate());
+			output.append("," + eval.recall(1));
+			output.append("," + eval.precision(1));
+			output.append("," + eval.fMeasure(1));
+			output.append("," + gmeasure);
+			output.append("," + mcc);
+			output.append("," + eval.areaUnderROC(1));
+			output.append("," + aucec);
+			output.append("," + eval.truePositiveRate(1));
+			output.append("," + eval.trueNegativeRate(1));
+			output.append("," + eval.numTruePositives(1));
+			output.append("," + eval.numFalseNegatives(1));
+			output.append("," + eval.numTrueNegatives(1));
+			output.append("," + eval.numFalsePositives(1));
+		}
+		
+		output.append(StringTools.ENDLINE);
+		output.flush();
+	}
+	
+	private double calculateReviewEffort(Instances testdata, Classifier classifier) {
+		
+		final Attribute loc = testdata.attribute("loc");
+		if( loc==null ) {
+			return 0.0;
+		}
+				
+		final List<Integer> bugPredicted = new ArrayList<>();
+		final List<Integer> nobugPredicted = new ArrayList<>(); 
+		double totalLoc = 0.0d;
+		int totalBugs = 0;
+		for( int i=0 ; i<testdata.numInstances() ; i++ ) {
+			try {
+				if( Double.compare(classifier.classifyInstance(testdata.instance(i)),0.0d)==0 ) {
+					nobugPredicted.add(i);
+				} else {
+					bugPredicted.add(i);
+				}
+			} catch (Exception e) {
+				throw new RuntimeException("unexpected error during the evaluation of the review effort", e);
+			}
+			if(Double.compare(testdata.instance(i).classValue(),1.0d)==0) {
+				totalBugs++;
+			}
+			totalLoc += testdata.instance(i).value(loc);
+		}
+		
+		final List<Double> reviewLoc = new ArrayList<>(testdata.numInstances());
+		final List<Double> bugsFound = new ArrayList<>(testdata.numInstances());
+		
+		double currentBugsFound = 0;
+		
+		while( !bugPredicted.isEmpty() ) {
+			double minLoc = Double.MAX_VALUE;
+			int minIndex = -1;
+			for( int i=0 ; i<bugPredicted.size() ; i++ ) {
+				double currentLoc = testdata.instance(bugPredicted.get(i)).value(loc);
+				if( currentLoc<minLoc ) {
+					minIndex = i;
+					minLoc = currentLoc;
+				}
+			}
+			if( minIndex!=-1 ) {
+				reviewLoc.add(minLoc/totalLoc);
+				
+				currentBugsFound += testdata.instance(bugPredicted.get(minIndex)).classValue();
+				bugsFound.add(currentBugsFound);
+				
+				bugPredicted.remove(minIndex);
+			} else {
+				throw new RuntimeException("Shouldn't happen!");
+			}
+		}
+		
+		while( !nobugPredicted.isEmpty() ) {
+			double minLoc = Double.MAX_VALUE;
+			int minIndex = -1;
+			for( int i=0 ; i<nobugPredicted.size() ; i++ ) {
+				double currentLoc = testdata.instance(nobugPredicted.get(i)).value(loc);
+				if( currentLoc<minLoc ) {
+					minIndex = i;
+					minLoc = currentLoc;
+				}
+			}
+			if( minIndex!=-1 ) {				
+				reviewLoc.add(minLoc/totalLoc);
+				
+				currentBugsFound += testdata.instance(nobugPredicted.get(minIndex)).classValue();
+				bugsFound.add(currentBugsFound);
+				nobugPredicted.remove(minIndex);
+			} else {
+				throw new RuntimeException("Shouldn't happen!");
+			}
+		}
+		
+		double auc = 0.0;
+		for( int i=0 ; i<bugsFound.size() ; i++ ) {
+			auc += reviewLoc.get(i)*bugsFound.get(i)/totalBugs;
+		}
+		
+		return auc;
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * @see de.ugoe.cs.cpdp.Parameterizable#setParameter(java.lang.String)
+	 */
+	@Override
+	public void setParameter(String parameters) {
+		if( output!=null && !outputIsSystemOut ) {
+			output.close();
+		}
+		if( "system.out".equals(parameters) || "".equals(parameters) ) {
+			output = new PrintWriter(System.out);
+			outputIsSystemOut = true;
+		} else {
+			try {
+				output = new PrintWriter(new FileOutputStream(parameters));
+				outputIsSystemOut = false;
+			} catch (FileNotFoundException e) {
+				throw new RuntimeException(e);
+			}
+		}
+	}
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/eval/CVWekaEvaluation.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/eval/CVWekaEvaluation.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/eval/CVWekaEvaluation.java	(revision 2)
@@ -0,0 +1,36 @@
+package de.ugoe.cs.cpdp.eval;
+
+import java.io.PrintStream;
+import java.util.Random;
+
+import org.apache.commons.io.output.NullOutputStream;
+
+import weka.classifiers.Classifier;
+import weka.classifiers.Evaluation;
+import weka.core.Instances;
+
+/**
+ * Implements the {@link AbstractWekaEvaluation} for 10-fold cross validation.
+ * @author Steffen Herbold
+ */
+public class CVWekaEvaluation extends AbstractWekaEvaluation {
+	
+	/**
+	 * @see de.ugoe.cs.cpdp.eval.AbstractWekaEvaluation#createEvaluator(weka.core.Instances, weka.classifiers.Classifier)
+	 */
+	@Override
+	protected Evaluation createEvaluator(Instances testdata, Classifier classifier) {
+		PrintStream errStr	= System.err;
+		System.setErr(new PrintStream(new NullOutputStream()));
+		try {
+			final Evaluation eval = new Evaluation(testdata);
+			eval.crossValidateModel(classifier, testdata, 10, new Random(1));
+			return eval;
+		} catch (Exception e) {
+			throw new RuntimeException(e);
+		} finally {
+			System.setErr(errStr);
+		}
+	}
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/eval/IEvaluationStrategy.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/eval/IEvaluationStrategy.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/eval/IEvaluationStrategy.java	(revision 2)
@@ -0,0 +1,24 @@
+package de.ugoe.cs.cpdp.eval;
+
+import java.util.List;
+
+import de.ugoe.cs.cpdp.IParameterizable;
+import de.ugoe.cs.cpdp.training.ITrainer;
+
+import weka.core.Instances;
+
+/**
+ * Interface for evaluation strategies to evaluate the performance of classifiers. 
+ * @author Steffen Herbold
+ */
+public interface IEvaluationStrategy extends IParameterizable {
+
+	/**
+	 * Applies the evaluation strategy. 
+	 * @param testdata test data for the evaluation
+	 * @param traindata training data used
+	 * @param trainers list of training algorithms used to train the classifiers
+	 * @param writeHeader if true, a header line for the results file is written (may not be applicable)
+	 */
+	void apply(Instances testdata, Instances traindata, List<ITrainer> trainers, boolean writeHeader);
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/eval/NormalWekaEvaluation.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/eval/NormalWekaEvaluation.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/eval/NormalWekaEvaluation.java	(revision 2)
@@ -0,0 +1,27 @@
+package de.ugoe.cs.cpdp.eval;
+
+import weka.classifiers.Classifier;
+import weka.classifiers.Evaluation;
+import weka.core.Instances;
+
+/**
+ * Implements the {@link AbstractWekaEvaluation} for evaluation on the test data.
+ * @author Steffen Herbold
+ *
+ */
+public class NormalWekaEvaluation extends AbstractWekaEvaluation {
+
+	/**
+	 * @see de.ugoe.cs.cpdp.eval.AbstractWekaEvaluation#createEvaluator(weka.core.Instances, weka.classifiers.Classifier)
+	 */
+	@Override
+	protected Evaluation createEvaluator(Instances testdata, Classifier classifier) {
+		try {
+			final Evaluation eval = new Evaluation(testdata);
+			eval.evaluateModel(classifier, testdata);
+			return eval;
+		} catch (Exception e) {
+			throw new RuntimeException(e);
+		}
+	}
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/AbstractFolderLoader.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/AbstractFolderLoader.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/AbstractFolderLoader.java	(revision 2)
@@ -0,0 +1,59 @@
+package de.ugoe.cs.cpdp.loader;
+
+import java.io.File;
+import java.util.LinkedList;
+import java.util.List;
+
+import weka.core.Instances;
+
+import de.ugoe.cs.cpdp.versions.SoftwareVersion;
+
+
+public abstract class AbstractFolderLoader implements IVersionLoader {
+	
+	// TODO
+	interface SingleVersionLoader {
+		Instances load(File file);
+		boolean filenameFilter(String filename);
+	}
+
+	/**
+	 * Path of the data.
+	 */
+	private String path = "";
+	
+	/**
+	 * @see de.ugoe.cs.cpdp.loader.IVersionLoader#setLocation(java.lang.String)
+	 */
+	@Override
+	public void setLocation(String location) {
+		path=location;
+	}
+	
+	/**
+	 * @see de.ugoe.cs.cpdp.loader.IVersionLoader#load()
+	 */
+	@Override
+	public List<SoftwareVersion> load() {
+		final List<SoftwareVersion> versions = new LinkedList<SoftwareVersion>();
+		
+		final File dataDir = new File(path);
+		final SingleVersionLoader instancesLoader = getSingleLoader();
+		
+		for( File projectDir : dataDir.listFiles() ) {
+			if( projectDir.isDirectory() ) {
+				String projectName = projectDir.getName();
+				for( File versionFile : projectDir.listFiles() ) {
+					if( versionFile.isFile() && instancesLoader.filenameFilter(versionFile.getName()) ) {
+						String versionName = versionFile.getName();
+						Instances data = instancesLoader.load(versionFile);
+						versions.add(new SoftwareVersion(projectName, versionName, data));
+					}
+				}
+			}
+		}
+		return versions;
+	}
+	
+	abstract protected SingleVersionLoader getSingleLoader();
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/CSVDataLoader.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/CSVDataLoader.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/CSVDataLoader.java	(revision 2)
@@ -0,0 +1,69 @@
+package de.ugoe.cs.cpdp.loader;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+
+import weka.core.Attribute;
+import weka.core.DenseInstance;
+import weka.core.Instances;
+import de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader;
+import de.ugoe.cs.util.FileTools;
+
+/**
+ * Loads the instances for a software version from a CSV file of the // TODO dataset citation
+ * data set. 
+ * @author Steffen Herbold
+ */
+class CSVDataLoader implements SingleVersionLoader {
+	
+	/**
+	 * Loads the instances.
+	 * @param file handle to the file of the instances
+	 * @return the instances
+	 */
+	@Override
+	public Instances load(File file) {
+		final String[] lines;
+		try {
+			lines = FileTools.getLinesFromFile(file.getAbsolutePath());
+		} catch (IOException e) {
+			throw new RuntimeException(e);
+		}
+		
+		// configure Instances
+		final ArrayList<Attribute> atts = new ArrayList<Attribute>();
+		
+		String[] lineSplit = lines[0].split(",");		
+		for( int j=0 ; j<lineSplit.length-4 ; j++ ) {
+			atts.add(new Attribute(lineSplit[j+3]));
+		}
+		final ArrayList<String> classAttVals = new ArrayList<String>();
+		classAttVals.add("0");
+		classAttVals.add("1");
+		final Attribute classAtt = new Attribute("bug", classAttVals);
+		atts.add(classAtt);
+		
+		final Instances data = new Instances(file.getName(), atts, 0);
+		data.setClass(classAtt);
+		
+		// fetch data
+		for( int i=1 ; i<lines.length ; i++ ) {
+			lineSplit = lines[i].split(",");
+			double[] values = new double[lineSplit.length-3];
+			for( int j=0 ; j<values.length-1 ; j++ ) {
+				values[j] = Double.parseDouble(lineSplit[j+3].trim());
+			}
+			values[values.length-1] = lineSplit[lineSplit.length-1].trim().equals("0") ? 0 : 1;
+			data.add(new DenseInstance(1.0, values));
+		}
+		
+		return data;
+	}
+	
+	@Override
+	public boolean filenameFilter(String filename) {
+		return filename.endsWith(".csv");
+	}
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/CSVFolderLoader.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/CSVFolderLoader.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/CSVFolderLoader.java	(revision 2)
@@ -0,0 +1,17 @@
+package de.ugoe.cs.cpdp.loader;
+
+/**
+ * Implements a {@link IVersionLoader} for data from // TODO data reference
+ * Each folder contained in the defined location ({@link #setLocation(String)}) represents a project, the data files
+ * within the versions.  
+ * @author Steffen Herbold
+ */
+public class CSVFolderLoader extends AbstractFolderLoader {
+
+	@Override
+	protected SingleVersionLoader getSingleLoader() {
+		return new CSVDataLoader();
+	}
+
+	
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/IVersionLoader.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/IVersionLoader.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/IVersionLoader.java	(revision 2)
@@ -0,0 +1,24 @@
+package de.ugoe.cs.cpdp.loader;
+
+import java.util.List;
+
+import de.ugoe.cs.cpdp.versions.SoftwareVersion;
+
+/**
+ * Implements the interface for loading software versions from a data source.
+ * @author Steffen Herbold
+ */
+public interface IVersionLoader {
+	
+	/**
+	 * Sets the location of the data. 
+	 * @param location location of the data
+	 */
+	public void setLocation(String location);
+	
+	/**
+	 * Loads the data.
+	 * @return the data
+	 */
+	public List<SoftwareVersion> load();
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/NasaARFFFolderLoader.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/NasaARFFFolderLoader.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/NasaARFFFolderLoader.java	(revision 2)
@@ -0,0 +1,10 @@
+package de.ugoe.cs.cpdp.loader;
+
+public class NasaARFFFolderLoader extends AbstractFolderLoader {
+
+	@Override
+	protected SingleVersionLoader getSingleLoader() {
+		return new NasaARFFLoader();
+	}
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/NasaARFFLoader.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/NasaARFFLoader.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/NasaARFFLoader.java	(revision 2)
@@ -0,0 +1,190 @@
+package de.ugoe.cs.cpdp.loader;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+
+import de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader;
+import weka.core.Instances;
+import weka.filters.Filter;
+import weka.filters.unsupervised.attribute.Add;
+import weka.filters.unsupervised.attribute.Reorder;
+
+public class NasaARFFLoader implements SingleVersionLoader {
+
+	Map<String, String> attributeNameMap;
+	List<String> attributeOrder;
+	
+	public NasaARFFLoader() {
+		attributeNameMap = new HashMap<>();
+		
+		// Map entries for ar project
+		attributeNameMap.put("total_loc", "LOC_TOTAL");
+		attributeNameMap.put("comment_loc", "LOC_COMMENTS");
+		attributeNameMap.put("code_and_comment_loc", "LOC_CODE_AND_COMMENT");
+		attributeNameMap.put("executable_loc", "LOC_EXECUTABLE");
+		attributeNameMap.put("unique_operands", "NUM_UNIQUE_OPERANDS");
+		attributeNameMap.put("unique_operators", "NUM_UNIQUE_OPERATORS");
+		attributeNameMap.put("total_operands", "NUM_OPERANDS");
+		attributeNameMap.put("total_operators",  "NUM_OPERATORS");
+		attributeNameMap.put("halstead_length",  "HALSTEAD_LENGTH");
+		attributeNameMap.put("halstead_volume", "HALSTEAD_VOLUME");
+		attributeNameMap.put("halstead_difficulty", "HALSTEAD_DIFFICULTY");
+		attributeNameMap.put("halstead_effort", "HALSTEAD_EFFORT");
+		attributeNameMap.put("halstead_error", "HALSTEAD_ERROR_EST");
+		attributeNameMap.put("halstead_time", "HALSTEAD_PROG_TIME");
+		attributeNameMap.put("branch_count", "BRANCH_COUNT");
+		attributeNameMap.put("cyclomatic_complexity", "CYCLOMATIC_COMPLEXITY");
+		attributeNameMap.put("design_complexity",  "DESIGN_COMPLEXITY");
+		
+		// Map entries for KC2
+		attributeNameMap.put("loc", "LOC_TOTAL"); // TODO these first two LOCs are guesses
+		attributeNameMap.put("lOCode", "LOC_EXECUTABLE"); // TODO
+		attributeNameMap.put("lOComment", "LOC_COMMENTS");
+		attributeNameMap.put("lOCodeAndComment", "LOC_CODE_AND_COMMENT");
+		attributeNameMap.put("uniq_Op", "NUM_UNIQUE_OPERATORS");
+		attributeNameMap.put("uniq_Opnd", "NUM_UNIQUE_OPERANDS");
+		attributeNameMap.put("total_Op", "NUM_OPERATORS");
+		attributeNameMap.put("total_Opnd", "NUM_OPERANDS");
+		attributeNameMap.put("v", "HALSTEAD_VOLUME");
+		attributeNameMap.put("l", "HALSTEAD_LENGTH");
+		attributeNameMap.put("d", "HALSTEAD_DIFFICULTY");
+		attributeNameMap.put("e", "HALSTEAD_EFFORT");
+		attributeNameMap.put("b",  "HALSTEAD_ERROR_EST"); // TODO not sure about this one
+		attributeNameMap.put("t", "HALSTEAD_PROG_TIME");
+		attributeNameMap.put("branchCount", "BRANCH_COUNT");
+		attributeNameMap.put("v(g)",  "CYCLOMATIC_COMPLEXITY");
+		attributeNameMap.put("iv(g)", "DESIGN_COMPLEXITY");
+				
+		attributeNameMap.put("defects",  "bug");
+		attributeNameMap.put("Defective", "bug");
+		attributeNameMap.put("problems", "bug");
+		
+		// build list with normalized attribute order
+		attributeOrder = new LinkedList<>();
+		
+		attributeOrder.add("LOC_TOTAL");
+		attributeOrder.add("LOC_EXECUTABLE");
+		attributeOrder.add("LOC_COMMENTS");
+		attributeOrder.add("LOC_CODE_AND_COMMENT");
+		attributeOrder.add("NUM_UNIQUE_OPERATORS");
+		attributeOrder.add("NUM_UNIQUE_OPERANDS");
+		attributeOrder.add("NUM_OPERATORS");
+		attributeOrder.add("NUM_OPERANDS");
+		attributeOrder.add("HALSTEAD_VOLUME");
+		attributeOrder.add("HALSTEAD_LENGTH");
+		attributeOrder.add("HALSTEAD_DIFFICULTY");
+		attributeOrder.add("HALSTEAD_EFFORT");
+		attributeOrder.add("HALSTEAD_ERROR_EST");
+		attributeOrder.add("HALSTEAD_PROG_TIME");
+		attributeOrder.add("BRANCH_COUNT");
+		attributeOrder.add("CYCLOMATIC_COMPLEXITY");
+		attributeOrder.add("DESIGN_COMPLEXITY");
+		attributeOrder.add("bug");
+	}
+	
+	/**
+	 * Loads the instances.
+	 * @param file handle to the file of the instances
+	 * @return the instances
+	 */
+	public Instances load(File file) {
+		BufferedReader reader;
+		Instances data;
+		try {
+			reader = new BufferedReader(new FileReader(file));
+			data = new Instances(reader);
+			reader.close();
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			throw new RuntimeException(e);
+		}
+		
+		//setting class attribute
+		data.setClassIndex(data.numAttributes() - 1);
+		
+		// normalize attribute names
+		for( int i=0; i<data.numAttributes(); i++) {
+			String mapValue = attributeNameMap.get(data.attribute(i).name());
+			if( mapValue!= null ) {
+				data.renameAttribute(i, mapValue);
+			}
+		}
+		
+		// determine new attribute order (unwanted attributes are implicitly removed
+		String orderString = "";
+		for( String attName : attributeOrder ) {
+			for( int i=0; i<data.numAttributes(); i++) {
+				if(attName.equals(data.attribute(i).name())) {
+					orderString += (i+1) + ",";
+				}
+			}
+		}
+		orderString = orderString.substring(0, orderString.length()-1);
+		
+		String relationName = data.relationName();
+		String[] options = new String[2];
+		options[0] = "-R";
+		options[1] = orderString;
+		Reorder reorder = new Reorder();
+		try {
+			reorder.setOptions(options);
+			reorder.setInputFormat(data);
+			data = Filter.useFilter(data, reorder);
+		} catch (Exception e) {
+			// TODO Auto-generated catch block
+			throw new RuntimeException();
+		}
+		if( data.numAttributes()!=attributeOrder.size() ) {
+			throw new RuntimeException("Invalid number of attributes");
+		}
+		
+		// normalize bug nominal values
+		Add add = new Add();
+		add.setAttributeIndex("last");
+        add.setNominalLabels("0,1");
+        add.setAttributeName("bug-new");
+        try {
+			add.setInputFormat(data);
+			data = Filter.useFilter(data, add);
+		} catch (Exception e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
+        data.setRelationName(relationName);
+		
+        double classValue;
+        
+        String firstValue = data.classAttribute().enumerateValues().nextElement().toString();
+        if( firstValue.equals("Y") || firstValue.equals("yes") || firstValue.equals("true") ) {
+        	classValue = 0.0;
+        } else {
+        	classValue = 1.0;
+        }
+        
+		for( int i=0 ; i<data.numInstances() ; i++ ) {
+			if( data.instance(i).classValue() == classValue ) {
+				data.instance(i).setValue(data.classIndex()+1, 1.0);
+			} else {
+				data.instance(i).setValue(data.classIndex()+1, 0.0);
+			}
+		}
+		
+		int oldClassIndex = data.classIndex();
+		data.setClassIndex(oldClassIndex+1);
+		data.deleteAttributeAt(oldClassIndex);
+		
+		return data;
+	}
+	
+	@Override
+	public boolean filenameFilter(String filename) {
+		return filename.endsWith(".arff");
+	}
+	
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/training/BaggingTraining.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/training/BaggingTraining.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/training/BaggingTraining.java	(revision 2)
@@ -0,0 +1,118 @@
+package de.ugoe.cs.cpdp.training;
+
+import java.io.PrintStream;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.commons.collections4.list.SetUniqueList;
+import org.apache.commons.io.output.NullOutputStream;
+
+import weka.classifiers.AbstractClassifier;
+import weka.classifiers.Classifier;
+import weka.core.DenseInstance;
+import weka.core.Instance;
+import weka.core.Instances;
+
+public abstract class BaggingTraining implements ISetWiseTrainingStrategy, WekaCompatibleTrainer {
+
+	protected abstract Classifier setupClassifier();
+	
+	private final TraindatasetBagging classifier = new TraindatasetBagging();
+	
+	public void apply(SetUniqueList<Instances> traindataSet) {
+		PrintStream errStr	= System.err;
+		System.setErr(new PrintStream(new NullOutputStream()));
+		try {
+			classifier.buildClassifier(traindataSet);
+		} catch (Exception e) {
+			throw new RuntimeException(e);
+		} finally {
+			System.setErr(errStr);
+		}
+	}
+	
+	@Override
+	public Classifier getClassifier() {
+		return classifier;
+	}
+	
+	@Override
+	public void setParameter(String parameters) {
+		// TODO should allow passing of weka parameters to the classifier
+	}
+	
+	public class TraindatasetBagging extends AbstractClassifier {
+		
+		/**
+		 * 
+		 */
+		private static final long serialVersionUID = 1L;
+
+		private List<Instances> trainingData = null;
+		
+		private List<Classifier> classifiers = null;
+	
+		@Override
+		public double classifyInstance(Instance instance) {
+			if( classifiers==null ) {
+				return 0.0; // TODO check how WEKA expects classifyInstance to behave if no classifier exists yet
+			}
+			
+			double classification = 0.0;
+			for( int i=0 ; i<classifiers.size(); i++ ) {
+				Classifier classifier = classifiers.get(i);
+				Instances traindata = trainingData.get(i);
+				
+				Set<String> attributeNames = new HashSet<>();
+				for( int j=0; j<traindata.numAttributes(); j++ ) {
+					attributeNames.add(traindata.attribute(j).name());
+				}
+				
+				double[] values = new double[traindata.numAttributes()];
+				int index = 0;
+				for( int j=0; j<instance.numAttributes(); j++ ) {
+					if( attributeNames.contains(instance.attribute(j).name())) {
+						values[index] = instance.value(j);
+						index++;
+					}
+				}
+				
+				Instances tmp = new Instances(traindata);
+				tmp.clear();
+				Instance instCopy = new DenseInstance(instance.weight(), values);
+				instCopy.setDataset(tmp);
+				try {
+					classification += classifier.classifyInstance(instCopy);
+				} catch (Exception e) {
+					throw new RuntimeException("bagging classifier could not classify an instance", e);
+				}
+			}
+			classification /= classifiers.size();
+			return (classification>=0.5) ? 1.0 : 0.0;
+		}
+		
+		public void buildClassifier(SetUniqueList<Instances> traindataSet) throws Exception {
+			classifiers = new LinkedList<>();
+			trainingData = new LinkedList<>();
+			for( Instances traindata : traindataSet ) {
+				Classifier classifier = setupClassifier();
+				classifier.buildClassifier(traindata);
+				classifiers.add(classifier);
+				trainingData.add(new Instances(traindata));
+			}
+		}
+	
+		@Override
+		public void buildClassifier(Instances traindata) throws Exception {
+			classifiers = new LinkedList<>();
+			trainingData = new LinkedList<>();
+			final Classifier classifier = setupClassifier();
+			classifier.buildClassifier(traindata);
+			classifiers.add(classifier);
+			trainingData.add(new Instances(traindata));
+		}
+	}
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/training/BayesNetBagging.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/training/BayesNetBagging.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/training/BayesNetBagging.java	(revision 2)
@@ -0,0 +1,18 @@
+package de.ugoe.cs.cpdp.training;
+
+import weka.classifiers.Classifier;
+import weka.classifiers.bayes.BayesNet;
+
+public class BayesNetBagging extends BaggingTraining {
+
+	@Override
+	public String getName() {
+		return "BayesNetBagging";
+	}
+
+	@Override
+	protected Classifier setupClassifier() {
+		return new BayesNet();
+	}
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/training/BayesNetTraining.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/training/BayesNetTraining.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/training/BayesNetTraining.java	(revision 2)
@@ -0,0 +1,18 @@
+package de.ugoe.cs.cpdp.training;
+
+import weka.classifiers.Classifier;
+import weka.classifiers.bayes.BayesNet;
+
+public class BayesNetTraining extends WekaTraining {
+
+	@Override
+	public String getName() {
+		return "BayesNet";
+	}
+
+	@Override
+	protected Classifier setupClassifier() {
+		return new BayesNet();
+	}
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/training/DecisionTreeBagging.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/training/DecisionTreeBagging.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/training/DecisionTreeBagging.java	(revision 2)
@@ -0,0 +1,18 @@
+package de.ugoe.cs.cpdp.training;
+
+import weka.classifiers.Classifier;
+import weka.classifiers.trees.J48;
+
+public class DecisionTreeBagging extends BaggingTraining {
+
+	@Override
+	public String getName() {
+		return "C4.5-DTreeBagging";
+	}
+
+	@Override
+	protected Classifier setupClassifier() {
+		return new J48();
+	}
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/training/DecisionTreeTraining.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/training/DecisionTreeTraining.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/training/DecisionTreeTraining.java	(revision 2)
@@ -0,0 +1,27 @@
+package de.ugoe.cs.cpdp.training;
+
+import weka.classifiers.Classifier;
+import weka.classifiers.meta.CVParameterSelection;
+import weka.classifiers.trees.J48;
+
+public class DecisionTreeTraining extends WekaTraining {
+
+	@Override
+	public String getName() {
+		return "C4.5-DTree";
+	}
+
+	@Override
+	protected Classifier setupClassifier() {
+		final CVParameterSelection ps = new CVParameterSelection();
+		ps.setClassifier(new J48());
+		try {
+			// Parameter optimization with 5x5 CV
+			ps.setNumFolds(5);
+			ps.addCVParameter("C 0.1 0.5 5");
+		} catch (Exception e) {
+			throw new RuntimeException(e);
+		}
+		return ps;
+	}
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/training/ISetWiseTrainingStrategy.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/training/ISetWiseTrainingStrategy.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/training/ISetWiseTrainingStrategy.java	(revision 2)
@@ -0,0 +1,13 @@
+package de.ugoe.cs.cpdp.training;
+
+import org.apache.commons.collections4.list.SetUniqueList;
+
+import weka.core.Instances;
+
+// Bagging Strategy: separate models for each training data set
+public interface ISetWiseTrainingStrategy extends ITrainer {
+	
+	void apply(SetUniqueList<Instances> traindataSet);
+	
+	String getName();
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/training/ITrainer.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/training/ITrainer.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/training/ITrainer.java	(revision 2)
@@ -0,0 +1,7 @@
+package de.ugoe.cs.cpdp.training;
+
+import de.ugoe.cs.cpdp.IParameterizable;
+
+public interface ITrainer extends IParameterizable {
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/training/ITrainingStrategy.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/training/ITrainingStrategy.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/training/ITrainingStrategy.java	(revision 2)
@@ -0,0 +1,8 @@
+package de.ugoe.cs.cpdp.training;
+
+import weka.core.Instances;
+
+public interface ITrainingStrategy extends ITrainer {
+	
+	void apply(Instances traindata);
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/training/LogisticRegressionBagging.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/training/LogisticRegressionBagging.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/training/LogisticRegressionBagging.java	(revision 2)
@@ -0,0 +1,18 @@
+package de.ugoe.cs.cpdp.training;
+
+import weka.classifiers.Classifier;
+import weka.classifiers.functions.Logistic;
+
+public class LogisticRegressionBagging extends BaggingTraining {
+
+	@Override
+	public String getName() {
+		return "LogisticRegressionBagging";
+	}
+
+	@Override
+	protected Classifier setupClassifier() {
+		return new Logistic();
+	}
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/training/LogisticRegressionTraining.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/training/LogisticRegressionTraining.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/training/LogisticRegressionTraining.java	(revision 2)
@@ -0,0 +1,17 @@
+package de.ugoe.cs.cpdp.training;
+
+import weka.classifiers.Classifier;
+import weka.classifiers.functions.Logistic;
+
+public class LogisticRegressionTraining extends WekaTraining {
+
+	@Override
+	public String getName() {
+		return "LogisticRegression";
+	}
+
+	@Override
+	protected Classifier setupClassifier() {
+		return new Logistic();
+	}
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/training/NaiveBayesBagging.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/training/NaiveBayesBagging.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/training/NaiveBayesBagging.java	(revision 2)
@@ -0,0 +1,18 @@
+package de.ugoe.cs.cpdp.training;
+
+import weka.classifiers.Classifier;
+import weka.classifiers.bayes.NaiveBayes;
+
+public class NaiveBayesBagging extends BaggingTraining {
+
+	@Override
+	public String getName() {
+		return "NaiveBayesBagging";
+	}
+
+	@Override
+	protected Classifier setupClassifier() {
+		return new NaiveBayes();
+	}
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/training/NaiveBayesTraining.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/training/NaiveBayesTraining.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/training/NaiveBayesTraining.java	(revision 2)
@@ -0,0 +1,18 @@
+package de.ugoe.cs.cpdp.training;
+
+import weka.classifiers.Classifier;
+import weka.classifiers.bayes.NaiveBayes;
+
+public class NaiveBayesTraining extends WekaTraining {
+
+	@Override
+	public String getName() {
+		return "NaiveBayes";
+	}
+
+	@Override
+	protected Classifier setupClassifier() {
+		return new NaiveBayes();
+	}
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/training/NeuralNetworkBagging.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/training/NeuralNetworkBagging.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/training/NeuralNetworkBagging.java	(revision 2)
@@ -0,0 +1,18 @@
+package de.ugoe.cs.cpdp.training;
+
+import weka.classifiers.Classifier;
+import weka.classifiers.functions.MultilayerPerceptron;
+
+public class NeuralNetworkBagging extends BaggingTraining {
+
+	@Override
+	public String getName() {
+		return "NeuralNetworkBagging";
+	}
+
+	@Override
+	protected Classifier setupClassifier() {
+		return new MultilayerPerceptron();
+	}
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/training/NeuralNetworkTraining.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/training/NeuralNetworkTraining.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/training/NeuralNetworkTraining.java	(revision 2)
@@ -0,0 +1,17 @@
+package de.ugoe.cs.cpdp.training;
+
+import weka.classifiers.Classifier;
+import weka.classifiers.functions.MultilayerPerceptron;
+
+public class NeuralNetworkTraining extends WekaTraining {
+
+	@Override
+	public String getName() {
+		return "NeuralNetwork";
+	}
+
+	@Override
+	protected Classifier setupClassifier() {
+		return new MultilayerPerceptron();
+	}
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/training/RandomForestBagging.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/training/RandomForestBagging.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/training/RandomForestBagging.java	(revision 2)
@@ -0,0 +1,28 @@
+package de.ugoe.cs.cpdp.training;
+
+import weka.classifiers.Classifier;
+import weka.classifiers.meta.CVParameterSelection;
+import weka.classifiers.trees.RandomForest;
+
+public class RandomForestBagging extends BaggingTraining {
+
+	@Override
+	protected Classifier setupClassifier() {
+		final CVParameterSelection ps = new CVParameterSelection();
+		ps.setClassifier(new RandomForest());
+		try {
+			// Parameter optimization with 5x5 CV
+			ps.setNumFolds(5);
+			ps.addCVParameter("I 5 25 5");
+		} catch (Exception e) {
+			throw new RuntimeException(e);
+		}
+		return ps;
+	}
+	
+	@Override
+	public String getName() {
+		return "RandomForestBagging";
+	}
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/training/RandomForestTraining.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/training/RandomForestTraining.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/training/RandomForestTraining.java	(revision 2)
@@ -0,0 +1,28 @@
+package de.ugoe.cs.cpdp.training;
+
+import weka.classifiers.Classifier;
+import weka.classifiers.meta.CVParameterSelection;
+import weka.classifiers.trees.RandomForest;
+
+public class RandomForestTraining extends WekaTraining {
+
+	@Override
+	protected Classifier setupClassifier() {
+		final CVParameterSelection ps = new CVParameterSelection();
+		ps.setClassifier(new RandomForest());
+		try {
+			// Parameter optimization with 5x5 CV
+			ps.setNumFolds(5);
+			ps.addCVParameter("I 5 25 5");
+		} catch (Exception e) {
+			throw new RuntimeException(e);
+		}
+		return ps;
+	}
+	
+	@Override
+	public String getName() {
+		return "RandomForest";
+	}
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/training/SMOPolyKernelBagging.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/training/SMOPolyKernelBagging.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/training/SMOPolyKernelBagging.java	(revision 2)
@@ -0,0 +1,28 @@
+package de.ugoe.cs.cpdp.training;
+
+import weka.classifiers.Classifier;
+import weka.classifiers.functions.SMO;
+
+public class SMOPolyKernelBagging extends BaggingTraining {
+
+	@Override
+	public String getName() {
+		return "SMOPolyBagging";
+	}
+
+	@Override
+	protected Classifier setupClassifier() {
+		return new SMO();
+		/*CVParameterSelection ps = new CVParameterSelection();
+		ps.setClassifier(new SMO());
+		try {
+			// Parameter optimization with 5x5 CV
+			ps.setNumFolds(5);
+			ps.addCVParameter("C 1 100 4");
+		} catch (Exception e) {
+			throw new RuntimeException();
+		}
+		return ps;*/
+	}
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/training/SMOPolyKernelTraining.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/training/SMOPolyKernelTraining.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/training/SMOPolyKernelTraining.java	(revision 2)
@@ -0,0 +1,28 @@
+package de.ugoe.cs.cpdp.training;
+
+import weka.classifiers.Classifier;
+import weka.classifiers.functions.SMO;
+
+public class SMOPolyKernelTraining extends WekaTraining {
+
+	@Override
+	public String getName() {
+		return "SMOPoly";
+	}
+
+	@Override
+	protected Classifier setupClassifier() {
+		return new SMO();
+		/*CVParameterSelection ps = new CVParameterSelection();
+		ps.setClassifier(new SMO());
+		try {
+			// Parameter optimization with 5x5 CV
+			ps.setNumFolds(5);
+			ps.addCVParameter("C 1 100 4");
+		} catch (Exception e) {
+			throw new RuntimeException();
+		}
+		return ps;*/
+	}
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/training/SMORBFKernelBagging.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/training/SMORBFKernelBagging.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/training/SMORBFKernelBagging.java	(revision 2)
@@ -0,0 +1,31 @@
+package de.ugoe.cs.cpdp.training;
+
+import weka.classifiers.Classifier;
+import weka.classifiers.functions.SMO;
+import weka.classifiers.functions.supportVector.RBFKernel;
+
+public class SMORBFKernelBagging extends BaggingTraining {
+
+	@Override
+	public String getName() {
+		return "SMORBFBagging";
+	}
+
+	@Override
+	protected Classifier setupClassifier() {
+		//CVParameterSelection ps = new CVParameterSelection();
+		final SMO smoRBF = new SMO();
+		smoRBF.setKernel(new RBFKernel());
+		return smoRBF;
+		/*ps.setClassifier(smoRBF);
+		try {
+			// Parameter optimization with 5x5 CV
+			ps.setNumFolds(5);
+			ps.addCVParameter("C 1 100 4");
+		} catch (Exception e) {
+			throw new RuntimeException();
+		}
+		return ps;*/
+	}
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/training/SMORBFKernelTraining.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/training/SMORBFKernelTraining.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/training/SMORBFKernelTraining.java	(revision 2)
@@ -0,0 +1,31 @@
+package de.ugoe.cs.cpdp.training;
+
+import weka.classifiers.Classifier;
+import weka.classifiers.functions.SMO;
+import weka.classifiers.functions.supportVector.RBFKernel;
+
+public class SMORBFKernelTraining extends WekaTraining {
+
+	@Override
+	public String getName() {
+		return "SMORBF";
+	}
+
+	@Override
+	protected Classifier setupClassifier() {
+		//CVParameterSelection ps = new CVParameterSelection();
+		final SMO smoRBF = new SMO();
+		smoRBF.setKernel(new RBFKernel());
+		return smoRBF;
+		/*ps.setClassifier(smoRBF);
+		try {
+			// Parameter optimization with 5x5 CV
+			ps.setNumFolds(5);
+			ps.addCVParameter("C 1 100 4");
+		} catch (Exception e) {
+			throw new RuntimeException();
+		}
+		return ps;*/
+	}
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaBaggingTraining2.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaBaggingTraining2.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaBaggingTraining2.java	(revision 2)
@@ -0,0 +1,126 @@
+package de.ugoe.cs.cpdp.training;
+
+import java.io.PrintStream;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.commons.collections4.list.SetUniqueList;
+import org.apache.commons.io.output.NullOutputStream;
+
+import weka.classifiers.AbstractClassifier;
+import weka.classifiers.Classifier;
+import weka.core.DenseInstance;
+import weka.core.Instance;
+import weka.core.Instances;
+
+/**
+ * Programmatic WekaBaggingTraining
+ *
+ * first parameter is Trainer Name.
+ * second parameter is class name
+ * 
+ * all subsequent parameters are configuration params (for example for trees)
+ * 
+ * XML Configurations for Weka Classifiers:
+ * <pre>
+ * {@code
+ * <!-- examples -->
+ * <setwisetrainer name="WekaBaggingTraining2" param="NaiveBayesBagging weka.classifiers.bayes.NaiveBayes" />
+ * <setwisetrainer name="WekaBaggingTraining2" param="LogisticBagging weka.classifiers.functions.Logistic -R 1.0E-8 -M -1" />
+ * }
+ * </pre>
+ * 
+ */
+public class WekaBaggingTraining2 extends WekaBaseTraining2 implements ISetWiseTrainingStrategy {
+
+	private final TraindatasetBagging classifier = new TraindatasetBagging();
+	
+	@Override
+	public Classifier getClassifier() {
+		return classifier;
+	}
+	
+	@Override
+	public void apply(SetUniqueList<Instances> traindataSet) {
+		PrintStream errStr	= System.err;
+		System.setErr(new PrintStream(new NullOutputStream()));
+		try {
+			classifier.buildClassifier(traindataSet);
+		} catch (Exception e) {
+			throw new RuntimeException(e);
+		} finally {
+			System.setErr(errStr);
+		}
+	}
+	
+	public class TraindatasetBagging extends AbstractClassifier {
+		
+		private static final long serialVersionUID = 1L;
+
+		private List<Instances> trainingData = null;
+		
+		private List<Classifier> classifiers = null;
+	
+		@Override
+		public double classifyInstance(Instance instance) {
+			if( classifiers==null ) {
+				return 0.0; // TODO check how WEKA expects classifyInstance to behave if no classifier exists yet
+			}
+			
+			double classification = 0.0;
+			for( int i=0 ; i<classifiers.size(); i++ ) {
+				Classifier classifier = classifiers.get(i);
+				Instances traindata = trainingData.get(i);
+				
+				Set<String> attributeNames = new HashSet<>();
+				for( int j=0; j<traindata.numAttributes(); j++ ) {
+					attributeNames.add(traindata.attribute(j).name());
+				}
+				
+				double[] values = new double[traindata.numAttributes()];
+				int index = 0;
+				for( int j=0; j<instance.numAttributes(); j++ ) {
+					if( attributeNames.contains(instance.attribute(j).name())) {
+						values[index] = instance.value(j);
+						index++;
+					}
+				}
+				
+				Instances tmp = new Instances(traindata);
+				tmp.clear();
+				Instance instCopy = new DenseInstance(instance.weight(), values);
+				instCopy.setDataset(tmp);
+				try {
+					classification += classifier.classifyInstance(instCopy);
+				} catch (Exception e) {
+					throw new RuntimeException("bagging classifier could not classify an instance", e);
+				}
+			}
+			classification /= classifiers.size();
+			return (classification>=0.5) ? 1.0 : 0.0;
+		}
+		
+		public void buildClassifier(SetUniqueList<Instances> traindataSet) throws Exception {
+			classifiers = new LinkedList<>();
+			trainingData = new LinkedList<>();
+			for( Instances traindata : traindataSet ) {
+				Classifier classifier = setupClassifier();
+				classifier.buildClassifier(traindata);
+				classifiers.add(classifier);
+				trainingData.add(new Instances(traindata));
+			}
+		}
+	
+		@Override
+		public void buildClassifier(Instances traindata) throws Exception {
+			classifiers = new LinkedList<>();
+			trainingData = new LinkedList<>();
+			final Classifier classifier = setupClassifier();
+			classifier.buildClassifier(traindata);
+			classifiers.add(classifier);
+			trainingData.add(new Instances(traindata));
+		}
+	}
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaBaseTraining2.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaBaseTraining2.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaBaseTraining2.java	(revision 2)
@@ -0,0 +1,103 @@
+package de.ugoe.cs.cpdp.training;
+
+import java.util.Arrays;
+import java.util.logging.Level;
+
+import de.ugoe.cs.util.console.Console;
+import weka.core.OptionHandler;
+import weka.classifiers.Classifier;
+import weka.classifiers.meta.CVParameterSelection;
+
+public abstract class WekaBaseTraining2 implements WekaCompatibleTrainer {
+	
+	protected Classifier classifier = null;
+	protected String classifierClassName;
+	protected String classifierName;
+	protected String[] classifierParams;
+	
+	@Override
+	public void setParameter(String parameters) {
+		String[] params = parameters.split(" ");
+
+		// first is classifierName
+		classifierName = params[0];
+		
+		// all following parameters can be copied from weka!
+		
+		// second param is classifierClassName
+		classifierClassName = params[1];
+	
+		// rest are params to the specified classifier
+		classifierParams = Arrays.copyOfRange(params, 2, params.length);
+		
+		classifier = setupClassifier();
+	}
+
+	@Override
+	public Classifier getClassifier() {
+		return classifier;
+	}
+
+	public Classifier setupClassifier() {
+		Classifier cl = null;
+		try{
+			@SuppressWarnings("rawtypes")
+			Class c = Class.forName(classifierClassName);
+			Classifier obj = (Classifier) c.newInstance();
+			
+			// Filter -CVPARAM
+			String[] param = Arrays.copyOf(classifierParams, classifierParams.length);
+			String[] cvparam = {};
+			boolean cv = false;
+			for ( int i=0; i < classifierParams.length; i++ ) {
+				if(classifierParams[i].equals("-CVPARAM")) {
+					// rest of array are cvparam
+					cvparam = Arrays.copyOfRange(classifierParams, i+1, classifierParams.length);
+					
+					// before this we have normal params
+					param = Arrays.copyOfRange(classifierParams, 0, i);
+					
+					cv = true;
+					break;
+				}
+			}
+			
+			// set classifier params
+			((OptionHandler)obj).setOptions(param);
+			cl = obj;
+			
+			// we have cross val params
+			// cant check on cvparam.length may not be initialized			
+			if(cv) {
+				final CVParameterSelection ps = new CVParameterSelection();
+				ps.setClassifier(obj);
+				ps.setNumFolds(5);
+				//ps.addCVParameter("I 5 25 5");
+				ps.addCVParameter(Arrays.asList(cvparam).toString().replaceAll(", ", " ").replaceAll("^\\[|\\]$", ""));
+				
+				cl = ps;
+			}
+
+		}catch(ClassNotFoundException e) {
+			Console.traceln(Level.WARNING, String.format("class not found: %s", e.toString()));
+			e.printStackTrace();
+		} catch (InstantiationException e) {
+			Console.traceln(Level.WARNING, String.format("Instantiation Exception: %s", e.toString()));
+			e.printStackTrace();
+		} catch (IllegalAccessException e) {
+			Console.traceln(Level.WARNING, String.format("Illegal Access Exception: %s", e.toString()));
+			e.printStackTrace();
+		} catch (Exception e) {
+			Console.traceln(Level.WARNING, String.format("Exception: %s", e.toString()));
+			e.printStackTrace();
+		}
+		
+		return cl;
+	}
+
+	@Override
+	public String getName() {
+		return classifierName;
+	}
+	
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaClusterTraining2.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaClusterTraining2.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaClusterTraining2.java	(revision 2)
@@ -0,0 +1,186 @@
+package de.ugoe.cs.cpdp.training;
+
+import java.io.PrintStream;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
+import java.util.logging.Level;
+
+import org.apache.commons.io.output.NullOutputStream;
+
+import de.ugoe.cs.util.console.Console;
+import weka.classifiers.AbstractClassifier;
+import weka.classifiers.Classifier;
+import weka.clusterers.EM;
+import weka.core.DenseInstance;
+import weka.core.Instance;
+import weka.core.Instances;
+import weka.filters.Filter;
+import weka.filters.unsupervised.attribute.Remove;
+
+/**
+ * WekaClusterTraining2
+ * 
+ * 1. Cluster traindata
+ * 2. for each cluster train a classifier with traindata from cluster
+ * 3. match testdata instance to a cluster, then classify with classifier from the cluster
+ * 
+ * XML config:
+ * <!-- because of clustering -->
+ * <preprocessor name="Normalization" param=""/>
+ * 
+ * <!-- cluster trainer -->
+ * <trainer name="WekaClusterTraining2" param="NaiveBayes weka.classifiers.bayes.NaiveBayes" />
+ * 
+ * Questions:
+ * - how do we configure the clustering params?
+ */
+public class WekaClusterTraining2 extends WekaBaseTraining2 implements ITrainingStrategy {
+
+	private final TraindatasetCluster classifier = new TraindatasetCluster();
+	
+	@Override
+	public Classifier getClassifier() {
+		return classifier;
+	}
+	
+	
+	@Override
+	public void apply(Instances traindata) {
+		PrintStream errStr	= System.err;
+		System.setErr(new PrintStream(new NullOutputStream()));
+		try {
+			classifier.buildClassifier(traindata);
+		} catch (Exception e) {
+			throw new RuntimeException(e);
+		} finally {
+			System.setErr(errStr);
+		}
+	}
+	
+
+	public class TraindatasetCluster extends AbstractClassifier {
+		
+		private static final long serialVersionUID = 1L;
+
+		private EM clusterer = null;
+
+		private HashMap<Integer, Classifier> cclassifier = new HashMap<Integer, Classifier>();
+		private HashMap<Integer, Instances> ctraindata = new HashMap<Integer, Instances>(); 
+		
+		
+		
+		private Instance createInstance(Instances instances, Instance instance) {
+			// attributes for feeding instance to classifier
+			Set<String> attributeNames = new HashSet<>();
+			for( int j=0; j<instances.numAttributes(); j++ ) {
+				attributeNames.add(instances.attribute(j).name());
+			}
+			
+			double[] values = new double[instances.numAttributes()];
+			int index = 0;
+			for( int j=0; j<instance.numAttributes(); j++ ) {
+				if( attributeNames.contains(instance.attribute(j).name())) {
+					values[index] = instance.value(j);
+					index++;
+				}
+			}
+			
+			Instances tmp = new Instances(instances);
+			tmp.clear();
+			Instance instCopy = new DenseInstance(instance.weight(), values);
+			instCopy.setDataset(tmp);
+			
+			return instCopy;
+		}
+		
+		
+		@Override
+		public double classifyInstance(Instance instance) {
+			double ret = 0;
+			try {
+				Instances traindata = ctraindata.get(0);
+				Instance classInstance = createInstance(traindata, instance);
+				
+				// remove class attribute before clustering
+				Remove filter = new Remove();
+				filter.setAttributeIndices("" + (traindata.classIndex() + 1));
+				filter.setInputFormat(traindata);
+				traindata = Filter.useFilter(traindata, filter);
+				
+				Instance clusterInstance = createInstance(traindata, instance);
+				
+				// 1. classify testdata instance to a cluster number
+				int cnum = clusterer.clusterInstance(clusterInstance);
+				
+				// 2. classify testata instance to the classifier
+				ret = cclassifier.get(cnum).classifyInstance(classInstance);
+				
+			}catch( Exception e ) {
+				Console.traceln(Level.INFO, String.format("ERROR matching instance to cluster!"));
+				throw new RuntimeException(e);
+			}
+			return ret;
+		}
+
+		
+		
+		@Override
+		public void buildClassifier(Instances traindata) throws Exception {
+			
+			// 1. copy traindata
+			Instances train = new Instances(traindata);
+			
+			// 2. remove class attribute for clustering
+			Remove filter = new Remove();
+			filter.setAttributeIndices("" + (train.classIndex() + 1));
+			filter.setInputFormat(train);
+			train = Filter.useFilter(train, filter);
+			
+			// 3. cluster data
+			//Console.traceln(Level.INFO, String.format("starting clustering"));
+			
+			// use standard params for now
+			clusterer = new EM();
+			//String[] params = {"-N", "100"};
+			//clusterer.setOptions(params);
+			clusterer.buildClusterer(train);
+			// set max num to traindata size
+			clusterer.setMaximumNumberOfClusters(train.size());
+			
+			// 4. get cluster membership of our traindata
+			//AddCluster cfilter = new AddCluster();
+			//cfilter.setClusterer(clusterer);
+			//cfilter.setInputFormat(train);
+			//Instances ctrain = Filter.useFilter(train, cfilter);
+			
+			Instances ctrain = new Instances(train);
+			
+			// get traindata per cluster
+			int cnumber;
+			for ( int j=0; j < ctrain.numInstances(); j++ ) {
+				// get the cluster number from the attributes, subract 1 because if we clusterInstance we get 0-n, and this is 1-n
+				//cnumber = Integer.parseInt(ctrain.get(j).stringValue(ctrain.get(j).numAttributes()-1).replace("cluster", "")) - 1;
+				
+				cnumber = clusterer.clusterInstance(ctrain.get(j));
+				// add training data to list of instances for this cluster number
+				if ( !ctraindata.containsKey(cnumber) ) {
+					ctraindata.put(cnumber, new Instances(traindata));
+					ctraindata.get(cnumber).delete();
+				}
+				ctraindata.get(cnumber).add(traindata.get(j));
+			}
+			
+			// train one classifier per cluster, we get the clusternumber from the traindata
+			Iterator<Integer> clusternumber = ctraindata.keySet().iterator();
+			while ( clusternumber.hasNext() ) {
+				cnumber = clusternumber.next();			
+				cclassifier.put(cnumber,setupClassifier());
+				cclassifier.get(cnumber).buildClassifier(ctraindata.get(cnumber));
+				
+				//Console.traceln(Level.INFO, String.format("classifier in cluster "+cnumber));
+			}
+		}
+	}
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaCompatibleTrainer.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaCompatibleTrainer.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaCompatibleTrainer.java	(revision 2)
@@ -0,0 +1,10 @@
+package de.ugoe.cs.cpdp.training;
+
+import weka.classifiers.Classifier;
+
+public interface WekaCompatibleTrainer extends ITrainer {
+	
+	Classifier getClassifier();
+	
+	String getName();
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaTraining.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaTraining.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaTraining.java	(revision 2)
@@ -0,0 +1,38 @@
+package de.ugoe.cs.cpdp.training;
+
+import java.io.PrintStream;
+
+import org.apache.commons.io.output.NullOutputStream;
+
+import weka.classifiers.Classifier;
+import weka.core.Instances;
+
+public abstract class WekaTraining implements ITrainingStrategy, WekaCompatibleTrainer {
+	
+	private final Classifier classifier = setupClassifier();
+
+	protected abstract Classifier setupClassifier();
+	
+	@Override
+	public Classifier getClassifier() {
+		return classifier;
+	}
+	
+	@Override
+	public void apply(Instances traindata) {
+		PrintStream errStr	= System.err;
+		System.setErr(new PrintStream(new NullOutputStream()));
+		try {
+			classifier.buildClassifier(traindata);
+		} catch (Exception e) {
+			throw new RuntimeException(e);
+		} finally {
+			System.setErr(errStr);
+		}
+	}
+	
+	@Override
+	public void setParameter(String parameters) {
+		// TODO should allow passing of weka parameters to the classifier
+	}
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaTraining2.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaTraining2.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaTraining2.java	(revision 2)
@@ -0,0 +1,46 @@
+package de.ugoe.cs.cpdp.training;
+
+import java.io.PrintStream;
+import java.util.logging.Level;
+
+import org.apache.commons.io.output.NullOutputStream;
+
+import de.ugoe.cs.util.console.Console;
+import weka.core.Instances;
+
+/**
+ * Programmatic WekaBaggingTraining
+ *
+ * first parameter is Trainer Name.
+ * second parameter is class name
+ * 
+ * all subsequent parameters are configuration params (for example for trees)
+ * 
+ * XML Configurations for Weka Classifiers:
+ * <pre>
+ * {@code
+ * <!-- examples -->
+ * <trainer name="WekaTraining2" param="NaiveBayes weka.classifiers.bayes.NaiveBayes" />
+ * <trainer name="WekaTraining2" param="Logistic weka.classifiers.functions.Logistic -R 1.0E-8 -M -1" />
+ * }
+ * </pre>
+ * 
+ */
+public class WekaTraining2 extends WekaBaseTraining2 implements ITrainingStrategy {
+
+	@Override
+	public void apply(Instances traindata) {
+		PrintStream errStr	= System.err;
+		System.setErr(new PrintStream(new NullOutputStream()));
+		try {
+			if(classifier == null) {
+				Console.traceln(Level.WARNING, String.format("classifier null!"));
+			}
+			classifier.buildClassifier(traindata);
+		} catch (Exception e) {
+			throw new RuntimeException(e);
+		} finally {
+			System.setErr(errStr);
+		}
+	}
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/versions/AbstractVersionFilter.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/versions/AbstractVersionFilter.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/versions/AbstractVersionFilter.java	(revision 2)
@@ -0,0 +1,28 @@
+package de.ugoe.cs.cpdp.versions;
+
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * Implements a skeletal {@link IVersionFilter}.
+ * @author Steffen Herbold
+ */
+public abstract class AbstractVersionFilter implements IVersionFilter {
+
+	/**
+	 * @see de.ugoe.cs.cpdp.versions.IVersionFilter#apply(java.util.List)
+	 */
+	@Override
+	public int apply(List<SoftwareVersion> versions) {
+		int removed = 0;
+		for( final Iterator<SoftwareVersion> iter=versions.iterator() ; iter.hasNext() ; ) {
+			SoftwareVersion version = iter.next();
+			
+			if( apply(version) ) {
+				iter.remove();
+				removed++;
+			}
+		}
+		return removed;
+	}
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/versions/IVersionFilter.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/versions/IVersionFilter.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/versions/IVersionFilter.java	(revision 2)
@@ -0,0 +1,26 @@
+package de.ugoe.cs.cpdp.versions;
+
+import java.util.List;
+
+import de.ugoe.cs.cpdp.IParameterizable;
+
+/**
+ * Implements the interface for a {@link SoftwareVersion} filter. 
+ * @author Steffen Herbold
+ */
+public interface IVersionFilter extends IParameterizable {
+
+	/**
+	 * Applies the filter to a single version. 
+	 * @param version the version
+	 * @return true if filter applies to version, false otherwise
+	 */
+	boolean apply(SoftwareVersion version);
+	
+	/**
+	 * Applies the filter a a list of versions. Versions were the filter applies are automatically removed from the list. 
+	 * @param versions list of versions
+	 * @return number of removed versions
+	 */
+	int apply(List<SoftwareVersion> versions);
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/versions/MaxInstanceNumberFilter.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/versions/MaxInstanceNumberFilter.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/versions/MaxInstanceNumberFilter.java	(revision 2)
@@ -0,0 +1,31 @@
+package de.ugoe.cs.cpdp.versions;
+
+/**
+ * Applies to large data sets. All data sets that have more than the required maximum number of instances are removed. 
+ * @author Steffen Herbold
+ */
+public class MaxInstanceNumberFilter extends AbstractVersionFilter {
+
+	/**
+	 * maximum number of instances required
+	 */
+	private int maxInstances = 0;
+	
+	/**
+	 * @see de.ugoe.cs.cpdp.versions.IVersionFilter#apply(de.ugoe.cs.cpdp.versions.SoftwareVersion)
+	 */
+	@Override
+	public boolean apply(SoftwareVersion version) {
+		return version.getInstances().numInstances()>maxInstances;
+	}
+
+	/**
+	 * Sets the minimal number of instances.
+	 * @param parameters number of instances
+	 */
+	@Override
+	public void setParameter(String parameters) {
+		maxInstances = Integer.parseInt(parameters);
+	}
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/versions/MinInstanceNumberFilter.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/versions/MinInstanceNumberFilter.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/versions/MinInstanceNumberFilter.java	(revision 2)
@@ -0,0 +1,31 @@
+package de.ugoe.cs.cpdp.versions;
+
+/**
+ * Applies to small data sets. All data sets that do not have the required minimal number of instances are removed. 
+ * @author Steffen Herbold
+ */
+public class MinInstanceNumberFilter extends AbstractVersionFilter {
+
+	/**
+	 * minimal number of instances required
+	 */
+	private int minInstances = 0;
+	
+	/**
+	 * @see de.ugoe.cs.cpdp.versions.IVersionFilter#apply(de.ugoe.cs.cpdp.versions.SoftwareVersion)
+	 */
+	@Override
+	public boolean apply(SoftwareVersion version) {
+		return version.getInstances().numInstances()<minInstances;
+	}
+
+	/**
+	 * Sets the minimal number of instances.
+	 * @param parameters number of instances
+	 */
+	@Override
+	public void setParameter(String parameters) {
+		minInstances = Integer.parseInt(parameters);
+	}
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/versions/SoftwareVersion.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/versions/SoftwareVersion.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/versions/SoftwareVersion.java	(revision 2)
@@ -0,0 +1,61 @@
+package de.ugoe.cs.cpdp.versions;
+
+import weka.core.Instances;
+
+/**
+ * Data class for software versions. 
+ * @author Steffen Herbold
+ */
+public class SoftwareVersion {
+
+	/**
+	 * name of the project
+	 */
+	private final String project;
+	
+	/**
+	 * version of the project
+	 */
+	private final String version;
+
+	/**
+	 * data of the version
+	 */
+	private final Instances instances;
+	
+	/**
+	 * Constructor. Creates a new version. 
+	 * @param project name of the project
+	 * @param version name of the version
+	 * @param instances data of the version
+	 */
+	public SoftwareVersion(String project, String version, Instances instances) {
+		this.project = project;
+		this.version = version;
+		this.instances = instances;
+	}
+	
+	/**
+	 * returns the project name
+	 * @return project name
+	 */
+	public String getProject() {
+		return project;
+	}
+	
+	/**
+	 * returns the name of the version
+	 * @return name of the version
+	 */
+	public String getVersion() {
+		return version;
+	}
+	
+	/**
+	 * returns the data of the version
+	 * @return data
+	 */
+	public Instances getInstances() {
+		return new Instances(instances);
+	}
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/versions/UnbalancedFilter.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/versions/UnbalancedFilter.java	(revision 2)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/versions/UnbalancedFilter.java	(revision 2)
@@ -0,0 +1,38 @@
+package de.ugoe.cs.cpdp.versions;
+
+import weka.core.Instances;
+
+/**
+ * Removes unbalanced data sets in terms of classification. All data sets that are outside of the quantil defined
+ * by setParameter (default=0.1) are removed. 
+ * @author Steffen Herbold
+ */
+public class UnbalancedFilter extends AbstractVersionFilter {
+
+	/**
+	 * quantil where outside lying versions are removed
+	 */
+	private double quantil = 0.1;
+	
+	/**
+	 * Sets the quantil.
+	 * @param parameters the quantil as string
+	 */
+	@Override
+	public void setParameter(String parameters) {
+		quantil = Double.parseDouble(parameters);
+	}
+
+	/**
+	 * @see de.ugoe.cs.cpdp.versions.IVersionFilter#apply(de.ugoe.cs.cpdp.versions.SoftwareVersion)
+	 */
+	@Override
+	public boolean apply(SoftwareVersion version) {
+		final Instances instances = version.getInstances();
+		
+		final int[] counts = instances.attributeStats(instances.classIndex()).nominalCounts;
+		return ((double) counts[0])/instances.numInstances() >= (1-quantil) ||
+			((double) counts[0])/instances.numInstances() <= (quantil);
+	}
+
+}
Index: trunk/CrossPare/test/de/ugoe/cs/cpdp/ExperimentConfigurationTest.java
===================================================================
--- trunk/CrossPare/test/de/ugoe/cs/cpdp/ExperimentConfigurationTest.java	(revision 2)
+++ trunk/CrossPare/test/de/ugoe/cs/cpdp/ExperimentConfigurationTest.java	(revision 2)
@@ -0,0 +1,37 @@
+package de.ugoe.cs.cpdp;
+
+import java.io.File;
+
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+public class ExperimentConfigurationTest {
+
+	@Test
+	public void validateConfigurations() {
+		/*validateConfigurations("config_shared");
+		validateConfigurations("config_singleclassifier");
+		validateConfigurations("config_bagging");*/
+		validateConfigurations("exp-java/config");
+		validateConfigurations("exp-nasa/config");
+	}
+	
+	public void validateConfigurations(String folder) {
+		File configFolder = new File(folder);
+		boolean error = false;
+		for( File configFile : configFolder.listFiles() ) {
+			try {
+				if( configFile.isFile() ) {
+					new ExperimentConfiguration(configFile);
+				}
+			} catch (Exception e) {
+				System.err.println("Failure initializing the experiment configuration for configuration file " + configFile);
+				e.printStackTrace();
+				error = true;
+			}
+		}
+		if(error) fail();
+	}
+
+}
Index: trunk/CrossPare/test/de/ugoe/cs/cpdp/dataselection/PetersFilterTest.java
===================================================================
--- trunk/CrossPare/test/de/ugoe/cs/cpdp/dataselection/PetersFilterTest.java	(revision 2)
+++ trunk/CrossPare/test/de/ugoe/cs/cpdp/dataselection/PetersFilterTest.java	(revision 2)
@@ -0,0 +1,61 @@
+package de.ugoe.cs.cpdp.dataselection;
+
+import static org.junit.Assert.*;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.junit.Test;
+
+import weka.core.Attribute;
+import weka.core.DenseInstance;
+import weka.core.Instances;
+
+public class PetersFilterTest {
+
+	@SuppressWarnings("deprecation")
+	@Test
+	public void testSetParameter() {
+		new PetersFilter().setParameter("somestring");
+	}
+	
+	@SuppressWarnings("deprecation")
+	@Test
+	public void testApply() {
+		ArrayList<Attribute> attributes = new ArrayList<Attribute>();
+		attributes.add(new Attribute("attr1"));
+		attributes.add(new Attribute("class"));
+		
+		Instances testdata = new Instances("test", attributes, 0);
+		testdata.setClassIndex(1);
+		testdata.add(new DenseInstance(1.0, new double[]{3.0, 0.0}));
+		testdata.add(new DenseInstance(1.0, new double[]{6.6, 0.0}));
+		testdata.add(new DenseInstance(1.0, new double[]{3.1, 0.0}));
+				
+		Instances traindata = new Instances("train", attributes, 0);
+		traindata.setClassIndex(1);
+		traindata.add(new DenseInstance(1.0, new double[]{2.9, 0.0}));
+		traindata.add(new DenseInstance(1.0, new double[]{2.8, 0.0}));
+		traindata.add(new DenseInstance(1.0, new double[]{3.2, 0.0}));
+		traindata.add(new DenseInstance(1.0, new double[]{3.05, 0.0}));
+		traindata.add(new DenseInstance(1.0, new double[]{10.0, 0.0}));
+		traindata.add(new DenseInstance(1.0, new double[]{9.0, 0.0}));
+		traindata.add(new DenseInstance(1.0, new double[]{8.0, 0.0}));
+		traindata.add(new DenseInstance(1.0, new double[]{1.0, 0.0}));
+		traindata.add(new DenseInstance(1.0, new double[]{5.0, 0.0}));
+		traindata.add(new DenseInstance(1.0, new double[]{6.6, 0.0}));
+		
+		PetersFilter filter = new PetersFilter();
+		Instances selected = filter.apply(testdata, traindata);
+		
+		Set<Double> selectedSet = new HashSet<>();
+		for( int i=0 ; i<selected.numInstances() ; i++ ) {
+			selectedSet.add(selected.instance(i).toDoubleArray()[0]);
+		}
+		
+		assertTrue(selectedSet.contains(3.05));
+		assertTrue(selectedSet.contains(8.0));
+	}
+
+}
Index: trunk/CrossPare/test/de/ugoe/cs/cpdp/dataselection/TestAsTrainingTest.java
===================================================================
--- trunk/CrossPare/test/de/ugoe/cs/cpdp/dataselection/TestAsTrainingTest.java	(revision 2)
+++ trunk/CrossPare/test/de/ugoe/cs/cpdp/dataselection/TestAsTrainingTest.java	(revision 2)
@@ -0,0 +1,57 @@
+package de.ugoe.cs.cpdp.dataselection;
+
+import static org.junit.Assert.*;
+
+import java.util.ArrayList;
+import java.util.LinkedList;
+
+import org.apache.commons.collections4.list.SetUniqueList;
+import org.junit.Test;
+
+import weka.core.Attribute;
+import weka.core.DenseInstance;
+import weka.core.Instances;
+
+public class TestAsTrainingTest {
+
+	@Test
+	public void testApply() {
+		ArrayList<Attribute> attributes = new ArrayList<Attribute>();
+		attributes.add(new Attribute("attr1"));
+		attributes.add(new Attribute("class"));
+		
+		Instances testdata = new Instances("test", attributes, 0);
+		testdata.setClassIndex(1);
+		testdata.add(new DenseInstance(1.0, new double[]{3.0, 0.0}));
+		testdata.add(new DenseInstance(1.0, new double[]{6.6, 0.0}));
+		testdata.add(new DenseInstance(1.0, new double[]{3.1, 0.0}));
+				
+		Instances traindata = new Instances("train", attributes, 0);
+		traindata.setClassIndex(1);
+		traindata.add(new DenseInstance(1.0, new double[]{2.9, 0.0}));
+		traindata.add(new DenseInstance(1.0, new double[]{2.8, 0.0}));
+		traindata.add(new DenseInstance(1.0, new double[]{3.2, 0.0}));
+		traindata.add(new DenseInstance(1.0, new double[]{3.05, 0.0}));
+		traindata.add(new DenseInstance(1.0, new double[]{10.0, 0.0}));
+		traindata.add(new DenseInstance(1.0, new double[]{9.0, 0.0}));
+		traindata.add(new DenseInstance(1.0, new double[]{8.0, 0.0}));
+		traindata.add(new DenseInstance(1.0, new double[]{1.0, 0.0}));
+		traindata.add(new DenseInstance(1.0, new double[]{5.0, 0.0}));
+				
+		SetUniqueList<Instances> traindataSet = SetUniqueList.setUniqueList(new LinkedList<Instances>());
+		traindataSet.add(traindata);
+		
+		TestAsTraining filter = new TestAsTraining();
+		filter.apply(testdata, traindataSet);
+		
+		assertEquals(1, traindataSet.size());
+	
+		traindata = traindataSet.get(0);
+		assertNotSame(testdata, traindata);
+		assertEquals(testdata.numInstances(), traindata.numInstances());
+		for( int i=0; i<testdata.numInstances(); i++ ) {
+			assertArrayEquals(testdata.instance(i).toDoubleArray(), traindata.instance(i).toDoubleArray(), 0.000000001);
+		}
+	}
+
+}
Index: trunk/CrossPare/test/de/ugoe/cs/cpdp/dataselection/TurhanFilterTest.java
===================================================================
--- trunk/CrossPare/test/de/ugoe/cs/cpdp/dataselection/TurhanFilterTest.java	(revision 2)
+++ trunk/CrossPare/test/de/ugoe/cs/cpdp/dataselection/TurhanFilterTest.java	(revision 2)
@@ -0,0 +1,57 @@
+package de.ugoe.cs.cpdp.dataselection;
+
+import static org.junit.Assert.*;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.junit.Test;
+
+import weka.core.Attribute;
+import weka.core.DenseInstance;
+import weka.core.Instances;
+
+public class TurhanFilterTest {
+
+	@Test
+	public void testApply() {
+		ArrayList<Attribute> attributes = new ArrayList<Attribute>();
+		attributes.add(new Attribute("attr1"));
+		attributes.add(new Attribute("class"));
+		
+		Instances testdata = new Instances("test", attributes, 0);
+		testdata.setClassIndex(1);
+		testdata.add(new DenseInstance(1.0, new double[]{3.0, 0.0}));
+		testdata.add(new DenseInstance(1.0, new double[]{6.6, 0.0}));
+		testdata.add(new DenseInstance(1.0, new double[]{3.1, 0.0}));
+				
+		Instances traindata = new Instances("train", attributes, 0);
+		traindata.setClassIndex(1);
+		traindata.add(new DenseInstance(1.0, new double[]{2.9, 0.0}));
+		traindata.add(new DenseInstance(1.0, new double[]{2.8, 0.0}));
+		traindata.add(new DenseInstance(1.0, new double[]{3.2, 0.0}));
+		traindata.add(new DenseInstance(1.0, new double[]{3.05, 0.0}));
+		traindata.add(new DenseInstance(1.0, new double[]{10.0, 0.0}));
+		traindata.add(new DenseInstance(1.0, new double[]{9.0, 0.0}));
+		traindata.add(new DenseInstance(1.0, new double[]{8.0, 0.0}));
+		traindata.add(new DenseInstance(1.0, new double[]{1.0, 0.0}));
+		traindata.add(new DenseInstance(1.0, new double[]{5.0, 0.0}));
+		
+		TurhanFilter filter = new TurhanFilter();
+		filter.setParameter("2");
+		Instances selected = filter.apply(testdata, traindata);
+		
+		Set<Double> selectedSet = new HashSet<>();
+		for( int i=0 ; i<selected.numInstances() ; i++ ) {
+			selectedSet.add(selected.instance(i).toDoubleArray()[0]);
+		}
+		
+		assertTrue(selectedSet.contains(2.9));
+		assertTrue(selectedSet.contains(3.05));
+		assertTrue(selectedSet.contains(3.2));
+		assertTrue(selectedSet.contains(5.0));
+		assertTrue(selectedSet.contains(8.0));
+	}
+
+}
Index: trunk/CrossPare/test/de/ugoe/cs/cpdp/loader/FolderLoaderTest.java
===================================================================
--- trunk/CrossPare/test/de/ugoe/cs/cpdp/loader/FolderLoaderTest.java	(revision 2)
+++ trunk/CrossPare/test/de/ugoe/cs/cpdp/loader/FolderLoaderTest.java	(revision 2)
@@ -0,0 +1,22 @@
+package de.ugoe.cs.cpdp.loader;
+
+import static org.junit.Assert.*;
+
+import java.util.List;
+
+import org.junit.Test;
+
+import de.ugoe.cs.cpdp.versions.SoftwareVersion;
+
+public class FolderLoaderTest {
+
+	@Test
+	public void testLoadVersions() {
+		CSVFolderLoader loader = new CSVFolderLoader();
+		loader.setLocation("data");
+		List<SoftwareVersion> versions = loader.load();
+		
+		assertEquals(65, versions.size());
+	}
+
+}
Index: trunk/CrossPare/test/de/ugoe/cs/cpdp/preprocessing/AttributeRemoverTest.java
===================================================================
--- trunk/CrossPare/test/de/ugoe/cs/cpdp/preprocessing/AttributeRemoverTest.java	(revision 2)
+++ trunk/CrossPare/test/de/ugoe/cs/cpdp/preprocessing/AttributeRemoverTest.java	(revision 2)
@@ -0,0 +1,81 @@
+package de.ugoe.cs.cpdp.preprocessing;
+
+import static org.junit.Assert.*;
+
+import java.util.ArrayList;
+import java.util.LinkedList;
+
+import org.apache.commons.collections4.list.SetUniqueList;
+import org.junit.Test;
+
+import de.ugoe.cs.cpdp.dataprocessing.AttributeRemoval;
+
+import weka.core.Attribute;
+import weka.core.DenseInstance;
+import weka.core.Instances;
+
+public class AttributeRemoverTest {
+
+	@Test
+	public void testApply_1() {
+		ArrayList<Attribute> attributes = new ArrayList<Attribute>();
+		attributes.add(new Attribute("attr1"));
+		attributes.add(new Attribute("class"));
+		attributes.add(new Attribute("attr2"));
+		Instances instances = new Instances("test", attributes, 0);
+		instances.setClassIndex(1);
+		
+		double[] value1 = new double[]{1.5, 0.0,  3.0};
+		double[] value2 = new double[]{1.4, 1.0,  6.0};
+		double[] value3 = new double[]{1.6, 0.0, 15.0};
+		
+		instances.add(new DenseInstance(1.0, value1));
+		instances.add(new DenseInstance(1.0, value2));
+		instances.add(new DenseInstance(1.0, value3));
+		
+		AttributeRemoval processor = new AttributeRemoval();
+		processor.setParameter("attr2");
+		processor.apply(instances, SetUniqueList.setUniqueList(new LinkedList<Instances>()) );
+		
+		double[] expected1 = new double[]{1.5, 0.0};
+		double[] expected2 = new double[]{1.4, 1.0};
+		double[] expected3 = new double[]{1.6, 0.0};
+				
+		assertEquals(2, instances.numAttributes());
+		assertArrayEquals(expected1, instances.instance(0).toDoubleArray(), 0.0001);
+		assertArrayEquals(expected2, instances.instance(1).toDoubleArray(), 0.0001);
+		assertArrayEquals(expected3, instances.instance(2).toDoubleArray(), 0.0001);
+	}
+	
+	@Test
+	public void testApply_2() {
+		ArrayList<Attribute> attributes = new ArrayList<Attribute>();
+		attributes.add(new Attribute("attr1"));
+		attributes.add(new Attribute("class"));
+		attributes.add(new Attribute("attr2"));
+		Instances instances = new Instances("test", attributes, 0);
+		instances.setClassIndex(1);
+		
+		double[] value1 = new double[]{1.5, 0.0,  3.0};
+		double[] value2 = new double[]{1.4, 1.0,  6.0};
+		double[] value3 = new double[]{1.6, 0.0, 15.0};
+		
+		instances.add(new DenseInstance(1.0, value1));
+		instances.add(new DenseInstance(1.0, value2));
+		instances.add(new DenseInstance(1.0, value3));
+		
+		AttributeRemoval processor = new AttributeRemoval();
+		processor.setParameter("attr2 attr1");
+		processor.apply(instances, SetUniqueList.setUniqueList(new LinkedList<Instances>()) );
+		
+		double[] expected1 = new double[]{0.0};
+		double[] expected2 = new double[]{1.0};
+		double[] expected3 = new double[]{0.0};
+				
+		assertEquals(1, instances.numAttributes());
+		assertArrayEquals(expected1, instances.instance(0).toDoubleArray(), 0.0001);
+		assertArrayEquals(expected2, instances.instance(1).toDoubleArray(), 0.0001);
+		assertArrayEquals(expected3, instances.instance(2).toDoubleArray(), 0.0001);
+	}
+
+}
Index: trunk/CrossPare/test/de/ugoe/cs/cpdp/preprocessing/AverageStandardizationTest.java
===================================================================
--- trunk/CrossPare/test/de/ugoe/cs/cpdp/preprocessing/AverageStandardizationTest.java	(revision 2)
+++ trunk/CrossPare/test/de/ugoe/cs/cpdp/preprocessing/AverageStandardizationTest.java	(revision 2)
@@ -0,0 +1,65 @@
+package de.ugoe.cs.cpdp.preprocessing;
+
+import static org.junit.Assert.*;
+
+import java.util.ArrayList;
+import java.util.LinkedList;
+
+import org.apache.commons.collections4.list.SetUniqueList;
+import org.junit.Test;
+
+import de.ugoe.cs.cpdp.dataprocessing.AverageStandardization;
+
+import weka.core.Attribute;
+import weka.core.DenseInstance;
+import weka.core.Instances;
+
+public class AverageStandardizationTest {
+
+	@Test
+	public void testSetParameter() {
+		new AverageStandardization().setParameter("somestring");
+	}
+
+	@Test
+	public void testApply() {
+		ArrayList<Attribute> attributes = new ArrayList<Attribute>();
+		attributes.add(new Attribute("attr1"));
+		attributes.add(new Attribute("class"));
+		attributes.add(new Attribute("attr2"));
+			
+		double[] value1 = new double[]{1.5, 0.0,  3.0};
+		double[] value2 = new double[]{1.4, 1.0,  6.0};
+		double[] value3 = new double[]{1.6, 0.0, 15.0};
+		double[] value4 = new double[]{ 3.0, 0.0, 1.5};
+		double[] value5 = new double[]{ 6.0, 1.0, 1.4 };
+		double[] value6 = new double[]{15.0, 0.0, 1.6};
+		
+		Instances instances = new Instances("test", attributes, 0);
+		instances.setClassIndex(1);
+		
+		instances.add(new DenseInstance(1.0, value1));
+		instances.add(new DenseInstance(1.0, value2));
+		instances.add(new DenseInstance(1.0, value3));
+				
+		Instances instTest = new Instances("foo", attributes, 0);
+		instTest.add(new DenseInstance(1.0, value4));
+		instTest.add(new DenseInstance(1.0, value5));
+		instTest.add(new DenseInstance(1.0, value6));
+		
+		SetUniqueList<Instances> instSet = SetUniqueList.setUniqueList(new LinkedList<Instances>()); 
+		instSet.add(instTest);
+		
+		AverageStandardization processor = new AverageStandardization();
+		processor.apply(instances, instSet);
+		
+		double[] expected1 = new double[]{0.5625, 0.0, 8.0};
+		double[] expected2 = new double[]{1.125 , 1.0, 7.466666666666};
+		double[] expected3 = new double[]{2.8125, 0.0, 8.533333333333};
+		
+		assertArrayEquals(expected1, instSet.get(0).instance(0).toDoubleArray(), 0.0001);
+		assertArrayEquals(expected2, instSet.get(0).instance(1).toDoubleArray(), 0.0001);
+		assertArrayEquals(expected3, instSet.get(0).instance(2).toDoubleArray(), 0.0001);
+	}
+
+}
Index: trunk/CrossPare/test/de/ugoe/cs/cpdp/preprocessing/LogarithmTranformTest.java
===================================================================
--- trunk/CrossPare/test/de/ugoe/cs/cpdp/preprocessing/LogarithmTranformTest.java	(revision 2)
+++ trunk/CrossPare/test/de/ugoe/cs/cpdp/preprocessing/LogarithmTranformTest.java	(revision 2)
@@ -0,0 +1,53 @@
+package de.ugoe.cs.cpdp.preprocessing;
+
+import static org.junit.Assert.*;
+
+import java.util.ArrayList;
+import java.util.LinkedList;
+
+import org.apache.commons.collections4.list.SetUniqueList;
+import org.junit.Test;
+
+import de.ugoe.cs.cpdp.dataprocessing.LogarithmTransform;
+
+import weka.core.Attribute;
+import weka.core.DenseInstance;
+import weka.core.Instances;
+
+public class LogarithmTranformTest {
+
+	@Test
+	public void testSetParameter() {
+		new LogarithmTransform().setParameter("somestring");
+	}
+
+	@Test
+	public void testApply() {
+		ArrayList<Attribute> attributes = new ArrayList<Attribute>();
+		attributes.add(new Attribute("attr1"));
+		attributes.add(new Attribute("class"));
+		attributes.add(new Attribute("attr2"));
+		Instances instances = new Instances("test", attributes, 0);
+		instances.setClassIndex(1);
+		
+		double[] value1 = new double[]{1.5, 0.0,  3.0};
+		double[] value2 = new double[]{1.4, 1.0,  6.0};
+		double[] value3 = new double[]{1.6, 0.0, 15.0};
+		
+		instances.add(new DenseInstance(1.0, value1));
+		instances.add(new DenseInstance(1.0, value2));
+		instances.add(new DenseInstance(1.0, value3));
+		
+		LogarithmTransform processor = new LogarithmTransform();
+		processor.apply(instances, SetUniqueList.setUniqueList(new LinkedList<Instances>()) );
+		
+		double[] expected1 = new double[]{Math.log(1.5+1), 0.0, Math.log(3.0+1)};
+		double[] expected2 = new double[]{Math.log(1.4+1), 1.0, Math.log(6.0+1)};
+		double[] expected3 = new double[]{Math.log(1.6+1), 0.0, Math.log(15.0+1)};
+				
+		assertArrayEquals(expected1, instances.instance(0).toDoubleArray(), 0.0001);
+		assertArrayEquals(expected2, instances.instance(1).toDoubleArray(), 0.0001);
+		assertArrayEquals(expected3, instances.instance(2).toDoubleArray(), 0.0001);
+	}
+
+}
Index: trunk/CrossPare/test/de/ugoe/cs/cpdp/preprocessing/MedianAsReferenceTest.java
===================================================================
--- trunk/CrossPare/test/de/ugoe/cs/cpdp/preprocessing/MedianAsReferenceTest.java	(revision 2)
+++ trunk/CrossPare/test/de/ugoe/cs/cpdp/preprocessing/MedianAsReferenceTest.java	(revision 2)
@@ -0,0 +1,54 @@
+package de.ugoe.cs.cpdp.preprocessing;
+
+import static org.junit.Assert.*;
+
+import java.util.ArrayList;
+import java.util.LinkedList;
+
+import org.apache.commons.collections4.list.SetUniqueList;
+import org.junit.Test;
+
+import de.ugoe.cs.cpdp.dataprocessing.MedianAsReference;
+
+import weka.core.Attribute;
+import weka.core.DenseInstance;
+import weka.core.Instances;
+
+public class MedianAsReferenceTest {
+
+	@Test
+	public void testSetParameter() {
+		// simple crashstest
+		new MedianAsReference().setParameter("somestring");
+	}
+
+	@Test
+	public void testApply() {
+		ArrayList<Attribute> attributes = new ArrayList<Attribute>();
+		attributes.add(new Attribute("attr1"));
+		attributes.add(new Attribute("class"));
+		attributes.add(new Attribute("attr2"));
+		Instances instances = new Instances("test", attributes, 0);
+		instances.setClassIndex(1);
+		
+		double[] value1 = new double[]{1.5, 0.0,  3.0};
+		double[] value2 = new double[]{1.4, 1.0,  6.0};
+		double[] value3 = new double[]{1.6, 0.0, 15.0};
+		
+		instances.add(new DenseInstance(1.0, value1));
+		instances.add(new DenseInstance(1.0, value2));
+		instances.add(new DenseInstance(1.0, value3));
+		
+		MedianAsReference processor = new MedianAsReference();
+		processor.apply(instances, SetUniqueList.setUniqueList(new LinkedList<Instances>()) );
+		
+		double[] expected1 = new double[]{0.0, 0.0, -3.0};
+		double[] expected2 = new double[]{-0.1,1.0,  0.0};
+		double[] expected3 = new double[]{0.1, 0.0,  9.0};
+				
+		assertArrayEquals(expected1, instances.instance(0).toDoubleArray(), 0.0001);
+		assertArrayEquals(expected2, instances.instance(1).toDoubleArray(), 0.0001);
+		assertArrayEquals(expected3, instances.instance(2).toDoubleArray(), 0.0001);
+	}
+
+}
Index: trunk/CrossPare/test/de/ugoe/cs/cpdp/preprocessing/NormalizationPreprocessorTest.java
===================================================================
--- trunk/CrossPare/test/de/ugoe/cs/cpdp/preprocessing/NormalizationPreprocessorTest.java	(revision 2)
+++ trunk/CrossPare/test/de/ugoe/cs/cpdp/preprocessing/NormalizationPreprocessorTest.java	(revision 2)
@@ -0,0 +1,54 @@
+package de.ugoe.cs.cpdp.preprocessing;
+
+import static org.junit.Assert.*;
+
+import java.util.ArrayList;
+import java.util.LinkedList;
+
+import org.apache.commons.collections4.list.SetUniqueList;
+import org.junit.Test;
+
+import de.ugoe.cs.cpdp.dataprocessing.Normalization;
+
+import weka.core.Attribute;
+import weka.core.DenseInstance;
+import weka.core.Instances;
+
+public class NormalizationPreprocessorTest {
+
+	@Test
+	public void testApply() {
+		ArrayList<Attribute> attributes = new ArrayList<Attribute>();
+		attributes.add(new Attribute("attr1"));
+		attributes.add(new Attribute("class"));
+		attributes.add(new Attribute("attr2"));
+		Instances instances = new Instances("test", attributes, 0);
+		instances.setClassIndex(1);
+		
+		double[] value1 = new double[]{1.5, 0.0,  3.0};
+		double[] value2 = new double[]{1.4, 1.0,  6.0};
+		double[] value3 = new double[]{1.6, 0.0, 15.0};
+		
+		instances.add(new DenseInstance(1.0, value1));
+		instances.add(new DenseInstance(1.0, value2));
+		instances.add(new DenseInstance(1.0, value3));
+		
+		Normalization processor = new Normalization();
+		processor.apply(instances, SetUniqueList.setUniqueList(new LinkedList<Instances>()) );
+		
+		double[] expected1 = new double[]{0.5, 0.0, 0.0};
+		double[] expected2 = new double[]{0.0, 1.0, 0.25};
+		double[] expected3 = new double[]{1.0, 0.0, 1.0};
+				
+		assertArrayEquals(expected1, instances.instance(0).toDoubleArray(), 0.0001);
+		assertArrayEquals(expected2, instances.instance(1).toDoubleArray(), 0.0001);
+		assertArrayEquals(expected3, instances.instance(2).toDoubleArray(), 0.0001);
+	}
+
+	@Test
+	public void testSetParameter() {
+		// just a simple crash test
+		new Normalization().setParameter("somestring");
+	}
+
+}
Index: trunk/CrossPare/test/de/ugoe/cs/cpdp/versions/InstanceNumberFilterTest.java
===================================================================
--- trunk/CrossPare/test/de/ugoe/cs/cpdp/versions/InstanceNumberFilterTest.java	(revision 2)
+++ trunk/CrossPare/test/de/ugoe/cs/cpdp/versions/InstanceNumberFilterTest.java	(revision 2)
@@ -0,0 +1,44 @@
+package de.ugoe.cs.cpdp.versions;
+
+import static org.junit.Assert.*;
+
+import java.util.List;
+
+import org.junit.Test;
+
+import de.ugoe.cs.cpdp.loader.CSVFolderLoader;
+
+public class InstanceNumberFilterTest {
+
+	@Test
+	public void testApply_1() {
+		CSVFolderLoader loader = new CSVFolderLoader();
+		loader.setLocation("data");
+		List<SoftwareVersion> versions = loader.load();
+		
+		int sizeVersions = versions.size();
+		
+		MinInstanceNumberFilter filter = new MinInstanceNumberFilter();
+		filter.apply(versions);
+		
+		assertEquals(sizeVersions, versions.size());
+	}
+	
+	@Test
+	public void testApply_2() {
+		CSVFolderLoader loader = new CSVFolderLoader();
+		loader.setLocation("data");
+		List<SoftwareVersion> versions = loader.load();
+		
+		int expected = 0;
+		for( SoftwareVersion version : versions ) {
+			if(version.getInstances().numInstances() >= 100) expected++;
+		}
+		
+		MinInstanceNumberFilter filter = new MinInstanceNumberFilter();
+		filter.setParameter("100");
+		filter.apply(versions);
+		
+		assertEquals(expected, versions.size());
+	}
+}
Index: trunk/CrossPare/test/de/ugoe/cs/cpdp/versions/UnbalancedFilterTest.java
===================================================================
--- trunk/CrossPare/test/de/ugoe/cs/cpdp/versions/UnbalancedFilterTest.java	(revision 2)
+++ trunk/CrossPare/test/de/ugoe/cs/cpdp/versions/UnbalancedFilterTest.java	(revision 2)
@@ -0,0 +1,25 @@
+package de.ugoe.cs.cpdp.versions;
+
+import static org.junit.Assert.*;
+
+import java.util.List;
+
+import org.junit.Test;
+
+import de.ugoe.cs.cpdp.loader.CSVFolderLoader;
+
+public class UnbalancedFilterTest {
+
+	@Test
+	public void testApply() {
+		CSVFolderLoader loader = new CSVFolderLoader();
+		loader.setLocation("data");
+		List<SoftwareVersion> versions = loader.load();
+		
+		UnbalancedFilter filter = new UnbalancedFilter();
+		filter.apply(versions);
+		
+		assertEquals(57, versions.size());
+	}
+
+}
Index: trunk/CrossPare/test/de/ugoe/cs/cpdp/weighting/BiasedWeightsTest.java
===================================================================
--- trunk/CrossPare/test/de/ugoe/cs/cpdp/weighting/BiasedWeightsTest.java	(revision 2)
+++ trunk/CrossPare/test/de/ugoe/cs/cpdp/weighting/BiasedWeightsTest.java	(revision 2)
@@ -0,0 +1,110 @@
+package de.ugoe.cs.cpdp.weighting;
+
+import static org.junit.Assert.*;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.junit.Test;
+
+import de.ugoe.cs.cpdp.dataprocessing.BiasedWeights;
+
+import weka.core.Attribute;
+import weka.core.DenseInstance;
+import weka.core.Instances;
+
+public class BiasedWeightsTest {
+
+	@Test
+	public void testApply_1() {
+		ArrayList<Attribute> attributes = new ArrayList<Attribute>();
+		attributes.add(new Attribute("attr1"));
+		List<String> classAttVals = new ArrayList<String>();
+		classAttVals.add("0");
+		classAttVals.add("1");
+		attributes.add(new Attribute("bug", classAttVals));
+		attributes.add(new Attribute("attr2"));
+			
+		double[] value1 = new double[]{1.5, 0.0,  3.0};
+		double[] value2 = new double[]{1.4, 1.0,  6.0};
+		double[] value3 = new double[]{1.6, 0.0, 15.0};
+		double[] value4 = new double[]{ 3.0, 0.0, 1.5};
+		double[] value5 = new double[]{ 6.0, 1.0, 1.4 };
+		double[] value6 = new double[]{15.0, 0.0, 1.6};
+		double[] value7 = new double[]{ 6.0, 0.0, 1.4 };
+		double[] value8 = new double[]{15.0, 0.0, 1.6};
+		
+		Instances instances = new Instances("test", attributes, 0);
+		instances.setClassIndex(1);
+		
+		instances.add(new DenseInstance(1.0, value1));
+		instances.add(new DenseInstance(1.0, value2));
+		instances.add(new DenseInstance(1.0, value3));
+		instances.add(new DenseInstance(1.0, value4));
+		instances.add(new DenseInstance(1.0, value5));
+		instances.add(new DenseInstance(1.0, value6));
+		instances.add(new DenseInstance(1.0, value7));
+		instances.add(new DenseInstance(1.0, value8));
+		
+		BiasedWeights processor = new BiasedWeights();
+		processor.apply(new Instances(instances), instances);
+		
+		assertEquals(0.6666666d, instances.instance(0).weight(), 0.00001);
+		assertEquals(2.0d, instances.instance(1).weight(), 0.00001);
+		assertEquals(0.6666666d, instances.instance(2).weight(), 0.00001);
+		assertEquals(0.6666666d, instances.instance(3).weight(), 0.00001);
+		assertEquals(2.0d, instances.instance(4).weight(), 0.00001);
+		assertEquals(0.6666666d, instances.instance(5).weight(), 0.00001);
+		assertEquals(0.6666666d, instances.instance(6).weight(), 0.00001);
+		assertEquals(0.6666666d, instances.instance(7).weight(), 0.00001);
+		assertEquals(instances.numInstances(), instances.sumOfWeights(), 0.0001);
+	}
+	
+	@Test
+	public void testApply_2() {
+		ArrayList<Attribute> attributes = new ArrayList<Attribute>();
+		attributes.add(new Attribute("attr1"));
+		List<String> classAttVals = new ArrayList<String>();
+		classAttVals.add("0");
+		classAttVals.add("1");
+		attributes.add(new Attribute("bug", classAttVals));
+		attributes.add(new Attribute("attr2"));
+			
+		double[] value1 = new double[]{1.5, 0.0,  3.0};
+		double[] value2 = new double[]{1.4, 1.0,  6.0};
+		double[] value3 = new double[]{1.6, 0.0, 15.0};
+		double[] value4 = new double[]{ 3.0, 0.0, 1.5};
+		double[] value5 = new double[]{ 6.0, 1.0, 1.4 };
+		double[] value6 = new double[]{15.0, 0.0, 1.6};
+		double[] value7 = new double[]{ 6.0, 0.0, 1.4 };
+		double[] value8 = new double[]{15.0, 0.0, 1.6};
+		
+		Instances instances = new Instances("test", attributes, 0);
+		instances.setClassIndex(1);
+		
+		instances.add(new DenseInstance(1.0, value1));
+		instances.add(new DenseInstance(1.0, value2));
+		instances.add(new DenseInstance(1.0, value3));
+		instances.add(new DenseInstance(1.0, value4));
+		instances.add(new DenseInstance(1.0, value5));
+		instances.add(new DenseInstance(1.0, value6));
+		instances.add(new DenseInstance(1.0, value7));
+		instances.add(new DenseInstance(1.0, value8));
+		
+		
+		BiasedWeights processor = new BiasedWeights();
+		processor.setParameter("0.7");
+		processor.apply(new Instances(instances), instances);
+		
+		assertEquals(0.4, instances.instance(0).weight(), 0.00001);
+		assertEquals(2.8, instances.instance(1).weight(), 0.00001);
+		assertEquals(0.4, instances.instance(2).weight(), 0.00001);
+		assertEquals(0.4, instances.instance(3).weight(), 0.00001);
+		assertEquals(2.8, instances.instance(4).weight(), 0.00001);
+		assertEquals(0.4, instances.instance(5).weight(), 0.00001);
+		assertEquals(0.4, instances.instance(6).weight(), 0.00001);
+		assertEquals(0.4, instances.instance(7).weight(), 0.00001);
+		assertEquals(instances.numInstances(), instances.sumOfWeights(), 0.0001);
+	}
+
+}
