Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/Runner.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/Runner.java	(revision 37)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/Runner.java	(revision 38)
@@ -26,5 +26,5 @@
 	public static void main(String[] args) {
 		new TextConsole(Level.FINE);
-		final int concurrentThreads = Runtime.getRuntime().availableProcessors();
+		final int concurrentThreads = Runtime.getRuntime().availableProcessors()-2;
 		final ExecutorService threadPool = Executors.newFixedThreadPool(concurrentThreads);
 		for( String arg : args ) {
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/Oversampling.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/Oversampling.java	(revision 38)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/Oversampling.java	(revision 38)
@@ -0,0 +1,93 @@
+package de.ugoe.cs.cpdp.dataprocessing;
+
+import org.apache.commons.collections4.list.SetUniqueList;
+
+import weka.core.Instances;
+import weka.filters.Filter;
+import weka.filters.supervised.instance.Resample;
+
+/**
+ * Implements oversampling, a strategy for
+ * handling bias in data. In case there are less positive samples (i.e.
+ * defect-prone) samples in the data than negative samples (i.e.
+ * non-defect-prone), the defect-prone entities are over-sampled such that the
+ * number of defect-prone and non-defect-prone instances is the same afterwards.
+ * This means, that some of the defect-prone entities will be more than once
+ * within the data.
+ * 
+ * @author Steffen Herbold
+ */
+public class Oversampling implements IProcessesingStrategy,
+		ISetWiseProcessingStrategy {
+
+	/**
+	 * Does not have parameters. String is ignored.
+	 * 
+	 * @param parameters
+	 *            ignored
+	 */
+	@Override
+	public void setParameter(String parameters) {
+		// dummy
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see
+	 * de.ugoe.cs.cpdp.dataprocessing.ISetWiseProcessingStrategy#apply(weka.
+	 * core.Instances, org.apache.commons.collections4.list.SetUniqueList)
+	 */
+	@Override
+	public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) {
+		for (Instances traindata : traindataSet) {
+			apply(testdata, traindata);
+		}
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see
+	 * de.ugoe.cs.cpdp.dataprocessing.IProcessesingStrategy#apply(weka.core.
+	 * Instances, weka.core.Instances)
+	 */
+	@Override
+	public void apply(Instances testdata, Instances traindata) {
+
+		final int[] counts = traindata.attributeStats(traindata.classIndex()).nominalCounts;
+		if (counts[1] < counts[0]) {
+			Instances negatives = new Instances(traindata);
+			Instances positives = new Instances(traindata);
+
+			for (int i = traindata.size() - 1; i >= 0; i--) {
+				if (Double.compare(1.0, negatives.get(i).classValue()) == 0) {
+					negatives.remove(i);
+				}
+				if (Double.compare(0.0, positives.get(i).classValue()) == 0) {
+					positives.remove(i);
+				}
+			}
+
+			Resample resample = new Resample();
+			// TODO: resample.setSampleSizePercent((100.0*counts[1])/100+0.01);
+			// Ohne +0.01 wird bei tomcat, xerces-1.2 und jedit-4.0 ein negative
+			// weniger zurückgegeben
+			resample.setSampleSizePercent((100.0 * counts[0]) / counts[1]);
+			try {
+				resample.setInputFormat(traindata);
+				positives = Filter.useFilter(positives, resample);
+			} catch (Exception e) {
+				throw new RuntimeException(e);
+			}
+			traindata.clear();
+			for (int i = 0; i < negatives.size(); i++) {
+				traindata.add(negatives.get(i));
+			}
+			for (int i = 0; i < positives.size(); i++) {
+				traindata.add(positives.get(i));
+			}
+		}
+	}
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/Resampling.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/Resampling.java	(revision 38)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/Resampling.java	(revision 38)
@@ -0,0 +1,60 @@
+package de.ugoe.cs.cpdp.dataprocessing;
+
+import org.apache.commons.collections4.list.SetUniqueList;
+
+import weka.core.Instances;
+import weka.filters.Filter;
+import weka.filters.supervised.instance.Resample;
+
+/**
+ * Resamples the data with WEKA {@link Resample} to have a uniform distribution among all classes.   
+ * @author Steffen Herbold
+ */
+public class Resampling implements IProcessesingStrategy,
+		ISetWiseProcessingStrategy {
+
+	
+	/**
+	 * Does not have parameters. String is ignored.
+	 * @param parameters ignored
+	 */
+	@Override
+	public void setParameter(String parameters) {
+		// dummy
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * @see de.ugoe.cs.cpdp.dataprocessing.ISetWiseProcessingStrategy#apply(weka.core.Instances, org.apache.commons.collections4.list.SetUniqueList)
+	 */
+	@Override
+	public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) {
+		for( Instances traindata : traindataSet ) {
+			apply(testdata, traindata);
+		}
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * @see de.ugoe.cs.cpdp.dataprocessing.IProcessesingStrategy#apply(weka.core.Instances, weka.core.Instances)
+	 */
+	@Override
+	public void apply(Instances testdata, Instances traindata) {
+		Resample resample = new Resample();
+		resample.setSampleSizePercent(100);
+		resample.setBiasToUniformClass(1.0);
+		
+		Instances traindataSample;
+		try {
+			resample.setInputFormat(traindata);
+			traindataSample = Filter.useFilter(traindata, resample);
+		} catch (Exception e) {
+			throw new RuntimeException(e);
+		}
+		traindata.clear();
+		for( int i=0 ; i<traindataSample.size() ; i++ ) {
+			traindata.add(traindataSample.get(i));
+		}
+	}
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/ZScoreNormalization.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/ZScoreNormalization.java	(revision 37)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/ZScoreNormalization.java	(revision 38)
@@ -44,4 +44,5 @@
 	
 	private void normalize(Instances instances) {
+		instances.toString();
 		final Attribute classAttribute = instances.classAttribute();
 		
@@ -56,11 +57,13 @@
 			}
 		}
-		
 		for( int i=0 ; i<instances.numAttributes(); i++) {
 			if( !instances.attribute(i).equals(classAttribute) ) {
-				for( int j=0 ; j<instances.numAttributes() ; j++ ) {
+				for( int j=0 ; j<instances.numInstances() ; j++ ) {
 					Instance inst = instances.get(i);
-					double newValue = (inst.value(j)-means[j])/stddevs[j];
-					inst.setValue(j, newValue);
+					double newValue = (inst.value(i)-means[i])/stddevs[i];
+					if( newValue==Double.NaN ) {
+						System.out.println("foooooo");
+					}
+					inst.setValue(i, newValue);
 				}
 			}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/ZScoreTargetNormalization.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/ZScoreTargetNormalization.java	(revision 37)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/ZScoreTargetNormalization.java	(revision 38)
@@ -44,5 +44,5 @@
 		for( int i=0 ; i<testdata.numInstances() ; i++ ) {
 			Instance instance = testdata.instance(i);
-			for( int j=0 ; j<testdata.numInstances() ; j++ ) {
+			for( int j=0 ; j<testdata.numAttributes() ; j++ ) {
 				if( testdata.attribute(j)!=classAttribute ) {
 					instance.setValue(j, instance.value(j)-meanTest[j]/stddevTest[j]);
@@ -85,5 +85,5 @@
 		for( int i=0 ; i<testdata.numInstances() ; i++ ) {
 			Instance instance = testdata.instance(i);
-			for( int j=0 ; j<testdata.numInstances() ; j++ ) {
+			for( int j=0 ; j<testdata.numAttributes() ; j++ ) {
 				if( testdata.attribute(j)!=classAttribute ) {
 					instance.setValue(j, instance.value(j)-meanTest[j]/stddevTest[j]);
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/SetWiseEMContextSelection.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/SetWiseEMContextSelection.java	(revision 37)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/SetWiseEMContextSelection.java	(revision 38)
@@ -28,5 +28,4 @@
 public class SetWiseEMContextSelection implements ISetWiseDataselectionStrategy {
 	
-	private String parameters;
 	private String[] project_context_factors; // = new String[]{"TND", "TNC", "TNF", "TLOC"};
 	
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/AUDIChangeFolderLoader.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/AUDIChangeFolderLoader.java	(revision 38)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/AUDIChangeFolderLoader.java	(revision 38)
@@ -0,0 +1,14 @@
+package de.ugoe.cs.cpdp.loader;
+
+public class AUDIChangeFolderLoader extends AbstractFolderLoader {
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader#getSingleLoader()
+	 */
+	@Override
+	protected SingleVersionLoader getSingleLoader() {
+		return new AUDIChangeLoader();
+	}
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/AUDIChangeLoader.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/AUDIChangeLoader.java	(revision 38)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/AUDIChangeLoader.java	(revision 38)
@@ -0,0 +1,284 @@
+package de.ugoe.cs.cpdp.loader;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Map.Entry;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import weka.core.Attribute;
+import weka.core.DenseInstance;
+import weka.core.Instances;
+import de.ugoe.cs.util.FileTools;
+
+/**
+ * TODO
+ * @author sherbold
+ *
+ */
+class AUDIChangeLoader implements SingleVersionLoader {
+	
+	private class EntityRevisionPair implements Comparable<EntityRevisionPair> {
+		private final String entity;
+		private final int revision;
+		
+		public EntityRevisionPair(String entity, int revision) {
+			this.entity = entity;
+			this.revision = revision;
+		}
+		
+		@Override
+		public boolean equals(Object other) {
+			if( !(other instanceof EntityRevisionPair) ) {
+				return false;
+			} else {
+				return compareTo((EntityRevisionPair) other)==0;
+			}
+		}
+		
+		@Override
+		public int hashCode() {
+			return entity.hashCode()+revision;
+		}
+
+		@Override
+		public int compareTo(EntityRevisionPair other) {
+			int strCmp = this.entity.compareTo(other.entity);
+			if( strCmp!=0 ) {
+				return strCmp;
+			}
+			return Integer.compare(revision, other.revision);
+		}
+		
+		@Override
+		public String toString() {
+			return entity+"@"+revision;
+		}
+	}
+
+	@Override
+	public Instances load(File file) {
+		final String[] lines;
+		String[] lineSplit;
+		String[] lineSplitBug;
+		
+		try {
+			lines = FileTools.getLinesFromFile(file.getAbsolutePath());
+		} catch (IOException e) {
+			throw new RuntimeException(e);
+		}
+		
+		// information about bugs are in another file
+		String path = file.getAbsolutePath();
+		path = path.substring(0, path.length()-14) + "repro.csv";
+		final String[] linesBug;
+		try {
+			linesBug = FileTools.getLinesFromFile(path);
+		} catch (IOException e) {
+			throw new RuntimeException(e);
+		}
+		
+		int revisionIndex=-1;
+		int bugIndex=-1;
+		lineSplitBug = linesBug[0].split(";");
+		for( int j=0; j<lineSplitBug.length ; j++ ) {
+			if( lineSplitBug[j].equals("svnrev") ) {
+				revisionIndex=j;
+			}
+			if( lineSplitBug[j].equals("num_bugs_trace") ) {
+				bugIndex=j;
+			}
+		}
+		if( revisionIndex<0 ) {
+			throw new RuntimeException("could not find SVN revisions");
+		}
+		if( bugIndex<0 ) {
+			throw new RuntimeException("could not find bug information");
+		}
+		
+		int metricsStartIndex=-1;
+		int metricsEndIndex=-1;
+		lineSplit = lines[0].split(";");
+		for( int j=0; j<lineSplit.length ; j++ ) {
+			if( lineSplit[j].equals("lm_LOC") ) {
+				metricsStartIndex=j;
+			}
+			if( lineSplit[j].equals("h_E") ) {
+				metricsEndIndex=j;
+			}
+		}
+		if( metricsStartIndex<0 ) {
+			throw new RuntimeException("could not find first metric, i.e., lm_LOC");
+		}
+		if( metricsEndIndex<0 ) {
+			throw new RuntimeException("could not find last metric, i.e., h_E");
+		}
+		int numMetrics = metricsEndIndex-metricsStartIndex+1;
+		
+		// create sets of all filenames and revisions
+		SortedMap<EntityRevisionPair, Integer> entityRevisionPairs = new TreeMap<>();
+		for( int i=1; i<linesBug.length ; i++ ) {
+			lineSplitBug = linesBug[i].split(";");
+			entityRevisionPairs.put(new EntityRevisionPair(lineSplitBug[0], Integer.parseInt(lineSplitBug[revisionIndex])), i);
+		}
+		
+		
+		// prepare weka instances
+		final ArrayList<Attribute> atts = new ArrayList<Attribute>();
+		lineSplit = lines[0].split(";"); 
+		for (int j = metricsStartIndex; j<=metricsEndIndex; j++) {
+			atts.add(new Attribute(lineSplit[j]+"_delta"));
+		}
+		for (int j = metricsStartIndex; j<=metricsEndIndex; j++) {
+			atts.add(new Attribute(lineSplit[j]+"_abs"));
+		}
+		final ArrayList<String> classAttVals = new ArrayList<String>();
+		classAttVals.add("0");
+		classAttVals.add("1");
+		final Attribute classAtt = new Attribute("bug", classAttVals);
+		atts.add(classAtt);
+
+		final Instances data = new Instances(file.getName(), atts, 0);
+		data.setClass(classAtt);
+		
+		// create data
+		String lastFile = null;
+		double[] lastValues = null;
+		int lastNumBugs = 0;
+		for( Entry<EntityRevisionPair, Integer> entry : entityRevisionPairs.entrySet() ) {
+			try {
+				// first get values
+				lineSplit = lines[entry.getValue()].split(";");
+				lineSplitBug = linesBug[entry.getValue()].split(";");
+				int i=0;
+				double[] values = new double[numMetrics];
+				for(int j=metricsStartIndex ; j<=metricsEndIndex ; j++ ) {
+					values[i] = Double.parseDouble(lineSplit[j]);
+					i++;
+				}
+				int numBugs = Integer.parseInt(lineSplitBug[bugIndex]);
+				
+				// then check if an entity must be created
+				if( entry.getKey().entity.equals(lastFile)) {
+					// create new instance
+					double[] instanceValues = new double[2*numMetrics+1];
+					for( int j=0; j<numMetrics; j++ ) {
+						instanceValues[j] = values[j]-lastValues[j];
+						instanceValues[j+numMetrics]= values[j];
+					}
+					// check if any value>0
+					boolean changeOccured = false;
+					for( int j=0; j<numMetrics; j++ ) {
+						if( instanceValues[j]>0 ) {
+							changeOccured = true;
+						}
+					}
+					if( changeOccured ) {
+						instanceValues[instanceValues.length-1] = numBugs<=lastNumBugs ? 0 : 1;
+						data.add(new DenseInstance(1.0, instanceValues));
+					}
+				}
+				lastFile = entry.getKey().entity;
+				lastValues = values;
+				lastNumBugs = numBugs;
+			} catch(IllegalArgumentException e) {
+				System.err.println("error in line " + entry.getValue() + ": " + e.getMessage());
+				System.err.println("metrics line: " + lines[entry.getValue()]);
+				System.err.println("bugs line: " + linesBug[entry.getValue()]);
+				System.err.println("line is ignored");
+			}
+		}
+		
+		return data;
+	}
+	
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see
+	 * de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader#load(
+	 * java.io.File)
+	 */
+	
+	public Instances load(File file, String dummy) {
+		final String[] lines;
+		try {
+			lines = FileTools.getLinesFromFile(file.getAbsolutePath());
+		} catch (IOException e) {
+			throw new RuntimeException(e);
+		}
+		
+		// information about bugs are in another file
+		String path = file.getAbsolutePath();
+		path = path.substring(0, path.length()-14) + "repro.csv";
+		final String[] linesBug;
+		try {
+			linesBug = FileTools.getLinesFromFile(path);
+		} catch (IOException e) {
+			throw new RuntimeException(e);
+		}
+		
+		// configure Instances
+		final ArrayList<Attribute> atts = new ArrayList<Attribute>();
+
+		String[] lineSplit = lines[0].split(";");
+		// ignore first three/four and last two columns
+		int offset;
+		if( lineSplit[3].equals("project_rev") ) {
+			offset = 4;
+		} else {
+			offset = 3;
+		}
+		for (int j = 0; j < lineSplit.length - (offset+2); j++) {
+			atts.add(new Attribute(lineSplit[j + offset]));
+		}
+		final ArrayList<String> classAttVals = new ArrayList<String>();
+		classAttVals.add("0");
+		classAttVals.add("1");
+		final Attribute classAtt = new Attribute("bug", classAttVals);
+		atts.add(classAtt);
+
+		final Instances data = new Instances(file.getName(), atts, 0);
+		data.setClass(classAtt);
+
+		// fetch data
+		for (int i = 1; i < lines.length; i++) {
+			boolean validInstance = true;
+			lineSplit = lines[i].split(";");
+			String[] lineSplitBug = linesBug[i].split(";");
+			double[] values = new double[data.numAttributes()];
+			for (int j = 0; validInstance && j < values.length-1; j++) {
+				if( lineSplit[j + offset].trim().isEmpty() ) {
+					validInstance = false;
+				} else {
+					values[j] = Double.parseDouble(lineSplit[j + offset].trim());
+				}
+			}
+			if( offset==3 ) {
+				values[values.length - 1] = lineSplitBug[7].equals("0") ? 0 : 1;
+			} else {
+				values[values.length - 1] = lineSplitBug[8].equals("0") ? 0 : 1;
+			}
+			
+			if( validInstance ) {
+				data.add(new DenseInstance(1.0, values));
+			} else {
+				System.out.println("instance " + i + " is invalid");
+			}
+		}
+		return data;
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader#
+	 * filenameFilter(java.lang.String)
+	 */
+	@Override
+	public boolean filenameFilter(String filename) {
+		return filename.endsWith("src.csv");
+	}
+
+}
Index: trunk/CrossPare/src/de/ugoe/cs/cpdp/training/RandomClass.java
===================================================================
--- trunk/CrossPare/src/de/ugoe/cs/cpdp/training/RandomClass.java	(revision 37)
+++ trunk/CrossPare/src/de/ugoe/cs/cpdp/training/RandomClass.java	(revision 38)
@@ -2,7 +2,5 @@
 
 import java.util.Random;
-import java.util.logging.Level;
 
-import de.ugoe.cs.util.console.Console;
 import weka.classifiers.AbstractClassifier;
 import weka.classifiers.Classifier;
