package de.ugoe.cs.cpdp.dataprocessing; import org.apache.commons.collections4.list.SetUniqueList; import weka.core.Instance; import weka.core.Instances; /** * Sets the bias of the weights of the training data. By using a bias of 0.5 (default value) the total weight of the positive instances (i.e. * fault-prone) is equal to the total weight of the negative instances (i.e. non-fault-prone). Otherwise the weights between the two will be * distributed according to the bias, where <0.5 means in favor of the negative instances and >0.5 in favor of the positive instances. * equal to the total weight of the test * @author Steffen Herbold */ public class BiasedWeights implements IProcessesingStrategy, ISetWiseProcessingStrategy { /** * bias used for the weighting */ private double bias = 0.5; /** * Sets the bias to be used for weighting. * @param parameters string with the bias */ @Override public void setParameter(String parameters) { bias = Double.parseDouble(parameters); } /** * @see de.ugoe.cs.cpdp.dataprocessing.ProcessesingStrategy#apply(weka.core.Instances, weka.core.Instances) */ @Override public void apply(Instances testdata, Instances traindata) { //setBiasedWeights(testdata); setBiasedWeights(traindata); } /** * @see de.ugoe.cs.cpdp.dataprocessing.SetWiseProcessingStrategy#apply(weka.core.Instances, org.apache.commons.collections4.list.SetUniqueList) */ @Override public void apply(Instances testdata, SetUniqueList traindataSet) { for( Instances traindata : traindataSet ) { setBiasedWeights(traindata); } } /** * Helper method that sets the weights for a given data set. * @param data data set whose weights are set */ private void setBiasedWeights(Instances data) { final int classIndex = data.classIndex(); final int[] counts = data.attributeStats(classIndex).nominalCounts; final double weightNegatives = ((1-bias)*data.numInstances()) / counts[0]; final double weightPositives = (bias*data.numInstances()) / counts[1]; for( int i=0 ; i