package de.ugoe.cs.cpdp.dataselection; import java.util.ArrayList; import org.apache.commons.collections4.list.SetUniqueList; import weka.core.Attribute; import weka.core.DenseInstance; import weka.core.Instance; import weka.core.Instances; import weka.experiment.Stats; import weka.filters.Filter; import weka.filters.unsupervised.attribute.Normalize; /** * Abstract class that implements the foundation of setwise data selection strategies using distributional characteristics. * This class provides the means to transform the data sets into their characteristic vectors. * @author Steffen Herbold */ public abstract class AbstractCharacteristicSelection implements ISetWiseDataselectionStrategy { /** * vector with the distributional characteristics */ private String[] characteristics = new String[]{"mean","stddev"}; /** * Sets the distributional characteristics. The names of the characteristics are separated by blanks. */ @Override public void setParameter(String parameters) { if( !"".equals(parameters) ) { characteristics = parameters.split(" "); } } /** * Transforms the data into the distributional characteristics. The first instance is the test data, followed by the training data. * @param testdata test data * @param traindataSet training data sets * @return distributional characteristics of the data */ protected Instances characteristicInstances(Instances testdata, SetUniqueList traindataSet) { // setup weka Instances for clustering final ArrayList atts = new ArrayList(); final Attribute classAtt = testdata.classAttribute(); for( int i=0 ; i traindataSet) { Instances data = characteristicInstances(testdata, traindataSet); try { final Normalize normalizer = new Normalize(); normalizer.setInputFormat(data); data = Filter.useFilter(data, normalizer); } catch (Exception e) { throw new RuntimeException("Unexpected exception during normalization of distributional characteristics.", e); } return data; } }