package de.ugoe.cs.cpdp;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.LinkedList;
import java.util.List;
import java.util.logging.Level;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import de.ugoe.cs.cpdp.dataprocessing.IProcessesingStrategy;
import de.ugoe.cs.cpdp.dataprocessing.ISetWiseProcessingStrategy;
import de.ugoe.cs.cpdp.dataselection.IPointWiseDataselectionStrategy;
import de.ugoe.cs.cpdp.dataselection.ISetWiseDataselectionStrategy;
import de.ugoe.cs.cpdp.eval.IEvaluationStrategy;
import de.ugoe.cs.cpdp.loader.IVersionLoader;
import de.ugoe.cs.cpdp.training.ISetWiseTrainingStrategy;
import de.ugoe.cs.cpdp.training.ITrainingStrategy;
import de.ugoe.cs.cpdp.versions.IVersionFilter;
import de.ugoe.cs.util.StringTools;
import de.ugoe.cs.util.console.Console;
/**
* Class that contains all meta information about an experiment, i.e., its configuration. The configuration is loaded from an XML file.
*
* In the current implementation, the experiment configuration can only be created using an XML file. Programmatic creation of experiment configurations is currently not possibly.
* @author Steffen Herbold
*/
public class ExperimentConfiguration extends DefaultHandler {
/**
* handle of the file that contains the configuration
*/
private final File configFile;
/**
* name of the experiment (automatically set to the file name without the .xml ending)
*/
private String experimentName = "exp";
/**
* loads instances
*/
private List loaders;
/**
* path were the results of the experiments are stored
*/
private String resultsPath = "results";
/**
* data set filters applied to all data
*/
private List versionFilters;
/**
* data set filters that decide if a data set is used as test data
*/
private List testVersionFilters;
/**
* data set filters that decide if a data is used as candidate training data
*/
private List trainingVersionFilters;
/**
* setwise data processors that are applied before the setwise data selection
*/
private List setwisepreprocessors;
/**
* setwise data selection strategies
*/
private List setwiseselectors;
/**
* setwise data processors that are applied after the setwise data selection
*/
private List setwisepostprocessors;
/**
* setwise trainers, i.e., trainers that require the selected training data to be separate from each other
*/
private List setwiseTrainers;
/**
* data processors that are applied before the pointwise data selection
*/
private List preprocessors;
/**
* pointwise data selection strategies
*/
private List pointwiseselectors;
/**
* data processors that are applied before the pointwise data selection
*/
private List postprocessors;
/**
* normal trainers, i.e., trainers that require the selected training data in a single data set
*/
private List trainers;
/**
* evaluators used for the the experiment results
*/
private List evaluators;
/**
* Constructor. Creates a new configuration from a given file.
* @param filename name of the file from the configuration is loaded.
* @throws ExperimentConfigurationException thrown if there is an error creating the configuration
*/
public ExperimentConfiguration(String filename) throws ExperimentConfigurationException {
this(new File(filename));
}
/**
* Constructor. Creates a new configuration from a given file.
* @param filename handle of the file from the configuration is loaded.
* @throws ExperimentConfigurationException thrown if there is an error creating the configuration
*/
public ExperimentConfiguration(File file) throws ExperimentConfigurationException {
loaders = new LinkedList<>();
versionFilters = new LinkedList<>();
testVersionFilters = new LinkedList<>();
trainingVersionFilters = new LinkedList<>();
setwisepreprocessors = new LinkedList<>();
setwiseselectors = new LinkedList<>();
setwisepostprocessors = new LinkedList<>();
setwiseTrainers = new LinkedList<>();
preprocessors = new LinkedList<>();
pointwiseselectors = new LinkedList<>();
postprocessors = new LinkedList<>();
trainers = new LinkedList<>();
evaluators = new LinkedList<>();
if (file == null) {
throw new IllegalArgumentException("file must not be null");
}
if (file.isDirectory()) {
throw new IllegalArgumentException("file must not be a directory");
}
configFile = file;
experimentName = file.getName().split("\\.")[0];
final SAXParserFactory spf = SAXParserFactory.newInstance();
spf.setValidating(true);
SAXParser saxParser = null;
InputSource inputSource = null;
try {
saxParser = spf.newSAXParser();
} catch (ParserConfigurationException | SAXException e) {
throw new ExperimentConfigurationException(e);
}
InputStreamReader reader = null;
try {
reader = new InputStreamReader(new FileInputStream(file), "UTF-8");
inputSource = new InputSource(reader);
} catch (UnsupportedEncodingException | FileNotFoundException e) {
throw new ExperimentConfigurationException("Could not open configuration file.", e);
}
if (inputSource != null) {
inputSource.setSystemId("file://" + file.getAbsolutePath());
try {
saxParser.parse(inputSource, this);
} catch (SAXException | IOException e) {
throw new ExperimentConfigurationException("Error parsing configuration.", e);
}
}
if( reader!=null ) {
try {
reader.close();
} catch (IOException e) {
throw new ExperimentConfigurationException("Error closing reader.", e);
}
}
}
/**
* returns the name of the experiment
* @return name of the experiment
*/
public String getExperimentName() {
return experimentName;
}
/**
* returns the loaders for instances
* @return data loaders
*/
public List getLoaders() {
return loaders;
}
/**
* returns the results path
* @return results path
*/
public String getResultsPath() {
return resultsPath;
}
/**
* returns the data set filters of the experiment
* @return data set filters of the experiment
*/
public List getVersionFilters() {
return versionFilters;
}
/**
* returns the test set filters of the experiment
* @return test set filters of the experiment
*/
public List getTestVersionFilters() {
return testVersionFilters;
}
/**
* returns the candidate training version filters of the experiment
* @return candidate training version filters of the experiment
*/
public List getTrainingVersionFilters() {
return trainingVersionFilters;
}
/**
* returns the setwise processors applied before the setwise data selection
* @return setwise processors applied before the setwise data selection
*/
public List getSetWisePreprocessors() {
return setwisepreprocessors;
}
/**
* returns the setwise data selection strategies
* @return setwise data selection strategies
*/
public List getSetWiseSelectors() {
return setwiseselectors;
}
/**
* returns the setwise processors applied after the setwise data selection
* @return setwise processors applied after the setwise data selection
*/
public List getSetWisePostprocessors() {
return setwisepostprocessors;
}
/**
* returns the setwise training algorithms
* @return setwise training algorithms
*/
public List getSetWiseTrainers() {
return setwiseTrainers;
}
/**
* returns the processors applied before the pointwise data selection
* @return processors applied before the pointwise data selection
*/
public List getPreProcessors() {
return preprocessors;
}
/**
* returns the pointwise data selection strategies
* @return pointwise data selection strategies
*/
public List getPointWiseSelectors() {
return pointwiseselectors;
}
/**
* returns the processors applied after the pointwise data selection
* @return processors applied after the pointwise data selection
*/
public List getPostProcessors() {
return postprocessors;
}
/**
* returns the normal training algorithm
* @return normal training algorithms
*/
public List getTrainers() {
return trainers;
}
/**
* returns the evaluation strategies
* @return evaluation strategies
*/
public List getEvaluators() {
return evaluators;
}
/* (non-Javadoc)
* @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
*/
@Override
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
try {
if( qName.equals("config") ) {
// ingore
}
else if( qName.equals("loader") ) {
final IVersionLoader loader = (IVersionLoader) Class.forName("de.ugoe.cs.cpdp.loader." + attributes.getValue("name")).newInstance();
loader.setLocation(attributes.getValue("datalocation"));
loaders.add(loader);
// TODO location as relative
}
else if( qName.equals("resultspath") ) {
resultsPath = attributes.getValue("path");
}
else if( qName.equals("versionfilter") ) {
final IVersionFilter filter = (IVersionFilter) Class.forName("de.ugoe.cs.cpdp.versions." + attributes.getValue("name")).newInstance();
filter.setParameter(attributes.getValue("param"));
versionFilters.add(filter);
}
else if( qName.equals("testVersionfilter") ) {
final IVersionFilter filter = (IVersionFilter) Class.forName("de.ugoe.cs.cpdp.versions." + attributes.getValue("name")).newInstance();
filter.setParameter(attributes.getValue("param"));
testVersionFilters.add(filter);
}
else if( qName.equals("trainVersionfilter") ) {
final IVersionFilter filter = (IVersionFilter) Class.forName("de.ugoe.cs.cpdp.versions." + attributes.getValue("name")).newInstance();
filter.setParameter(attributes.getValue("param"));
trainingVersionFilters.add(filter);
}
else if( qName.equals("setwisepreprocessor") ) {
final ISetWiseProcessingStrategy processor = (ISetWiseProcessingStrategy) Class.forName("de.ugoe.cs.cpdp.dataprocessing." + attributes.getValue("name")).newInstance();
processor.setParameter(attributes.getValue("param"));
setwisepreprocessors.add(processor);
}
else if( qName.equals("setwiseselector") ) {
final ISetWiseDataselectionStrategy selection = (ISetWiseDataselectionStrategy) Class.forName("de.ugoe.cs.cpdp.dataselection." + attributes.getValue("name")).newInstance();
selection.setParameter(attributes.getValue("param"));
setwiseselectors.add(selection);
}
else if( qName.equals("setwisepostprocessor") ) {
final ISetWiseProcessingStrategy processor = (ISetWiseProcessingStrategy) Class.forName("de.ugoe.cs.cpdp.dataprocessing." + attributes.getValue("name")).newInstance();
processor.setParameter(attributes.getValue("param"));
setwisepostprocessors.add(processor);
}
else if( qName.equals("setwisetrainer") ) {
final ISetWiseTrainingStrategy trainer = (ISetWiseTrainingStrategy) Class.forName("de.ugoe.cs.cpdp.training." + attributes.getValue("name")).newInstance();
trainer.setParameter(attributes.getValue("param"));
setwiseTrainers.add(trainer);
}
else if( qName.equals("preprocessor") ) {
final IProcessesingStrategy processor = (IProcessesingStrategy) Class.forName("de.ugoe.cs.cpdp.dataprocessing." + attributes.getValue("name")).newInstance();
processor.setParameter( attributes.getValue("param"));
preprocessors.add(processor);
}
else if( qName.equals("pointwiseselector") ) {
final IPointWiseDataselectionStrategy selection = (IPointWiseDataselectionStrategy) Class.forName("de.ugoe.cs.cpdp.dataselection." + attributes.getValue("name")).newInstance();
selection.setParameter( attributes.getValue("param"));
pointwiseselectors.add(selection);
}
else if( qName.equals("postprocessor") ) {
final IProcessesingStrategy processor = (IProcessesingStrategy) Class.forName("de.ugoe.cs.cpdp.dataprocessing." + attributes.getValue("name")).newInstance();
processor.setParameter( attributes.getValue("param"));
postprocessors.add(processor);
}
else if( qName.equals("trainer") ) {
final ITrainingStrategy trainer = (ITrainingStrategy) Class.forName("de.ugoe.cs.cpdp.training." + attributes.getValue("name")).newInstance();
trainer.setParameter(attributes.getValue("param"));
trainers.add(trainer);
}
else if( qName.equals("eval") ) {
final IEvaluationStrategy evaluator = (IEvaluationStrategy) Class.forName("de.ugoe.cs.cpdp.eval." + attributes.getValue("name")).newInstance();
evaluators.add(evaluator);
}
else if( qName.equals("partialconfig") ) {
String path = attributes.getValue("path");
try {
boolean relative = true;
if( attributes.getValue("relative")!=null ) {
relative = Boolean.parseBoolean(attributes.getValue("relative"));
}
if( relative ) {
path = configFile.getParentFile().getPath() + "/" + path;
}
addConfigurations(new ExperimentConfiguration(path));
} catch (ExperimentConfigurationException e) {
throw new SAXException("Could not load partial configuration: " + path, e);
}
} else {
Console.traceln(Level.WARNING, "element in config-file " + configFile.getName() + " ignored: " + qName);
}
}
catch (NoClassDefFoundError | ClassNotFoundException | IllegalAccessException | InstantiationException | ClassCastException e) {
throw new SAXException("Could not initialize class correctly", (Exception) e);
}
}
/**
* Adds the information of another experiment configuration to this configuration. This mechanism allows the usage of partial configuration files. The name of the other configuration is lost.
*
* If the current data path is the empty string (""), it is override by the datapath of the other configuration. Otherwise, the current data path is kept.
* @param other experiment whose information is added
*/
private void addConfigurations(ExperimentConfiguration other) {
if( "results".equals(resultsPath) ) {
resultsPath = other.resultsPath;
}
loaders.addAll(other.loaders);
versionFilters.addAll(other.versionFilters);
testVersionFilters.addAll(other.testVersionFilters);
trainingVersionFilters.addAll(other.trainingVersionFilters);
setwisepreprocessors.addAll(other.setwisepreprocessors);
setwiseselectors.addAll(other.setwiseselectors);
setwisepostprocessors.addAll(other.setwisepostprocessors);
setwiseTrainers.addAll(other.setwiseTrainers);
preprocessors.addAll(other.preprocessors);
pointwiseselectors.addAll(other.pointwiseselectors);
postprocessors.addAll(other.postprocessors);
trainers.addAll(other.trainers);
evaluators.addAll(other.evaluators);
}
/* (non-Javadoc)
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
final StringBuilder builder = new StringBuilder();
builder.append("Experiment name: " + experimentName + StringTools.ENDLINE);
builder.append("Loaders: " + loaders + StringTools.ENDLINE);
builder.append("Results path: " + resultsPath + StringTools.ENDLINE);
builder.append("Version filters: " + versionFilters.toString() + StringTools.ENDLINE);
builder.append("Test version filters: " + testVersionFilters.toString() + StringTools.ENDLINE);
builder.append("Training version filters: " + trainingVersionFilters.toString() + StringTools.ENDLINE);
builder.append("Setwise preprocessors: " + setwisepreprocessors.toString() + StringTools.ENDLINE);
builder.append("Setwise selectors: " + setwiseselectors.toString() + StringTools.ENDLINE);
builder.append("Setwise postprocessors: " + setwisepostprocessors.toString() + StringTools.ENDLINE);
builder.append("Setwise trainers: " + setwiseTrainers.toString() + StringTools.ENDLINE);
builder.append("Pointwise preprocessors: " + preprocessors.toString() + StringTools.ENDLINE);
builder.append("Pointwise selectors: " + pointwiseselectors.toString() + StringTools.ENDLINE);
builder.append("Pointwise postprocessors: " + postprocessors.toString() + StringTools.ENDLINE);
builder.append("Pointwise trainers: " + trainers.toString() + StringTools.ENDLINE);
builder.append("Evaluators: " + evaluators.toString() + StringTools.ENDLINE);
return builder.toString();
}
}