// Copyright 2015 Georg-August-Universit�t G�ttingen, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package de.ugoe.cs.cpdp; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; import java.util.LinkedList; import java.util.List; import java.util.logging.Level; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.xml.sax.Attributes; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; import de.ugoe.cs.cpdp.dataprocessing.IProcessesingStrategy; import de.ugoe.cs.cpdp.dataprocessing.ISetWiseProcessingStrategy; import de.ugoe.cs.cpdp.dataselection.IPointWiseDataselectionStrategy; import de.ugoe.cs.cpdp.dataselection.ISetWiseDataselectionStrategy; import de.ugoe.cs.cpdp.eval.IEvaluationStrategy; import de.ugoe.cs.cpdp.eval.IResultStorage; import de.ugoe.cs.cpdp.loader.IVersionLoader; import de.ugoe.cs.cpdp.training.ISetWiseTestdataAwareTrainingStrategy; import de.ugoe.cs.cpdp.training.ISetWiseTrainingStrategy; import de.ugoe.cs.cpdp.training.ITestAwareTrainingStrategy; import de.ugoe.cs.cpdp.training.ITrainingStrategy; import de.ugoe.cs.cpdp.versions.IVersionFilter; import de.ugoe.cs.util.StringTools; import de.ugoe.cs.util.console.Console; /** * Class that contains all meta information about an experiment, i.e., its configuration. The * configuration is loaded from an XML file.
*
* In the current implementation, the experiment configuration can only be created using an XML * file. Programmatic creation of experiment configurations is currently not possibly. * * @author Steffen Herbold */ public class ExperimentConfiguration extends DefaultHandler { /** * handle of the file that contains the configuration */ private final File configFile; /** * name of the experiment (automatically set to the file name without the .xml ending) */ private String experimentName = "exp"; /** * loads instances */ private List loaders; /** * path were the results of the experiments are stored */ private String resultsPath = "results"; /** * data set filters applied to all data */ private List versionFilters; /** * data set filters that decide if a data set is used as test data */ private List testVersionFilters; /** * data set filters that decide if a data is used as candidate training data */ private List trainingVersionFilters; /** * setwise data processors that are applied before the setwise data selection */ private List setwisepreprocessors; /** * setwise data selection strategies */ private List setwiseselectors; /** * setwise data processors that are applied after the setwise data selection */ private List setwisepostprocessors; /** * setwise trainers, i.e., trainers that require the selected training data to be separate from * each other */ private List setwiseTrainers; /** * setwise testdata aware trainers, i.e., trainers that require the selected training data to be * separate from each other and the current testdata */ private List setwiseTestdataAwareTrainers; /** * data processors that are applied before the pointwise data selection */ private List preprocessors; /** * pointwise data selection strategies */ private List pointwiseselectors; /** * data processors that are applied before the pointwise data selection */ private List postprocessors; /** * normal trainers, i.e., trainers that require the selected training data in a single data set */ private List trainers; /** * normal trainers, i.e., trainers that require the selected training data in a single data set */ private List testAwareTrainers; /** * evaluators used for the the experiment results */ private List evaluators; /** * result storages used for experiments */ private List resultStorages; /** * indicates, if the classifier should be saved */ private Boolean saveClassifier = null; /** * indicates, which execution strategy to choose (e.g. CrossProjectExperiment, * ClassifierCreationExecution). Default is CrossProjectExperiment. */ private String executionStrategy = "CrossProjectExperiment"; /** * Constructor. Creates a new configuration from a given file. * * @param filename * name of the file from the configuration is loaded. * @throws ExperimentConfigurationException * thrown if there is an error creating the configuration */ public ExperimentConfiguration(String filename) throws ExperimentConfigurationException { this(new File(filename)); } /** * Constructor. Creates a new configuration from a given file. * * @param filename * handle of the file from the configuration is loaded. * @throws ExperimentConfigurationException * thrown if there is an error creating the configuration */ public ExperimentConfiguration(File file) throws ExperimentConfigurationException { loaders = new LinkedList<>(); versionFilters = new LinkedList<>(); testVersionFilters = new LinkedList<>(); trainingVersionFilters = new LinkedList<>(); setwisepreprocessors = new LinkedList<>(); setwiseselectors = new LinkedList<>(); setwisepostprocessors = new LinkedList<>(); setwiseTrainers = new LinkedList<>(); setwiseTestdataAwareTrainers = new LinkedList<>(); preprocessors = new LinkedList<>(); pointwiseselectors = new LinkedList<>(); postprocessors = new LinkedList<>(); trainers = new LinkedList<>(); testAwareTrainers = new LinkedList<>(); evaluators = new LinkedList<>(); resultStorages = new LinkedList<>(); if (file == null) { throw new IllegalArgumentException("file must not be null"); } if (file.isDirectory()) { throw new IllegalArgumentException("file must not be a directory"); } configFile = file; experimentName = file.getName().split("\\.")[0]; final SAXParserFactory spf = SAXParserFactory.newInstance(); spf.setValidating(true); SAXParser saxParser = null; InputSource inputSource = null; try { saxParser = spf.newSAXParser(); } catch (ParserConfigurationException | SAXException e) { throw new ExperimentConfigurationException(e); } InputStreamReader reader = null; try { reader = new InputStreamReader(new FileInputStream(file), "UTF-8"); inputSource = new InputSource(reader); } catch (UnsupportedEncodingException | FileNotFoundException e) { throw new ExperimentConfigurationException("Could not open configuration file.", e); } if (inputSource != null) { inputSource.setSystemId("file://" + file.getAbsolutePath()); try { saxParser.parse(inputSource, this); } catch (SAXException | IOException e) { throw new ExperimentConfigurationException("Error parsing configuration.", e); } } if (reader != null) { try { reader.close(); } catch (IOException e) { throw new ExperimentConfigurationException("Error closing reader.", e); } } } /** * returns the name of the experiment * * @return name of the experiment */ public String getExperimentName() { return experimentName; } /** * returns the loaders for instances * * @return data loaders */ public List getLoaders() { return loaders; } /** * returns the results path * * @return results path */ public String getResultsPath() { return resultsPath; } /** * returns the data set filters of the experiment * * @return data set filters of the experiment */ public List getVersionFilters() { return versionFilters; } /** * returns the test set filters of the experiment * * @return test set filters of the experiment */ public List getTestVersionFilters() { return testVersionFilters; } /** * returns the candidate training version filters of the experiment * * @return candidate training version filters of the experiment */ public List getTrainingVersionFilters() { return trainingVersionFilters; } /** * returns the setwise processors applied before the setwise data selection * * @return setwise processors applied before the setwise data selection */ public List getSetWisePreprocessors() { return setwisepreprocessors; } /** * returns the setwise data selection strategies * * @return setwise data selection strategies */ public List getSetWiseSelectors() { return setwiseselectors; } /** * returns the setwise processors applied after the setwise data selection * * @return setwise processors applied after the setwise data selection */ public List getSetWisePostprocessors() { return setwisepostprocessors; } /** * returns the setwise training algorithms * * @return setwise training algorithms */ public List getSetWiseTrainers() { return setwiseTrainers; } /** * returns the setwise training algorithms * * @return setwise training algorithms */ public List getSetWiseTestdataAwareTrainers() { return setwiseTestdataAwareTrainers; } /** * returns the processors applied before the pointwise data selection * * @return processors applied before the pointwise data selection */ public List getPreProcessors() { return preprocessors; } /** * returns the pointwise data selection strategies * * @return pointwise data selection strategies */ public List getPointWiseSelectors() { return pointwiseselectors; } /** * returns the processors applied after the pointwise data selection * * @return processors applied after the pointwise data selection */ public List getPostProcessors() { return postprocessors; } /** * returns the normal training algorithm * * @return normal training algorithms */ public List getTrainers() { return trainers; } /** * returns the test aware training algorithms * * @return normal training algorithms */ public List getTestAwareTrainers() { return testAwareTrainers; } /** * returns the evaluation strategies * * @return evaluation strategies */ public List getEvaluators() { return evaluators; } public List getResultStorages() { return resultStorages; } /** * returns boolean, if classifier should be saved * * @return boolean */ public boolean getSaveClassifier() { return saveClassifier; } /** * returns the execution strategy * * @return String execution strategy */ public String getExecutionStrategy() { return executionStrategy; } /* * (non-Javadoc) * * @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String, * java.lang.String, org.xml.sax.Attributes) */ @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { try { if (qName.equals("config")) { // ingore } else if (qName.equals("loader")) { final IVersionLoader loader = (IVersionLoader) Class .forName("de.ugoe.cs.cpdp.loader." + attributes.getValue("name")).newInstance(); loader.setLocation(attributes.getValue("datalocation")); loaders.add(loader); // TODO location as relative } else if (qName.equals("resultspath")) { resultsPath = attributes.getValue("path"); } else if (qName.equals("versionfilter")) { final IVersionFilter filter = (IVersionFilter) Class .forName("de.ugoe.cs.cpdp.versions." + attributes.getValue("name")) .newInstance(); filter.setParameter(attributes.getValue("param")); versionFilters.add(filter); } else if (qName.equals("testVersionfilter")) { final IVersionFilter filter = (IVersionFilter) Class .forName("de.ugoe.cs.cpdp.versions." + attributes.getValue("name")) .newInstance(); filter.setParameter(attributes.getValue("param")); testVersionFilters.add(filter); } else if (qName.equals("trainVersionfilter")) { final IVersionFilter filter = (IVersionFilter) Class .forName("de.ugoe.cs.cpdp.versions." + attributes.getValue("name")) .newInstance(); filter.setParameter(attributes.getValue("param")); trainingVersionFilters.add(filter); } else if (qName.equals("setwisepreprocessor")) { final ISetWiseProcessingStrategy processor = (ISetWiseProcessingStrategy) Class .forName("de.ugoe.cs.cpdp.dataprocessing." + attributes.getValue("name")) .newInstance(); processor.setParameter(attributes.getValue("param")); setwisepreprocessors.add(processor); } else if (qName.equals("setwiseselector")) { final ISetWiseDataselectionStrategy selection = (ISetWiseDataselectionStrategy) Class .forName("de.ugoe.cs.cpdp.dataselection." + attributes.getValue("name")) .newInstance(); selection.setParameter(attributes.getValue("param")); setwiseselectors.add(selection); } else if (qName.equals("setwisepostprocessor")) { final ISetWiseProcessingStrategy processor = (ISetWiseProcessingStrategy) Class .forName("de.ugoe.cs.cpdp.dataprocessing." + attributes.getValue("name")) .newInstance(); processor.setParameter(attributes.getValue("param")); setwisepostprocessors.add(processor); } else if (qName.equals("setwisetrainer")) { final ISetWiseTrainingStrategy trainer = (ISetWiseTrainingStrategy) Class .forName("de.ugoe.cs.cpdp.training." + attributes.getValue("name")) .newInstance(); trainer.setParameter(attributes.getValue("param")); setwiseTrainers.add(trainer); } else if (qName.equals("setwisetestdataawaretrainer")) { final ISetWiseTestdataAwareTrainingStrategy trainer = (ISetWiseTestdataAwareTrainingStrategy) Class .forName("de.ugoe.cs.cpdp.training." + attributes.getValue("name")) .newInstance(); trainer.setParameter(attributes.getValue("param")); trainer.setMethod(attributes.getValue("method")); trainer.setThreshold(attributes.getValue("threshold")); setwiseTestdataAwareTrainers.add(trainer); } else if (qName.equals("preprocessor")) { final IProcessesingStrategy processor = (IProcessesingStrategy) Class .forName("de.ugoe.cs.cpdp.dataprocessing." + attributes.getValue("name")) .newInstance(); processor.setParameter(attributes.getValue("param")); preprocessors.add(processor); } else if (qName.equals("pointwiseselector")) { final IPointWiseDataselectionStrategy selection = (IPointWiseDataselectionStrategy) Class .forName("de.ugoe.cs.cpdp.dataselection." + attributes.getValue("name")) .newInstance(); selection.setParameter(attributes.getValue("param")); pointwiseselectors.add(selection); } else if (qName.equals("postprocessor")) { final IProcessesingStrategy processor = (IProcessesingStrategy) Class .forName("de.ugoe.cs.cpdp.dataprocessing." + attributes.getValue("name")) .newInstance(); processor.setParameter(attributes.getValue("param")); postprocessors.add(processor); } else if (qName.equals("trainer")) { final ITrainingStrategy trainer = (ITrainingStrategy) Class .forName("de.ugoe.cs.cpdp.training." + attributes.getValue("name")) .newInstance(); trainer.setParameter(attributes.getValue("param")); trainers.add(trainer); } else if (qName.equals("testawaretrainer")) { final ITestAwareTrainingStrategy trainer = (ITestAwareTrainingStrategy) Class .forName("de.ugoe.cs.cpdp.training." + attributes.getValue("name")) .newInstance(); trainer.setParameter(attributes.getValue("param")); testAwareTrainers.add(trainer); } else if (qName.equals("eval")) { final IEvaluationStrategy evaluator = (IEvaluationStrategy) Class .forName("de.ugoe.cs.cpdp.eval." + attributes.getValue("name")).newInstance(); evaluators.add(evaluator); } else if (qName.equals("storage")) { final IResultStorage resultStorage = (IResultStorage) Class .forName("de.ugoe.cs.cpdp.eval." + attributes.getValue("name")).newInstance(); resultStorages.add(resultStorage); } else if (qName.equals("saveClassifier")) { saveClassifier = true; } else if (qName.equals("executionStrategy")) { executionStrategy = attributes.getValue("name"); } else if (qName.equals("partialconfig")) { String path = attributes.getValue("path"); try { boolean relative = true; if (attributes.getValue("relative") != null) { relative = Boolean.parseBoolean(attributes.getValue("relative")); } if (relative) { path = configFile.getParentFile().getPath() + "/" + path; } addConfigurations(new ExperimentConfiguration(path)); } catch (ExperimentConfigurationException e) { throw new SAXException("Could not load partial configuration: " + path, e); } } else { Console.traceln(Level.WARNING, "element in config-file " + configFile.getName() + " ignored: " + qName); } } catch (NoClassDefFoundError | ClassNotFoundException | IllegalAccessException | InstantiationException | ClassCastException e) { throw new SAXException("Could not initialize class correctly", (Exception) e); } } /** * Adds the information of another experiment configuration to this configuration. This * mechanism allows the usage of partial configuration files. The name of the other * configuration is lost.
*
* If the current data path is the empty string (""), it is override by the datapath * of the other configuration. Otherwise, the current data path is kept. * * @param other * experiment whose information is added * @throws ExperimentConfigurationException */ private void addConfigurations(ExperimentConfiguration other) throws ExperimentConfigurationException { if ("results".equals(resultsPath)) { resultsPath = other.resultsPath; } loaders.addAll(other.loaders); versionFilters.addAll(other.versionFilters); testVersionFilters.addAll(other.testVersionFilters); trainingVersionFilters.addAll(other.trainingVersionFilters); setwisepreprocessors.addAll(other.setwisepreprocessors); setwiseselectors.addAll(other.setwiseselectors); setwisepostprocessors.addAll(other.setwisepostprocessors); setwiseTrainers.addAll(other.setwiseTrainers); setwiseTestdataAwareTrainers.addAll(other.setwiseTestdataAwareTrainers); preprocessors.addAll(other.preprocessors); pointwiseselectors.addAll(other.pointwiseselectors); postprocessors.addAll(other.postprocessors); trainers.addAll(other.trainers); evaluators.addAll(other.evaluators); if (!executionStrategy.equals(other.executionStrategy)) { throw new ExperimentConfigurationException("Executionstrategies must be the same, if config files should be added."); } /* * Only if saveClassifier is not set in the main config and the other configs saveClassifier * is true, it must be set. */ if (saveClassifier == null && other.saveClassifier == true) { saveClassifier = other.saveClassifier; } } /* * (non-Javadoc) * * @see java.lang.Object#toString() */ @Override public String toString() { final StringBuilder builder = new StringBuilder(); builder.append("Experiment name: " + experimentName + StringTools.ENDLINE); builder.append("Loaders: " + loaders + StringTools.ENDLINE); builder.append("Results path: " + resultsPath + StringTools.ENDLINE); builder.append("Version filters: " + versionFilters.toString() + StringTools.ENDLINE); builder .append("Test version filters: " + testVersionFilters.toString() + StringTools.ENDLINE); builder.append("Training version filters: " + trainingVersionFilters.toString() + StringTools.ENDLINE); builder.append("Setwise preprocessors: " + setwisepreprocessors.toString() + StringTools.ENDLINE); builder.append("Setwise selectors: " + setwiseselectors.toString() + StringTools.ENDLINE); builder.append("Setwise postprocessors: " + setwisepostprocessors.toString() + StringTools.ENDLINE); builder.append("Setwise trainers: " + setwiseTrainers.toString() + StringTools.ENDLINE); builder.append("Setwise Testdata Aware trainers: " + setwiseTestdataAwareTrainers.toString() + StringTools.ENDLINE); builder .append("Pointwise preprocessors: " + preprocessors.toString() + StringTools.ENDLINE); builder .append("Pointwise selectors: " + pointwiseselectors.toString() + StringTools.ENDLINE); builder .append("Pointwise postprocessors: " + postprocessors.toString() + StringTools.ENDLINE); builder.append("Pointwise trainers: " + trainers.toString() + StringTools.ENDLINE); builder.append("Evaluators: " + evaluators.toString() + StringTools.ENDLINE); builder.append("Save Classifier?: " + saveClassifier + StringTools.ENDLINE); builder.append("Execution Strategy: " + executionStrategy + StringTools.ENDLINE); return builder.toString(); } }