source: trunk/CrossPare/src/de/ugoe/cs/cpdp/ExperimentConfiguration.java @ 66

Last change on this file since 66 was 66, checked in by sherbold, 9 years ago
  • fixed one bug in the experiment configuration for test aware classifiers and added infrastructure for usage of this new interface with Weka classifiers.
  • Property svn:mime-type set to text/plain
File size: 26.1 KB
Line 
1// Copyright 2015 Georg-August-Universit�t G�ttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.cpdp;
16
17import java.io.File;
18import java.io.FileInputStream;
19import java.io.FileNotFoundException;
20import java.io.IOException;
21import java.io.InputStreamReader;
22import java.io.UnsupportedEncodingException;
23import java.util.LinkedList;
24import java.util.List;
25import java.util.logging.Level;
26
27import javax.xml.parsers.ParserConfigurationException;
28import javax.xml.parsers.SAXParser;
29import javax.xml.parsers.SAXParserFactory;
30
31import org.xml.sax.Attributes;
32import org.xml.sax.InputSource;
33import org.xml.sax.SAXException;
34import org.xml.sax.helpers.DefaultHandler;
35
36import de.ugoe.cs.cpdp.dataprocessing.IProcessesingStrategy;
37import de.ugoe.cs.cpdp.dataprocessing.ISetWiseProcessingStrategy;
38import de.ugoe.cs.cpdp.dataselection.IPointWiseDataselectionStrategy;
39import de.ugoe.cs.cpdp.dataselection.ISetWiseDataselectionStrategy;
40import de.ugoe.cs.cpdp.eval.IEvaluationStrategy;
41import de.ugoe.cs.cpdp.loader.IVersionLoader;
42import de.ugoe.cs.cpdp.training.ISetWiseTestdataAwareTrainingStrategy;
43import de.ugoe.cs.cpdp.training.ISetWiseTrainingStrategy;
44import de.ugoe.cs.cpdp.training.ITestAwareTrainingStrategy;
45import de.ugoe.cs.cpdp.training.ITrainingStrategy;
46import de.ugoe.cs.cpdp.versions.IVersionFilter;
47import de.ugoe.cs.util.StringTools;
48import de.ugoe.cs.util.console.Console;
49
50/**
51 * Class that contains all meta information about an experiment, i.e., its configuration. The
52 * configuration is loaded from an XML file. <br>
53 * <br>
54 * In the current implementation, the experiment configuration can only be created using an XML
55 * file. Programmatic creation of experiment configurations is currently not possibly.
56 *
57 * @author Steffen Herbold
58 */
59public class ExperimentConfiguration extends DefaultHandler {
60
61    /**
62     * handle of the file that contains the configuration
63     */
64    private final File configFile;
65
66    /**
67     * name of the experiment (automatically set to the file name without the .xml ending)
68     */
69    private String experimentName = "exp";
70
71    /**
72     * loads instances
73     */
74    private List<IVersionLoader> loaders;
75
76    /**
77     * path were the results of the experiments are stored
78     */
79    private String resultsPath = "results";
80
81    /**
82     * data set filters applied to all data
83     */
84    private List<IVersionFilter> versionFilters;
85
86    /**
87     * data set filters that decide if a data set is used as test data
88     */
89    private List<IVersionFilter> testVersionFilters;
90
91    /**
92     * data set filters that decide if a data is used as candidate training data
93     */
94    private List<IVersionFilter> trainingVersionFilters;
95
96    /**
97     * setwise data processors that are applied before the setwise data selection
98     */
99    private List<ISetWiseProcessingStrategy> setwisepreprocessors;
100
101    /**
102     * setwise data selection strategies
103     */
104    private List<ISetWiseDataselectionStrategy> setwiseselectors;
105
106    /**
107     * setwise data processors that are applied after the setwise data selection
108     */
109    private List<ISetWiseProcessingStrategy> setwisepostprocessors;
110
111    /**
112     * setwise trainers, i.e., trainers that require the selected training data to be separate from
113     * each other
114     */
115    private List<ISetWiseTrainingStrategy> setwiseTrainers;
116
117    /**
118     * setwise testdata aware trainers, i.e., trainers that require the selected training data to be separate from
119     * each other and the current testdata
120     */
121    private List<ISetWiseTestdataAwareTrainingStrategy> setwiseTestdataAwareTrainers;
122   
123    /**
124     * data processors that are applied before the pointwise data selection
125     */
126    private List<IProcessesingStrategy> preprocessors;
127
128    /**
129     * pointwise data selection strategies
130     */
131    private List<IPointWiseDataselectionStrategy> pointwiseselectors;
132
133    /**
134     * data processors that are applied before the pointwise data selection
135     */
136    private List<IProcessesingStrategy> postprocessors;
137
138    /**
139     * normal trainers, i.e., trainers that require the selected training data in a single data set
140     */
141    private List<ITrainingStrategy> trainers;
142   
143    /**
144     * normal trainers, i.e., trainers that require the selected training data in a single data set
145     */
146    private List<ITestAwareTrainingStrategy> testAwareTrainers;
147
148    /**
149     * evaluators used for the the experiment results
150     */
151    private List<IEvaluationStrategy> evaluators;
152
153    /**
154     * indicates, if the classifier should be saved
155     */
156    private Boolean saveClassifier = null;
157
158    /**
159     * indicates, which execution strategy to choose (e.g. CrossProjectExperiment,
160     * ClassifierCreationExecution). Default is CrossProjectExperiment.
161     */
162    private String executionStrategy = "CrossProjectExperiment";
163
164    /**
165     * Constructor. Creates a new configuration from a given file.
166     *
167     * @param filename
168     *            name of the file from the configuration is loaded.
169     * @throws ExperimentConfigurationException
170     *             thrown if there is an error creating the configuration
171     */
172    public ExperimentConfiguration(String filename) throws ExperimentConfigurationException {
173        this(new File(filename));
174    }
175
176    /**
177     * Constructor. Creates a new configuration from a given file.
178     *
179     * @param filename
180     *            handle of the file from the configuration is loaded.
181     * @throws ExperimentConfigurationException
182     *             thrown if there is an error creating the configuration
183     */
184    public ExperimentConfiguration(File file) throws ExperimentConfigurationException {
185        loaders = new LinkedList<>();
186        versionFilters = new LinkedList<>();
187        testVersionFilters = new LinkedList<>();
188        trainingVersionFilters = new LinkedList<>();
189        setwisepreprocessors = new LinkedList<>();
190        setwiseselectors = new LinkedList<>();
191        setwisepostprocessors = new LinkedList<>();
192        setwiseTrainers = new LinkedList<>();
193        setwiseTestdataAwareTrainers = new LinkedList<>();
194        preprocessors = new LinkedList<>();
195        pointwiseselectors = new LinkedList<>();
196        postprocessors = new LinkedList<>();
197        trainers = new LinkedList<>();
198        testAwareTrainers = new LinkedList<>();
199        evaluators = new LinkedList<>();
200
201        if (file == null) {
202            throw new IllegalArgumentException("file must not be null");
203        }
204        if (file.isDirectory()) {
205            throw new IllegalArgumentException("file must not be a directory");
206        }
207        configFile = file;
208
209        experimentName = file.getName().split("\\.")[0];
210
211        final SAXParserFactory spf = SAXParserFactory.newInstance();
212        spf.setValidating(true);
213
214        SAXParser saxParser = null;
215        InputSource inputSource = null;
216        try {
217            saxParser = spf.newSAXParser();
218        }
219        catch (ParserConfigurationException | SAXException e) {
220            throw new ExperimentConfigurationException(e);
221        }
222
223        InputStreamReader reader = null;
224        try {
225            reader = new InputStreamReader(new FileInputStream(file), "UTF-8");
226            inputSource = new InputSource(reader);
227        }
228        catch (UnsupportedEncodingException | FileNotFoundException e) {
229            throw new ExperimentConfigurationException("Could not open configuration file.", e);
230        }
231
232        if (inputSource != null) {
233            inputSource.setSystemId("file://" + file.getAbsolutePath());
234            try {
235                saxParser.parse(inputSource, this);
236            }
237            catch (SAXException | IOException e) {
238                throw new ExperimentConfigurationException("Error parsing configuration.", e);
239            }
240        }
241        if (reader != null) {
242            try {
243                reader.close();
244            }
245            catch (IOException e) {
246                throw new ExperimentConfigurationException("Error closing reader.", e);
247            }
248        }
249    }
250
251    /**
252     * returns the name of the experiment
253     *
254     * @return name of the experiment
255     */
256    public String getExperimentName() {
257        return experimentName;
258    }
259
260    /**
261     * returns the loaders for instances
262     *
263     * @return data loaders
264     */
265    public List<IVersionLoader> getLoaders() {
266        return loaders;
267    }
268
269    /**
270     * returns the results path
271     *
272     * @return results path
273     */
274    public String getResultsPath() {
275        return resultsPath;
276    }
277
278    /**
279     * returns the data set filters of the experiment
280     *
281     * @return data set filters of the experiment
282     */
283    public List<IVersionFilter> getVersionFilters() {
284        return versionFilters;
285    }
286
287    /**
288     * returns the test set filters of the experiment
289     *
290     * @return test set filters of the experiment
291     */
292    public List<IVersionFilter> getTestVersionFilters() {
293        return testVersionFilters;
294    }
295
296    /**
297     * returns the candidate training version filters of the experiment
298     *
299     * @return candidate training version filters of the experiment
300     */
301    public List<IVersionFilter> getTrainingVersionFilters() {
302        return trainingVersionFilters;
303    }
304
305    /**
306     * returns the setwise processors applied before the setwise data selection
307     *
308     * @return setwise processors applied before the setwise data selection
309     */
310    public List<ISetWiseProcessingStrategy> getSetWisePreprocessors() {
311        return setwisepreprocessors;
312    }
313
314    /**
315     * returns the setwise data selection strategies
316     *
317     * @return setwise data selection strategies
318     */
319    public List<ISetWiseDataselectionStrategy> getSetWiseSelectors() {
320        return setwiseselectors;
321    }
322
323    /**
324     * returns the setwise processors applied after the setwise data selection
325     *
326     * @return setwise processors applied after the setwise data selection
327     */
328    public List<ISetWiseProcessingStrategy> getSetWisePostprocessors() {
329        return setwisepostprocessors;
330    }
331
332    /**
333     * returns the setwise training algorithms
334     *
335     * @return setwise training algorithms
336     */
337    public List<ISetWiseTrainingStrategy> getSetWiseTrainers() {
338        return setwiseTrainers;
339    }
340
341    /**
342     * returns the setwise training algorithms
343     *
344     * @return setwise training algorithms
345     */
346    public List<ISetWiseTestdataAwareTrainingStrategy> getSetWiseTestdataAwareTrainers() {
347        return setwiseTestdataAwareTrainers;
348    }
349   
350    /**
351     * returns the processors applied before the pointwise data selection
352     *
353     * @return processors applied before the pointwise data selection
354     */
355    public List<IProcessesingStrategy> getPreProcessors() {
356        return preprocessors;
357    }
358
359    /**
360     * returns the pointwise data selection strategies
361     *
362     * @return pointwise data selection strategies
363     */
364    public List<IPointWiseDataselectionStrategy> getPointWiseSelectors() {
365        return pointwiseselectors;
366    }
367
368    /**
369     * returns the processors applied after the pointwise data selection
370     *
371     * @return processors applied after the pointwise data selection
372     */
373    public List<IProcessesingStrategy> getPostProcessors() {
374        return postprocessors;
375    }
376
377    /**
378     * returns the normal training algorithm
379     *
380     * @return normal training algorithms
381     */
382    public List<ITrainingStrategy> getTrainers() {
383        return trainers;
384    }
385   
386    /**
387     * returns the test aware training algorithms
388     *
389     * @return normal training algorithms
390     */
391    public List<ITestAwareTrainingStrategy> getTestAwareTrainers() {
392        return testAwareTrainers;
393    }
394
395    /**
396     * returns the evaluation strategies
397     *
398     * @return evaluation strategies
399     */
400    public List<IEvaluationStrategy> getEvaluators() {
401        return evaluators;
402    }
403
404    /**
405     * returns boolean, if classifier should be saved
406     *
407     * @return boolean
408     */
409    public boolean getSaveClassifier() {
410        return saveClassifier;
411    }
412
413    /**
414     * returns the execution strategy
415     *
416     * @return String execution strategy
417     */
418    public String getExecutionStrategy() {
419        return executionStrategy;
420    }
421
422    /*
423     * (non-Javadoc)
424     *
425     * @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String,
426     * java.lang.String, org.xml.sax.Attributes)
427     */
428    @Override
429    public void startElement(String uri, String localName, String qName, Attributes attributes)
430        throws SAXException
431    {
432        try {
433            if (qName.equals("config")) {
434                // ingore
435            }
436            else if (qName.equals("loader")) {
437                final IVersionLoader loader =
438                    (IVersionLoader) Class.forName("de.ugoe.cs.cpdp.loader." +
439                                                       attributes.getValue("name")).newInstance();
440                loader.setLocation(attributes.getValue("datalocation"));
441                loaders.add(loader);
442
443                // TODO location as relative
444            }
445            else if (qName.equals("resultspath")) {
446                resultsPath = attributes.getValue("path");
447            }
448            else if (qName.equals("versionfilter")) {
449                final IVersionFilter filter =
450                    (IVersionFilter) Class.forName("de.ugoe.cs.cpdp.versions." +
451                                                       attributes.getValue("name")).newInstance();
452                filter.setParameter(attributes.getValue("param"));
453                versionFilters.add(filter);
454            }
455            else if (qName.equals("testVersionfilter")) {
456                final IVersionFilter filter =
457                    (IVersionFilter) Class.forName("de.ugoe.cs.cpdp.versions." +
458                                                       attributes.getValue("name")).newInstance();
459                filter.setParameter(attributes.getValue("param"));
460                testVersionFilters.add(filter);
461            }
462            else if (qName.equals("trainVersionfilter")) {
463                final IVersionFilter filter =
464                    (IVersionFilter) Class.forName("de.ugoe.cs.cpdp.versions." +
465                                                       attributes.getValue("name")).newInstance();
466                filter.setParameter(attributes.getValue("param"));
467                trainingVersionFilters.add(filter);
468            }
469            else if (qName.equals("setwisepreprocessor")) {
470                final ISetWiseProcessingStrategy processor =
471                    (ISetWiseProcessingStrategy) Class.forName("de.ugoe.cs.cpdp.dataprocessing." +
472                                                                   attributes.getValue("name"))
473                        .newInstance();
474                processor.setParameter(attributes.getValue("param"));
475                setwisepreprocessors.add(processor);
476            }
477            else if (qName.equals("setwiseselector")) {
478                final ISetWiseDataselectionStrategy selection =
479                    (ISetWiseDataselectionStrategy) Class.forName("de.ugoe.cs.cpdp.dataselection." +
480                                                                      attributes.getValue("name"))
481                        .newInstance();
482                selection.setParameter(attributes.getValue("param"));
483                setwiseselectors.add(selection);
484            }
485            else if (qName.equals("setwisepostprocessor")) {
486                final ISetWiseProcessingStrategy processor =
487                    (ISetWiseProcessingStrategy) Class.forName("de.ugoe.cs.cpdp.dataprocessing." +
488                                                                   attributes.getValue("name"))
489                        .newInstance();
490                processor.setParameter(attributes.getValue("param"));
491                setwisepostprocessors.add(processor);
492            }
493            else if (qName.equals("setwisetrainer")) {
494                final ISetWiseTrainingStrategy trainer =
495                    (ISetWiseTrainingStrategy) Class.forName("de.ugoe.cs.cpdp.training." +
496                                                                 attributes.getValue("name"))
497                        .newInstance();
498                trainer.setParameter(attributes.getValue("param"));
499                setwiseTrainers.add(trainer);
500            }
501            else if (qName.equals("setwisetestdataawaretrainer")) {
502                final ISetWiseTestdataAwareTrainingStrategy trainer =
503                    (ISetWiseTestdataAwareTrainingStrategy) Class.forName("de.ugoe.cs.cpdp.training." +
504                                                                 attributes.getValue("name"))
505                        .newInstance();
506                trainer.setParameter(attributes.getValue("param"));
507                trainer.setMethod(attributes.getValue("method"));
508                trainer.setThreshold(attributes.getValue("threshold"));
509                setwiseTestdataAwareTrainers.add(trainer);
510            }
511            else if (qName.equals("preprocessor")) {
512                final IProcessesingStrategy processor =
513                    (IProcessesingStrategy) Class.forName("de.ugoe.cs.cpdp.dataprocessing." +
514                                                              attributes.getValue("name"))
515                        .newInstance();
516                processor.setParameter(attributes.getValue("param"));
517                preprocessors.add(processor);
518            }
519            else if (qName.equals("pointwiseselector")) {
520                final IPointWiseDataselectionStrategy selection =
521                    (IPointWiseDataselectionStrategy) Class
522                        .forName("de.ugoe.cs.cpdp.dataselection." + attributes.getValue("name"))
523                        .newInstance();
524                selection.setParameter(attributes.getValue("param"));
525                pointwiseselectors.add(selection);
526            }
527            else if (qName.equals("postprocessor")) {
528                final IProcessesingStrategy processor =
529                    (IProcessesingStrategy) Class.forName("de.ugoe.cs.cpdp.dataprocessing." +
530                                                              attributes.getValue("name"))
531                        .newInstance();
532                processor.setParameter(attributes.getValue("param"));
533                postprocessors.add(processor);
534            }
535            else if (qName.equals("trainer")) {
536                final ITrainingStrategy trainer =
537                    (ITrainingStrategy) Class.forName("de.ugoe.cs.cpdp.training." +
538                                                          attributes.getValue("name"))
539                        .newInstance();
540                trainer.setParameter(attributes.getValue("param"));
541                trainers.add(trainer);
542            }
543            else if (qName.equals("testawaretrainer")) {
544                final ITestAwareTrainingStrategy trainer =
545                    (ITestAwareTrainingStrategy) Class.forName("de.ugoe.cs.cpdp.training." +
546                                                          attributes.getValue("name"))
547                        .newInstance();
548                trainer.setParameter(attributes.getValue("param"));
549                testAwareTrainers.add(trainer);
550            }
551            else if (qName.equals("eval")) {
552                final IEvaluationStrategy evaluator =
553                    (IEvaluationStrategy) Class.forName("de.ugoe.cs.cpdp.eval." +
554                                                            attributes.getValue("name"))
555                        .newInstance();
556                evaluators.add(evaluator);
557            }
558            else if (qName.equals("saveClassifier")) {
559                saveClassifier = true;
560            }
561            else if (qName.equals("executionStrategy")) {
562                executionStrategy = attributes.getValue("name");
563            }
564            else if (qName.equals("partialconfig")) {
565                String path = attributes.getValue("path");
566                try {
567                    boolean relative = true;
568                    if (attributes.getValue("relative") != null) {
569                        relative = Boolean.parseBoolean(attributes.getValue("relative"));
570                    }
571
572                    if (relative) {
573                        path = configFile.getParentFile().getPath() + "/" + path;
574                    }
575                    addConfigurations(new ExperimentConfiguration(path));
576                }
577                catch (ExperimentConfigurationException e) {
578                    throw new SAXException("Could not load partial configuration: " + path, e);
579                }
580            }
581            else {
582                Console.traceln(Level.WARNING, "element in config-file " + configFile.getName() +
583                    " ignored: " + qName);
584            }
585        }
586        catch (NoClassDefFoundError | ClassNotFoundException | IllegalAccessException
587                | InstantiationException | ClassCastException e)
588        {
589            throw new SAXException("Could not initialize class correctly", (Exception) e);
590        }
591    }
592
593    /**
594     * Adds the information of another experiment configuration to this configuration. This
595     * mechanism allows the usage of partial configuration files. The name of the other
596     * configuration is lost. <br>
597     * <br>
598     * If the current data path is the empty string (&quot;&quot;), it is override by the datapath
599     * of the other configuration. Otherwise, the current data path is kept.
600     *
601     * @param other
602     *            experiment whose information is added
603     * @throws ExperimentConfigurationException
604     */
605    private void addConfigurations(ExperimentConfiguration other)
606        throws ExperimentConfigurationException
607    {
608        if ("results".equals(resultsPath)) {
609            resultsPath = other.resultsPath;
610        }
611        loaders.addAll(other.loaders);
612        versionFilters.addAll(other.versionFilters);
613        testVersionFilters.addAll(other.testVersionFilters);
614        trainingVersionFilters.addAll(other.trainingVersionFilters);
615        setwisepreprocessors.addAll(other.setwisepreprocessors);
616        setwiseselectors.addAll(other.setwiseselectors);
617        setwisepostprocessors.addAll(other.setwisepostprocessors);
618        setwiseTrainers.addAll(other.setwiseTrainers);
619        setwiseTestdataAwareTrainers.addAll(other.setwiseTestdataAwareTrainers);
620        preprocessors.addAll(other.preprocessors);
621        pointwiseselectors.addAll(other.pointwiseselectors);
622        postprocessors.addAll(other.postprocessors);
623        trainers.addAll(other.trainers);
624        evaluators.addAll(other.evaluators);
625
626        if (!executionStrategy.equals(other.executionStrategy)) {
627            throw new ExperimentConfigurationException(
628                                                       "Executionstrategies must be the same, if config files should be added.");
629        }
630
631        /*
632         * Only if saveClassifier is not set in the main config and the other configs saveClassifier
633         * is true, it must be set.
634         */
635        if (saveClassifier == null && other.saveClassifier == true) {
636            saveClassifier = other.saveClassifier;
637        }
638
639    }
640
641    /*
642     * (non-Javadoc)
643     *
644     * @see java.lang.Object#toString()
645     */
646    @Override
647    public String toString() {
648        final StringBuilder builder = new StringBuilder();
649        builder.append("Experiment name: " + experimentName + StringTools.ENDLINE);
650        builder.append("Loaders: " + loaders + StringTools.ENDLINE);
651        builder.append("Results path: " + resultsPath + StringTools.ENDLINE);
652        builder.append("Version filters: " + versionFilters.toString() + StringTools.ENDLINE);
653        builder.append("Test version filters: " + testVersionFilters.toString() +
654            StringTools.ENDLINE);
655        builder.append("Training version filters: " + trainingVersionFilters.toString() +
656            StringTools.ENDLINE);
657        builder.append("Setwise preprocessors: " + setwisepreprocessors.toString() +
658            StringTools.ENDLINE);
659        builder.append("Setwise selectors: " + setwiseselectors.toString() + StringTools.ENDLINE);
660        builder.append("Setwise postprocessors: " + setwisepostprocessors.toString() +
661            StringTools.ENDLINE);
662        builder.append("Setwise trainers: " + setwiseTrainers.toString() + StringTools.ENDLINE);
663        builder.append("Setwise Testdata Aware trainers: " + setwiseTestdataAwareTrainers.toString() + StringTools.ENDLINE);
664        builder
665            .append("Pointwise preprocessors: " + preprocessors.toString() + StringTools.ENDLINE);
666        builder.append("Pointwise selectors: " + pointwiseselectors.toString() +
667            StringTools.ENDLINE);
668        builder.append("Pointwise postprocessors: " + postprocessors.toString() +
669            StringTools.ENDLINE);
670        builder.append("Pointwise trainers: " + trainers.toString() + StringTools.ENDLINE);
671        builder.append("Evaluators: " + evaluators.toString() + StringTools.ENDLINE);
672        builder.append("Save Classifier?: " + saveClassifier + StringTools.ENDLINE);
673        builder.append("Execution Strategy: " + executionStrategy + StringTools.ENDLINE);
674
675        return builder.toString();
676    }
677}
Note: See TracBrowser for help on using the repository browser.