source: trunk/CrossPare/src/de/ugoe/cs/cpdp/ExperimentConfiguration.java @ 65

Last change on this file since 65 was 65, checked in by sherbold, 8 years ago
  • added new interface ITestAwareTraining strategy to the framework to support trainers with knowledge of the test data. The implementation of such trainers must take care to not accidentally take the classification of the test data into account.
  • Property svn:mime-type set to text/plain
File size: 26.1 KB
Line 
1// Copyright 2015 Georg-August-Universit�t G�ttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.cpdp;
16
17import java.io.File;
18import java.io.FileInputStream;
19import java.io.FileNotFoundException;
20import java.io.IOException;
21import java.io.InputStreamReader;
22import java.io.UnsupportedEncodingException;
23import java.util.LinkedList;
24import java.util.List;
25import java.util.logging.Level;
26
27import javax.xml.parsers.ParserConfigurationException;
28import javax.xml.parsers.SAXParser;
29import javax.xml.parsers.SAXParserFactory;
30
31import org.xml.sax.Attributes;
32import org.xml.sax.InputSource;
33import org.xml.sax.SAXException;
34import org.xml.sax.helpers.DefaultHandler;
35
36import de.ugoe.cs.cpdp.dataprocessing.IProcessesingStrategy;
37import de.ugoe.cs.cpdp.dataprocessing.ISetWiseProcessingStrategy;
38import de.ugoe.cs.cpdp.dataselection.IPointWiseDataselectionStrategy;
39import de.ugoe.cs.cpdp.dataselection.ISetWiseDataselectionStrategy;
40import de.ugoe.cs.cpdp.eval.IEvaluationStrategy;
41import de.ugoe.cs.cpdp.loader.IVersionLoader;
42import de.ugoe.cs.cpdp.training.ISetWiseTestdataAwareTrainingStrategy;
43import de.ugoe.cs.cpdp.training.ISetWiseTrainingStrategy;
44import de.ugoe.cs.cpdp.training.ITestAwareTrainingStrategy;
45import de.ugoe.cs.cpdp.training.ITrainingStrategy;
46import de.ugoe.cs.cpdp.versions.IVersionFilter;
47import de.ugoe.cs.util.StringTools;
48import de.ugoe.cs.util.console.Console;
49
50/**
51 * Class that contains all meta information about an experiment, i.e., its configuration. The
52 * configuration is loaded from an XML file. <br>
53 * <br>
54 * In the current implementation, the experiment configuration can only be created using an XML
55 * file. Programmatic creation of experiment configurations is currently not possibly.
56 *
57 * @author Steffen Herbold
58 */
59public class ExperimentConfiguration extends DefaultHandler {
60
61    /**
62     * handle of the file that contains the configuration
63     */
64    private final File configFile;
65
66    /**
67     * name of the experiment (automatically set to the file name without the .xml ending)
68     */
69    private String experimentName = "exp";
70
71    /**
72     * loads instances
73     */
74    private List<IVersionLoader> loaders;
75
76    /**
77     * path were the results of the experiments are stored
78     */
79    private String resultsPath = "results";
80
81    /**
82     * data set filters applied to all data
83     */
84    private List<IVersionFilter> versionFilters;
85
86    /**
87     * data set filters that decide if a data set is used as test data
88     */
89    private List<IVersionFilter> testVersionFilters;
90
91    /**
92     * data set filters that decide if a data is used as candidate training data
93     */
94    private List<IVersionFilter> trainingVersionFilters;
95
96    /**
97     * setwise data processors that are applied before the setwise data selection
98     */
99    private List<ISetWiseProcessingStrategy> setwisepreprocessors;
100
101    /**
102     * setwise data selection strategies
103     */
104    private List<ISetWiseDataselectionStrategy> setwiseselectors;
105
106    /**
107     * setwise data processors that are applied after the setwise data selection
108     */
109    private List<ISetWiseProcessingStrategy> setwisepostprocessors;
110
111    /**
112     * setwise trainers, i.e., trainers that require the selected training data to be separate from
113     * each other
114     */
115    private List<ISetWiseTrainingStrategy> setwiseTrainers;
116
117    /**
118     * setwise testdata aware trainers, i.e., trainers that require the selected training data to be separate from
119     * each other and the current testdata
120     */
121    private List<ISetWiseTestdataAwareTrainingStrategy> setwiseTestdataAwareTrainers;
122   
123    /**
124     * data processors that are applied before the pointwise data selection
125     */
126    private List<IProcessesingStrategy> preprocessors;
127
128    /**
129     * pointwise data selection strategies
130     */
131    private List<IPointWiseDataselectionStrategy> pointwiseselectors;
132
133    /**
134     * data processors that are applied before the pointwise data selection
135     */
136    private List<IProcessesingStrategy> postprocessors;
137
138    /**
139     * normal trainers, i.e., trainers that require the selected training data in a single data set
140     */
141    private List<ITrainingStrategy> trainers;
142   
143    /**
144     * normal trainers, i.e., trainers that require the selected training data in a single data set
145     */
146    private List<ITestAwareTrainingStrategy> testAwareTrainers;
147
148    /**
149     * evaluators used for the the experiment results
150     */
151    private List<IEvaluationStrategy> evaluators;
152
153    /**
154     * indicates, if the classifier should be saved
155     */
156    private Boolean saveClassifier = null;
157
158    /**
159     * indicates, which execution strategy to choose (e.g. CrossProjectExperiment,
160     * ClassifierCreationExecution). Default is CrossProjectExperiment.
161     */
162    private String executionStrategy = "CrossProjectExperiment";
163
164    /**
165     * Constructor. Creates a new configuration from a given file.
166     *
167     * @param filename
168     *            name of the file from the configuration is loaded.
169     * @throws ExperimentConfigurationException
170     *             thrown if there is an error creating the configuration
171     */
172    public ExperimentConfiguration(String filename) throws ExperimentConfigurationException {
173        this(new File(filename));
174    }
175
176    /**
177     * Constructor. Creates a new configuration from a given file.
178     *
179     * @param filename
180     *            handle of the file from the configuration is loaded.
181     * @throws ExperimentConfigurationException
182     *             thrown if there is an error creating the configuration
183     */
184    public ExperimentConfiguration(File file) throws ExperimentConfigurationException {
185        loaders = new LinkedList<>();
186        versionFilters = new LinkedList<>();
187        testVersionFilters = new LinkedList<>();
188        trainingVersionFilters = new LinkedList<>();
189        setwisepreprocessors = new LinkedList<>();
190        setwiseselectors = new LinkedList<>();
191        setwisepostprocessors = new LinkedList<>();
192        setwiseTrainers = new LinkedList<>();
193        setwiseTestdataAwareTrainers = new LinkedList<>();
194        preprocessors = new LinkedList<>();
195        pointwiseselectors = new LinkedList<>();
196        postprocessors = new LinkedList<>();
197        trainers = new LinkedList<>();
198        evaluators = new LinkedList<>();
199
200        if (file == null) {
201            throw new IllegalArgumentException("file must not be null");
202        }
203        if (file.isDirectory()) {
204            throw new IllegalArgumentException("file must not be a directory");
205        }
206        configFile = file;
207
208        experimentName = file.getName().split("\\.")[0];
209
210        final SAXParserFactory spf = SAXParserFactory.newInstance();
211        spf.setValidating(true);
212
213        SAXParser saxParser = null;
214        InputSource inputSource = null;
215        try {
216            saxParser = spf.newSAXParser();
217        }
218        catch (ParserConfigurationException | SAXException e) {
219            throw new ExperimentConfigurationException(e);
220        }
221
222        InputStreamReader reader = null;
223        try {
224            reader = new InputStreamReader(new FileInputStream(file), "UTF-8");
225            inputSource = new InputSource(reader);
226        }
227        catch (UnsupportedEncodingException | FileNotFoundException e) {
228            throw new ExperimentConfigurationException("Could not open configuration file.", e);
229        }
230
231        if (inputSource != null) {
232            inputSource.setSystemId("file://" + file.getAbsolutePath());
233            try {
234                saxParser.parse(inputSource, this);
235            }
236            catch (SAXException | IOException e) {
237                throw new ExperimentConfigurationException("Error parsing configuration.", e);
238            }
239        }
240        if (reader != null) {
241            try {
242                reader.close();
243            }
244            catch (IOException e) {
245                throw new ExperimentConfigurationException("Error closing reader.", e);
246            }
247        }
248    }
249
250    /**
251     * returns the name of the experiment
252     *
253     * @return name of the experiment
254     */
255    public String getExperimentName() {
256        return experimentName;
257    }
258
259    /**
260     * returns the loaders for instances
261     *
262     * @return data loaders
263     */
264    public List<IVersionLoader> getLoaders() {
265        return loaders;
266    }
267
268    /**
269     * returns the results path
270     *
271     * @return results path
272     */
273    public String getResultsPath() {
274        return resultsPath;
275    }
276
277    /**
278     * returns the data set filters of the experiment
279     *
280     * @return data set filters of the experiment
281     */
282    public List<IVersionFilter> getVersionFilters() {
283        return versionFilters;
284    }
285
286    /**
287     * returns the test set filters of the experiment
288     *
289     * @return test set filters of the experiment
290     */
291    public List<IVersionFilter> getTestVersionFilters() {
292        return testVersionFilters;
293    }
294
295    /**
296     * returns the candidate training version filters of the experiment
297     *
298     * @return candidate training version filters of the experiment
299     */
300    public List<IVersionFilter> getTrainingVersionFilters() {
301        return trainingVersionFilters;
302    }
303
304    /**
305     * returns the setwise processors applied before the setwise data selection
306     *
307     * @return setwise processors applied before the setwise data selection
308     */
309    public List<ISetWiseProcessingStrategy> getSetWisePreprocessors() {
310        return setwisepreprocessors;
311    }
312
313    /**
314     * returns the setwise data selection strategies
315     *
316     * @return setwise data selection strategies
317     */
318    public List<ISetWiseDataselectionStrategy> getSetWiseSelectors() {
319        return setwiseselectors;
320    }
321
322    /**
323     * returns the setwise processors applied after the setwise data selection
324     *
325     * @return setwise processors applied after the setwise data selection
326     */
327    public List<ISetWiseProcessingStrategy> getSetWisePostprocessors() {
328        return setwisepostprocessors;
329    }
330
331    /**
332     * returns the setwise training algorithms
333     *
334     * @return setwise training algorithms
335     */
336    public List<ISetWiseTrainingStrategy> getSetWiseTrainers() {
337        return setwiseTrainers;
338    }
339
340    /**
341     * returns the setwise training algorithms
342     *
343     * @return setwise training algorithms
344     */
345    public List<ISetWiseTestdataAwareTrainingStrategy> getSetWiseTestdataAwareTrainers() {
346        return setwiseTestdataAwareTrainers;
347    }
348   
349    /**
350     * returns the processors applied before the pointwise data selection
351     *
352     * @return processors applied before the pointwise data selection
353     */
354    public List<IProcessesingStrategy> getPreProcessors() {
355        return preprocessors;
356    }
357
358    /**
359     * returns the pointwise data selection strategies
360     *
361     * @return pointwise data selection strategies
362     */
363    public List<IPointWiseDataselectionStrategy> getPointWiseSelectors() {
364        return pointwiseselectors;
365    }
366
367    /**
368     * returns the processors applied after the pointwise data selection
369     *
370     * @return processors applied after the pointwise data selection
371     */
372    public List<IProcessesingStrategy> getPostProcessors() {
373        return postprocessors;
374    }
375
376    /**
377     * returns the normal training algorithm
378     *
379     * @return normal training algorithms
380     */
381    public List<ITrainingStrategy> getTrainers() {
382        return trainers;
383    }
384   
385    /**
386     * returns the test aware training algorithms
387     *
388     * @return normal training algorithms
389     */
390    public List<ITestAwareTrainingStrategy> getTestAwareTrainers() {
391        return testAwareTrainers;
392    }
393
394    /**
395     * returns the evaluation strategies
396     *
397     * @return evaluation strategies
398     */
399    public List<IEvaluationStrategy> getEvaluators() {
400        return evaluators;
401    }
402
403    /**
404     * returns boolean, if classifier should be saved
405     *
406     * @return boolean
407     */
408    public boolean getSaveClassifier() {
409        return saveClassifier;
410    }
411
412    /**
413     * returns the execution strategy
414     *
415     * @return String execution strategy
416     */
417    public String getExecutionStrategy() {
418        return executionStrategy;
419    }
420
421    /*
422     * (non-Javadoc)
423     *
424     * @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String,
425     * java.lang.String, org.xml.sax.Attributes)
426     */
427    @Override
428    public void startElement(String uri, String localName, String qName, Attributes attributes)
429        throws SAXException
430    {
431        try {
432            if (qName.equals("config")) {
433                // ingore
434            }
435            else if (qName.equals("loader")) {
436                final IVersionLoader loader =
437                    (IVersionLoader) Class.forName("de.ugoe.cs.cpdp.loader." +
438                                                       attributes.getValue("name")).newInstance();
439                loader.setLocation(attributes.getValue("datalocation"));
440                loaders.add(loader);
441
442                // TODO location as relative
443            }
444            else if (qName.equals("resultspath")) {
445                resultsPath = attributes.getValue("path");
446            }
447            else if (qName.equals("versionfilter")) {
448                final IVersionFilter filter =
449                    (IVersionFilter) Class.forName("de.ugoe.cs.cpdp.versions." +
450                                                       attributes.getValue("name")).newInstance();
451                filter.setParameter(attributes.getValue("param"));
452                versionFilters.add(filter);
453            }
454            else if (qName.equals("testVersionfilter")) {
455                final IVersionFilter filter =
456                    (IVersionFilter) Class.forName("de.ugoe.cs.cpdp.versions." +
457                                                       attributes.getValue("name")).newInstance();
458                filter.setParameter(attributes.getValue("param"));
459                testVersionFilters.add(filter);
460            }
461            else if (qName.equals("trainVersionfilter")) {
462                final IVersionFilter filter =
463                    (IVersionFilter) Class.forName("de.ugoe.cs.cpdp.versions." +
464                                                       attributes.getValue("name")).newInstance();
465                filter.setParameter(attributes.getValue("param"));
466                trainingVersionFilters.add(filter);
467            }
468            else if (qName.equals("setwisepreprocessor")) {
469                final ISetWiseProcessingStrategy processor =
470                    (ISetWiseProcessingStrategy) Class.forName("de.ugoe.cs.cpdp.dataprocessing." +
471                                                                   attributes.getValue("name"))
472                        .newInstance();
473                processor.setParameter(attributes.getValue("param"));
474                setwisepreprocessors.add(processor);
475            }
476            else if (qName.equals("setwiseselector")) {
477                final ISetWiseDataselectionStrategy selection =
478                    (ISetWiseDataselectionStrategy) Class.forName("de.ugoe.cs.cpdp.dataselection." +
479                                                                      attributes.getValue("name"))
480                        .newInstance();
481                selection.setParameter(attributes.getValue("param"));
482                setwiseselectors.add(selection);
483            }
484            else if (qName.equals("setwisepostprocessor")) {
485                final ISetWiseProcessingStrategy processor =
486                    (ISetWiseProcessingStrategy) Class.forName("de.ugoe.cs.cpdp.dataprocessing." +
487                                                                   attributes.getValue("name"))
488                        .newInstance();
489                processor.setParameter(attributes.getValue("param"));
490                setwisepostprocessors.add(processor);
491            }
492            else if (qName.equals("setwisetrainer")) {
493                final ISetWiseTrainingStrategy trainer =
494                    (ISetWiseTrainingStrategy) Class.forName("de.ugoe.cs.cpdp.training." +
495                                                                 attributes.getValue("name"))
496                        .newInstance();
497                trainer.setParameter(attributes.getValue("param"));
498                setwiseTrainers.add(trainer);
499            }
500            else if (qName.equals("setwisetestdataawaretrainer")) {
501                final ISetWiseTestdataAwareTrainingStrategy trainer =
502                    (ISetWiseTestdataAwareTrainingStrategy) Class.forName("de.ugoe.cs.cpdp.training." +
503                                                                 attributes.getValue("name"))
504                        .newInstance();
505                trainer.setParameter(attributes.getValue("param"));
506                trainer.setMethod(attributes.getValue("method"));
507                trainer.setThreshold(attributes.getValue("threshold"));
508                setwiseTestdataAwareTrainers.add(trainer);
509            }
510            else if (qName.equals("preprocessor")) {
511                final IProcessesingStrategy processor =
512                    (IProcessesingStrategy) Class.forName("de.ugoe.cs.cpdp.dataprocessing." +
513                                                              attributes.getValue("name"))
514                        .newInstance();
515                processor.setParameter(attributes.getValue("param"));
516                preprocessors.add(processor);
517            }
518            else if (qName.equals("pointwiseselector")) {
519                final IPointWiseDataselectionStrategy selection =
520                    (IPointWiseDataselectionStrategy) Class
521                        .forName("de.ugoe.cs.cpdp.dataselection." + attributes.getValue("name"))
522                        .newInstance();
523                selection.setParameter(attributes.getValue("param"));
524                pointwiseselectors.add(selection);
525            }
526            else if (qName.equals("postprocessor")) {
527                final IProcessesingStrategy processor =
528                    (IProcessesingStrategy) Class.forName("de.ugoe.cs.cpdp.dataprocessing." +
529                                                              attributes.getValue("name"))
530                        .newInstance();
531                processor.setParameter(attributes.getValue("param"));
532                postprocessors.add(processor);
533            }
534            else if (qName.equals("trainer")) {
535                final ITrainingStrategy trainer =
536                    (ITrainingStrategy) Class.forName("de.ugoe.cs.cpdp.training." +
537                                                          attributes.getValue("name"))
538                        .newInstance();
539                trainer.setParameter(attributes.getValue("param"));
540                trainers.add(trainer);
541            }
542            else if (qName.equals("testawaretrainer")) {
543                final ITestAwareTrainingStrategy trainer =
544                    (ITestAwareTrainingStrategy) Class.forName("de.ugoe.cs.cpdp.training." +
545                                                          attributes.getValue("name"))
546                        .newInstance();
547                trainer.setParameter(attributes.getValue("param"));
548                testAwareTrainers.add(trainer);
549            }
550            else if (qName.equals("eval")) {
551                final IEvaluationStrategy evaluator =
552                    (IEvaluationStrategy) Class.forName("de.ugoe.cs.cpdp.eval." +
553                                                            attributes.getValue("name"))
554                        .newInstance();
555                evaluators.add(evaluator);
556            }
557            else if (qName.equals("saveClassifier")) {
558                saveClassifier = true;
559            }
560            else if (qName.equals("executionStrategy")) {
561                executionStrategy = attributes.getValue("name");
562            }
563            else if (qName.equals("partialconfig")) {
564                String path = attributes.getValue("path");
565                try {
566                    boolean relative = true;
567                    if (attributes.getValue("relative") != null) {
568                        relative = Boolean.parseBoolean(attributes.getValue("relative"));
569                    }
570
571                    if (relative) {
572                        path = configFile.getParentFile().getPath() + "/" + path;
573                    }
574                    addConfigurations(new ExperimentConfiguration(path));
575                }
576                catch (ExperimentConfigurationException e) {
577                    throw new SAXException("Could not load partial configuration: " + path, e);
578                }
579            }
580            else {
581                Console.traceln(Level.WARNING, "element in config-file " + configFile.getName() +
582                    " ignored: " + qName);
583            }
584        }
585        catch (NoClassDefFoundError | ClassNotFoundException | IllegalAccessException
586                | InstantiationException | ClassCastException e)
587        {
588            throw new SAXException("Could not initialize class correctly", (Exception) e);
589        }
590    }
591
592    /**
593     * Adds the information of another experiment configuration to this configuration. This
594     * mechanism allows the usage of partial configuration files. The name of the other
595     * configuration is lost. <br>
596     * <br>
597     * If the current data path is the empty string (&quot;&quot;), it is override by the datapath
598     * of the other configuration. Otherwise, the current data path is kept.
599     *
600     * @param other
601     *            experiment whose information is added
602     * @throws ExperimentConfigurationException
603     */
604    private void addConfigurations(ExperimentConfiguration other)
605        throws ExperimentConfigurationException
606    {
607        if ("results".equals(resultsPath)) {
608            resultsPath = other.resultsPath;
609        }
610        loaders.addAll(other.loaders);
611        versionFilters.addAll(other.versionFilters);
612        testVersionFilters.addAll(other.testVersionFilters);
613        trainingVersionFilters.addAll(other.trainingVersionFilters);
614        setwisepreprocessors.addAll(other.setwisepreprocessors);
615        setwiseselectors.addAll(other.setwiseselectors);
616        setwisepostprocessors.addAll(other.setwisepostprocessors);
617        setwiseTrainers.addAll(other.setwiseTrainers);
618        setwiseTestdataAwareTrainers.addAll(other.setwiseTestdataAwareTrainers);
619        preprocessors.addAll(other.preprocessors);
620        pointwiseselectors.addAll(other.pointwiseselectors);
621        postprocessors.addAll(other.postprocessors);
622        trainers.addAll(other.trainers);
623        evaluators.addAll(other.evaluators);
624
625        if (!executionStrategy.equals(other.executionStrategy)) {
626            throw new ExperimentConfigurationException(
627                                                       "Executionstrategies must be the same, if config files should be added.");
628        }
629
630        /*
631         * Only if saveClassifier is not set in the main config and the other configs saveClassifier
632         * is true, it must be set.
633         */
634        if (saveClassifier == null && other.saveClassifier == true) {
635            saveClassifier = other.saveClassifier;
636        }
637
638    }
639
640    /*
641     * (non-Javadoc)
642     *
643     * @see java.lang.Object#toString()
644     */
645    @Override
646    public String toString() {
647        final StringBuilder builder = new StringBuilder();
648        builder.append("Experiment name: " + experimentName + StringTools.ENDLINE);
649        builder.append("Loaders: " + loaders + StringTools.ENDLINE);
650        builder.append("Results path: " + resultsPath + StringTools.ENDLINE);
651        builder.append("Version filters: " + versionFilters.toString() + StringTools.ENDLINE);
652        builder.append("Test version filters: " + testVersionFilters.toString() +
653            StringTools.ENDLINE);
654        builder.append("Training version filters: " + trainingVersionFilters.toString() +
655            StringTools.ENDLINE);
656        builder.append("Setwise preprocessors: " + setwisepreprocessors.toString() +
657            StringTools.ENDLINE);
658        builder.append("Setwise selectors: " + setwiseselectors.toString() + StringTools.ENDLINE);
659        builder.append("Setwise postprocessors: " + setwisepostprocessors.toString() +
660            StringTools.ENDLINE);
661        builder.append("Setwise trainers: " + setwiseTrainers.toString() + StringTools.ENDLINE);
662        builder.append("Setwise Testdata Aware trainers: " + setwiseTestdataAwareTrainers.toString() + StringTools.ENDLINE);
663        builder
664            .append("Pointwise preprocessors: " + preprocessors.toString() + StringTools.ENDLINE);
665        builder.append("Pointwise selectors: " + pointwiseselectors.toString() +
666            StringTools.ENDLINE);
667        builder.append("Pointwise postprocessors: " + postprocessors.toString() +
668            StringTools.ENDLINE);
669        builder.append("Pointwise trainers: " + trainers.toString() + StringTools.ENDLINE);
670        builder.append("Evaluators: " + evaluators.toString() + StringTools.ENDLINE);
671        builder.append("Save Classifier?: " + saveClassifier + StringTools.ENDLINE);
672        builder.append("Execution Strategy: " + executionStrategy + StringTools.ENDLINE);
673
674        return builder.toString();
675    }
676}
Note: See TracBrowser for help on using the repository browser.