source: trunk/CrossPare/src/de/ugoe/cs/cpdp/ExperimentConfiguration.java @ 69

Last change on this file since 69 was 68, checked in by sherbold, 8 years ago
  • added the concept of result storages to the framework and implemented a very simple first prototype of a MySQLResultStorage (that currently only works with a locally running database)
  • Property svn:mime-type set to text/plain
File size: 25.9 KB
Line 
1// Copyright 2015 Georg-August-Universit�t G�ttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.cpdp;
16
17import java.io.File;
18import java.io.FileInputStream;
19import java.io.FileNotFoundException;
20import java.io.IOException;
21import java.io.InputStreamReader;
22import java.io.UnsupportedEncodingException;
23import java.util.LinkedList;
24import java.util.List;
25import java.util.logging.Level;
26
27import javax.xml.parsers.ParserConfigurationException;
28import javax.xml.parsers.SAXParser;
29import javax.xml.parsers.SAXParserFactory;
30
31import org.xml.sax.Attributes;
32import org.xml.sax.InputSource;
33import org.xml.sax.SAXException;
34import org.xml.sax.helpers.DefaultHandler;
35
36import de.ugoe.cs.cpdp.dataprocessing.IProcessesingStrategy;
37import de.ugoe.cs.cpdp.dataprocessing.ISetWiseProcessingStrategy;
38import de.ugoe.cs.cpdp.dataselection.IPointWiseDataselectionStrategy;
39import de.ugoe.cs.cpdp.dataselection.ISetWiseDataselectionStrategy;
40import de.ugoe.cs.cpdp.eval.IEvaluationStrategy;
41import de.ugoe.cs.cpdp.eval.IResultStorage;
42import de.ugoe.cs.cpdp.loader.IVersionLoader;
43import de.ugoe.cs.cpdp.training.ISetWiseTestdataAwareTrainingStrategy;
44import de.ugoe.cs.cpdp.training.ISetWiseTrainingStrategy;
45import de.ugoe.cs.cpdp.training.ITestAwareTrainingStrategy;
46import de.ugoe.cs.cpdp.training.ITrainingStrategy;
47import de.ugoe.cs.cpdp.versions.IVersionFilter;
48import de.ugoe.cs.util.StringTools;
49import de.ugoe.cs.util.console.Console;
50
51/**
52 * Class that contains all meta information about an experiment, i.e., its configuration. The
53 * configuration is loaded from an XML file. <br>
54 * <br>
55 * In the current implementation, the experiment configuration can only be created using an XML
56 * file. Programmatic creation of experiment configurations is currently not possibly.
57 *
58 * @author Steffen Herbold
59 */
60public class ExperimentConfiguration extends DefaultHandler {
61
62    /**
63     * handle of the file that contains the configuration
64     */
65    private final File configFile;
66
67    /**
68     * name of the experiment (automatically set to the file name without the .xml ending)
69     */
70    private String experimentName = "exp";
71
72    /**
73     * loads instances
74     */
75    private List<IVersionLoader> loaders;
76
77    /**
78     * path were the results of the experiments are stored
79     */
80    private String resultsPath = "results";
81
82    /**
83     * data set filters applied to all data
84     */
85    private List<IVersionFilter> versionFilters;
86
87    /**
88     * data set filters that decide if a data set is used as test data
89     */
90    private List<IVersionFilter> testVersionFilters;
91
92    /**
93     * data set filters that decide if a data is used as candidate training data
94     */
95    private List<IVersionFilter> trainingVersionFilters;
96
97    /**
98     * setwise data processors that are applied before the setwise data selection
99     */
100    private List<ISetWiseProcessingStrategy> setwisepreprocessors;
101
102    /**
103     * setwise data selection strategies
104     */
105    private List<ISetWiseDataselectionStrategy> setwiseselectors;
106
107    /**
108     * setwise data processors that are applied after the setwise data selection
109     */
110    private List<ISetWiseProcessingStrategy> setwisepostprocessors;
111
112    /**
113     * setwise trainers, i.e., trainers that require the selected training data to be separate from
114     * each other
115     */
116    private List<ISetWiseTrainingStrategy> setwiseTrainers;
117
118    /**
119     * setwise testdata aware trainers, i.e., trainers that require the selected training data to be
120     * separate from each other and the current testdata
121     */
122    private List<ISetWiseTestdataAwareTrainingStrategy> setwiseTestdataAwareTrainers;
123
124    /**
125     * data processors that are applied before the pointwise data selection
126     */
127    private List<IProcessesingStrategy> preprocessors;
128
129    /**
130     * pointwise data selection strategies
131     */
132    private List<IPointWiseDataselectionStrategy> pointwiseselectors;
133
134    /**
135     * data processors that are applied before the pointwise data selection
136     */
137    private List<IProcessesingStrategy> postprocessors;
138
139    /**
140     * normal trainers, i.e., trainers that require the selected training data in a single data set
141     */
142    private List<ITrainingStrategy> trainers;
143
144    /**
145     * normal trainers, i.e., trainers that require the selected training data in a single data set
146     */
147    private List<ITestAwareTrainingStrategy> testAwareTrainers;
148
149    /**
150     * evaluators used for the the experiment results
151     */
152    private List<IEvaluationStrategy> evaluators;
153
154    /**
155     * result storages used for experiments
156     */
157    private List<IResultStorage> resultStorages;
158
159    /**
160     * indicates, if the classifier should be saved
161     */
162    private Boolean saveClassifier = null;
163
164    /**
165     * indicates, which execution strategy to choose (e.g. CrossProjectExperiment,
166     * ClassifierCreationExecution). Default is CrossProjectExperiment.
167     */
168    private String executionStrategy = "CrossProjectExperiment";
169
170    /**
171     * Constructor. Creates a new configuration from a given file.
172     *
173     * @param filename
174     *            name of the file from the configuration is loaded.
175     * @throws ExperimentConfigurationException
176     *             thrown if there is an error creating the configuration
177     */
178    public ExperimentConfiguration(String filename) throws ExperimentConfigurationException {
179        this(new File(filename));
180    }
181
182    /**
183     * Constructor. Creates a new configuration from a given file.
184     *
185     * @param filename
186     *            handle of the file from the configuration is loaded.
187     * @throws ExperimentConfigurationException
188     *             thrown if there is an error creating the configuration
189     */
190    public ExperimentConfiguration(File file) throws ExperimentConfigurationException {
191        loaders = new LinkedList<>();
192        versionFilters = new LinkedList<>();
193        testVersionFilters = new LinkedList<>();
194        trainingVersionFilters = new LinkedList<>();
195        setwisepreprocessors = new LinkedList<>();
196        setwiseselectors = new LinkedList<>();
197        setwisepostprocessors = new LinkedList<>();
198        setwiseTrainers = new LinkedList<>();
199        setwiseTestdataAwareTrainers = new LinkedList<>();
200        preprocessors = new LinkedList<>();
201        pointwiseselectors = new LinkedList<>();
202        postprocessors = new LinkedList<>();
203        trainers = new LinkedList<>();
204        testAwareTrainers = new LinkedList<>();
205        evaluators = new LinkedList<>();
206        resultStorages = new LinkedList<>();
207
208        if (file == null) {
209            throw new IllegalArgumentException("file must not be null");
210        }
211        if (file.isDirectory()) {
212            throw new IllegalArgumentException("file must not be a directory");
213        }
214        configFile = file;
215
216        experimentName = file.getName().split("\\.")[0];
217
218        final SAXParserFactory spf = SAXParserFactory.newInstance();
219        spf.setValidating(true);
220
221        SAXParser saxParser = null;
222        InputSource inputSource = null;
223        try {
224            saxParser = spf.newSAXParser();
225        }
226        catch (ParserConfigurationException | SAXException e) {
227            throw new ExperimentConfigurationException(e);
228        }
229
230        InputStreamReader reader = null;
231        try {
232            reader = new InputStreamReader(new FileInputStream(file), "UTF-8");
233            inputSource = new InputSource(reader);
234        }
235        catch (UnsupportedEncodingException | FileNotFoundException e) {
236            throw new ExperimentConfigurationException("Could not open configuration file.", e);
237        }
238
239        if (inputSource != null) {
240            inputSource.setSystemId("file://" + file.getAbsolutePath());
241            try {
242                saxParser.parse(inputSource, this);
243            }
244            catch (SAXException | IOException e) {
245                throw new ExperimentConfigurationException("Error parsing configuration.", e);
246            }
247        }
248        if (reader != null) {
249            try {
250                reader.close();
251            }
252            catch (IOException e) {
253                throw new ExperimentConfigurationException("Error closing reader.", e);
254            }
255        }
256    }
257
258    /**
259     * returns the name of the experiment
260     *
261     * @return name of the experiment
262     */
263    public String getExperimentName() {
264        return experimentName;
265    }
266
267    /**
268     * returns the loaders for instances
269     *
270     * @return data loaders
271     */
272    public List<IVersionLoader> getLoaders() {
273        return loaders;
274    }
275
276    /**
277     * returns the results path
278     *
279     * @return results path
280     */
281    public String getResultsPath() {
282        return resultsPath;
283    }
284
285    /**
286     * returns the data set filters of the experiment
287     *
288     * @return data set filters of the experiment
289     */
290    public List<IVersionFilter> getVersionFilters() {
291        return versionFilters;
292    }
293
294    /**
295     * returns the test set filters of the experiment
296     *
297     * @return test set filters of the experiment
298     */
299    public List<IVersionFilter> getTestVersionFilters() {
300        return testVersionFilters;
301    }
302
303    /**
304     * returns the candidate training version filters of the experiment
305     *
306     * @return candidate training version filters of the experiment
307     */
308    public List<IVersionFilter> getTrainingVersionFilters() {
309        return trainingVersionFilters;
310    }
311
312    /**
313     * returns the setwise processors applied before the setwise data selection
314     *
315     * @return setwise processors applied before the setwise data selection
316     */
317    public List<ISetWiseProcessingStrategy> getSetWisePreprocessors() {
318        return setwisepreprocessors;
319    }
320
321    /**
322     * returns the setwise data selection strategies
323     *
324     * @return setwise data selection strategies
325     */
326    public List<ISetWiseDataselectionStrategy> getSetWiseSelectors() {
327        return setwiseselectors;
328    }
329
330    /**
331     * returns the setwise processors applied after the setwise data selection
332     *
333     * @return setwise processors applied after the setwise data selection
334     */
335    public List<ISetWiseProcessingStrategy> getSetWisePostprocessors() {
336        return setwisepostprocessors;
337    }
338
339    /**
340     * returns the setwise training algorithms
341     *
342     * @return setwise training algorithms
343     */
344    public List<ISetWiseTrainingStrategy> getSetWiseTrainers() {
345        return setwiseTrainers;
346    }
347
348    /**
349     * returns the setwise training algorithms
350     *
351     * @return setwise training algorithms
352     */
353    public List<ISetWiseTestdataAwareTrainingStrategy> getSetWiseTestdataAwareTrainers() {
354        return setwiseTestdataAwareTrainers;
355    }
356
357    /**
358     * returns the processors applied before the pointwise data selection
359     *
360     * @return processors applied before the pointwise data selection
361     */
362    public List<IProcessesingStrategy> getPreProcessors() {
363        return preprocessors;
364    }
365
366    /**
367     * returns the pointwise data selection strategies
368     *
369     * @return pointwise data selection strategies
370     */
371    public List<IPointWiseDataselectionStrategy> getPointWiseSelectors() {
372        return pointwiseselectors;
373    }
374
375    /**
376     * returns the processors applied after the pointwise data selection
377     *
378     * @return processors applied after the pointwise data selection
379     */
380    public List<IProcessesingStrategy> getPostProcessors() {
381        return postprocessors;
382    }
383
384    /**
385     * returns the normal training algorithm
386     *
387     * @return normal training algorithms
388     */
389    public List<ITrainingStrategy> getTrainers() {
390        return trainers;
391    }
392
393    /**
394     * returns the test aware training algorithms
395     *
396     * @return normal training algorithms
397     */
398    public List<ITestAwareTrainingStrategy> getTestAwareTrainers() {
399        return testAwareTrainers;
400    }
401
402    /**
403     * returns the evaluation strategies
404     *
405     * @return evaluation strategies
406     */
407    public List<IEvaluationStrategy> getEvaluators() {
408        return evaluators;
409    }
410
411    public List<IResultStorage> getResultStorages() {
412        return resultStorages;
413    }
414
415    /**
416     * returns boolean, if classifier should be saved
417     *
418     * @return boolean
419     */
420    public boolean getSaveClassifier() {
421        return saveClassifier;
422    }
423
424    /**
425     * returns the execution strategy
426     *
427     * @return String execution strategy
428     */
429    public String getExecutionStrategy() {
430        return executionStrategy;
431    }
432
433    /*
434     * (non-Javadoc)
435     *
436     * @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String,
437     * java.lang.String, org.xml.sax.Attributes)
438     */
439    @Override
440    public void startElement(String uri, String localName, String qName, Attributes attributes)
441        throws SAXException
442    {
443        try {
444            if (qName.equals("config")) {
445                // ingore
446            }
447            else if (qName.equals("loader")) {
448                final IVersionLoader loader = (IVersionLoader) Class
449                    .forName("de.ugoe.cs.cpdp.loader." + attributes.getValue("name")).newInstance();
450                loader.setLocation(attributes.getValue("datalocation"));
451                loaders.add(loader);
452
453                // TODO location as relative
454            }
455            else if (qName.equals("resultspath")) {
456                resultsPath = attributes.getValue("path");
457            }
458            else if (qName.equals("versionfilter")) {
459                final IVersionFilter filter = (IVersionFilter) Class
460                    .forName("de.ugoe.cs.cpdp.versions." + attributes.getValue("name"))
461                    .newInstance();
462                filter.setParameter(attributes.getValue("param"));
463                versionFilters.add(filter);
464            }
465            else if (qName.equals("testVersionfilter")) {
466                final IVersionFilter filter = (IVersionFilter) Class
467                    .forName("de.ugoe.cs.cpdp.versions." + attributes.getValue("name"))
468                    .newInstance();
469                filter.setParameter(attributes.getValue("param"));
470                testVersionFilters.add(filter);
471            }
472            else if (qName.equals("trainVersionfilter")) {
473                final IVersionFilter filter = (IVersionFilter) Class
474                    .forName("de.ugoe.cs.cpdp.versions." + attributes.getValue("name"))
475                    .newInstance();
476                filter.setParameter(attributes.getValue("param"));
477                trainingVersionFilters.add(filter);
478            }
479            else if (qName.equals("setwisepreprocessor")) {
480                final ISetWiseProcessingStrategy processor = (ISetWiseProcessingStrategy) Class
481                    .forName("de.ugoe.cs.cpdp.dataprocessing." + attributes.getValue("name"))
482                    .newInstance();
483                processor.setParameter(attributes.getValue("param"));
484                setwisepreprocessors.add(processor);
485            }
486            else if (qName.equals("setwiseselector")) {
487                final ISetWiseDataselectionStrategy selection =
488                    (ISetWiseDataselectionStrategy) Class
489                        .forName("de.ugoe.cs.cpdp.dataselection." + attributes.getValue("name"))
490                        .newInstance();
491                selection.setParameter(attributes.getValue("param"));
492                setwiseselectors.add(selection);
493            }
494            else if (qName.equals("setwisepostprocessor")) {
495                final ISetWiseProcessingStrategy processor = (ISetWiseProcessingStrategy) Class
496                    .forName("de.ugoe.cs.cpdp.dataprocessing." + attributes.getValue("name"))
497                    .newInstance();
498                processor.setParameter(attributes.getValue("param"));
499                setwisepostprocessors.add(processor);
500            }
501            else if (qName.equals("setwisetrainer")) {
502                final ISetWiseTrainingStrategy trainer = (ISetWiseTrainingStrategy) Class
503                    .forName("de.ugoe.cs.cpdp.training." + attributes.getValue("name"))
504                    .newInstance();
505                trainer.setParameter(attributes.getValue("param"));
506                setwiseTrainers.add(trainer);
507            }
508            else if (qName.equals("setwisetestdataawaretrainer")) {
509                final ISetWiseTestdataAwareTrainingStrategy trainer =
510                    (ISetWiseTestdataAwareTrainingStrategy) Class
511                        .forName("de.ugoe.cs.cpdp.training." + attributes.getValue("name"))
512                        .newInstance();
513                trainer.setParameter(attributes.getValue("param"));
514                trainer.setMethod(attributes.getValue("method"));
515                trainer.setThreshold(attributes.getValue("threshold"));
516                setwiseTestdataAwareTrainers.add(trainer);
517            }
518            else if (qName.equals("preprocessor")) {
519                final IProcessesingStrategy processor = (IProcessesingStrategy) Class
520                    .forName("de.ugoe.cs.cpdp.dataprocessing." + attributes.getValue("name"))
521                    .newInstance();
522                processor.setParameter(attributes.getValue("param"));
523                preprocessors.add(processor);
524            }
525            else if (qName.equals("pointwiseselector")) {
526                final IPointWiseDataselectionStrategy selection =
527                    (IPointWiseDataselectionStrategy) Class
528                        .forName("de.ugoe.cs.cpdp.dataselection." + attributes.getValue("name"))
529                        .newInstance();
530                selection.setParameter(attributes.getValue("param"));
531                pointwiseselectors.add(selection);
532            }
533            else if (qName.equals("postprocessor")) {
534                final IProcessesingStrategy processor = (IProcessesingStrategy) Class
535                    .forName("de.ugoe.cs.cpdp.dataprocessing." + attributes.getValue("name"))
536                    .newInstance();
537                processor.setParameter(attributes.getValue("param"));
538                postprocessors.add(processor);
539            }
540            else if (qName.equals("trainer")) {
541                final ITrainingStrategy trainer = (ITrainingStrategy) Class
542                    .forName("de.ugoe.cs.cpdp.training." + attributes.getValue("name"))
543                    .newInstance();
544                trainer.setParameter(attributes.getValue("param"));
545                trainers.add(trainer);
546            }
547            else if (qName.equals("testawaretrainer")) {
548                final ITestAwareTrainingStrategy trainer = (ITestAwareTrainingStrategy) Class
549                    .forName("de.ugoe.cs.cpdp.training." + attributes.getValue("name"))
550                    .newInstance();
551                trainer.setParameter(attributes.getValue("param"));
552                testAwareTrainers.add(trainer);
553            }
554            else if (qName.equals("eval")) {
555                final IEvaluationStrategy evaluator = (IEvaluationStrategy) Class
556                    .forName("de.ugoe.cs.cpdp.eval." + attributes.getValue("name")).newInstance();
557                evaluators.add(evaluator);
558            }
559            else if (qName.equals("storage")) {
560                final IResultStorage resultStorage = (IResultStorage) Class
561                    .forName("de.ugoe.cs.cpdp.eval." + attributes.getValue("name")).newInstance();
562                resultStorages.add(resultStorage);
563            }
564            else if (qName.equals("saveClassifier")) {
565                saveClassifier = true;
566            }
567            else if (qName.equals("executionStrategy")) {
568                executionStrategy = attributes.getValue("name");
569            }
570            else if (qName.equals("partialconfig")) {
571                String path = attributes.getValue("path");
572                try {
573                    boolean relative = true;
574                    if (attributes.getValue("relative") != null) {
575                        relative = Boolean.parseBoolean(attributes.getValue("relative"));
576                    }
577
578                    if (relative) {
579                        path = configFile.getParentFile().getPath() + "/" + path;
580                    }
581                    addConfigurations(new ExperimentConfiguration(path));
582                }
583                catch (ExperimentConfigurationException e) {
584                    throw new SAXException("Could not load partial configuration: " + path, e);
585                }
586            }
587            else {
588                Console.traceln(Level.WARNING, "element in config-file " + configFile.getName() +
589                    " ignored: " + qName);
590            }
591        }
592        catch (NoClassDefFoundError | ClassNotFoundException | IllegalAccessException
593                | InstantiationException | ClassCastException e)
594        {
595            throw new SAXException("Could not initialize class correctly", (Exception) e);
596        }
597    }
598
599    /**
600     * Adds the information of another experiment configuration to this configuration. This
601     * mechanism allows the usage of partial configuration files. The name of the other
602     * configuration is lost. <br>
603     * <br>
604     * If the current data path is the empty string (&quot;&quot;), it is override by the datapath
605     * of the other configuration. Otherwise, the current data path is kept.
606     *
607     * @param other
608     *            experiment whose information is added
609     * @throws ExperimentConfigurationException
610     */
611    private void addConfigurations(ExperimentConfiguration other)
612        throws ExperimentConfigurationException
613    {
614        if ("results".equals(resultsPath)) {
615            resultsPath = other.resultsPath;
616        }
617        loaders.addAll(other.loaders);
618        versionFilters.addAll(other.versionFilters);
619        testVersionFilters.addAll(other.testVersionFilters);
620        trainingVersionFilters.addAll(other.trainingVersionFilters);
621        setwisepreprocessors.addAll(other.setwisepreprocessors);
622        setwiseselectors.addAll(other.setwiseselectors);
623        setwisepostprocessors.addAll(other.setwisepostprocessors);
624        setwiseTrainers.addAll(other.setwiseTrainers);
625        setwiseTestdataAwareTrainers.addAll(other.setwiseTestdataAwareTrainers);
626        preprocessors.addAll(other.preprocessors);
627        pointwiseselectors.addAll(other.pointwiseselectors);
628        postprocessors.addAll(other.postprocessors);
629        trainers.addAll(other.trainers);
630        evaluators.addAll(other.evaluators);
631
632        if (!executionStrategy.equals(other.executionStrategy)) {
633            throw new ExperimentConfigurationException("Executionstrategies must be the same, if config files should be added.");
634        }
635
636        /*
637         * Only if saveClassifier is not set in the main config and the other configs saveClassifier
638         * is true, it must be set.
639         */
640        if (saveClassifier == null && other.saveClassifier == true) {
641            saveClassifier = other.saveClassifier;
642        }
643
644    }
645
646    /*
647     * (non-Javadoc)
648     *
649     * @see java.lang.Object#toString()
650     */
651    @Override
652    public String toString() {
653        final StringBuilder builder = new StringBuilder();
654        builder.append("Experiment name: " + experimentName + StringTools.ENDLINE);
655        builder.append("Loaders: " + loaders + StringTools.ENDLINE);
656        builder.append("Results path: " + resultsPath + StringTools.ENDLINE);
657        builder.append("Version filters: " + versionFilters.toString() + StringTools.ENDLINE);
658        builder
659            .append("Test version filters: " + testVersionFilters.toString() + StringTools.ENDLINE);
660        builder.append("Training version filters: " + trainingVersionFilters.toString() +
661            StringTools.ENDLINE);
662        builder.append("Setwise preprocessors: " + setwisepreprocessors.toString() +
663            StringTools.ENDLINE);
664        builder.append("Setwise selectors: " + setwiseselectors.toString() + StringTools.ENDLINE);
665        builder.append("Setwise postprocessors: " + setwisepostprocessors.toString() +
666            StringTools.ENDLINE);
667        builder.append("Setwise trainers: " + setwiseTrainers.toString() + StringTools.ENDLINE);
668        builder.append("Setwise Testdata Aware trainers: " +
669            setwiseTestdataAwareTrainers.toString() + StringTools.ENDLINE);
670        builder
671            .append("Pointwise preprocessors: " + preprocessors.toString() + StringTools.ENDLINE);
672        builder
673            .append("Pointwise selectors: " + pointwiseselectors.toString() + StringTools.ENDLINE);
674        builder
675            .append("Pointwise postprocessors: " + postprocessors.toString() + StringTools.ENDLINE);
676        builder.append("Pointwise trainers: " + trainers.toString() + StringTools.ENDLINE);
677        builder.append("Evaluators: " + evaluators.toString() + StringTools.ENDLINE);
678        builder.append("Save Classifier?: " + saveClassifier + StringTools.ENDLINE);
679        builder.append("Execution Strategy: " + executionStrategy + StringTools.ENDLINE);
680
681        return builder.toString();
682    }
683}
Note: See TracBrowser for help on using the repository browser.