source: trunk/CrossPare/src/de/ugoe/cs/cpdp/ExperimentConfiguration.java @ 45

Last change on this file since 45 was 45, checked in by atrautsch, 9 years ago

metric matching configurable

  • Property svn:mime-type set to text/plain
File size: 25.2 KB
Line 
1// Copyright 2015 Georg-August-Universit�t G�ttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.cpdp;
16
17import java.io.File;
18import java.io.FileInputStream;
19import java.io.FileNotFoundException;
20import java.io.IOException;
21import java.io.InputStreamReader;
22import java.io.UnsupportedEncodingException;
23import java.util.LinkedList;
24import java.util.List;
25import java.util.logging.Level;
26
27import javax.xml.parsers.ParserConfigurationException;
28import javax.xml.parsers.SAXParser;
29import javax.xml.parsers.SAXParserFactory;
30
31import org.xml.sax.Attributes;
32import org.xml.sax.InputSource;
33import org.xml.sax.SAXException;
34import org.xml.sax.helpers.DefaultHandler;
35
36import de.ugoe.cs.cpdp.dataprocessing.IProcessesingStrategy;
37import de.ugoe.cs.cpdp.dataprocessing.ISetWiseProcessingStrategy;
38import de.ugoe.cs.cpdp.dataselection.IPointWiseDataselectionStrategy;
39import de.ugoe.cs.cpdp.dataselection.ISetWiseDataselectionStrategy;
40import de.ugoe.cs.cpdp.eval.IEvaluationStrategy;
41import de.ugoe.cs.cpdp.loader.IVersionLoader;
42import de.ugoe.cs.cpdp.training.ISetWiseTestdataAwareTrainingStrategy;
43import de.ugoe.cs.cpdp.training.ISetWiseTrainingStrategy;
44import de.ugoe.cs.cpdp.training.ITrainingStrategy;
45import de.ugoe.cs.cpdp.versions.IVersionFilter;
46import de.ugoe.cs.util.StringTools;
47import de.ugoe.cs.util.console.Console;
48
49/**
50 * Class that contains all meta information about an experiment, i.e., its configuration. The
51 * configuration is loaded from an XML file. <br>
52 * <br>
53 * In the current implementation, the experiment configuration can only be created using an XML
54 * file. Programmatic creation of experiment configurations is currently not possibly.
55 *
56 * @author Steffen Herbold
57 */
58public class ExperimentConfiguration extends DefaultHandler {
59
60    /**
61     * handle of the file that contains the configuration
62     */
63    private final File configFile;
64
65    /**
66     * name of the experiment (automatically set to the file name without the .xml ending)
67     */
68    private String experimentName = "exp";
69
70    /**
71     * loads instances
72     */
73    private List<IVersionLoader> loaders;
74
75    /**
76     * path were the results of the experiments are stored
77     */
78    private String resultsPath = "results";
79
80    /**
81     * data set filters applied to all data
82     */
83    private List<IVersionFilter> versionFilters;
84
85    /**
86     * data set filters that decide if a data set is used as test data
87     */
88    private List<IVersionFilter> testVersionFilters;
89
90    /**
91     * data set filters that decide if a data is used as candidate training data
92     */
93    private List<IVersionFilter> trainingVersionFilters;
94
95    /**
96     * setwise data processors that are applied before the setwise data selection
97     */
98    private List<ISetWiseProcessingStrategy> setwisepreprocessors;
99
100    /**
101     * setwise data selection strategies
102     */
103    private List<ISetWiseDataselectionStrategy> setwiseselectors;
104
105    /**
106     * setwise data processors that are applied after the setwise data selection
107     */
108    private List<ISetWiseProcessingStrategy> setwisepostprocessors;
109
110    /**
111     * setwise trainers, i.e., trainers that require the selected training data to be separate from
112     * each other
113     */
114    private List<ISetWiseTrainingStrategy> setwiseTrainers;
115
116    /**
117     * setwise testdata aware trainers, i.e., trainers that require the selected training data to be separate from
118     * each other and the current testdata
119     */
120    private List<ISetWiseTestdataAwareTrainingStrategy> setwiseTestdataAwareTrainers;
121   
122    /**
123     * data processors that are applied before the pointwise data selection
124     */
125    private List<IProcessesingStrategy> preprocessors;
126
127    /**
128     * pointwise data selection strategies
129     */
130    private List<IPointWiseDataselectionStrategy> pointwiseselectors;
131
132    /**
133     * data processors that are applied before the pointwise data selection
134     */
135    private List<IProcessesingStrategy> postprocessors;
136
137    /**
138     * normal trainers, i.e., trainers that require the selected training data in a single data set
139     */
140    private List<ITrainingStrategy> trainers;
141
142    /**
143     * evaluators used for the the experiment results
144     */
145    private List<IEvaluationStrategy> evaluators;
146
147    /**
148     * indicates, if the classifier should be saved
149     */
150    private Boolean saveClassifier = null;
151
152    /**
153     * indicates, which execution strategy to choose (e.g. CrossProjectExperiment,
154     * ClassifierCreationExecution). Default is CrossProjectExperiment.
155     */
156    private String executionStrategy = "CrossProjectExperiment";
157
158    /**
159     * Constructor. Creates a new configuration from a given file.
160     *
161     * @param filename
162     *            name of the file from the configuration is loaded.
163     * @throws ExperimentConfigurationException
164     *             thrown if there is an error creating the configuration
165     */
166    public ExperimentConfiguration(String filename) throws ExperimentConfigurationException {
167        this(new File(filename));
168    }
169
170    /**
171     * Constructor. Creates a new configuration from a given file.
172     *
173     * @param filename
174     *            handle of the file from the configuration is loaded.
175     * @throws ExperimentConfigurationException
176     *             thrown if there is an error creating the configuration
177     */
178    public ExperimentConfiguration(File file) throws ExperimentConfigurationException {
179        loaders = new LinkedList<>();
180        versionFilters = new LinkedList<>();
181        testVersionFilters = new LinkedList<>();
182        trainingVersionFilters = new LinkedList<>();
183        setwisepreprocessors = new LinkedList<>();
184        setwiseselectors = new LinkedList<>();
185        setwisepostprocessors = new LinkedList<>();
186        setwiseTrainers = new LinkedList<>();
187        setwiseTestdataAwareTrainers = new LinkedList<>();
188        preprocessors = new LinkedList<>();
189        pointwiseselectors = new LinkedList<>();
190        postprocessors = new LinkedList<>();
191        trainers = new LinkedList<>();
192        evaluators = new LinkedList<>();
193
194        if (file == null) {
195            throw new IllegalArgumentException("file must not be null");
196        }
197        if (file.isDirectory()) {
198            throw new IllegalArgumentException("file must not be a directory");
199        }
200        configFile = file;
201
202        experimentName = file.getName().split("\\.")[0];
203
204        final SAXParserFactory spf = SAXParserFactory.newInstance();
205        spf.setValidating(true);
206
207        SAXParser saxParser = null;
208        InputSource inputSource = null;
209        try {
210            saxParser = spf.newSAXParser();
211        }
212        catch (ParserConfigurationException | SAXException e) {
213            throw new ExperimentConfigurationException(e);
214        }
215
216        InputStreamReader reader = null;
217        try {
218            reader = new InputStreamReader(new FileInputStream(file), "UTF-8");
219            inputSource = new InputSource(reader);
220        }
221        catch (UnsupportedEncodingException | FileNotFoundException e) {
222            throw new ExperimentConfigurationException("Could not open configuration file.", e);
223        }
224
225        if (inputSource != null) {
226            inputSource.setSystemId("file://" + file.getAbsolutePath());
227            try {
228                saxParser.parse(inputSource, this);
229            }
230            catch (SAXException | IOException e) {
231                throw new ExperimentConfigurationException("Error parsing configuration.", e);
232            }
233        }
234        if (reader != null) {
235            try {
236                reader.close();
237            }
238            catch (IOException e) {
239                throw new ExperimentConfigurationException("Error closing reader.", e);
240            }
241        }
242    }
243
244    /**
245     * returns the name of the experiment
246     *
247     * @return name of the experiment
248     */
249    public String getExperimentName() {
250        return experimentName;
251    }
252
253    /**
254     * returns the loaders for instances
255     *
256     * @return data loaders
257     */
258    public List<IVersionLoader> getLoaders() {
259        return loaders;
260    }
261
262    /**
263     * returns the results path
264     *
265     * @return results path
266     */
267    public String getResultsPath() {
268        return resultsPath;
269    }
270
271    /**
272     * returns the data set filters of the experiment
273     *
274     * @return data set filters of the experiment
275     */
276    public List<IVersionFilter> getVersionFilters() {
277        return versionFilters;
278    }
279
280    /**
281     * returns the test set filters of the experiment
282     *
283     * @return test set filters of the experiment
284     */
285    public List<IVersionFilter> getTestVersionFilters() {
286        return testVersionFilters;
287    }
288
289    /**
290     * returns the candidate training version filters of the experiment
291     *
292     * @return candidate training version filters of the experiment
293     */
294    public List<IVersionFilter> getTrainingVersionFilters() {
295        return trainingVersionFilters;
296    }
297
298    /**
299     * returns the setwise processors applied before the setwise data selection
300     *
301     * @return setwise processors applied before the setwise data selection
302     */
303    public List<ISetWiseProcessingStrategy> getSetWisePreprocessors() {
304        return setwisepreprocessors;
305    }
306
307    /**
308     * returns the setwise data selection strategies
309     *
310     * @return setwise data selection strategies
311     */
312    public List<ISetWiseDataselectionStrategy> getSetWiseSelectors() {
313        return setwiseselectors;
314    }
315
316    /**
317     * returns the setwise processors applied after the setwise data selection
318     *
319     * @return setwise processors applied after the setwise data selection
320     */
321    public List<ISetWiseProcessingStrategy> getSetWisePostprocessors() {
322        return setwisepostprocessors;
323    }
324
325    /**
326     * returns the setwise training algorithms
327     *
328     * @return setwise training algorithms
329     */
330    public List<ISetWiseTrainingStrategy> getSetWiseTrainers() {
331        return setwiseTrainers;
332    }
333
334    /**
335     * returns the setwise training algorithms
336     *
337     * @return setwise training algorithms
338     */
339    public List<ISetWiseTestdataAwareTrainingStrategy> getSetWiseTestdataAwareTrainers() {
340        return setwiseTestdataAwareTrainers;
341    }
342   
343    /**
344     * returns the processors applied before the pointwise data selection
345     *
346     * @return processors applied before the pointwise data selection
347     */
348    public List<IProcessesingStrategy> getPreProcessors() {
349        return preprocessors;
350    }
351
352    /**
353     * returns the pointwise data selection strategies
354     *
355     * @return pointwise data selection strategies
356     */
357    public List<IPointWiseDataselectionStrategy> getPointWiseSelectors() {
358        return pointwiseselectors;
359    }
360
361    /**
362     * returns the processors applied after the pointwise data selection
363     *
364     * @return processors applied after the pointwise data selection
365     */
366    public List<IProcessesingStrategy> getPostProcessors() {
367        return postprocessors;
368    }
369
370    /**
371     * returns the normal training algorithm
372     *
373     * @return normal training algorithms
374     */
375    public List<ITrainingStrategy> getTrainers() {
376        return trainers;
377    }
378
379    /**
380     * returns the evaluation strategies
381     *
382     * @return evaluation strategies
383     */
384    public List<IEvaluationStrategy> getEvaluators() {
385        return evaluators;
386    }
387
388    /**
389     * returns boolean, if classifier should be saved
390     *
391     * @return boolean
392     */
393    public boolean getSaveClassifier() {
394        return saveClassifier;
395    }
396
397    /**
398     * returns the execution strategy
399     *
400     * @return String execution strategy
401     */
402    public String getExecutionStrategy() {
403        return executionStrategy;
404    }
405
406    /*
407     * (non-Javadoc)
408     *
409     * @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String,
410     * java.lang.String, org.xml.sax.Attributes)
411     */
412    @Override
413    public void startElement(String uri, String localName, String qName, Attributes attributes)
414        throws SAXException
415    {
416        try {
417            if (qName.equals("config")) {
418                // ingore
419            }
420            else if (qName.equals("loader")) {
421                final IVersionLoader loader =
422                    (IVersionLoader) Class.forName("de.ugoe.cs.cpdp.loader." +
423                                                       attributes.getValue("name")).newInstance();
424                loader.setLocation(attributes.getValue("datalocation"));
425                loaders.add(loader);
426
427                // TODO location as relative
428            }
429            else if (qName.equals("resultspath")) {
430                resultsPath = attributes.getValue("path");
431            }
432            else if (qName.equals("versionfilter")) {
433                final IVersionFilter filter =
434                    (IVersionFilter) Class.forName("de.ugoe.cs.cpdp.versions." +
435                                                       attributes.getValue("name")).newInstance();
436                filter.setParameter(attributes.getValue("param"));
437                versionFilters.add(filter);
438            }
439            else if (qName.equals("testVersionfilter")) {
440                final IVersionFilter filter =
441                    (IVersionFilter) Class.forName("de.ugoe.cs.cpdp.versions." +
442                                                       attributes.getValue("name")).newInstance();
443                filter.setParameter(attributes.getValue("param"));
444                testVersionFilters.add(filter);
445            }
446            else if (qName.equals("trainVersionfilter")) {
447                final IVersionFilter filter =
448                    (IVersionFilter) Class.forName("de.ugoe.cs.cpdp.versions." +
449                                                       attributes.getValue("name")).newInstance();
450                filter.setParameter(attributes.getValue("param"));
451                trainingVersionFilters.add(filter);
452            }
453            else if (qName.equals("setwisepreprocessor")) {
454                final ISetWiseProcessingStrategy processor =
455                    (ISetWiseProcessingStrategy) Class.forName("de.ugoe.cs.cpdp.dataprocessing." +
456                                                                   attributes.getValue("name"))
457                        .newInstance();
458                processor.setParameter(attributes.getValue("param"));
459                setwisepreprocessors.add(processor);
460            }
461            else if (qName.equals("setwiseselector")) {
462                final ISetWiseDataselectionStrategy selection =
463                    (ISetWiseDataselectionStrategy) Class.forName("de.ugoe.cs.cpdp.dataselection." +
464                                                                      attributes.getValue("name"))
465                        .newInstance();
466                selection.setParameter(attributes.getValue("param"));
467                setwiseselectors.add(selection);
468            }
469            else if (qName.equals("setwisepostprocessor")) {
470                final ISetWiseProcessingStrategy processor =
471                    (ISetWiseProcessingStrategy) Class.forName("de.ugoe.cs.cpdp.dataprocessing." +
472                                                                   attributes.getValue("name"))
473                        .newInstance();
474                processor.setParameter(attributes.getValue("param"));
475                setwisepostprocessors.add(processor);
476            }
477            else if (qName.equals("setwisetrainer")) {
478                final ISetWiseTrainingStrategy trainer =
479                    (ISetWiseTrainingStrategy) Class.forName("de.ugoe.cs.cpdp.training." +
480                                                                 attributes.getValue("name"))
481                        .newInstance();
482                trainer.setParameter(attributes.getValue("param"));
483                setwiseTrainers.add(trainer);
484            }
485            else if (qName.equals("setwisetestdataawaretrainer")) {
486                final ISetWiseTestdataAwareTrainingStrategy trainer =
487                    (ISetWiseTestdataAwareTrainingStrategy) Class.forName("de.ugoe.cs.cpdp.training." +
488                                                                 attributes.getValue("name"))
489                        .newInstance();
490                trainer.setParameter(attributes.getValue("param"));
491                trainer.setMethod(attributes.getValue("method"));
492                trainer.setThreshold(attributes.getValue("threshold"));
493                setwiseTestdataAwareTrainers.add(trainer);
494            }
495            else if (qName.equals("preprocessor")) {
496                final IProcessesingStrategy processor =
497                    (IProcessesingStrategy) Class.forName("de.ugoe.cs.cpdp.dataprocessing." +
498                                                              attributes.getValue("name"))
499                        .newInstance();
500                processor.setParameter(attributes.getValue("param"));
501                preprocessors.add(processor);
502            }
503            else if (qName.equals("pointwiseselector")) {
504                final IPointWiseDataselectionStrategy selection =
505                    (IPointWiseDataselectionStrategy) Class
506                        .forName("de.ugoe.cs.cpdp.dataselection." + attributes.getValue("name"))
507                        .newInstance();
508                selection.setParameter(attributes.getValue("param"));
509                pointwiseselectors.add(selection);
510            }
511            else if (qName.equals("postprocessor")) {
512                final IProcessesingStrategy processor =
513                    (IProcessesingStrategy) Class.forName("de.ugoe.cs.cpdp.dataprocessing." +
514                                                              attributes.getValue("name"))
515                        .newInstance();
516                processor.setParameter(attributes.getValue("param"));
517                postprocessors.add(processor);
518            }
519            else if (qName.equals("trainer")) {
520                final ITrainingStrategy trainer =
521                    (ITrainingStrategy) Class.forName("de.ugoe.cs.cpdp.training." +
522                                                          attributes.getValue("name"))
523                        .newInstance();
524                trainer.setParameter(attributes.getValue("param"));
525                trainers.add(trainer);
526            }
527            else if (qName.equals("eval")) {
528                final IEvaluationStrategy evaluator =
529                    (IEvaluationStrategy) Class.forName("de.ugoe.cs.cpdp.eval." +
530                                                            attributes.getValue("name"))
531                        .newInstance();
532                evaluators.add(evaluator);
533            }
534            else if (qName.equals("saveClassifier")) {
535                saveClassifier = true;
536            }
537            else if (qName.equals("executionStrategy")) {
538                executionStrategy = attributes.getValue("name");
539            }
540            else if (qName.equals("partialconfig")) {
541                String path = attributes.getValue("path");
542                try {
543                    boolean relative = true;
544                    if (attributes.getValue("relative") != null) {
545                        relative = Boolean.parseBoolean(attributes.getValue("relative"));
546                    }
547
548                    if (relative) {
549                        path = configFile.getParentFile().getPath() + "/" + path;
550                    }
551                    addConfigurations(new ExperimentConfiguration(path));
552                }
553                catch (ExperimentConfigurationException e) {
554                    throw new SAXException("Could not load partial configuration: " + path, e);
555                }
556            }
557            else {
558                Console.traceln(Level.WARNING, "element in config-file " + configFile.getName() +
559                    " ignored: " + qName);
560            }
561        }
562        catch (NoClassDefFoundError | ClassNotFoundException | IllegalAccessException
563                | InstantiationException | ClassCastException e)
564        {
565            throw new SAXException("Could not initialize class correctly", (Exception) e);
566        }
567    }
568
569    /**
570     * Adds the information of another experiment configuration to this configuration. This
571     * mechanism allows the usage of partial configuration files. The name of the other
572     * configuration is lost. <br>
573     * <br>
574     * If the current data path is the empty string (&quot;&quot;), it is override by the datapath
575     * of the other configuration. Otherwise, the current data path is kept.
576     *
577     * @param other
578     *            experiment whose information is added
579     * @throws ExperimentConfigurationException
580     */
581    private void addConfigurations(ExperimentConfiguration other)
582        throws ExperimentConfigurationException
583    {
584        if ("results".equals(resultsPath)) {
585            resultsPath = other.resultsPath;
586        }
587        loaders.addAll(other.loaders);
588        versionFilters.addAll(other.versionFilters);
589        testVersionFilters.addAll(other.testVersionFilters);
590        trainingVersionFilters.addAll(other.trainingVersionFilters);
591        setwisepreprocessors.addAll(other.setwisepreprocessors);
592        setwiseselectors.addAll(other.setwiseselectors);
593        setwisepostprocessors.addAll(other.setwisepostprocessors);
594        setwiseTrainers.addAll(other.setwiseTrainers);
595        setwiseTestdataAwareTrainers.addAll(other.setwiseTestdataAwareTrainers);
596        preprocessors.addAll(other.preprocessors);
597        pointwiseselectors.addAll(other.pointwiseselectors);
598        postprocessors.addAll(other.postprocessors);
599        trainers.addAll(other.trainers);
600        evaluators.addAll(other.evaluators);
601
602        if (!executionStrategy.equals(other.executionStrategy)) {
603            throw new ExperimentConfigurationException(
604                                                       "Executionstrategies must be the same, if config files should be added.");
605        }
606
607        /*
608         * Only if saveClassifier is not set in the main config and the other configs saveClassifier
609         * is true, it must be set.
610         */
611        if (saveClassifier == null && other.saveClassifier == true) {
612            saveClassifier = other.saveClassifier;
613        }
614
615    }
616
617    /*
618     * (non-Javadoc)
619     *
620     * @see java.lang.Object#toString()
621     */
622    @Override
623    public String toString() {
624        final StringBuilder builder = new StringBuilder();
625        builder.append("Experiment name: " + experimentName + StringTools.ENDLINE);
626        builder.append("Loaders: " + loaders + StringTools.ENDLINE);
627        builder.append("Results path: " + resultsPath + StringTools.ENDLINE);
628        builder.append("Version filters: " + versionFilters.toString() + StringTools.ENDLINE);
629        builder.append("Test version filters: " + testVersionFilters.toString() +
630            StringTools.ENDLINE);
631        builder.append("Training version filters: " + trainingVersionFilters.toString() +
632            StringTools.ENDLINE);
633        builder.append("Setwise preprocessors: " + setwisepreprocessors.toString() +
634            StringTools.ENDLINE);
635        builder.append("Setwise selectors: " + setwiseselectors.toString() + StringTools.ENDLINE);
636        builder.append("Setwise postprocessors: " + setwisepostprocessors.toString() +
637            StringTools.ENDLINE);
638        builder.append("Setwise trainers: " + setwiseTrainers.toString() + StringTools.ENDLINE);
639        builder.append("Setwise Testdata Aware trainers: " + setwiseTestdataAwareTrainers.toString() + StringTools.ENDLINE);
640        builder
641            .append("Pointwise preprocessors: " + preprocessors.toString() + StringTools.ENDLINE);
642        builder.append("Pointwise selectors: " + pointwiseselectors.toString() +
643            StringTools.ENDLINE);
644        builder.append("Pointwise postprocessors: " + postprocessors.toString() +
645            StringTools.ENDLINE);
646        builder.append("Pointwise trainers: " + trainers.toString() + StringTools.ENDLINE);
647        builder.append("Evaluators: " + evaluators.toString() + StringTools.ENDLINE);
648        builder.append("Save Classifier?: " + saveClassifier + StringTools.ENDLINE);
649        builder.append("Execution Strategy: " + executionStrategy + StringTools.ENDLINE);
650
651        return builder.toString();
652    }
653}
Note: See TracBrowser for help on using the repository browser.