source: trunk/CrossPare/src/de/ugoe/cs/cpdp/ExperimentConfiguration.java @ 42

Last change on this file since 42 was 41, checked in by sherbold, 9 years ago
  • formatted code and added copyrights
  • Property svn:mime-type set to text/plain
File size: 23.6 KB
Line 
1// Copyright 2015 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.cpdp;
16
17import java.io.File;
18import java.io.FileInputStream;
19import java.io.FileNotFoundException;
20import java.io.IOException;
21import java.io.InputStreamReader;
22import java.io.UnsupportedEncodingException;
23import java.util.LinkedList;
24import java.util.List;
25import java.util.logging.Level;
26
27import javax.xml.parsers.ParserConfigurationException;
28import javax.xml.parsers.SAXParser;
29import javax.xml.parsers.SAXParserFactory;
30
31import org.xml.sax.Attributes;
32import org.xml.sax.InputSource;
33import org.xml.sax.SAXException;
34import org.xml.sax.helpers.DefaultHandler;
35
36import de.ugoe.cs.cpdp.dataprocessing.IProcessesingStrategy;
37import de.ugoe.cs.cpdp.dataprocessing.ISetWiseProcessingStrategy;
38import de.ugoe.cs.cpdp.dataselection.IPointWiseDataselectionStrategy;
39import de.ugoe.cs.cpdp.dataselection.ISetWiseDataselectionStrategy;
40import de.ugoe.cs.cpdp.eval.IEvaluationStrategy;
41import de.ugoe.cs.cpdp.loader.IVersionLoader;
42import de.ugoe.cs.cpdp.training.ISetWiseTrainingStrategy;
43import de.ugoe.cs.cpdp.training.ITrainingStrategy;
44import de.ugoe.cs.cpdp.versions.IVersionFilter;
45import de.ugoe.cs.util.StringTools;
46import de.ugoe.cs.util.console.Console;
47
48/**
49 * Class that contains all meta information about an experiment, i.e., its configuration. The
50 * configuration is loaded from an XML file. <br>
51 * <br>
52 * In the current implementation, the experiment configuration can only be created using an XML
53 * file. Programmatic creation of experiment configurations is currently not possibly.
54 *
55 * @author Steffen Herbold
56 */
57public class ExperimentConfiguration extends DefaultHandler {
58
59    /**
60     * handle of the file that contains the configuration
61     */
62    private final File configFile;
63
64    /**
65     * name of the experiment (automatically set to the file name without the .xml ending)
66     */
67    private String experimentName = "exp";
68
69    /**
70     * loads instances
71     */
72    private List<IVersionLoader> loaders;
73
74    /**
75     * path were the results of the experiments are stored
76     */
77    private String resultsPath = "results";
78
79    /**
80     * data set filters applied to all data
81     */
82    private List<IVersionFilter> versionFilters;
83
84    /**
85     * data set filters that decide if a data set is used as test data
86     */
87    private List<IVersionFilter> testVersionFilters;
88
89    /**
90     * data set filters that decide if a data is used as candidate training data
91     */
92    private List<IVersionFilter> trainingVersionFilters;
93
94    /**
95     * setwise data processors that are applied before the setwise data selection
96     */
97    private List<ISetWiseProcessingStrategy> setwisepreprocessors;
98
99    /**
100     * setwise data selection strategies
101     */
102    private List<ISetWiseDataselectionStrategy> setwiseselectors;
103
104    /**
105     * setwise data processors that are applied after the setwise data selection
106     */
107    private List<ISetWiseProcessingStrategy> setwisepostprocessors;
108
109    /**
110     * setwise trainers, i.e., trainers that require the selected training data to be separate from
111     * each other
112     */
113    private List<ISetWiseTrainingStrategy> setwiseTrainers;
114
115    /**
116     * data processors that are applied before the pointwise data selection
117     */
118    private List<IProcessesingStrategy> preprocessors;
119
120    /**
121     * pointwise data selection strategies
122     */
123    private List<IPointWiseDataselectionStrategy> pointwiseselectors;
124
125    /**
126     * data processors that are applied before the pointwise data selection
127     */
128    private List<IProcessesingStrategy> postprocessors;
129
130    /**
131     * normal trainers, i.e., trainers that require the selected training data in a single data set
132     */
133    private List<ITrainingStrategy> trainers;
134
135    /**
136     * evaluators used for the the experiment results
137     */
138    private List<IEvaluationStrategy> evaluators;
139
140    /**
141     * indicates, if the classifier should be saved
142     */
143    private Boolean saveClassifier = null;
144
145    /**
146     * indicates, which execution strategy to choose (e.g. CrossProjectExperiment,
147     * ClassifierCreationExecution). Default is CrossProjectExperiment.
148     */
149    private String executionStrategy = "CrossProjectExperiment";
150
151    /**
152     * Constructor. Creates a new configuration from a given file.
153     *
154     * @param filename
155     *            name of the file from the configuration is loaded.
156     * @throws ExperimentConfigurationException
157     *             thrown if there is an error creating the configuration
158     */
159    public ExperimentConfiguration(String filename) throws ExperimentConfigurationException {
160        this(new File(filename));
161    }
162
163    /**
164     * Constructor. Creates a new configuration from a given file.
165     *
166     * @param filename
167     *            handle of the file from the configuration is loaded.
168     * @throws ExperimentConfigurationException
169     *             thrown if there is an error creating the configuration
170     */
171    public ExperimentConfiguration(File file) throws ExperimentConfigurationException {
172        loaders = new LinkedList<>();
173        versionFilters = new LinkedList<>();
174        testVersionFilters = new LinkedList<>();
175        trainingVersionFilters = new LinkedList<>();
176        setwisepreprocessors = new LinkedList<>();
177        setwiseselectors = new LinkedList<>();
178        setwisepostprocessors = new LinkedList<>();
179        setwiseTrainers = new LinkedList<>();
180        preprocessors = new LinkedList<>();
181        pointwiseselectors = new LinkedList<>();
182        postprocessors = new LinkedList<>();
183        trainers = new LinkedList<>();
184        evaluators = new LinkedList<>();
185
186        if (file == null) {
187            throw new IllegalArgumentException("file must not be null");
188        }
189        if (file.isDirectory()) {
190            throw new IllegalArgumentException("file must not be a directory");
191        }
192        configFile = file;
193
194        experimentName = file.getName().split("\\.")[0];
195
196        final SAXParserFactory spf = SAXParserFactory.newInstance();
197        spf.setValidating(true);
198
199        SAXParser saxParser = null;
200        InputSource inputSource = null;
201        try {
202            saxParser = spf.newSAXParser();
203        }
204        catch (ParserConfigurationException | SAXException e) {
205            throw new ExperimentConfigurationException(e);
206        }
207
208        InputStreamReader reader = null;
209        try {
210            reader = new InputStreamReader(new FileInputStream(file), "UTF-8");
211            inputSource = new InputSource(reader);
212        }
213        catch (UnsupportedEncodingException | FileNotFoundException e) {
214            throw new ExperimentConfigurationException("Could not open configuration file.", e);
215        }
216
217        if (inputSource != null) {
218            inputSource.setSystemId("file://" + file.getAbsolutePath());
219            try {
220                saxParser.parse(inputSource, this);
221            }
222            catch (SAXException | IOException e) {
223                throw new ExperimentConfigurationException("Error parsing configuration.", e);
224            }
225        }
226        if (reader != null) {
227            try {
228                reader.close();
229            }
230            catch (IOException e) {
231                throw new ExperimentConfigurationException("Error closing reader.", e);
232            }
233        }
234    }
235
236    /**
237     * returns the name of the experiment
238     *
239     * @return name of the experiment
240     */
241    public String getExperimentName() {
242        return experimentName;
243    }
244
245    /**
246     * returns the loaders for instances
247     *
248     * @return data loaders
249     */
250    public List<IVersionLoader> getLoaders() {
251        return loaders;
252    }
253
254    /**
255     * returns the results path
256     *
257     * @return results path
258     */
259    public String getResultsPath() {
260        return resultsPath;
261    }
262
263    /**
264     * returns the data set filters of the experiment
265     *
266     * @return data set filters of the experiment
267     */
268    public List<IVersionFilter> getVersionFilters() {
269        return versionFilters;
270    }
271
272    /**
273     * returns the test set filters of the experiment
274     *
275     * @return test set filters of the experiment
276     */
277    public List<IVersionFilter> getTestVersionFilters() {
278        return testVersionFilters;
279    }
280
281    /**
282     * returns the candidate training version filters of the experiment
283     *
284     * @return candidate training version filters of the experiment
285     */
286    public List<IVersionFilter> getTrainingVersionFilters() {
287        return trainingVersionFilters;
288    }
289
290    /**
291     * returns the setwise processors applied before the setwise data selection
292     *
293     * @return setwise processors applied before the setwise data selection
294     */
295    public List<ISetWiseProcessingStrategy> getSetWisePreprocessors() {
296        return setwisepreprocessors;
297    }
298
299    /**
300     * returns the setwise data selection strategies
301     *
302     * @return setwise data selection strategies
303     */
304    public List<ISetWiseDataselectionStrategy> getSetWiseSelectors() {
305        return setwiseselectors;
306    }
307
308    /**
309     * returns the setwise processors applied after the setwise data selection
310     *
311     * @return setwise processors applied after the setwise data selection
312     */
313    public List<ISetWiseProcessingStrategy> getSetWisePostprocessors() {
314        return setwisepostprocessors;
315    }
316
317    /**
318     * returns the setwise training algorithms
319     *
320     * @return setwise training algorithms
321     */
322    public List<ISetWiseTrainingStrategy> getSetWiseTrainers() {
323        return setwiseTrainers;
324    }
325
326    /**
327     * returns the processors applied before the pointwise data selection
328     *
329     * @return processors applied before the pointwise data selection
330     */
331    public List<IProcessesingStrategy> getPreProcessors() {
332        return preprocessors;
333    }
334
335    /**
336     * returns the pointwise data selection strategies
337     *
338     * @return pointwise data selection strategies
339     */
340    public List<IPointWiseDataselectionStrategy> getPointWiseSelectors() {
341        return pointwiseselectors;
342    }
343
344    /**
345     * returns the processors applied after the pointwise data selection
346     *
347     * @return processors applied after the pointwise data selection
348     */
349    public List<IProcessesingStrategy> getPostProcessors() {
350        return postprocessors;
351    }
352
353    /**
354     * returns the normal training algorithm
355     *
356     * @return normal training algorithms
357     */
358    public List<ITrainingStrategy> getTrainers() {
359        return trainers;
360    }
361
362    /**
363     * returns the evaluation strategies
364     *
365     * @return evaluation strategies
366     */
367    public List<IEvaluationStrategy> getEvaluators() {
368        return evaluators;
369    }
370
371    /**
372     * returns boolean, if classifier should be saved
373     *
374     * @return boolean
375     */
376    public boolean getSaveClassifier() {
377        return saveClassifier;
378    }
379
380    /**
381     * returns the execution strategy
382     *
383     * @return String execution strategy
384     */
385    public String getExecutionStrategy() {
386        return executionStrategy;
387    }
388
389    /*
390     * (non-Javadoc)
391     *
392     * @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String,
393     * java.lang.String, org.xml.sax.Attributes)
394     */
395    @Override
396    public void startElement(String uri, String localName, String qName, Attributes attributes)
397        throws SAXException
398    {
399        try {
400            if (qName.equals("config")) {
401                // ingore
402            }
403            else if (qName.equals("loader")) {
404                final IVersionLoader loader =
405                    (IVersionLoader) Class.forName("de.ugoe.cs.cpdp.loader." +
406                                                       attributes.getValue("name")).newInstance();
407                loader.setLocation(attributes.getValue("datalocation"));
408                loaders.add(loader);
409
410                // TODO location as relative
411            }
412            else if (qName.equals("resultspath")) {
413                resultsPath = attributes.getValue("path");
414            }
415            else if (qName.equals("versionfilter")) {
416                final IVersionFilter filter =
417                    (IVersionFilter) Class.forName("de.ugoe.cs.cpdp.versions." +
418                                                       attributes.getValue("name")).newInstance();
419                filter.setParameter(attributes.getValue("param"));
420                versionFilters.add(filter);
421            }
422            else if (qName.equals("testVersionfilter")) {
423                final IVersionFilter filter =
424                    (IVersionFilter) Class.forName("de.ugoe.cs.cpdp.versions." +
425                                                       attributes.getValue("name")).newInstance();
426                filter.setParameter(attributes.getValue("param"));
427                testVersionFilters.add(filter);
428            }
429            else if (qName.equals("trainVersionfilter")) {
430                final IVersionFilter filter =
431                    (IVersionFilter) Class.forName("de.ugoe.cs.cpdp.versions." +
432                                                       attributes.getValue("name")).newInstance();
433                filter.setParameter(attributes.getValue("param"));
434                trainingVersionFilters.add(filter);
435            }
436            else if (qName.equals("setwisepreprocessor")) {
437                final ISetWiseProcessingStrategy processor =
438                    (ISetWiseProcessingStrategy) Class.forName("de.ugoe.cs.cpdp.dataprocessing." +
439                                                                   attributes.getValue("name"))
440                        .newInstance();
441                processor.setParameter(attributes.getValue("param"));
442                setwisepreprocessors.add(processor);
443            }
444            else if (qName.equals("setwiseselector")) {
445                final ISetWiseDataselectionStrategy selection =
446                    (ISetWiseDataselectionStrategy) Class.forName("de.ugoe.cs.cpdp.dataselection." +
447                                                                      attributes.getValue("name"))
448                        .newInstance();
449                selection.setParameter(attributes.getValue("param"));
450                setwiseselectors.add(selection);
451            }
452            else if (qName.equals("setwisepostprocessor")) {
453                final ISetWiseProcessingStrategy processor =
454                    (ISetWiseProcessingStrategy) Class.forName("de.ugoe.cs.cpdp.dataprocessing." +
455                                                                   attributes.getValue("name"))
456                        .newInstance();
457                processor.setParameter(attributes.getValue("param"));
458                setwisepostprocessors.add(processor);
459            }
460            else if (qName.equals("setwisetrainer")) {
461                final ISetWiseTrainingStrategy trainer =
462                    (ISetWiseTrainingStrategy) Class.forName("de.ugoe.cs.cpdp.training." +
463                                                                 attributes.getValue("name"))
464                        .newInstance();
465                trainer.setParameter(attributes.getValue("param"));
466                setwiseTrainers.add(trainer);
467            }
468            else if (qName.equals("preprocessor")) {
469                final IProcessesingStrategy processor =
470                    (IProcessesingStrategy) Class.forName("de.ugoe.cs.cpdp.dataprocessing." +
471                                                              attributes.getValue("name"))
472                        .newInstance();
473                processor.setParameter(attributes.getValue("param"));
474                preprocessors.add(processor);
475            }
476            else if (qName.equals("pointwiseselector")) {
477                final IPointWiseDataselectionStrategy selection =
478                    (IPointWiseDataselectionStrategy) Class
479                        .forName("de.ugoe.cs.cpdp.dataselection." + attributes.getValue("name"))
480                        .newInstance();
481                selection.setParameter(attributes.getValue("param"));
482                pointwiseselectors.add(selection);
483            }
484            else if (qName.equals("postprocessor")) {
485                final IProcessesingStrategy processor =
486                    (IProcessesingStrategy) Class.forName("de.ugoe.cs.cpdp.dataprocessing." +
487                                                              attributes.getValue("name"))
488                        .newInstance();
489                processor.setParameter(attributes.getValue("param"));
490                postprocessors.add(processor);
491            }
492            else if (qName.equals("trainer")) {
493                final ITrainingStrategy trainer =
494                    (ITrainingStrategy) Class.forName("de.ugoe.cs.cpdp.training." +
495                                                          attributes.getValue("name"))
496                        .newInstance();
497                trainer.setParameter(attributes.getValue("param"));
498                trainers.add(trainer);
499            }
500            else if (qName.equals("eval")) {
501                final IEvaluationStrategy evaluator =
502                    (IEvaluationStrategy) Class.forName("de.ugoe.cs.cpdp.eval." +
503                                                            attributes.getValue("name"))
504                        .newInstance();
505                evaluators.add(evaluator);
506            }
507            else if (qName.equals("saveClassifier")) {
508                saveClassifier = true;
509            }
510            else if (qName.equals("executionStrategy")) {
511                executionStrategy = attributes.getValue("name");
512            }
513            else if (qName.equals("partialconfig")) {
514                String path = attributes.getValue("path");
515                try {
516                    boolean relative = true;
517                    if (attributes.getValue("relative") != null) {
518                        relative = Boolean.parseBoolean(attributes.getValue("relative"));
519                    }
520
521                    if (relative) {
522                        path = configFile.getParentFile().getPath() + "/" + path;
523                    }
524                    addConfigurations(new ExperimentConfiguration(path));
525                }
526                catch (ExperimentConfigurationException e) {
527                    throw new SAXException("Could not load partial configuration: " + path, e);
528                }
529            }
530            else {
531                Console.traceln(Level.WARNING, "element in config-file " + configFile.getName() +
532                    " ignored: " + qName);
533            }
534        }
535        catch (NoClassDefFoundError | ClassNotFoundException | IllegalAccessException
536                | InstantiationException | ClassCastException e)
537        {
538            throw new SAXException("Could not initialize class correctly", (Exception) e);
539        }
540    }
541
542    /**
543     * Adds the information of another experiment configuration to this configuration. This
544     * mechanism allows the usage of partial configuration files. The name of the other
545     * configuration is lost. <br>
546     * <br>
547     * If the current data path is the empty string (&quot;&quot;), it is override by the datapath
548     * of the other configuration. Otherwise, the current data path is kept.
549     *
550     * @param other
551     *            experiment whose information is added
552     * @throws ExperimentConfigurationException
553     */
554    private void addConfigurations(ExperimentConfiguration other)
555        throws ExperimentConfigurationException
556    {
557        if ("results".equals(resultsPath)) {
558            resultsPath = other.resultsPath;
559        }
560        loaders.addAll(other.loaders);
561        versionFilters.addAll(other.versionFilters);
562        testVersionFilters.addAll(other.testVersionFilters);
563        trainingVersionFilters.addAll(other.trainingVersionFilters);
564        setwisepreprocessors.addAll(other.setwisepreprocessors);
565        setwiseselectors.addAll(other.setwiseselectors);
566        setwisepostprocessors.addAll(other.setwisepostprocessors);
567        setwiseTrainers.addAll(other.setwiseTrainers);
568        preprocessors.addAll(other.preprocessors);
569        pointwiseselectors.addAll(other.pointwiseselectors);
570        postprocessors.addAll(other.postprocessors);
571        trainers.addAll(other.trainers);
572        evaluators.addAll(other.evaluators);
573
574        if (!executionStrategy.equals(other.executionStrategy)) {
575            throw new ExperimentConfigurationException(
576                                                       "Executionstrategies must be the same, if config files should be added.");
577        }
578
579        /*
580         * Only if saveClassifier is not set in the main config and the other configs saveClassifier
581         * is true, it must be set.
582         */
583        if (saveClassifier == null && other.saveClassifier == true) {
584            saveClassifier = other.saveClassifier;
585        }
586
587    }
588
589    /*
590     * (non-Javadoc)
591     *
592     * @see java.lang.Object#toString()
593     */
594    @Override
595    public String toString() {
596        final StringBuilder builder = new StringBuilder();
597        builder.append("Experiment name: " + experimentName + StringTools.ENDLINE);
598        builder.append("Loaders: " + loaders + StringTools.ENDLINE);
599        builder.append("Results path: " + resultsPath + StringTools.ENDLINE);
600        builder.append("Version filters: " + versionFilters.toString() + StringTools.ENDLINE);
601        builder.append("Test version filters: " + testVersionFilters.toString() +
602            StringTools.ENDLINE);
603        builder.append("Training version filters: " + trainingVersionFilters.toString() +
604            StringTools.ENDLINE);
605        builder.append("Setwise preprocessors: " + setwisepreprocessors.toString() +
606            StringTools.ENDLINE);
607        builder.append("Setwise selectors: " + setwiseselectors.toString() + StringTools.ENDLINE);
608        builder.append("Setwise postprocessors: " + setwisepostprocessors.toString() +
609            StringTools.ENDLINE);
610        builder.append("Setwise trainers: " + setwiseTrainers.toString() + StringTools.ENDLINE);
611        builder
612            .append("Pointwise preprocessors: " + preprocessors.toString() + StringTools.ENDLINE);
613        builder.append("Pointwise selectors: " + pointwiseselectors.toString() +
614            StringTools.ENDLINE);
615        builder.append("Pointwise postprocessors: " + postprocessors.toString() +
616            StringTools.ENDLINE);
617        builder.append("Pointwise trainers: " + trainers.toString() + StringTools.ENDLINE);
618        builder.append("Evaluators: " + evaluators.toString() + StringTools.ENDLINE);
619        builder.append("Save Classifier?: " + saveClassifier + StringTools.ENDLINE);
620        builder.append("Execution Strategy: " + executionStrategy + StringTools.ENDLINE);
621
622        return builder.toString();
623    }
624}
Note: See TracBrowser for help on using the repository browser.