source: trunk/CrossPare/src/de/ugoe/cs/cpdp/execution/CrossValidationExperiment.java @ 123

Last change on this file since 123 was 122, checked in by sherbold, 8 years ago
  • added new and very efficient cross-validation experiment workflow
  • Property svn:mime-type set to text/plain
File size: 11.1 KB
Line 
1// Copyright 2015 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.cpdp.execution;
16
17import java.io.File;
18import java.util.Collections;
19import java.util.LinkedList;
20import java.util.List;
21import java.util.logging.Level;
22
23import org.apache.commons.collections4.list.SetUniqueList;
24
25import de.ugoe.cs.cpdp.ExperimentConfiguration;
26import de.ugoe.cs.cpdp.eval.IEvaluationStrategy;
27import de.ugoe.cs.cpdp.eval.IResultStorage;
28import de.ugoe.cs.cpdp.loader.IVersionLoader;
29import de.ugoe.cs.cpdp.training.ISetWiseTestdataAwareTrainingStrategy;
30import de.ugoe.cs.cpdp.training.ISetWiseTrainingStrategy;
31import de.ugoe.cs.cpdp.training.ITestAwareTrainingStrategy;
32import de.ugoe.cs.cpdp.training.ITrainer;
33import de.ugoe.cs.cpdp.training.ITrainingStrategy;
34import de.ugoe.cs.cpdp.training.IWekaCompatibleTrainer;
35import de.ugoe.cs.cpdp.versions.IVersionFilter;
36import de.ugoe.cs.cpdp.versions.SoftwareVersion;
37import de.ugoe.cs.util.console.Console;
38import weka.core.Instances;
39
40/**
41 * Class responsible for executing an experiment according to an {@link ExperimentConfiguration}.
42 * The steps of an experiment are as follows:
43 * <ul>
44 * <li>load the data from the provided data path</li>
45 * <li>filter the data sets according to the provided version filters</li>
46 * <li>execute the following steps for each data sets as test data that is not ignored through the
47 * test version filter:
48 * <ul>
49 * <li>filter the data sets to setup the candidate training data:
50 * <ul>
51 * <li>remove all data sets from the same project</li>
52 * <li>filter all data sets according to the training data filter
53 * </ul>
54 * </li>
55 * <li>apply the setwise preprocessors</li>
56 * <li>apply the setwise data selection algorithms</li>
57 * <li>apply the setwise postprocessors</li>
58 * <li>train the setwise training classifiers</li>
59 * <li>unify all remaining training data into one data set</li>
60 * <li>apply the preprocessors</li>
61 * <li>apply the pointwise data selection algorithms</li>
62 * <li>apply the postprocessors</li>
63 * <li>train the normal classifiers</li>
64 * <li>evaluate the results for all trained classifiers on the training data</li>
65 * </ul>
66 * </li>
67 * </ul>
68 *
69 * Note that this class implements {@link Runnable}, i.e., each experiment can be started in its own
70 * thread.
71 *
72 * @author Steffen Herbold
73 */
74public class CrossValidationExperiment implements IExecutionStrategy {
75
76    /**
77     * configuration of the experiment
78     */
79    protected final ExperimentConfiguration config;
80
81    /**
82     * Constructor. Creates a new experiment based on a configuration.
83     *
84     * @param config
85     *            configuration of the experiment
86     */
87    public CrossValidationExperiment(ExperimentConfiguration config) {
88        this.config = config;
89    }
90
91    /**
92     * Helper method that combines a set of Weka {@link Instances} sets into a single
93     * {@link Instances} set.
94     *
95     * @param traindataSet
96     *            set of {@link Instances} to be combines
97     * @return single {@link Instances} set
98     */
99    public static Instances makeSingleTrainingSet(SetUniqueList<Instances> traindataSet) {
100        Instances traindataFull = null;
101        for (Instances traindata : traindataSet) {
102            if (traindataFull == null) {
103                traindataFull = new Instances(traindata);
104            }
105            else {
106                for (int i = 0; i < traindata.numInstances(); i++) {
107                    traindataFull.add(traindata.instance(i));
108                }
109            }
110        }
111        return traindataFull;
112    }
113
114    /**
115     * Executes the experiment with the steps as described in the class comment.
116     *
117     * @see Runnable#run()
118     */
119    @Override
120    public void run() {
121        final List<SoftwareVersion> versions = new LinkedList<>();
122
123        for (IVersionLoader loader : config.getLoaders()) {
124            versions.addAll(loader.load());
125        }
126
127        for (IVersionFilter filter : config.getVersionFilters()) {
128            filter.apply(versions);
129        }
130        boolean writeHeader = true;
131        int versionCount = 1;
132        int testVersionCount = 0;
133        int numTrainers = 0;
134
135        for (SoftwareVersion testVersion : versions) {
136            if (isVersion(testVersion, config.getTestVersionFilters())) {
137                testVersionCount++;
138            }
139        }
140       
141        numTrainers += config.getSetWiseTrainers().size();
142        numTrainers += config.getSetWiseTestdataAwareTrainers().size();
143        numTrainers += config.getTrainers().size();
144        numTrainers += config.getTestAwareTrainers().size();
145
146        // sort versions
147        Collections.sort(versions);
148
149        for (SoftwareVersion testVersion : versions) {
150            if (isVersion(testVersion, config.getTestVersionFilters())) {
151                Console.traceln(Level.INFO,
152                                String.format("[%s] [%02d/%02d] %s: starting",
153                                              config.getExperimentName(), versionCount,
154                                              testVersionCount, testVersion.getVersion()));
155                int numResultsAvailable = resultsAvailable(testVersion);
156                if (numResultsAvailable >= numTrainers*config.getRepetitions()) {
157                    Console.traceln(Level.INFO,
158                                    String.format(
159                                                  "[%s] [%02d/%02d] %s: results already available; skipped",
160                                                  config.getExperimentName(), versionCount,
161                                                  testVersionCount, testVersion.getVersion()));
162                    versionCount++;
163                    continue;
164                }
165
166                // Setup testdata and training data
167                Instances testdata = testVersion.getInstances();
168               
169                for (ITrainingStrategy trainer : config.getTrainers()) {
170                    Console.traceln(Level.FINE,
171                                    String.format("[%s] [%02d/%02d] %s: applying trainer %s",
172                                                  config.getExperimentName(), versionCount,
173                                                  testVersionCount, testVersion.getVersion(),
174                                                  trainer.getName()));
175                    trainer.apply(testdata);
176                }
177               
178                File resultsDir = new File(config.getResultsPath());
179                if (!resultsDir.exists()) {
180                    resultsDir.mkdir();
181                }
182                for (IEvaluationStrategy evaluator : config.getEvaluators()) {
183                    Console.traceln(Level.FINE,
184                                    String.format("[%s] [%02d/%02d] %s: applying evaluator %s",
185                                                  config.getExperimentName(), versionCount,
186                                                  testVersionCount, testVersion.getVersion(),
187                                                  evaluator.getClass().getName()));
188                    List<ITrainer> allTrainers = new LinkedList<>();
189                    for (ISetWiseTrainingStrategy setwiseTrainer : config.getSetWiseTrainers()) {
190                        allTrainers.add(setwiseTrainer);
191                    }
192                    for (ISetWiseTestdataAwareTrainingStrategy setwiseTestdataAwareTrainer : config
193                        .getSetWiseTestdataAwareTrainers())
194                    {
195                        allTrainers.add(setwiseTestdataAwareTrainer);
196                    }
197                    for (ITrainingStrategy trainer : config.getTrainers()) {
198                        allTrainers.add(trainer);
199                    }
200                    for (ITestAwareTrainingStrategy trainer : config.getTestAwareTrainers()) {
201                        allTrainers.add(trainer);
202                    }
203                    if (writeHeader) {
204                        evaluator.setParameter(config.getResultsPath() + "/" +
205                            config.getExperimentName() + ".csv");
206                    }
207                    evaluator.apply(testdata, testdata, allTrainers, writeHeader,
208                                    config.getResultStorages());
209                    writeHeader = false;
210                }
211                Console.traceln(Level.INFO,
212                                String.format("[%s] [%02d/%02d] %s: finished",
213                                              config.getExperimentName(), versionCount,
214                                              testVersionCount, testVersion.getVersion()));
215                versionCount++;
216            }
217        }
218    }
219
220    /**
221     * Helper method that checks if a version passes all filters.
222     *
223     * @param version
224     *            version that is checked
225     * @param filters
226     *            list of the filters
227     * @return true, if the version passes all filters, false otherwise
228     */
229    private boolean isVersion(SoftwareVersion version, List<IVersionFilter> filters) {
230        boolean result = true;
231        for (IVersionFilter filter : filters) {
232            result &= !filter.apply(version);
233        }
234        return result;
235    }
236
237    private int resultsAvailable(SoftwareVersion version) {
238        if (config.getResultStorages().isEmpty()) {
239            return 0;
240        }
241       
242        List<ITrainer> allTrainers = new LinkedList<>();
243        for (ISetWiseTrainingStrategy setwiseTrainer : config.getSetWiseTrainers()) {
244            allTrainers.add(setwiseTrainer);
245        }
246        for (ISetWiseTestdataAwareTrainingStrategy setwiseTestdataAwareTrainer : config
247            .getSetWiseTestdataAwareTrainers())
248        {
249            allTrainers.add(setwiseTestdataAwareTrainer);
250        }
251        for (ITrainingStrategy trainer : config.getTrainers()) {
252            allTrainers.add(trainer);
253        }
254        for (ITestAwareTrainingStrategy trainer : config.getTestAwareTrainers()) {
255            allTrainers.add(trainer);
256        }
257       
258        int available = Integer.MAX_VALUE;
259        for (IResultStorage storage : config.getResultStorages()) {
260            String classifierName = ((IWekaCompatibleTrainer) allTrainers.get(0)).getName();
261            int curAvailable = storage.containsResult(config.getExperimentName(), version.getVersion(), classifierName);
262            if( curAvailable<available ) {
263                available = curAvailable;
264            }
265        }
266        return available;
267    }
268}
Note: See TracBrowser for help on using the repository browser.