source: trunk/CrossPare/src/de/ugoe/cs/cpdp/execution/CrossValidationExperiment.java @ 142

Last change on this file since 142 was 135, checked in by sherbold, 8 years ago
  • code documentation and formatting
  • Property svn:mime-type set to text/plain
File size: 11.4 KB
Line 
1// Copyright 2015 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.cpdp.execution;
16
17import java.io.File;
18import java.util.Collections;
19import java.util.LinkedList;
20import java.util.List;
21import java.util.logging.Level;
22
23import org.apache.commons.collections4.list.SetUniqueList;
24
25import de.ugoe.cs.cpdp.ExperimentConfiguration;
26import de.ugoe.cs.cpdp.eval.IEvaluationStrategy;
27import de.ugoe.cs.cpdp.eval.IResultStorage;
28import de.ugoe.cs.cpdp.loader.IVersionLoader;
29import de.ugoe.cs.cpdp.training.ISetWiseTestdataAwareTrainingStrategy;
30import de.ugoe.cs.cpdp.training.ISetWiseTrainingStrategy;
31import de.ugoe.cs.cpdp.training.ITestAwareTrainingStrategy;
32import de.ugoe.cs.cpdp.training.ITrainer;
33import de.ugoe.cs.cpdp.training.ITrainingStrategy;
34import de.ugoe.cs.cpdp.training.IWekaCompatibleTrainer;
35import de.ugoe.cs.cpdp.versions.IVersionFilter;
36import de.ugoe.cs.cpdp.versions.SoftwareVersion;
37import de.ugoe.cs.util.console.Console;
38import weka.core.Instances;
39
40/**
41 * Class responsible for executing an experiment according to an {@link ExperimentConfiguration}.
42 * The steps of an experiment are as follows:
43 * <ul>
44 * <li>load the data from the provided data path</li>
45 * <li>filter the data sets according to the provided version filters</li>
46 * <li>execute the following steps for each data sets as test data that is not ignored through the
47 * test version filter:
48 * <ul>
49 * <li>filter the data sets to setup the candidate training data:
50 * <ul>
51 * <li>remove all data sets from the same project</li>
52 * <li>filter all data sets according to the training data filter
53 * </ul>
54 * </li>
55 * <li>apply the setwise preprocessors</li>
56 * <li>apply the setwise data selection algorithms</li>
57 * <li>apply the setwise postprocessors</li>
58 * <li>train the setwise training classifiers</li>
59 * <li>unify all remaining training data into one data set</li>
60 * <li>apply the preprocessors</li>
61 * <li>apply the pointwise data selection algorithms</li>
62 * <li>apply the postprocessors</li>
63 * <li>train the normal classifiers</li>
64 * <li>evaluate the results for all trained classifiers on the training data</li>
65 * </ul>
66 * </li>
67 * </ul>
68 *
69 * Note that this class implements {@link Runnable}, i.e., each experiment can be started in its own
70 * thread.
71 *
72 * @author Steffen Herbold
73 */
74public class CrossValidationExperiment implements IExecutionStrategy {
75
76    /**
77     * configuration of the experiment
78     */
79    protected final ExperimentConfiguration config;
80
81    /**
82     * Constructor. Creates a new experiment based on a configuration.
83     *
84     * @param config
85     *            configuration of the experiment
86     */
87    public CrossValidationExperiment(ExperimentConfiguration config) {
88        this.config = config;
89    }
90
91    /**
92     * Helper method that combines a set of Weka {@link Instances} sets into a single
93     * {@link Instances} set.
94     *
95     * @param traindataSet
96     *            set of {@link Instances} to be combines
97     * @return single {@link Instances} set
98     */
99    public static Instances makeSingleTrainingSet(SetUniqueList<Instances> traindataSet) {
100        Instances traindataFull = null;
101        for (Instances traindata : traindataSet) {
102            if (traindataFull == null) {
103                traindataFull = new Instances(traindata);
104            }
105            else {
106                for (int i = 0; i < traindata.numInstances(); i++) {
107                    traindataFull.add(traindata.instance(i));
108                }
109            }
110        }
111        return traindataFull;
112    }
113
114    /**
115     * Executes the experiment with the steps as described in the class comment.
116     *
117     * @see Runnable#run()
118     */
119    @Override
120    public void run() {
121        final List<SoftwareVersion> versions = new LinkedList<>();
122
123        for (IVersionLoader loader : config.getLoaders()) {
124            versions.addAll(loader.load());
125        }
126
127        for (IVersionFilter filter : config.getVersionFilters()) {
128            filter.apply(versions);
129        }
130        boolean writeHeader = true;
131        int versionCount = 1;
132        int testVersionCount = 0;
133        int numTrainers = 0;
134
135        for (SoftwareVersion testVersion : versions) {
136            if (isVersion(testVersion, config.getTestVersionFilters())) {
137                testVersionCount++;
138            }
139        }
140
141        numTrainers += config.getSetWiseTrainers().size();
142        numTrainers += config.getSetWiseTestdataAwareTrainers().size();
143        numTrainers += config.getTrainers().size();
144        numTrainers += config.getTestAwareTrainers().size();
145
146        // sort versions
147        Collections.sort(versions);
148
149        for (SoftwareVersion testVersion : versions) {
150            if (isVersion(testVersion, config.getTestVersionFilters())) {
151                Console.traceln(Level.INFO,
152                                String.format("[%s] [%02d/%02d] %s: starting",
153                                              config.getExperimentName(), versionCount,
154                                              testVersionCount, testVersion.getVersion()));
155                int numResultsAvailable = resultsAvailable(testVersion);
156                if (numResultsAvailable >= numTrainers * config.getRepetitions()) {
157                    Console.traceln(Level.INFO,
158                                    String.format(
159                                                  "[%s] [%02d/%02d] %s: results already available; skipped",
160                                                  config.getExperimentName(), versionCount,
161                                                  testVersionCount, testVersion.getVersion()));
162                    versionCount++;
163                    continue;
164                }
165
166                // Setup testdata and training data
167                Instances testdata = testVersion.getInstances();
168                List<Double> efforts = testVersion.getEfforts();
169
170                for (ITrainingStrategy trainer : config.getTrainers()) {
171                    Console.traceln(Level.FINE,
172                                    String.format("[%s] [%02d/%02d] %s: applying trainer %s",
173                                                  config.getExperimentName(), versionCount,
174                                                  testVersionCount, testVersion.getVersion(),
175                                                  trainer.getName()));
176                    trainer.apply(testdata);
177                }
178
179                File resultsDir = new File(config.getResultsPath());
180                if (!resultsDir.exists()) {
181                    resultsDir.mkdir();
182                }
183                for (IEvaluationStrategy evaluator : config.getEvaluators()) {
184                    Console.traceln(Level.FINE,
185                                    String.format("[%s] [%02d/%02d] %s: applying evaluator %s",
186                                                  config.getExperimentName(), versionCount,
187                                                  testVersionCount, testVersion.getVersion(),
188                                                  evaluator.getClass().getName()));
189                    List<ITrainer> allTrainers = new LinkedList<>();
190                    for (ISetWiseTrainingStrategy setwiseTrainer : config.getSetWiseTrainers()) {
191                        allTrainers.add(setwiseTrainer);
192                    }
193                    for (ISetWiseTestdataAwareTrainingStrategy setwiseTestdataAwareTrainer : config
194                        .getSetWiseTestdataAwareTrainers())
195                    {
196                        allTrainers.add(setwiseTestdataAwareTrainer);
197                    }
198                    for (ITrainingStrategy trainer : config.getTrainers()) {
199                        allTrainers.add(trainer);
200                    }
201                    for (ITestAwareTrainingStrategy trainer : config.getTestAwareTrainers()) {
202                        allTrainers.add(trainer);
203                    }
204                    if (writeHeader) {
205                        evaluator.setParameter(config.getResultsPath() + "/" +
206                            config.getExperimentName() + ".csv");
207                    }
208                    evaluator.apply(testdata, testdata, allTrainers, efforts, writeHeader,
209                                    config.getResultStorages());
210                    writeHeader = false;
211                }
212                Console.traceln(Level.INFO,
213                                String.format("[%s] [%02d/%02d] %s: finished",
214                                              config.getExperimentName(), versionCount,
215                                              testVersionCount, testVersion.getVersion()));
216                versionCount++;
217            }
218        }
219    }
220
221    /**
222     * Helper method that checks if a version passes all filters.
223     *
224     * @param version
225     *            version that is checked
226     * @param filters
227     *            list of the filters
228     * @return true, if the version passes all filters, false otherwise
229     */
230    private boolean isVersion(SoftwareVersion version, List<IVersionFilter> filters) {
231        boolean result = true;
232        for (IVersionFilter filter : filters) {
233            result &= !filter.apply(version);
234        }
235        return result;
236    }
237
238    /**
239     * <p>
240     * helper function that checks if the results are already in the data store
241     * </p>
242     *
243     * @param version
244     *            version for which the results are checked
245     * @return
246     */
247    private int resultsAvailable(SoftwareVersion version) {
248        if (config.getResultStorages().isEmpty()) {
249            return 0;
250        }
251
252        List<ITrainer> allTrainers = new LinkedList<>();
253        for (ISetWiseTrainingStrategy setwiseTrainer : config.getSetWiseTrainers()) {
254            allTrainers.add(setwiseTrainer);
255        }
256        for (ISetWiseTestdataAwareTrainingStrategy setwiseTestdataAwareTrainer : config
257            .getSetWiseTestdataAwareTrainers())
258        {
259            allTrainers.add(setwiseTestdataAwareTrainer);
260        }
261        for (ITrainingStrategy trainer : config.getTrainers()) {
262            allTrainers.add(trainer);
263        }
264        for (ITestAwareTrainingStrategy trainer : config.getTestAwareTrainers()) {
265            allTrainers.add(trainer);
266        }
267
268        int available = Integer.MAX_VALUE;
269        for (IResultStorage storage : config.getResultStorages()) {
270            String classifierName = ((IWekaCompatibleTrainer) allTrainers.get(0)).getName();
271            int curAvailable = storage.containsResult(config.getExperimentName(),
272                                                      version.getVersion(), classifierName);
273            if (curAvailable < available) {
274                available = curAvailable;
275            }
276        }
277        return available;
278    }
279}
Note: See TracBrowser for help on using the repository browser.