source: trunk/CrossPare/src/de/ugoe/cs/cpdp/execution/CrossProjectExperiment.java @ 66

Last change on this file since 66 was 65, checked in by sherbold, 9 years ago
  • added new interface ITestAwareTraining strategy to the framework to support trainers with knowledge of the test data. The implementation of such trainers must take care to not accidentally take the classification of the test data into account.
  • Property svn:mime-type set to text/plain
File size: 14.1 KB
Line 
1// Copyright 2015 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.cpdp.execution;
16
17import java.io.File;
18import java.util.Collections;
19import java.util.LinkedList;
20import java.util.List;
21import java.util.logging.Level;
22
23import org.apache.commons.collections4.list.SetUniqueList;
24
25import weka.core.Instances;
26import de.ugoe.cs.cpdp.ExperimentConfiguration;
27import de.ugoe.cs.cpdp.dataprocessing.IProcessesingStrategy;
28import de.ugoe.cs.cpdp.dataprocessing.ISetWiseProcessingStrategy;
29import de.ugoe.cs.cpdp.dataselection.IPointWiseDataselectionStrategy;
30import de.ugoe.cs.cpdp.dataselection.ISetWiseDataselectionStrategy;
31import de.ugoe.cs.cpdp.eval.IEvaluationStrategy;
32import de.ugoe.cs.cpdp.loader.IVersionLoader;
33import de.ugoe.cs.cpdp.training.ISetWiseTestdataAwareTrainingStrategy;
34import de.ugoe.cs.cpdp.training.ISetWiseTrainingStrategy;
35import de.ugoe.cs.cpdp.training.ITestAwareTrainingStrategy;
36import de.ugoe.cs.cpdp.training.ITrainer;
37import de.ugoe.cs.cpdp.training.ITrainingStrategy;
38import de.ugoe.cs.cpdp.versions.IVersionFilter;
39import de.ugoe.cs.cpdp.versions.SoftwareVersion;
40import de.ugoe.cs.util.console.Console;
41
42/**
43 * Class responsible for executing an experiment according to an {@link ExperimentConfiguration}.
44 * The steps of an experiment are as follows:
45 * <ul>
46 * <li>load the data from the provided data path</li>
47 * <li>filter the data sets according to the provided version filters</li>
48 * <li>execute the following steps for each data sets as test data that is not ignored through the
49 * test version filter:
50 * <ul>
51 * <li>filter the data sets to setup the candidate training data:
52 * <ul>
53 * <li>remove all data sets from the same project</li>
54 * <li>filter all data sets according to the training data filter
55 * </ul>
56 * </li>
57 * <li>apply the setwise preprocessors</li>
58 * <li>apply the setwise data selection algorithms</li>
59 * <li>apply the setwise postprocessors</li>
60 * <li>train the setwise training classifiers</li>
61 * <li>unify all remaining training data into one data set</li>
62 * <li>apply the preprocessors</li>
63 * <li>apply the pointwise data selection algorithms</li>
64 * <li>apply the postprocessors</li>
65 * <li>train the normal classifiers</li>
66 * <li>evaluate the results for all trained classifiers on the training data</li>
67 * </ul>
68 * </li>
69 * </ul>
70 *
71 * Note that this class implements {@link Runnable}, i.e., each experiment can be started in its own
72 * thread.
73 *
74 * @author Steffen Herbold
75 */
76public class CrossProjectExperiment implements IExecutionStrategy {
77
78    /**
79     * configuration of the experiment
80     */
81    private final ExperimentConfiguration config;
82
83    /**
84     * Constructor. Creates a new experiment based on a configuration.
85     *
86     * @param config
87     *            configuration of the experiment
88     */
89    public CrossProjectExperiment(ExperimentConfiguration config) {
90        this.config = config;
91    }
92
93    /**
94     * Executes the experiment with the steps as described in the class comment.
95     *
96     * @see Runnable#run()
97     */
98    @Override
99    public void run() {
100        final List<SoftwareVersion> versions = new LinkedList<>();
101
102        for (IVersionLoader loader : config.getLoaders()) {
103            versions.addAll(loader.load());
104        }
105
106        for (IVersionFilter filter : config.getVersionFilters()) {
107            filter.apply(versions);
108        }
109        boolean writeHeader = true;
110        int versionCount = 1;
111        int testVersionCount = 0;
112
113        for (SoftwareVersion testVersion : versions) {
114            if (isVersion(testVersion, config.getTestVersionFilters())) {
115                testVersionCount++;
116            }
117        }
118
119        // sort versions
120        Collections.sort(versions);
121
122        for (SoftwareVersion testVersion : versions) {
123            if (isVersion(testVersion, config.getTestVersionFilters())) {
124                Console.traceln(Level.INFO, String.format("[%s] [%02d/%02d] %s: starting",
125                                                          config.getExperimentName(), versionCount,
126                                                          testVersionCount,
127                                                          testVersion.getVersion()));
128
129                // Setup testdata and training data
130                Instances testdata = testVersion.getInstances();
131                String testProject = testVersion.getProject();
132                SetUniqueList<Instances> traindataSet =
133                    SetUniqueList.setUniqueList(new LinkedList<Instances>());
134                for (SoftwareVersion trainingVersion : versions) {
135                    if (isVersion(trainingVersion, config.getTrainingVersionFilters())) {
136                        if (trainingVersion != testVersion) {
137                            if (!trainingVersion.getProject().equals(testProject)) {
138                                traindataSet.add(trainingVersion.getInstances());
139                            }
140                        }
141                    }
142                }
143
144                for (ISetWiseProcessingStrategy processor : config.getSetWisePreprocessors()) {
145                    Console.traceln(Level.FINE, String
146                        .format("[%s] [%02d/%02d] %s: applying setwise preprocessor %s",
147                                config.getExperimentName(), versionCount, testVersionCount,
148                                testVersion.getVersion(), processor.getClass().getName()));
149                    processor.apply(testdata, traindataSet);
150                }
151                for (ISetWiseDataselectionStrategy dataselector : config.getSetWiseSelectors()) {
152                    Console.traceln(Level.FINE, String
153                        .format("[%s] [%02d/%02d] %s: applying setwise selection %s",
154                                config.getExperimentName(), versionCount, testVersionCount,
155                                testVersion.getVersion(), dataselector.getClass().getName()));
156                    dataselector.apply(testdata, traindataSet);
157                }
158                for (ISetWiseProcessingStrategy processor : config.getSetWisePostprocessors()) {
159                    Console.traceln(Level.FINE, String
160                        .format("[%s] [%02d/%02d] %s: applying setwise postprocessor %s",
161                                config.getExperimentName(), versionCount, testVersionCount,
162                                testVersion.getVersion(), processor.getClass().getName()));
163                    processor.apply(testdata, traindataSet);
164                }
165                for (ISetWiseTrainingStrategy setwiseTrainer : config.getSetWiseTrainers()) {
166                    Console.traceln(Level.FINE, String
167                        .format("[%s] [%02d/%02d] %s: applying setwise trainer %s",
168                                config.getExperimentName(), versionCount, testVersionCount,
169                                testVersion.getVersion(), setwiseTrainer.getName()));
170                    setwiseTrainer.apply(traindataSet);
171                }
172                for (ISetWiseTestdataAwareTrainingStrategy setwiseTestdataAwareTrainer : config.getSetWiseTestdataAwareTrainers()) {
173                    Console.traceln(Level.FINE, String
174                        .format("[%s] [%02d/%02d] %s: applying testdata aware setwise trainer %s",
175                                config.getExperimentName(), versionCount, testVersionCount,
176                                testVersion.getVersion(), setwiseTestdataAwareTrainer.getName()));
177                    setwiseTestdataAwareTrainer.apply(traindataSet, testdata);
178                }
179                Instances traindata = makeSingleTrainingSet(traindataSet);
180                for (IProcessesingStrategy processor : config.getPreProcessors()) {
181                    Console.traceln(Level.FINE, String
182                        .format("[%s] [%02d/%02d] %s: applying preprocessor %s",
183                                config.getExperimentName(), versionCount, testVersionCount,
184                                testVersion.getVersion(), processor.getClass().getName()));
185                    processor.apply(testdata, traindata);
186                }
187                for (IPointWiseDataselectionStrategy dataselector : config.getPointWiseSelectors())
188                {
189                    Console.traceln(Level.FINE, String
190                        .format("[%s] [%02d/%02d] %s: applying pointwise selection %s",
191                                config.getExperimentName(), versionCount, testVersionCount,
192                                testVersion.getVersion(), dataselector.getClass().getName()));
193                    traindata = dataselector.apply(testdata, traindata);
194                }
195                for (IProcessesingStrategy processor : config.getPostProcessors()) {
196                    Console.traceln(Level.FINE, String
197                        .format("[%s] [%02d/%02d] %s: applying setwise postprocessor %s",
198                                config.getExperimentName(), versionCount, testVersionCount,
199                                testVersion.getVersion(), processor.getClass().getName()));
200                    processor.apply(testdata, traindata);
201                }
202                for (ITrainingStrategy trainer : config.getTrainers()) {
203                    Console.traceln(Level.FINE, String
204                        .format("[%s] [%02d/%02d] %s: applying trainer %s",
205                                config.getExperimentName(), versionCount, testVersionCount,
206                                testVersion.getVersion(), trainer.getName()));
207                    trainer.apply(traindata);
208                }
209                for (ITestAwareTrainingStrategy trainer : config.getTestAwareTrainers()) {
210                    Console.traceln(Level.FINE, String
211                        .format("[%s] [%02d/%02d] %s: applying trainer %s",
212                                config.getExperimentName(), versionCount, testVersionCount,
213                                testVersion.getVersion(), trainer.getName()));
214                    trainer.apply(testdata, traindata);
215                }
216                File resultsDir = new File(config.getResultsPath());
217                if (!resultsDir.exists()) {
218                    resultsDir.mkdir();
219                }
220                for (IEvaluationStrategy evaluator : config.getEvaluators()) {
221                    Console.traceln(Level.FINE, String
222                        .format("[%s] [%02d/%02d] %s: applying evaluator %s",
223                                config.getExperimentName(), versionCount, testVersionCount,
224                                testVersion.getVersion(), evaluator.getClass().getName()));
225                    List<ITrainer> allTrainers = new LinkedList<>();
226                    for (ISetWiseTrainingStrategy setwiseTrainer : config.getSetWiseTrainers()) {
227                        allTrainers.add(setwiseTrainer);
228                    }
229                    for (ISetWiseTestdataAwareTrainingStrategy setwiseTestdataAwareTrainer : config.getSetWiseTestdataAwareTrainers()) {
230                        allTrainers.add(setwiseTestdataAwareTrainer);
231                    }
232                    for (ITrainingStrategy trainer : config.getTrainers()) {
233                        allTrainers.add(trainer);
234                    }
235                    for (ITestAwareTrainingStrategy trainer : config.getTestAwareTrainers()) {
236                        allTrainers.add(trainer);
237                    }
238                    if (writeHeader) {
239                        evaluator.setParameter(config.getResultsPath() + "/" +
240                            config.getExperimentName() + ".csv");
241                    }
242                    evaluator.apply(testdata, traindata, allTrainers, writeHeader);
243                    writeHeader = false;
244                }
245                Console.traceln(Level.INFO, String.format("[%s] [%02d/%02d] %s: finished",
246                                                          config.getExperimentName(), versionCount,
247                                                          testVersionCount,
248                                                          testVersion.getVersion()));
249                versionCount++;
250            }
251        }
252    }
253
254    /**
255     * Helper method that checks if a version passes all filters.
256     *
257     * @param version
258     *            version that is checked
259     * @param filters
260     *            list of the filters
261     * @return true, if the version passes all filters, false otherwise
262     */
263    private boolean isVersion(SoftwareVersion version, List<IVersionFilter> filters) {
264        boolean result = true;
265        for (IVersionFilter filter : filters) {
266            result &= !filter.apply(version);
267        }
268        return result;
269    }
270
271    /**
272     * Helper method that combines a set of Weka {@link Instances} sets into a single
273     * {@link Instances} set.
274     *
275     * @param traindataSet
276     *            set of {@link Instances} to be combines
277     * @return single {@link Instances} set
278     */
279    public static Instances makeSingleTrainingSet(SetUniqueList<Instances> traindataSet) {
280        Instances traindataFull = null;
281        for (Instances traindata : traindataSet) {
282            if (traindataFull == null) {
283                traindataFull = new Instances(traindata);
284            }
285            else {
286                for (int i = 0; i < traindata.numInstances(); i++) {
287                    traindataFull.add(traindata.instance(i));
288                }
289            }
290        }
291        return traindataFull;
292    }
293}
Note: See TracBrowser for help on using the repository browser.