source: trunk/CrossPare/src/de/ugoe/cs/cpdp/execution/RelaxedCrossProjectExperiment.java @ 66

Last change on this file since 66 was 65, checked in by sherbold, 9 years ago
  • added new interface ITestAwareTraining strategy to the framework to support trainers with knowledge of the test data. The implementation of such trainers must take care to not accidentally take the classification of the test data into account.
  • Property svn:mime-type set to text/plain
File size: 14.3 KB
Line 
1// Copyright 2015 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.cpdp.execution;
16
17import java.io.File;
18import java.util.Collections;
19import java.util.LinkedList;
20import java.util.List;
21import java.util.logging.Level;
22
23import org.apache.commons.collections4.list.SetUniqueList;
24
25import weka.core.Instances;
26import de.ugoe.cs.cpdp.ExperimentConfiguration;
27import de.ugoe.cs.cpdp.dataprocessing.IProcessesingStrategy;
28import de.ugoe.cs.cpdp.dataprocessing.ISetWiseProcessingStrategy;
29import de.ugoe.cs.cpdp.dataselection.IPointWiseDataselectionStrategy;
30import de.ugoe.cs.cpdp.dataselection.ISetWiseDataselectionStrategy;
31import de.ugoe.cs.cpdp.eval.IEvaluationStrategy;
32import de.ugoe.cs.cpdp.loader.IVersionLoader;
33import de.ugoe.cs.cpdp.training.ISetWiseTestdataAwareTrainingStrategy;
34import de.ugoe.cs.cpdp.training.ISetWiseTrainingStrategy;
35import de.ugoe.cs.cpdp.training.ITestAwareTrainingStrategy;
36import de.ugoe.cs.cpdp.training.ITrainer;
37import de.ugoe.cs.cpdp.training.ITrainingStrategy;
38import de.ugoe.cs.cpdp.versions.IVersionFilter;
39import de.ugoe.cs.cpdp.versions.SoftwareVersion;
40import de.ugoe.cs.util.console.Console;
41
42/**
43 * Class responsible for executing an experiment according to an {@link ExperimentConfiguration}.
44 * The steps of an experiment are as follows:
45 * <ul>
46 * <li>load the data from the provided data path</li>
47 * <li>filter the data sets according to the provided version filters</li>
48 * <li>execute the following steps for each data sets as test data that is not ignored through the
49 * test version filter:
50 * <ul>
51 * <li>filter the data sets to setup the candidate training data:
52 * <ul>
53 * <li>filter all data sets according to the training data filter
54 * </ul>
55 * </li>
56 * <li>apply the setwise preprocessors</li>
57 * <li>apply the setwise data selection algorithms</li>
58 * <li>apply the setwise postprocessors</li>
59 * <li>train the setwise training classifiers</li>
60 * <li>unify all remaining training data into one data set</li>
61 * <li>apply the preprocessors</li>
62 * <li>apply the pointwise data selection algorithms</li>
63 * <li>apply the postprocessors</li>
64 * <li>train the normal classifiers</li>
65 * <li>evaluate the results for all trained classifiers on the training data</li>
66 * </ul>
67 * </li>
68 * </ul>
69 *
70 * Note that this class implements {@link Runnable}, i.e., each experiment can be started in its own
71 * thread.
72 *
73 * @author Steffen Herbold
74 */
75public class RelaxedCrossProjectExperiment implements IExecutionStrategy {
76
77    /**
78     * configuration of the experiment
79     */
80    private final ExperimentConfiguration config;
81
82    /**
83     * Constructor. Creates a new experiment based on a configuration.
84     *
85     * @param config
86     *            configuration of the experiment
87     */
88    public RelaxedCrossProjectExperiment(ExperimentConfiguration config) {
89        this.config = config;
90    }
91
92    /**
93     * Executes the experiment with the steps as described in the class comment.
94     *
95     * @see Runnable#run()
96     */
97    @Override
98    public void run() {
99        final List<SoftwareVersion> versions = new LinkedList<>();
100
101        for (IVersionLoader loader : config.getLoaders()) {
102            versions.addAll(loader.load());
103        }
104
105        for (IVersionFilter filter : config.getVersionFilters()) {
106            filter.apply(versions);
107        }
108        boolean writeHeader = true;
109        int versionCount = 1;
110        int testVersionCount = 0;
111
112        for (SoftwareVersion testVersion : versions) {
113            if (isVersion(testVersion, config.getTestVersionFilters())) {
114                testVersionCount++;
115            }
116        }
117
118        // sort versions
119        Collections.sort(versions);
120
121        for (SoftwareVersion testVersion : versions) {
122            if (isVersion(testVersion, config.getTestVersionFilters())) {
123                Console.traceln(Level.INFO, String.format("[%s] [%02d/%02d] %s: starting",
124                                                          config.getExperimentName(), versionCount,
125                                                          testVersionCount,
126                                                          testVersion.getVersion()));
127
128                // Setup testdata and training data
129                Instances testdata = testVersion.getInstances();
130                String testProject = testVersion.getProject();
131                SetUniqueList<Instances> traindataSet =
132                    SetUniqueList.setUniqueList(new LinkedList<Instances>());
133                for (SoftwareVersion trainingVersion : versions) {
134                    if (isVersion(trainingVersion, config.getTrainingVersionFilters())) {
135                        if (trainingVersion != testVersion) {
136                            if (trainingVersion.getProject().equals(testProject)) {
137                                if (trainingVersion.compareTo(testVersion) < 0) {
138                                    // only add if older
139                                    traindataSet.add(trainingVersion.getInstances());
140                                }
141                            }
142                            else {
143                                traindataSet.add(trainingVersion.getInstances());
144                            }
145                        }
146                    }
147                }
148
149                for (ISetWiseProcessingStrategy processor : config.getSetWisePreprocessors()) {
150                    Console.traceln(Level.FINE, String
151                        .format("[%s] [%02d/%02d] %s: applying setwise preprocessor %s",
152                                config.getExperimentName(), versionCount, testVersionCount,
153                                testVersion.getVersion(), processor.getClass().getName()));
154                    processor.apply(testdata, traindataSet);
155                }
156                for (ISetWiseDataselectionStrategy dataselector : config.getSetWiseSelectors()) {
157                    Console.traceln(Level.FINE, String
158                        .format("[%s] [%02d/%02d] %s: applying setwise selection %s",
159                                config.getExperimentName(), versionCount, testVersionCount,
160                                testVersion.getVersion(), dataselector.getClass().getName()));
161                    dataselector.apply(testdata, traindataSet);
162                }
163                for (ISetWiseProcessingStrategy processor : config.getSetWisePostprocessors()) {
164                    Console.traceln(Level.FINE, String
165                        .format("[%s] [%02d/%02d] %s: applying setwise postprocessor %s",
166                                config.getExperimentName(), versionCount, testVersionCount,
167                                testVersion.getVersion(), processor.getClass().getName()));
168                    processor.apply(testdata, traindataSet);
169                }
170                for (ISetWiseTrainingStrategy setwiseTrainer : config.getSetWiseTrainers()) {
171                    Console.traceln(Level.FINE, String
172                        .format("[%s] [%02d/%02d] %s: applying setwise trainer %s",
173                                config.getExperimentName(), versionCount, testVersionCount,
174                                testVersion.getVersion(), setwiseTrainer.getName()));
175                    setwiseTrainer.apply(traindataSet);
176                }
177                for (ISetWiseTestdataAwareTrainingStrategy setwiseTestdataAwareTrainer : config.getSetWiseTestdataAwareTrainers()) {
178                    Console.traceln(Level.FINE, String
179                        .format("[%s] [%02d/%02d] %s: applying testdata aware setwise trainer %s",
180                                config.getExperimentName(), versionCount, testVersionCount,
181                                testVersion.getVersion(), setwiseTestdataAwareTrainer.getName()));
182                    setwiseTestdataAwareTrainer.apply(traindataSet, testdata);
183                }
184                Instances traindata = makeSingleTrainingSet(traindataSet);
185                for (IProcessesingStrategy processor : config.getPreProcessors()) {
186                    Console.traceln(Level.FINE, String
187                        .format("[%s] [%02d/%02d] %s: applying preprocessor %s",
188                                config.getExperimentName(), versionCount, testVersionCount,
189                                testVersion.getVersion(), processor.getClass().getName()));
190                    processor.apply(testdata, traindata);
191                }
192                for (IPointWiseDataselectionStrategy dataselector : config.getPointWiseSelectors())
193                {
194                    Console.traceln(Level.FINE, String
195                        .format("[%s] [%02d/%02d] %s: applying pointwise selection %s",
196                                config.getExperimentName(), versionCount, testVersionCount,
197                                testVersion.getVersion(), dataselector.getClass().getName()));
198                    traindata = dataselector.apply(testdata, traindata);
199                }
200                for (IProcessesingStrategy processor : config.getPostProcessors()) {
201                    Console.traceln(Level.FINE, String
202                        .format("[%s] [%02d/%02d] %s: applying setwise postprocessor %s",
203                                config.getExperimentName(), versionCount, testVersionCount,
204                                testVersion.getVersion(), processor.getClass().getName()));
205                    processor.apply(testdata, traindata);
206                }
207                for (ITrainingStrategy trainer : config.getTrainers()) {
208                    Console.traceln(Level.FINE, String
209                        .format("[%s] [%02d/%02d] %s: applying trainer %s",
210                                config.getExperimentName(), versionCount, testVersionCount,
211                                testVersion.getVersion(), trainer.getName()));
212                    trainer.apply(traindata);
213                }
214                for (ITestAwareTrainingStrategy trainer : config.getTestAwareTrainers()) {
215                    Console.traceln(Level.FINE, String
216                        .format("[%s] [%02d/%02d] %s: applying trainer %s",
217                                config.getExperimentName(), versionCount, testVersionCount,
218                                testVersion.getVersion(), trainer.getName()));
219                    trainer.apply(testdata, traindata);
220                }
221                File resultsDir = new File(config.getResultsPath());
222                if (!resultsDir.exists()) {
223                    resultsDir.mkdir();
224                }
225                for (IEvaluationStrategy evaluator : config.getEvaluators()) {
226                    Console.traceln(Level.FINE, String
227                        .format("[%s] [%02d/%02d] %s: applying evaluator %s",
228                                config.getExperimentName(), versionCount, testVersionCount,
229                                testVersion.getVersion(), evaluator.getClass().getName()));
230                    List<ITrainer> allTrainers = new LinkedList<>();
231                    for (ISetWiseTrainingStrategy setwiseTrainer : config.getSetWiseTrainers()) {
232                        allTrainers.add(setwiseTrainer);
233                    }
234                    for (ISetWiseTestdataAwareTrainingStrategy setwiseTrainer : config.getSetWiseTestdataAwareTrainers()) {
235                        allTrainers.add(setwiseTrainer);
236                    }
237                    for (ITrainingStrategy trainer : config.getTrainers()) {
238                        allTrainers.add(trainer);
239                    }
240                    for (ITestAwareTrainingStrategy trainer : config.getTestAwareTrainers()) {
241                        allTrainers.add(trainer);
242                    }
243                    if (writeHeader) {
244                        evaluator.setParameter(config.getResultsPath() + "/" +
245                            config.getExperimentName() + ".csv");
246                    }
247                    evaluator.apply(testdata, traindata, allTrainers, writeHeader);
248                    writeHeader = false;
249                }
250                Console.traceln(Level.INFO, String.format("[%s] [%02d/%02d] %s: finished",
251                                                          config.getExperimentName(), versionCount,
252                                                          testVersionCount,
253                                                          testVersion.getVersion()));
254                versionCount++;
255            }
256        }
257    }
258
259    /**
260     * Helper method that checks if a version passes all filters.
261     *
262     * @param version
263     *            version that is checked
264     * @param filters
265     *            list of the filters
266     * @return true, if the version passes all filters, false otherwise
267     */
268    private boolean isVersion(SoftwareVersion version, List<IVersionFilter> filters) {
269        boolean result = true;
270        for (IVersionFilter filter : filters) {
271            result &= !filter.apply(version);
272        }
273        return result;
274    }
275
276    /**
277     * Helper method that combines a set of Weka {@link Instances} sets into a single
278     * {@link Instances} set.
279     *
280     * @param traindataSet
281     *            set of {@link Instances} to be combines
282     * @return single {@link Instances} set
283     */
284    public static Instances makeSingleTrainingSet(SetUniqueList<Instances> traindataSet) {
285        Instances traindataFull = null;
286        for (Instances traindata : traindataSet) {
287            if (traindataFull == null) {
288                traindataFull = new Instances(traindata);
289            }
290            else {
291                for (int i = 0; i < traindata.numInstances(); i++) {
292                    traindataFull.add(traindata.instance(i));
293                }
294            }
295        }
296        return traindataFull;
297    }
298}
Note: See TracBrowser for help on using the repository browser.