source: trunk/CrossPare/src/de/ugoe/cs/cpdp/execution/CrossProjectExperiment.java @ 53

Last change on this file since 53 was 44, checked in by atrautsch, 9 years ago

metric matching hinzu

  • Property svn:mime-type set to text/plain
File size: 13.4 KB
Line 
1// Copyright 2015 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.cpdp.execution;
16
17import java.io.File;
18import java.util.Collections;
19import java.util.LinkedList;
20import java.util.List;
21import java.util.logging.Level;
22
23import org.apache.commons.collections4.list.SetUniqueList;
24
25import weka.core.Instances;
26import de.ugoe.cs.cpdp.ExperimentConfiguration;
27import de.ugoe.cs.cpdp.dataprocessing.IProcessesingStrategy;
28import de.ugoe.cs.cpdp.dataprocessing.ISetWiseProcessingStrategy;
29import de.ugoe.cs.cpdp.dataselection.IPointWiseDataselectionStrategy;
30import de.ugoe.cs.cpdp.dataselection.ISetWiseDataselectionStrategy;
31import de.ugoe.cs.cpdp.eval.IEvaluationStrategy;
32import de.ugoe.cs.cpdp.loader.IVersionLoader;
33import de.ugoe.cs.cpdp.training.ISetWiseTestdataAwareTrainingStrategy;
34import de.ugoe.cs.cpdp.training.ISetWiseTrainingStrategy;
35import de.ugoe.cs.cpdp.training.ITrainer;
36import de.ugoe.cs.cpdp.training.ITrainingStrategy;
37import de.ugoe.cs.cpdp.versions.IVersionFilter;
38import de.ugoe.cs.cpdp.versions.SoftwareVersion;
39import de.ugoe.cs.util.console.Console;
40
41/**
42 * Class responsible for executing an experiment according to an {@link ExperimentConfiguration}.
43 * The steps of an experiment are as follows:
44 * <ul>
45 * <li>load the data from the provided data path</li>
46 * <li>filter the data sets according to the provided version filters</li>
47 * <li>execute the following steps for each data sets as test data that is not ignored through the
48 * test version filter:
49 * <ul>
50 * <li>filter the data sets to setup the candidate training data:
51 * <ul>
52 * <li>remove all data sets from the same project</li>
53 * <li>filter all data sets according to the training data filter
54 * </ul>
55 * </li>
56 * <li>apply the setwise preprocessors</li>
57 * <li>apply the setwise data selection algorithms</li>
58 * <li>apply the setwise postprocessors</li>
59 * <li>train the setwise training classifiers</li>
60 * <li>unify all remaining training data into one data set</li>
61 * <li>apply the preprocessors</li>
62 * <li>apply the pointwise data selection algorithms</li>
63 * <li>apply the postprocessors</li>
64 * <li>train the normal classifiers</li>
65 * <li>evaluate the results for all trained classifiers on the training data</li>
66 * </ul>
67 * </li>
68 * </ul>
69 *
70 * Note that this class implements {@link Runnable}, i.e., each experiment can be started in its own
71 * thread.
72 *
73 * @author Steffen Herbold
74 */
75public class CrossProjectExperiment implements IExecutionStrategy {
76
77    /**
78     * configuration of the experiment
79     */
80    private final ExperimentConfiguration config;
81
82    /**
83     * Constructor. Creates a new experiment based on a configuration.
84     *
85     * @param config
86     *            configuration of the experiment
87     */
88    public CrossProjectExperiment(ExperimentConfiguration config) {
89        this.config = config;
90    }
91
92    /**
93     * Executes the experiment with the steps as described in the class comment.
94     *
95     * @see Runnable#run()
96     */
97    @Override
98    public void run() {
99        final List<SoftwareVersion> versions = new LinkedList<>();
100
101        for (IVersionLoader loader : config.getLoaders()) {
102            versions.addAll(loader.load());
103        }
104
105        for (IVersionFilter filter : config.getVersionFilters()) {
106            filter.apply(versions);
107        }
108        boolean writeHeader = true;
109        int versionCount = 1;
110        int testVersionCount = 0;
111
112        for (SoftwareVersion testVersion : versions) {
113            if (isVersion(testVersion, config.getTestVersionFilters())) {
114                testVersionCount++;
115            }
116        }
117
118        // sort versions
119        Collections.sort(versions);
120
121        for (SoftwareVersion testVersion : versions) {
122            if (isVersion(testVersion, config.getTestVersionFilters())) {
123                Console.traceln(Level.INFO, String.format("[%s] [%02d/%02d] %s: starting",
124                                                          config.getExperimentName(), versionCount,
125                                                          testVersionCount,
126                                                          testVersion.getVersion()));
127
128                // Setup testdata and training data
129                Instances testdata = testVersion.getInstances();
130                String testProject = testVersion.getProject();
131                SetUniqueList<Instances> traindataSet =
132                    SetUniqueList.setUniqueList(new LinkedList<Instances>());
133                for (SoftwareVersion trainingVersion : versions) {
134                    if (isVersion(trainingVersion, config.getTrainingVersionFilters())) {
135                        if (trainingVersion != testVersion) {
136                            if (!trainingVersion.getProject().equals(testProject)) {
137                                traindataSet.add(trainingVersion.getInstances());
138                            }
139                        }
140                    }
141                }
142
143                for (ISetWiseProcessingStrategy processor : config.getSetWisePreprocessors()) {
144                    Console.traceln(Level.FINE, String
145                        .format("[%s] [%02d/%02d] %s: applying setwise preprocessor %s",
146                                config.getExperimentName(), versionCount, testVersionCount,
147                                testVersion.getVersion(), processor.getClass().getName()));
148                    processor.apply(testdata, traindataSet);
149                }
150                for (ISetWiseDataselectionStrategy dataselector : config.getSetWiseSelectors()) {
151                    Console.traceln(Level.FINE, String
152                        .format("[%s] [%02d/%02d] %s: applying setwise selection %s",
153                                config.getExperimentName(), versionCount, testVersionCount,
154                                testVersion.getVersion(), dataselector.getClass().getName()));
155                    dataselector.apply(testdata, traindataSet);
156                }
157                for (ISetWiseProcessingStrategy processor : config.getSetWisePostprocessors()) {
158                    Console.traceln(Level.FINE, String
159                        .format("[%s] [%02d/%02d] %s: applying setwise postprocessor %s",
160                                config.getExperimentName(), versionCount, testVersionCount,
161                                testVersion.getVersion(), processor.getClass().getName()));
162                    processor.apply(testdata, traindataSet);
163                }
164                for (ISetWiseTrainingStrategy setwiseTrainer : config.getSetWiseTrainers()) {
165                    Console.traceln(Level.FINE, String
166                        .format("[%s] [%02d/%02d] %s: applying setwise trainer %s",
167                                config.getExperimentName(), versionCount, testVersionCount,
168                                testVersion.getVersion(), setwiseTrainer.getName()));
169                    setwiseTrainer.apply(traindataSet);
170                }
171                for (ISetWiseTestdataAwareTrainingStrategy setwiseTestdataAwareTrainer : config.getSetWiseTestdataAwareTrainers()) {
172                    Console.traceln(Level.FINE, String
173                        .format("[%s] [%02d/%02d] %s: applying testdata aware setwise trainer %s",
174                                config.getExperimentName(), versionCount, testVersionCount,
175                                testVersion.getVersion(), setwiseTestdataAwareTrainer.getName()));
176                    setwiseTestdataAwareTrainer.apply(traindataSet, testdata);
177                }
178                Instances traindata = makeSingleTrainingSet(traindataSet);
179                for (IProcessesingStrategy processor : config.getPreProcessors()) {
180                    Console.traceln(Level.FINE, String
181                        .format("[%s] [%02d/%02d] %s: applying preprocessor %s",
182                                config.getExperimentName(), versionCount, testVersionCount,
183                                testVersion.getVersion(), processor.getClass().getName()));
184                    processor.apply(testdata, traindata);
185                }
186                for (IPointWiseDataselectionStrategy dataselector : config.getPointWiseSelectors())
187                {
188                    Console.traceln(Level.FINE, String
189                        .format("[%s] [%02d/%02d] %s: applying pointwise selection %s",
190                                config.getExperimentName(), versionCount, testVersionCount,
191                                testVersion.getVersion(), dataselector.getClass().getName()));
192                    traindata = dataselector.apply(testdata, traindata);
193                }
194                for (IProcessesingStrategy processor : config.getPostProcessors()) {
195                    Console.traceln(Level.FINE, String
196                        .format("[%s] [%02d/%02d] %s: applying setwise postprocessor %s",
197                                config.getExperimentName(), versionCount, testVersionCount,
198                                testVersion.getVersion(), processor.getClass().getName()));
199                    processor.apply(testdata, traindata);
200                }
201                for (ITrainingStrategy trainer : config.getTrainers()) {
202                    Console.traceln(Level.FINE, String
203                        .format("[%s] [%02d/%02d] %s: applying trainer %s",
204                                config.getExperimentName(), versionCount, testVersionCount,
205                                testVersion.getVersion(), trainer.getName()));
206                    trainer.apply(traindata);
207                }
208                File resultsDir = new File(config.getResultsPath());
209                if (!resultsDir.exists()) {
210                    resultsDir.mkdir();
211                }
212                for (IEvaluationStrategy evaluator : config.getEvaluators()) {
213                    Console.traceln(Level.FINE, String
214                        .format("[%s] [%02d/%02d] %s: applying evaluator %s",
215                                config.getExperimentName(), versionCount, testVersionCount,
216                                testVersion.getVersion(), evaluator.getClass().getName()));
217                    List<ITrainer> allTrainers = new LinkedList<>();
218                    for (ISetWiseTrainingStrategy setwiseTrainer : config.getSetWiseTrainers()) {
219                        allTrainers.add(setwiseTrainer);
220                    }
221                    for (ISetWiseTestdataAwareTrainingStrategy setwiseTestdataAwareTrainer : config.getSetWiseTestdataAwareTrainers()) {
222                        allTrainers.add(setwiseTestdataAwareTrainer);
223                    }
224                    for (ITrainingStrategy trainer : config.getTrainers()) {
225                        allTrainers.add(trainer);
226                    }
227                    if (writeHeader) {
228                        evaluator.setParameter(config.getResultsPath() + "/" +
229                            config.getExperimentName() + ".csv");
230                    }
231                    evaluator.apply(testdata, traindata, allTrainers, writeHeader);
232                    writeHeader = false;
233                }
234                Console.traceln(Level.INFO, String.format("[%s] [%02d/%02d] %s: finished",
235                                                          config.getExperimentName(), versionCount,
236                                                          testVersionCount,
237                                                          testVersion.getVersion()));
238                versionCount++;
239            }
240        }
241    }
242
243    /**
244     * Helper method that checks if a version passes all filters.
245     *
246     * @param version
247     *            version that is checked
248     * @param filters
249     *            list of the filters
250     * @return true, if the version passes all filters, false otherwise
251     */
252    private boolean isVersion(SoftwareVersion version, List<IVersionFilter> filters) {
253        boolean result = true;
254        for (IVersionFilter filter : filters) {
255            result &= !filter.apply(version);
256        }
257        return result;
258    }
259
260    /**
261     * Helper method that combines a set of Weka {@link Instances} sets into a single
262     * {@link Instances} set.
263     *
264     * @param traindataSet
265     *            set of {@link Instances} to be combines
266     * @return single {@link Instances} set
267     */
268    public static Instances makeSingleTrainingSet(SetUniqueList<Instances> traindataSet) {
269        Instances traindataFull = null;
270        for (Instances traindata : traindataSet) {
271            if (traindataFull == null) {
272                traindataFull = new Instances(traindata);
273            }
274            else {
275                for (int i = 0; i < traindata.numInstances(); i++) {
276                    traindataFull.add(traindata.instance(i));
277                }
278            }
279        }
280        return traindataFull;
281    }
282}
Note: See TracBrowser for help on using the repository browser.