source: trunk/CrossPare/src/de/ugoe/cs/cpdp/execution/RelaxedCrossProjectExperiment.java @ 43

Last change on this file since 43 was 41, checked in by sherbold, 9 years ago
  • formatted code and added copyrights
  • Property svn:mime-type set to text/plain
File size: 12.8 KB
Line 
1// Copyright 2015 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.cpdp.execution;
16
17import java.io.File;
18import java.util.Collections;
19import java.util.LinkedList;
20import java.util.List;
21import java.util.logging.Level;
22
23import org.apache.commons.collections4.list.SetUniqueList;
24
25import weka.core.Instances;
26import de.ugoe.cs.cpdp.ExperimentConfiguration;
27import de.ugoe.cs.cpdp.dataprocessing.IProcessesingStrategy;
28import de.ugoe.cs.cpdp.dataprocessing.ISetWiseProcessingStrategy;
29import de.ugoe.cs.cpdp.dataselection.IPointWiseDataselectionStrategy;
30import de.ugoe.cs.cpdp.dataselection.ISetWiseDataselectionStrategy;
31import de.ugoe.cs.cpdp.eval.IEvaluationStrategy;
32import de.ugoe.cs.cpdp.loader.IVersionLoader;
33import de.ugoe.cs.cpdp.training.ISetWiseTrainingStrategy;
34import de.ugoe.cs.cpdp.training.ITrainer;
35import de.ugoe.cs.cpdp.training.ITrainingStrategy;
36import de.ugoe.cs.cpdp.versions.IVersionFilter;
37import de.ugoe.cs.cpdp.versions.SoftwareVersion;
38import de.ugoe.cs.util.console.Console;
39
40/**
41 * Class responsible for executing an experiment according to an {@link ExperimentConfiguration}.
42 * The steps of an experiment are as follows:
43 * <ul>
44 * <li>load the data from the provided data path</li>
45 * <li>filter the data sets according to the provided version filters</li>
46 * <li>execute the following steps for each data sets as test data that is not ignored through the
47 * test version filter:
48 * <ul>
49 * <li>filter the data sets to setup the candidate training data:
50 * <ul>
51 * <li>filter all data sets according to the training data filter
52 * </ul>
53 * </li>
54 * <li>apply the setwise preprocessors</li>
55 * <li>apply the setwise data selection algorithms</li>
56 * <li>apply the setwise postprocessors</li>
57 * <li>train the setwise training classifiers</li>
58 * <li>unify all remaining training data into one data set</li>
59 * <li>apply the preprocessors</li>
60 * <li>apply the pointwise data selection algorithms</li>
61 * <li>apply the postprocessors</li>
62 * <li>train the normal classifiers</li>
63 * <li>evaluate the results for all trained classifiers on the training data</li>
64 * </ul>
65 * </li>
66 * </ul>
67 *
68 * Note that this class implements {@link Runnable}, i.e., each experiment can be started in its own
69 * thread.
70 *
71 * @author Steffen Herbold
72 */
73public class RelaxedCrossProjectExperiment implements IExecutionStrategy {
74
75    /**
76     * configuration of the experiment
77     */
78    private final ExperimentConfiguration config;
79
80    /**
81     * Constructor. Creates a new experiment based on a configuration.
82     *
83     * @param config
84     *            configuration of the experiment
85     */
86    public RelaxedCrossProjectExperiment(ExperimentConfiguration config) {
87        this.config = config;
88    }
89
90    /**
91     * Executes the experiment with the steps as described in the class comment.
92     *
93     * @see Runnable#run()
94     */
95    @Override
96    public void run() {
97        final List<SoftwareVersion> versions = new LinkedList<>();
98
99        for (IVersionLoader loader : config.getLoaders()) {
100            versions.addAll(loader.load());
101        }
102
103        for (IVersionFilter filter : config.getVersionFilters()) {
104            filter.apply(versions);
105        }
106        boolean writeHeader = true;
107        int versionCount = 1;
108        int testVersionCount = 0;
109
110        for (SoftwareVersion testVersion : versions) {
111            if (isVersion(testVersion, config.getTestVersionFilters())) {
112                testVersionCount++;
113            }
114        }
115
116        // sort versions
117        Collections.sort(versions);
118
119        for (SoftwareVersion testVersion : versions) {
120            if (isVersion(testVersion, config.getTestVersionFilters())) {
121                Console.traceln(Level.INFO, String.format("[%s] [%02d/%02d] %s: starting",
122                                                          config.getExperimentName(), versionCount,
123                                                          testVersionCount,
124                                                          testVersion.getVersion()));
125
126                // Setup testdata and training data
127                Instances testdata = testVersion.getInstances();
128                String testProject = testVersion.getProject();
129                SetUniqueList<Instances> traindataSet =
130                    SetUniqueList.setUniqueList(new LinkedList<Instances>());
131                for (SoftwareVersion trainingVersion : versions) {
132                    if (isVersion(trainingVersion, config.getTrainingVersionFilters())) {
133                        if (trainingVersion != testVersion) {
134                            if (trainingVersion.getProject().equals(testProject)) {
135                                if (trainingVersion.compareTo(testVersion) < 0) {
136                                    // only add if older
137                                    traindataSet.add(trainingVersion.getInstances());
138                                }
139                            }
140                            else {
141                                traindataSet.add(trainingVersion.getInstances());
142                            }
143                        }
144                    }
145                }
146
147                for (ISetWiseProcessingStrategy processor : config.getSetWisePreprocessors()) {
148                    Console.traceln(Level.FINE, String
149                        .format("[%s] [%02d/%02d] %s: applying setwise preprocessor %s",
150                                config.getExperimentName(), versionCount, testVersionCount,
151                                testVersion.getVersion(), processor.getClass().getName()));
152                    processor.apply(testdata, traindataSet);
153                }
154                for (ISetWiseDataselectionStrategy dataselector : config.getSetWiseSelectors()) {
155                    Console.traceln(Level.FINE, String
156                        .format("[%s] [%02d/%02d] %s: applying setwise selection %s",
157                                config.getExperimentName(), versionCount, testVersionCount,
158                                testVersion.getVersion(), dataselector.getClass().getName()));
159                    dataselector.apply(testdata, traindataSet);
160                }
161                for (ISetWiseProcessingStrategy processor : config.getSetWisePostprocessors()) {
162                    Console.traceln(Level.FINE, String
163                        .format("[%s] [%02d/%02d] %s: applying setwise postprocessor %s",
164                                config.getExperimentName(), versionCount, testVersionCount,
165                                testVersion.getVersion(), processor.getClass().getName()));
166                    processor.apply(testdata, traindataSet);
167                }
168                for (ISetWiseTrainingStrategy setwiseTrainer : config.getSetWiseTrainers()) {
169                    Console.traceln(Level.FINE, String
170                        .format("[%s] [%02d/%02d] %s: applying setwise trainer %s",
171                                config.getExperimentName(), versionCount, testVersionCount,
172                                testVersion.getVersion(), setwiseTrainer.getName()));
173                    setwiseTrainer.apply(traindataSet);
174                }
175                Instances traindata = makeSingleTrainingSet(traindataSet);
176                for (IProcessesingStrategy processor : config.getPreProcessors()) {
177                    Console.traceln(Level.FINE, String
178                        .format("[%s] [%02d/%02d] %s: applying preprocessor %s",
179                                config.getExperimentName(), versionCount, testVersionCount,
180                                testVersion.getVersion(), processor.getClass().getName()));
181                    processor.apply(testdata, traindata);
182                }
183                for (IPointWiseDataselectionStrategy dataselector : config.getPointWiseSelectors())
184                {
185                    Console.traceln(Level.FINE, String
186                        .format("[%s] [%02d/%02d] %s: applying pointwise selection %s",
187                                config.getExperimentName(), versionCount, testVersionCount,
188                                testVersion.getVersion(), dataselector.getClass().getName()));
189                    traindata = dataselector.apply(testdata, traindata);
190                }
191                for (IProcessesingStrategy processor : config.getPostProcessors()) {
192                    Console.traceln(Level.FINE, String
193                        .format("[%s] [%02d/%02d] %s: applying setwise postprocessor %s",
194                                config.getExperimentName(), versionCount, testVersionCount,
195                                testVersion.getVersion(), processor.getClass().getName()));
196                    processor.apply(testdata, traindata);
197                }
198                for (ITrainingStrategy trainer : config.getTrainers()) {
199                    Console.traceln(Level.FINE, String
200                        .format("[%s] [%02d/%02d] %s: applying trainer %s",
201                                config.getExperimentName(), versionCount, testVersionCount,
202                                testVersion.getVersion(), trainer.getName()));
203                    trainer.apply(traindata);
204                }
205                File resultsDir = new File(config.getResultsPath());
206                if (!resultsDir.exists()) {
207                    resultsDir.mkdir();
208                }
209                for (IEvaluationStrategy evaluator : config.getEvaluators()) {
210                    Console.traceln(Level.FINE, String
211                        .format("[%s] [%02d/%02d] %s: applying evaluator %s",
212                                config.getExperimentName(), versionCount, testVersionCount,
213                                testVersion.getVersion(), evaluator.getClass().getName()));
214                    List<ITrainer> allTrainers = new LinkedList<>();
215                    for (ISetWiseTrainingStrategy setwiseTrainer : config.getSetWiseTrainers()) {
216                        allTrainers.add(setwiseTrainer);
217                    }
218                    for (ITrainingStrategy trainer : config.getTrainers()) {
219                        allTrainers.add(trainer);
220                    }
221                    if (writeHeader) {
222                        evaluator.setParameter(config.getResultsPath() + "/" +
223                            config.getExperimentName() + ".csv");
224                    }
225                    evaluator.apply(testdata, traindata, allTrainers, writeHeader);
226                    writeHeader = false;
227                }
228                Console.traceln(Level.INFO, String.format("[%s] [%02d/%02d] %s: finished",
229                                                          config.getExperimentName(), versionCount,
230                                                          testVersionCount,
231                                                          testVersion.getVersion()));
232                versionCount++;
233            }
234        }
235    }
236
237    /**
238     * Helper method that checks if a version passes all filters.
239     *
240     * @param version
241     *            version that is checked
242     * @param filters
243     *            list of the filters
244     * @return true, if the version passes all filters, false otherwise
245     */
246    private boolean isVersion(SoftwareVersion version, List<IVersionFilter> filters) {
247        boolean result = true;
248        for (IVersionFilter filter : filters) {
249            result &= !filter.apply(version);
250        }
251        return result;
252    }
253
254    /**
255     * Helper method that combines a set of Weka {@link Instances} sets into a single
256     * {@link Instances} set.
257     *
258     * @param traindataSet
259     *            set of {@link Instances} to be combines
260     * @return single {@link Instances} set
261     */
262    public static Instances makeSingleTrainingSet(SetUniqueList<Instances> traindataSet) {
263        Instances traindataFull = null;
264        for (Instances traindata : traindataSet) {
265            if (traindataFull == null) {
266                traindataFull = new Instances(traindata);
267            }
268            else {
269                for (int i = 0; i < traindata.numInstances(); i++) {
270                    traindataFull.add(traindata.instance(i));
271                }
272            }
273        }
274        return traindataFull;
275    }
276}
Note: See TracBrowser for help on using the repository browser.