Context Navigation

AbstractCrossProjectExperiment.java @ 70

Last change on this file since 70 was 69, checked in by sherbold, 9 years ago
updated new result storage concept and cross-project experiments to first check if a result is available. If this is the case, the experiment is not executed.
Property svn:mime-type set to `text/plain`
File size: 16.9 KB

Line
1	// Copyright 2015 Georg-August-Universität Göttingen, Germany
2	//
3	// Licensed under the Apache License, Version 2.0 (the "License");
4	// you may not use this file except in compliance with the License.
5	// You may obtain a copy of the License at
6	//
7	// http://www.apache.org/licenses/LICENSE-2.0
8	//
9	// Unless required by applicable law or agreed to in writing, software
10	// distributed under the License is distributed on an "AS IS" BASIS,
11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	// See the License for the specific language governing permissions and
13	// limitations under the License.
14
15	package de.ugoe.cs.cpdp.execution;
16
17	import java.io.File;
18	import java.util.Collections;
19	import java.util.LinkedList;
20	import java.util.List;
21	import java.util.logging.Level;
22
23	import org.apache.commons.collections4.list.SetUniqueList;
24
25	import de.ugoe.cs.cpdp.ExperimentConfiguration;
26	import de.ugoe.cs.cpdp.dataprocessing.IProcessesingStrategy;
27	import de.ugoe.cs.cpdp.dataprocessing.ISetWiseProcessingStrategy;
28	import de.ugoe.cs.cpdp.dataselection.IPointWiseDataselectionStrategy;
29	import de.ugoe.cs.cpdp.dataselection.ISetWiseDataselectionStrategy;
30	import de.ugoe.cs.cpdp.eval.IEvaluationStrategy;
31	import de.ugoe.cs.cpdp.eval.IResultStorage;
32	import de.ugoe.cs.cpdp.loader.IVersionLoader;
33	import de.ugoe.cs.cpdp.training.ISetWiseTestdataAwareTrainingStrategy;
34	import de.ugoe.cs.cpdp.training.ISetWiseTrainingStrategy;
35	import de.ugoe.cs.cpdp.training.ITestAwareTrainingStrategy;
36	import de.ugoe.cs.cpdp.training.ITrainer;
37	import de.ugoe.cs.cpdp.training.ITrainingStrategy;
38	import de.ugoe.cs.cpdp.versions.IVersionFilter;
39	import de.ugoe.cs.cpdp.versions.SoftwareVersion;
40	import de.ugoe.cs.util.console.Console;
41	import weka.core.Instances;
42
43	/**
44	* Class responsible for executing an experiment according to an {@link ExperimentConfiguration}.
45	* The steps of an experiment are as follows:
46	* <ul>
47	* <li>load the data from the provided data path</li>
48	* <li>filter the data sets according to the provided version filters</li>
49	* <li>execute the following steps for each data sets as test data that is not ignored through the
50	* test version filter:
51	* <ul>
52	* <li>filter the data sets to setup the candidate training data:
53	* <ul>
54	* <li>remove all data sets from the same project</li>
55	* <li>filter all data sets according to the training data filter
56	* </ul>
57	* </li>
58	* <li>apply the setwise preprocessors</li>
59	* <li>apply the setwise data selection algorithms</li>
60	* <li>apply the setwise postprocessors</li>
61	* <li>train the setwise training classifiers</li>
62	* <li>unify all remaining training data into one data set</li>
63	* <li>apply the preprocessors</li>
64	* <li>apply the pointwise data selection algorithms</li>
65	* <li>apply the postprocessors</li>
66	* <li>train the normal classifiers</li>
67	* <li>evaluate the results for all trained classifiers on the training data</li>
68	* </ul>
69	* </li>
70	* </ul>
71	*
72	* Note that this class implements {@link Runnable}, i.e., each experiment can be started in its own
73	* thread.
74	*
75	* @author Steffen Herbold
76	*/
77	public abstract class AbstractCrossProjectExperiment implements IExecutionStrategy {
78
79	/**
80	* configuration of the experiment
81	*/
82	protected final ExperimentConfiguration config;
83
84	/**
85	* Constructor. Creates a new experiment based on a configuration.
86	*
87	* @param config
88	* configuration of the experiment
89	*/
90	public AbstractCrossProjectExperiment(ExperimentConfiguration config) {
91	this.config = config;
92	}
93
94	/**
95	* <p>
96	* Defines which products are allowed for training.
97	* </p>
98	*
99	* @param trainingVersion
100	* training version
101	* @param testVersion
102	* test candidate
103	* @return true if test candidate can be used for training
104	*/
105	protected abstract boolean isTrainingVersion(SoftwareVersion trainingVersion,
106	SoftwareVersion testVersion);
107
108	/**
109	* Helper method that combines a set of Weka {@link Instances} sets into a single
110	* {@link Instances} set.
111	*
112	* @param traindataSet
113	* set of {@link Instances} to be combines
114	* @return single {@link Instances} set
115	*/
116	public static Instances makeSingleTrainingSet(SetUniqueList<Instances> traindataSet) {
117	Instances traindataFull = null;
118	for (Instances traindata : traindataSet) {
119	if (traindataFull == null) {
120	traindataFull = new Instances(traindata);
121	}
122	else {
123	for (int i = 0; i < traindata.numInstances(); i++) {
124	traindataFull.add(traindata.instance(i));
125	}
126	}
127	}
128	return traindataFull;
129	}
130
131	/**
132	* Executes the experiment with the steps as described in the class comment.
133	*
134	* @see Runnable#run()
135	*/
136	@Override
137	public void run() {
138	final List<SoftwareVersion> versions = new LinkedList<>();
139
140	for (IVersionLoader loader : config.getLoaders()) {
141	versions.addAll(loader.load());
142	}
143
144	for (IVersionFilter filter : config.getVersionFilters()) {
145	filter.apply(versions);
146	}
147	boolean writeHeader = true;
148	int versionCount = 1;
149	int testVersionCount = 0;
150
151	for (SoftwareVersion testVersion : versions) {
152	if (isVersion(testVersion, config.getTestVersionFilters())) {
153	testVersionCount++;
154	}
155	}
156
157	// sort versions
158	Collections.sort(versions);
159
160	for (SoftwareVersion testVersion : versions) {
161	if (isVersion(testVersion, config.getTestVersionFilters())) {
162	Console.traceln(Level.INFO,
163	String.format("[%s] [%02d/%02d] %s: starting",
164	config.getExperimentName(), versionCount,
165	testVersionCount, testVersion.getVersion()));
166	if (resultsAvailable(testVersion)) {
167	Console.traceln(Level.INFO,
168	String.format(
169	"[%s] [%02d/%02d] %s: results already available; skipped",
170	config.getExperimentName(), versionCount,
171	testVersionCount, testVersion.getVersion()));
172	versionCount++;
173	continue;
174	}
175
176	// Setup testdata and training data
177	Instances testdata = testVersion.getInstances();
178	SetUniqueList<Instances> traindataSet =
179	SetUniqueList.setUniqueList(new LinkedList<Instances>());
180	for (SoftwareVersion trainingVersion : versions) {
181	if (isVersion(trainingVersion, config.getTrainingVersionFilters())) {
182	if (trainingVersion != testVersion) {
183	if (isTrainingVersion(trainingVersion, testVersion)) {
184	traindataSet.add(trainingVersion.getInstances());
185	}
186	}
187	}
188	}
189
190	for (ISetWiseProcessingStrategy processor : config.getSetWisePreprocessors()) {
191	Console.traceln(Level.FINE,
192	String.format(
193	"[%s] [%02d/%02d] %s: applying setwise preprocessor %s",
194	config.getExperimentName(), versionCount,
195	testVersionCount, testVersion.getVersion(),
196	processor.getClass().getName()));
197	processor.apply(testdata, traindataSet);
198	}
199	for (ISetWiseDataselectionStrategy dataselector : config.getSetWiseSelectors()) {
200	Console
201	.traceln(Level.FINE,
202	String.format("[%s] [%02d/%02d] %s: applying setwise selection %s",
203	config.getExperimentName(), versionCount,
204	testVersionCount, testVersion.getVersion(),
205	dataselector.getClass().getName()));
206	dataselector.apply(testdata, traindataSet);
207	}
208	for (ISetWiseProcessingStrategy processor : config.getSetWisePostprocessors()) {
209	Console.traceln(Level.FINE,
210	String.format(
211	"[%s] [%02d/%02d] %s: applying setwise postprocessor %s",
212	config.getExperimentName(), versionCount,
213	testVersionCount, testVersion.getVersion(),
214	processor.getClass().getName()));
215	processor.apply(testdata, traindataSet);
216	}
217	for (ISetWiseTrainingStrategy setwiseTrainer : config.getSetWiseTrainers()) {
218	Console
219	.traceln(Level.FINE,
220	String.format("[%s] [%02d/%02d] %s: applying setwise trainer %s",
221	config.getExperimentName(), versionCount,
222	testVersionCount, testVersion.getVersion(),
223	setwiseTrainer.getName()));
224	setwiseTrainer.apply(traindataSet);
225	}
226	for (ISetWiseTestdataAwareTrainingStrategy setwiseTestdataAwareTrainer : config
227	.getSetWiseTestdataAwareTrainers())
228	{
229	Console.traceln(Level.FINE,
230	String.format(
231	"[%s] [%02d/%02d] %s: applying testdata aware setwise trainer %s",
232	config.getExperimentName(), versionCount,
233	testVersionCount, testVersion.getVersion(),
234	setwiseTestdataAwareTrainer.getName()));
235	setwiseTestdataAwareTrainer.apply(traindataSet, testdata);
236	}
237	Instances traindata = makeSingleTrainingSet(traindataSet);
238	for (IProcessesingStrategy processor : config.getPreProcessors()) {
239	Console.traceln(Level.FINE,
240	String.format("[%s] [%02d/%02d] %s: applying preprocessor %s",
241	config.getExperimentName(), versionCount,
242	testVersionCount, testVersion.getVersion(),
243	processor.getClass().getName()));
244	processor.apply(testdata, traindata);
245	}
246	for (IPointWiseDataselectionStrategy dataselector : config
247	.getPointWiseSelectors())
248	{
249	Console.traceln(Level.FINE,
250	String.format(
251	"[%s] [%02d/%02d] %s: applying pointwise selection %s",
252	config.getExperimentName(), versionCount,
253	testVersionCount, testVersion.getVersion(),
254	dataselector.getClass().getName()));
255	traindata = dataselector.apply(testdata, traindata);
256	}
257	for (IProcessesingStrategy processor : config.getPostProcessors()) {
258	Console.traceln(Level.FINE,
259	String.format(
260	"[%s] [%02d/%02d] %s: applying setwise postprocessor %s",
261	config.getExperimentName(), versionCount,
262	testVersionCount, testVersion.getVersion(),
263	processor.getClass().getName()));
264	processor.apply(testdata, traindata);
265	}
266	for (ITrainingStrategy trainer : config.getTrainers()) {
267	Console.traceln(Level.FINE,
268	String.format("[%s] [%02d/%02d] %s: applying trainer %s",
269	config.getExperimentName(), versionCount,
270	testVersionCount, testVersion.getVersion(),
271	trainer.getName()));
272	trainer.apply(traindata);
273	}
274	for (ITestAwareTrainingStrategy trainer : config.getTestAwareTrainers()) {
275	Console.traceln(Level.FINE,
276	String.format("[%s] [%02d/%02d] %s: applying trainer %s",
277	config.getExperimentName(), versionCount,
278	testVersionCount, testVersion.getVersion(),
279	trainer.getName()));
280	trainer.apply(testdata, traindata);
281	}
282	File resultsDir = new File(config.getResultsPath());
283	if (!resultsDir.exists()) {
284	resultsDir.mkdir();
285	}
286	for (IEvaluationStrategy evaluator : config.getEvaluators()) {
287	Console.traceln(Level.FINE,
288	String.format("[%s] [%02d/%02d] %s: applying evaluator %s",
289	config.getExperimentName(), versionCount,
290	testVersionCount, testVersion.getVersion(),
291	evaluator.getClass().getName()));
292	List<ITrainer> allTrainers = new LinkedList<>();
293	for (ISetWiseTrainingStrategy setwiseTrainer : config.getSetWiseTrainers()) {
294	allTrainers.add(setwiseTrainer);
295	}
296	for (ISetWiseTestdataAwareTrainingStrategy setwiseTestdataAwareTrainer : config
297	.getSetWiseTestdataAwareTrainers())
298	{
299	allTrainers.add(setwiseTestdataAwareTrainer);
300	}
301	for (ITrainingStrategy trainer : config.getTrainers()) {
302	allTrainers.add(trainer);
303	}
304	for (ITestAwareTrainingStrategy trainer : config.getTestAwareTrainers()) {
305	allTrainers.add(trainer);
306	}
307	if (writeHeader) {
308	evaluator.setParameter(config.getResultsPath() + "/" +
309	config.getExperimentName() + ".csv");
310	}
311	evaluator.apply(testdata, traindata, allTrainers, writeHeader,
312	config.getResultStorages());
313	writeHeader = false;
314	}
315	Console.traceln(Level.INFO,
316	String.format("[%s] [%02d/%02d] %s: finished",
317	config.getExperimentName(), versionCount,
318	testVersionCount, testVersion.getVersion()));
319	versionCount++;
320	}
321	}
322	}
323
324	/**
325	* Helper method that checks if a version passes all filters.
326	*
327	* @param version
328	* version that is checked
329	* @param filters
330	* list of the filters
331	* @return true, if the version passes all filters, false otherwise
332	*/
333	private boolean isVersion(SoftwareVersion version, List<IVersionFilter> filters) {
334	boolean result = true;
335	for (IVersionFilter filter : filters) {
336	result &= !filter.apply(version);
337	}
338	return result;
339	}
340
341	private boolean resultsAvailable(SoftwareVersion version) {
342	if (config.getResultStorages().isEmpty()) {
343	return false;
344	}
345	boolean available = true;
346	for (IResultStorage storage : config.getResultStorages()) {
347	available &= storage.containsResult(config.getExperimentName(), version.getVersion());
348	}
349	return available;
350	}
351	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format