Context Navigation

AbstractCrossProjectExperiment.java @ 68

Last change on this file since 68 was 68, checked in by sherbold, 10 years ago
added the concept of result storages to the framework and implemented a very simple first prototype of a MySQLResultStorage (that currently only works with a locally running database)
Property svn:mime-type set to `text/plain`
File size: 15.9 KB

Line
1	// Copyright 2015 Georg-August-Universität Göttingen, Germany
2	//
3	// Licensed under the Apache License, Version 2.0 (the "License");
4	// you may not use this file except in compliance with the License.
5	// You may obtain a copy of the License at
6	//
7	// http://www.apache.org/licenses/LICENSE-2.0
8	//
9	// Unless required by applicable law or agreed to in writing, software
10	// distributed under the License is distributed on an "AS IS" BASIS,
11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	// See the License for the specific language governing permissions and
13	// limitations under the License.
14
15	package de.ugoe.cs.cpdp.execution;
16
17	import java.io.File;
18	import java.util.Collections;
19	import java.util.LinkedList;
20	import java.util.List;
21	import java.util.logging.Level;
22
23	import org.apache.commons.collections4.list.SetUniqueList;
24
25	import de.ugoe.cs.cpdp.ExperimentConfiguration;
26	import de.ugoe.cs.cpdp.dataprocessing.IProcessesingStrategy;
27	import de.ugoe.cs.cpdp.dataprocessing.ISetWiseProcessingStrategy;
28	import de.ugoe.cs.cpdp.dataselection.IPointWiseDataselectionStrategy;
29	import de.ugoe.cs.cpdp.dataselection.ISetWiseDataselectionStrategy;
30	import de.ugoe.cs.cpdp.eval.IEvaluationStrategy;
31	import de.ugoe.cs.cpdp.loader.IVersionLoader;
32	import de.ugoe.cs.cpdp.training.ISetWiseTestdataAwareTrainingStrategy;
33	import de.ugoe.cs.cpdp.training.ISetWiseTrainingStrategy;
34	import de.ugoe.cs.cpdp.training.ITestAwareTrainingStrategy;
35	import de.ugoe.cs.cpdp.training.ITrainer;
36	import de.ugoe.cs.cpdp.training.ITrainingStrategy;
37	import de.ugoe.cs.cpdp.versions.IVersionFilter;
38	import de.ugoe.cs.cpdp.versions.SoftwareVersion;
39	import de.ugoe.cs.util.console.Console;
40	import weka.core.Instances;
41
42	/**
43	* Class responsible for executing an experiment according to an {@link ExperimentConfiguration}.
44	* The steps of an experiment are as follows:
45	* <ul>
46	* <li>load the data from the provided data path</li>
47	* <li>filter the data sets according to the provided version filters</li>
48	* <li>execute the following steps for each data sets as test data that is not ignored through the
49	* test version filter:
50	* <ul>
51	* <li>filter the data sets to setup the candidate training data:
52	* <ul>
53	* <li>remove all data sets from the same project</li>
54	* <li>filter all data sets according to the training data filter
55	* </ul>
56	* </li>
57	* <li>apply the setwise preprocessors</li>
58	* <li>apply the setwise data selection algorithms</li>
59	* <li>apply the setwise postprocessors</li>
60	* <li>train the setwise training classifiers</li>
61	* <li>unify all remaining training data into one data set</li>
62	* <li>apply the preprocessors</li>
63	* <li>apply the pointwise data selection algorithms</li>
64	* <li>apply the postprocessors</li>
65	* <li>train the normal classifiers</li>
66	* <li>evaluate the results for all trained classifiers on the training data</li>
67	* </ul>
68	* </li>
69	* </ul>
70	*
71	* Note that this class implements {@link Runnable}, i.e., each experiment can be started in its own
72	* thread.
73	*
74	* @author Steffen Herbold
75	*/
76	public abstract class AbstractCrossProjectExperiment implements IExecutionStrategy {
77
78	/**
79	* configuration of the experiment
80	*/
81	protected final ExperimentConfiguration config;
82
83	/**
84	* Constructor. Creates a new experiment based on a configuration.
85	*
86	* @param config
87	* configuration of the experiment
88	*/
89	public AbstractCrossProjectExperiment(ExperimentConfiguration config) {
90	this.config = config;
91	}
92
93	/**
94	* <p>
95	* Defines which products are allowed for training.
96	* </p>
97	*
98	* @param trainingVersion
99	* training version
100	* @param testVersion
101	* test candidate
102	* @return true if test candidate can be used for training
103	*/
104	protected abstract boolean isTrainingVersion(SoftwareVersion trainingVersion,
105	SoftwareVersion testVersion);
106
107	/**
108	* Helper method that combines a set of Weka {@link Instances} sets into a single
109	* {@link Instances} set.
110	*
111	* @param traindataSet
112	* set of {@link Instances} to be combines
113	* @return single {@link Instances} set
114	*/
115	public static Instances makeSingleTrainingSet(SetUniqueList<Instances> traindataSet) {
116	Instances traindataFull = null;
117	for (Instances traindata : traindataSet) {
118	if (traindataFull == null) {
119	traindataFull = new Instances(traindata);
120	}
121	else {
122	for (int i = 0; i < traindata.numInstances(); i++) {
123	traindataFull.add(traindata.instance(i));
124	}
125	}
126	}
127	return traindataFull;
128	}
129
130	/**
131	* Executes the experiment with the steps as described in the class comment.
132	*
133	* @see Runnable#run()
134	*/
135	@Override
136	public void run() {
137	final List<SoftwareVersion> versions = new LinkedList<>();
138
139	for (IVersionLoader loader : config.getLoaders()) {
140	versions.addAll(loader.load());
141	}
142
143	for (IVersionFilter filter : config.getVersionFilters()) {
144	filter.apply(versions);
145	}
146	boolean writeHeader = true;
147	int versionCount = 1;
148	int testVersionCount = 0;
149
150	for (SoftwareVersion testVersion : versions) {
151	if (isVersion(testVersion, config.getTestVersionFilters())) {
152	testVersionCount++;
153	}
154	}
155
156	// sort versions
157	Collections.sort(versions);
158
159	for (SoftwareVersion testVersion : versions) {
160	if (isVersion(testVersion, config.getTestVersionFilters())) {
161	Console.traceln(Level.INFO,
162	String.format("[%s] [%02d/%02d] %s: starting",
163	config.getExperimentName(), versionCount,
164	testVersionCount, testVersion.getVersion()));
165
166	// Setup testdata and training data
167	Instances testdata = testVersion.getInstances();
168	SetUniqueList<Instances> traindataSet =
169	SetUniqueList.setUniqueList(new LinkedList<Instances>());
170	for (SoftwareVersion trainingVersion : versions) {
171	if (isVersion(trainingVersion, config.getTrainingVersionFilters())) {
172	if (trainingVersion != testVersion) {
173	if (isTrainingVersion(trainingVersion, testVersion)) {
174	traindataSet.add(trainingVersion.getInstances());
175	}
176	}
177	}
178	}
179
180	for (ISetWiseProcessingStrategy processor : config.getSetWisePreprocessors()) {
181	Console.traceln(Level.FINE,
182	String.format(
183	"[%s] [%02d/%02d] %s: applying setwise preprocessor %s",
184	config.getExperimentName(), versionCount,
185	testVersionCount, testVersion.getVersion(),
186	processor.getClass().getName()));
187	processor.apply(testdata, traindataSet);
188	}
189	for (ISetWiseDataselectionStrategy dataselector : config.getSetWiseSelectors()) {
190	Console
191	.traceln(Level.FINE,
192	String.format("[%s] [%02d/%02d] %s: applying setwise selection %s",
193	config.getExperimentName(), versionCount,
194	testVersionCount, testVersion.getVersion(),
195	dataselector.getClass().getName()));
196	dataselector.apply(testdata, traindataSet);
197	}
198	for (ISetWiseProcessingStrategy processor : config.getSetWisePostprocessors()) {
199	Console.traceln(Level.FINE,
200	String.format(
201	"[%s] [%02d/%02d] %s: applying setwise postprocessor %s",
202	config.getExperimentName(), versionCount,
203	testVersionCount, testVersion.getVersion(),
204	processor.getClass().getName()));
205	processor.apply(testdata, traindataSet);
206	}
207	for (ISetWiseTrainingStrategy setwiseTrainer : config.getSetWiseTrainers()) {
208	Console
209	.traceln(Level.FINE,
210	String.format("[%s] [%02d/%02d] %s: applying setwise trainer %s",
211	config.getExperimentName(), versionCount,
212	testVersionCount, testVersion.getVersion(),
213	setwiseTrainer.getName()));
214	setwiseTrainer.apply(traindataSet);
215	}
216	for (ISetWiseTestdataAwareTrainingStrategy setwiseTestdataAwareTrainer : config
217	.getSetWiseTestdataAwareTrainers())
218	{
219	Console.traceln(Level.FINE,
220	String.format(
221	"[%s] [%02d/%02d] %s: applying testdata aware setwise trainer %s",
222	config.getExperimentName(), versionCount,
223	testVersionCount, testVersion.getVersion(),
224	setwiseTestdataAwareTrainer.getName()));
225	setwiseTestdataAwareTrainer.apply(traindataSet, testdata);
226	}
227	Instances traindata = makeSingleTrainingSet(traindataSet);
228	for (IProcessesingStrategy processor : config.getPreProcessors()) {
229	Console.traceln(Level.FINE,
230	String.format("[%s] [%02d/%02d] %s: applying preprocessor %s",
231	config.getExperimentName(), versionCount,
232	testVersionCount, testVersion.getVersion(),
233	processor.getClass().getName()));
234	processor.apply(testdata, traindata);
235	}
236	for (IPointWiseDataselectionStrategy dataselector : config
237	.getPointWiseSelectors())
238	{
239	Console.traceln(Level.FINE,
240	String.format(
241	"[%s] [%02d/%02d] %s: applying pointwise selection %s",
242	config.getExperimentName(), versionCount,
243	testVersionCount, testVersion.getVersion(),
244	dataselector.getClass().getName()));
245	traindata = dataselector.apply(testdata, traindata);
246	}
247	for (IProcessesingStrategy processor : config.getPostProcessors()) {
248	Console.traceln(Level.FINE,
249	String.format(
250	"[%s] [%02d/%02d] %s: applying setwise postprocessor %s",
251	config.getExperimentName(), versionCount,
252	testVersionCount, testVersion.getVersion(),
253	processor.getClass().getName()));
254	processor.apply(testdata, traindata);
255	}
256	for (ITrainingStrategy trainer : config.getTrainers()) {
257	Console.traceln(Level.FINE,
258	String.format("[%s] [%02d/%02d] %s: applying trainer %s",
259	config.getExperimentName(), versionCount,
260	testVersionCount, testVersion.getVersion(),
261	trainer.getName()));
262	trainer.apply(traindata);
263	}
264	for (ITestAwareTrainingStrategy trainer : config.getTestAwareTrainers()) {
265	Console.traceln(Level.FINE,
266	String.format("[%s] [%02d/%02d] %s: applying trainer %s",
267	config.getExperimentName(), versionCount,
268	testVersionCount, testVersion.getVersion(),
269	trainer.getName()));
270	trainer.apply(testdata, traindata);
271	}
272	File resultsDir = new File(config.getResultsPath());
273	if (!resultsDir.exists()) {
274	resultsDir.mkdir();
275	}
276	for (IEvaluationStrategy evaluator : config.getEvaluators()) {
277	Console.traceln(Level.FINE,
278	String.format("[%s] [%02d/%02d] %s: applying evaluator %s",
279	config.getExperimentName(), versionCount,
280	testVersionCount, testVersion.getVersion(),
281	evaluator.getClass().getName()));
282	List<ITrainer> allTrainers = new LinkedList<>();
283	for (ISetWiseTrainingStrategy setwiseTrainer : config.getSetWiseTrainers()) {
284	allTrainers.add(setwiseTrainer);
285	}
286	for (ISetWiseTestdataAwareTrainingStrategy setwiseTestdataAwareTrainer : config
287	.getSetWiseTestdataAwareTrainers())
288	{
289	allTrainers.add(setwiseTestdataAwareTrainer);
290	}
291	for (ITrainingStrategy trainer : config.getTrainers()) {
292	allTrainers.add(trainer);
293	}
294	for (ITestAwareTrainingStrategy trainer : config.getTestAwareTrainers()) {
295	allTrainers.add(trainer);
296	}
297	if (writeHeader) {
298	evaluator.setParameter(config.getResultsPath() + "/" +
299	config.getExperimentName() + ".csv");
300	}
301	evaluator.apply(testdata, traindata, allTrainers, writeHeader, config.getResultStorages());
302	writeHeader = false;
303	}
304	Console.traceln(Level.INFO,
305	String.format("[%s] [%02d/%02d] %s: finished",
306	config.getExperimentName(), versionCount,
307	testVersionCount, testVersion.getVersion()));
308	versionCount++;
309	}
310	}
311	}
312
313	/**
314	* Helper method that checks if a version passes all filters.
315	*
316	* @param version
317	* version that is checked
318	* @param filters
319	* list of the filters
320	* @return true, if the version passes all filters, false otherwise
321	*/
322	private boolean isVersion(SoftwareVersion version, List<IVersionFilter> filters) {
323	boolean result = true;
324	for (IVersionFilter filter : filters) {
325	result &= !filter.apply(version);
326	}
327	return result;
328	}
329	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format