Context Navigation

RelaxedCrossProjectExperiment.java @ 66

Last change on this file since 66 was 65, checked in by sherbold, 9 years ago
added new interface ITestAwareTraining strategy to the framework to support trainers with knowledge of the test data. The implementation of such trainers must take care to not accidentally take the classification of the test data into account.
Property svn:mime-type set to `text/plain`
File size: 14.3 KB

Line
1	// Copyright 2015 Georg-August-Universität Göttingen, Germany
2	//
3	// Licensed under the Apache License, Version 2.0 (the "License");
4	// you may not use this file except in compliance with the License.
5	// You may obtain a copy of the License at
6	//
7	// http://www.apache.org/licenses/LICENSE-2.0
8	//
9	// Unless required by applicable law or agreed to in writing, software
10	// distributed under the License is distributed on an "AS IS" BASIS,
11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	// See the License for the specific language governing permissions and
13	// limitations under the License.
14
15	package de.ugoe.cs.cpdp.execution;
16
17	import java.io.File;
18	import java.util.Collections;
19	import java.util.LinkedList;
20	import java.util.List;
21	import java.util.logging.Level;
22
23	import org.apache.commons.collections4.list.SetUniqueList;
24
25	import weka.core.Instances;
26	import de.ugoe.cs.cpdp.ExperimentConfiguration;
27	import de.ugoe.cs.cpdp.dataprocessing.IProcessesingStrategy;
28	import de.ugoe.cs.cpdp.dataprocessing.ISetWiseProcessingStrategy;
29	import de.ugoe.cs.cpdp.dataselection.IPointWiseDataselectionStrategy;
30	import de.ugoe.cs.cpdp.dataselection.ISetWiseDataselectionStrategy;
31	import de.ugoe.cs.cpdp.eval.IEvaluationStrategy;
32	import de.ugoe.cs.cpdp.loader.IVersionLoader;
33	import de.ugoe.cs.cpdp.training.ISetWiseTestdataAwareTrainingStrategy;
34	import de.ugoe.cs.cpdp.training.ISetWiseTrainingStrategy;
35	import de.ugoe.cs.cpdp.training.ITestAwareTrainingStrategy;
36	import de.ugoe.cs.cpdp.training.ITrainer;
37	import de.ugoe.cs.cpdp.training.ITrainingStrategy;
38	import de.ugoe.cs.cpdp.versions.IVersionFilter;
39	import de.ugoe.cs.cpdp.versions.SoftwareVersion;
40	import de.ugoe.cs.util.console.Console;
41
42	/**
43	* Class responsible for executing an experiment according to an {@link ExperimentConfiguration}.
44	* The steps of an experiment are as follows:
45	* <ul>
46	* <li>load the data from the provided data path</li>
47	* <li>filter the data sets according to the provided version filters</li>
48	* <li>execute the following steps for each data sets as test data that is not ignored through the
49	* test version filter:
50	* <ul>
51	* <li>filter the data sets to setup the candidate training data:
52	* <ul>
53	* <li>filter all data sets according to the training data filter
54	* </ul>
55	* </li>
56	* <li>apply the setwise preprocessors</li>
57	* <li>apply the setwise data selection algorithms</li>
58	* <li>apply the setwise postprocessors</li>
59	* <li>train the setwise training classifiers</li>
60	* <li>unify all remaining training data into one data set</li>
61	* <li>apply the preprocessors</li>
62	* <li>apply the pointwise data selection algorithms</li>
63	* <li>apply the postprocessors</li>
64	* <li>train the normal classifiers</li>
65	* <li>evaluate the results for all trained classifiers on the training data</li>
66	* </ul>
67	* </li>
68	* </ul>
69	*
70	* Note that this class implements {@link Runnable}, i.e., each experiment can be started in its own
71	* thread.
72	*
73	* @author Steffen Herbold
74	*/
75	public class RelaxedCrossProjectExperiment implements IExecutionStrategy {
76
77	/**
78	* configuration of the experiment
79	*/
80	private final ExperimentConfiguration config;
81
82	/**
83	* Constructor. Creates a new experiment based on a configuration.
84	*
85	* @param config
86	* configuration of the experiment
87	*/
88	public RelaxedCrossProjectExperiment(ExperimentConfiguration config) {
89	this.config = config;
90	}
91
92	/**
93	* Executes the experiment with the steps as described in the class comment.
94	*
95	* @see Runnable#run()
96	*/
97	@Override
98	public void run() {
99	final List<SoftwareVersion> versions = new LinkedList<>();
100
101	for (IVersionLoader loader : config.getLoaders()) {
102	versions.addAll(loader.load());
103	}
104
105	for (IVersionFilter filter : config.getVersionFilters()) {
106	filter.apply(versions);
107	}
108	boolean writeHeader = true;
109	int versionCount = 1;
110	int testVersionCount = 0;
111
112	for (SoftwareVersion testVersion : versions) {
113	if (isVersion(testVersion, config.getTestVersionFilters())) {
114	testVersionCount++;
115	}
116	}
117
118	// sort versions
119	Collections.sort(versions);
120
121	for (SoftwareVersion testVersion : versions) {
122	if (isVersion(testVersion, config.getTestVersionFilters())) {
123	Console.traceln(Level.INFO, String.format("[%s] [%02d/%02d] %s: starting",
124	config.getExperimentName(), versionCount,
125	testVersionCount,
126	testVersion.getVersion()));
127
128	// Setup testdata and training data
129	Instances testdata = testVersion.getInstances();
130	String testProject = testVersion.getProject();
131	SetUniqueList<Instances> traindataSet =
132	SetUniqueList.setUniqueList(new LinkedList<Instances>());
133	for (SoftwareVersion trainingVersion : versions) {
134	if (isVersion(trainingVersion, config.getTrainingVersionFilters())) {
135	if (trainingVersion != testVersion) {
136	if (trainingVersion.getProject().equals(testProject)) {
137	if (trainingVersion.compareTo(testVersion) < 0) {
138	// only add if older
139	traindataSet.add(trainingVersion.getInstances());
140	}
141	}
142	else {
143	traindataSet.add(trainingVersion.getInstances());
144	}
145	}
146	}
147	}
148
149	for (ISetWiseProcessingStrategy processor : config.getSetWisePreprocessors()) {
150	Console.traceln(Level.FINE, String
151	.format("[%s] [%02d/%02d] %s: applying setwise preprocessor %s",
152	config.getExperimentName(), versionCount, testVersionCount,
153	testVersion.getVersion(), processor.getClass().getName()));
154	processor.apply(testdata, traindataSet);
155	}
156	for (ISetWiseDataselectionStrategy dataselector : config.getSetWiseSelectors()) {
157	Console.traceln(Level.FINE, String
158	.format("[%s] [%02d/%02d] %s: applying setwise selection %s",
159	config.getExperimentName(), versionCount, testVersionCount,
160	testVersion.getVersion(), dataselector.getClass().getName()));
161	dataselector.apply(testdata, traindataSet);
162	}
163	for (ISetWiseProcessingStrategy processor : config.getSetWisePostprocessors()) {
164	Console.traceln(Level.FINE, String
165	.format("[%s] [%02d/%02d] %s: applying setwise postprocessor %s",
166	config.getExperimentName(), versionCount, testVersionCount,
167	testVersion.getVersion(), processor.getClass().getName()));
168	processor.apply(testdata, traindataSet);
169	}
170	for (ISetWiseTrainingStrategy setwiseTrainer : config.getSetWiseTrainers()) {
171	Console.traceln(Level.FINE, String
172	.format("[%s] [%02d/%02d] %s: applying setwise trainer %s",
173	config.getExperimentName(), versionCount, testVersionCount,
174	testVersion.getVersion(), setwiseTrainer.getName()));
175	setwiseTrainer.apply(traindataSet);
176	}
177	for (ISetWiseTestdataAwareTrainingStrategy setwiseTestdataAwareTrainer : config.getSetWiseTestdataAwareTrainers()) {
178	Console.traceln(Level.FINE, String
179	.format("[%s] [%02d/%02d] %s: applying testdata aware setwise trainer %s",
180	config.getExperimentName(), versionCount, testVersionCount,
181	testVersion.getVersion(), setwiseTestdataAwareTrainer.getName()));
182	setwiseTestdataAwareTrainer.apply(traindataSet, testdata);
183	}
184	Instances traindata = makeSingleTrainingSet(traindataSet);
185	for (IProcessesingStrategy processor : config.getPreProcessors()) {
186	Console.traceln(Level.FINE, String
187	.format("[%s] [%02d/%02d] %s: applying preprocessor %s",
188	config.getExperimentName(), versionCount, testVersionCount,
189	testVersion.getVersion(), processor.getClass().getName()));
190	processor.apply(testdata, traindata);
191	}
192	for (IPointWiseDataselectionStrategy dataselector : config.getPointWiseSelectors())
193	{
194	Console.traceln(Level.FINE, String
195	.format("[%s] [%02d/%02d] %s: applying pointwise selection %s",
196	config.getExperimentName(), versionCount, testVersionCount,
197	testVersion.getVersion(), dataselector.getClass().getName()));
198	traindata = dataselector.apply(testdata, traindata);
199	}
200	for (IProcessesingStrategy processor : config.getPostProcessors()) {
201	Console.traceln(Level.FINE, String
202	.format("[%s] [%02d/%02d] %s: applying setwise postprocessor %s",
203	config.getExperimentName(), versionCount, testVersionCount,
204	testVersion.getVersion(), processor.getClass().getName()));
205	processor.apply(testdata, traindata);
206	}
207	for (ITrainingStrategy trainer : config.getTrainers()) {
208	Console.traceln(Level.FINE, String
209	.format("[%s] [%02d/%02d] %s: applying trainer %s",
210	config.getExperimentName(), versionCount, testVersionCount,
211	testVersion.getVersion(), trainer.getName()));
212	trainer.apply(traindata);
213	}
214	for (ITestAwareTrainingStrategy trainer : config.getTestAwareTrainers()) {
215	Console.traceln(Level.FINE, String
216	.format("[%s] [%02d/%02d] %s: applying trainer %s",
217	config.getExperimentName(), versionCount, testVersionCount,
218	testVersion.getVersion(), trainer.getName()));
219	trainer.apply(testdata, traindata);
220	}
221	File resultsDir = new File(config.getResultsPath());
222	if (!resultsDir.exists()) {
223	resultsDir.mkdir();
224	}
225	for (IEvaluationStrategy evaluator : config.getEvaluators()) {
226	Console.traceln(Level.FINE, String
227	.format("[%s] [%02d/%02d] %s: applying evaluator %s",
228	config.getExperimentName(), versionCount, testVersionCount,
229	testVersion.getVersion(), evaluator.getClass().getName()));
230	List<ITrainer> allTrainers = new LinkedList<>();
231	for (ISetWiseTrainingStrategy setwiseTrainer : config.getSetWiseTrainers()) {
232	allTrainers.add(setwiseTrainer);
233	}
234	for (ISetWiseTestdataAwareTrainingStrategy setwiseTrainer : config.getSetWiseTestdataAwareTrainers()) {
235	allTrainers.add(setwiseTrainer);
236	}
237	for (ITrainingStrategy trainer : config.getTrainers()) {
238	allTrainers.add(trainer);
239	}
240	for (ITestAwareTrainingStrategy trainer : config.getTestAwareTrainers()) {
241	allTrainers.add(trainer);
242	}
243	if (writeHeader) {
244	evaluator.setParameter(config.getResultsPath() + "/" +
245	config.getExperimentName() + ".csv");
246	}
247	evaluator.apply(testdata, traindata, allTrainers, writeHeader);
248	writeHeader = false;
249	}
250	Console.traceln(Level.INFO, String.format("[%s] [%02d/%02d] %s: finished",
251	config.getExperimentName(), versionCount,
252	testVersionCount,
253	testVersion.getVersion()));
254	versionCount++;
255	}
256	}
257	}
258
259	/**
260	* Helper method that checks if a version passes all filters.
261	*
262	* @param version
263	* version that is checked
264	* @param filters
265	* list of the filters
266	* @return true, if the version passes all filters, false otherwise
267	*/
268	private boolean isVersion(SoftwareVersion version, List<IVersionFilter> filters) {
269	boolean result = true;
270	for (IVersionFilter filter : filters) {
271	result &= !filter.apply(version);
272	}
273	return result;
274	}
275
276	/**
277	* Helper method that combines a set of Weka {@link Instances} sets into a single
278	* {@link Instances} set.
279	*
280	* @param traindataSet
281	* set of {@link Instances} to be combines
282	* @return single {@link Instances} set
283	*/
284	public static Instances makeSingleTrainingSet(SetUniqueList<Instances> traindataSet) {
285	Instances traindataFull = null;
286	for (Instances traindata : traindataSet) {
287	if (traindataFull == null) {
288	traindataFull = new Instances(traindata);
289	}
290	else {
291	for (int i = 0; i < traindata.numInstances(); i++) {
292	traindataFull.add(traindata.instance(i));
293	}
294	}
295	}
296	return traindataFull;
297	}
298	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format