source: trunk/CrossPareConfigurationBuilder/src/de/ugoe/cs/crosspare/ConfigurationBuilder.java @ 144

Last change on this file since 144 was 144, checked in by sherbold, 7 years ago
  • Property svn:mime-type set to text/plain
File size: 44.3 KB
Line 
1package de.ugoe.cs.crosspare;
2
3import java.io.File;
4import java.io.FileWriter;
5import java.io.IOException;
6import java.lang.reflect.InvocationTargetException;
7import java.security.InvalidParameterException;
8
9public class ConfigurationBuilder {
10   
11    private static enum Dataset {
12        MDP,
13        MDP_EFFNORM,
14        MDP_EFFLOGNORM,
15        JURECZKO,
16        FILTERJURECZKO,
17        SELECTEDJURECZKO,
18        JURECZKO_NUMERIC,
19        JURECZKO_EFFNORM,
20        JURECZKO_EFFLOGNORM,
21        JURECZKO_EFFNORM_NUMERIC,
22        JURECZKO_EFFLOGNORM_NUMERIC,
23        JURECZKO_NUMERIC_DUPLICATE,
24        JURECZKO_NUMERIC_WEIGHTS,
25        AEEEM,
26        AEEEM_LDHH,
27        AEEEM_WCHU,
28        AEEEM_LDHHWCHU,
29        AEEEM_NUMERIC,
30        AEEEM_LDHH_NUMERIC,
31        AEEEM_WCHU_NUMERIC,
32        AEEEM_LDHHWCHU_NUMERIC,
33        AEEEM_LDHHWCHU_EFFNORM,
34        AEEEM_LDHHWCHU_EFFLOGNORM,
35        AEEEM_LDHHWCHU_EFFNORM_NUMERIC,
36        AEEEM_LDHHWCHU_EFFLOGNORM_NUMERIC,
37        AEEEM_LDHHWCHU_NUMERIC_DUPLICATE,
38        AEEEM_LDHHWCHU_NUMERIC_WEIGHTS,
39        RELINK,
40        RELINK_EFFNORM,
41        RELINK_EFFLOGNORM,
42        NETGENE,
43        NETGENE_NUMERIC,
44        NETGENE_NUMERIC_DUPLICATE,
45        NETGENE_NUMERIC_WEIGHTS,
46        SMARTSHARK_ALL,
47        SMARTSHARK_AST,
48        SMARTSHARK_SM,
49        SMARTSHARK_ALL_NUMERIC
50    }
51   
52    private static final String storageFolder = "config/";
53   
54    public static void main(String[] args) {
55        for( Dataset dataset : Dataset.values() ) {
56            // baselines
57            writeFile("ALL", dataset);
58            writeFile("CV", dataset);
59            writeFile("Random", dataset);
60            writeFile("Trivial", dataset);
61            // publications
62            writeFile("Koshgoftaar08", dataset);
63            writeFile("Watanabe08", dataset);
64            writeFile("Turhan09", dataset);
65            writeFile("Zimmermann09", dataset);
66            writeFile("CamargoCruz09", dataset);
67            writeFile("Liu10", dataset);
68            writeFile("Menzies11", dataset);
69            writeFile("Ma12", dataset);
70            writeFile("Peters12", dataset);
71            writeFile("Uchigaki12", dataset);
72            writeFile("Canfora13", dataset);
73            writeFile("Peters13", dataset);
74            writeFile("Herbold13", dataset);
75            writeFile("ZHe13", dataset);
76            writeFile("Nam13", dataset);
77            writeFile("Panichella14", dataset);
78            writeFile("Ryu14", dataset);
79            writeFile("PHe15", dataset);
80            writeFile("Peters15", dataset);
81            writeFile("Kawata15", dataset);
82            writeFile("YZhang15", dataset);
83            writeFile("Amasaki15", dataset);
84            writeFile("Ryu15", dataset);
85            writeFile("Nam15", dataset);
86        }
87    }
88   
89    public static void writeFile(String approach, Dataset dataset) {
90        File file = new File(storageFolder + dataset.toString() + "/"+  dataset.toString() + "-" + approach + ".xml");
91        file.getParentFile().mkdirs();
92        try(FileWriter writer = new FileWriter(file);) {
93            writer.append((String) ConfigurationBuilder.class.getMethod(approach, Dataset.class).invoke(null, dataset));
94            writer.flush();
95        }
96        catch (IOException | IllegalAccessException | IllegalArgumentException | InvocationTargetException | NoSuchMethodException | SecurityException e) {
97            e.printStackTrace();
98        }
99       
100        file = new File(storageFolder + "ALL/"+  dataset.toString() + "-" + approach + ".xml");
101        file.getParentFile().mkdirs();
102        try(FileWriter writer = new FileWriter(file);) {
103            writer.append((String) ConfigurationBuilder.class.getMethod(approach, Dataset.class).invoke(null, dataset));
104            writer.flush();
105        }
106        catch (IOException | IllegalAccessException | IllegalArgumentException | InvocationTargetException | NoSuchMethodException | SecurityException e) {
107            e.printStackTrace();
108        }
109    }
110   
111    public static void preamble(StringBuilder configFile) {
112        configFile.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
113        configFile.append("<config xmlns=\"experimentconfig\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"experimentconfig experimentconfig.xsd\">\n");
114    }
115   
116    public static void postamble(StringBuilder configFile) {
117        configFile.append(" <storage name=\"MySQLResultStorage\" param=\"\" />\n");
118        configFile.append("</config>");
119    }
120   
121    public static void trainers(StringBuilder configFile) {
122        configFile.append(" <trainer name=\"WekaTraining\" param=\"NB weka.classifiers.bayes.NaiveBayes\" />\n");
123        configFile.append(" <trainer name=\"WekaTraining\" param=\"RF weka.classifiers.trees.RandomForest -CVPARAM I 5 25 5\" />\n");
124        configFile.append(" <trainer name=\"WekaTraining\" param=\"DT weka.classifiers.trees.J48 -CVPARAM C 0.1 0.3 5\" />\n");
125        configFile.append(" <trainer name=\"WekaTraining\" param=\"LR weka.classifiers.functions.Logistic\" />\n");
126        configFile.append(" <trainer name=\"WekaTraining\" param=\"NET weka.classifiers.functions.RBFNetwork -CVPARAM W 0.1 10.0 3.0 L 2.0 18.0 3.0\" />\n");
127        configFile.append(" <trainer name=\"WekaTraining\" param=\"SVM weka.classifiers.functions.SMO -K weka.classifiers.functions.supportVector.RBFKernel\" />\n");
128    }
129   
130    public static void trainersBagging(StringBuilder configFile) {
131        configFile.append(" <setwisetrainer name=\"WekaBaggingTraining\" param=\"NB weka.classifiers.bayes.NaiveBayes\" />\n");
132        configFile.append(" <setwisetrainer name=\"WekaBaggingTraining\" param=\"RF weka.classifiers.trees.RandomForest -CVPARAM I 5 25 5\" />\n");
133        configFile.append(" <setwisetrainer name=\"WekaBaggingTraining\" param=\"DT weka.classifiers.trees.J48 -CVPARAM C 0.1 0.3 5\" />\n");
134        configFile.append(" <setwisetrainer name=\"WekaBaggingTraining\" param=\"LR weka.classifiers.functions.Logistic\" />\n");
135        configFile.append(" <setwisetrainer name=\"WekaBaggingTraining\" param=\"NET weka.classifiers.functions.RBFNetwork -CVPARAM W 0.1 10.0 3.0 L 2.0 18.0 3.0\" />\n");
136        configFile.append(" <setwisetrainer name=\"WekaBaggingTraining\" param=\"SVM weka.classifiers.functions.SMO -K weka.classifiers.functions.supportVector.RBFKernel\" />\n");
137    }
138   
139    public static void trainersLocalWhere(StringBuilder configFile) {
140        configFile.append(" <trainer name=\"WekaLocalFQTraining\" param=\"NB weka.classifiers.bayes.NaiveBayes\" />\n");
141        configFile.append(" <trainer name=\"WekaLocalFQTraining\" param=\"RF weka.classifiers.trees.RandomForest -CVPARAM I 5 25 5\" />\n");
142        configFile.append(" <trainer name=\"WekaLocalFQTraining\" param=\"DT weka.classifiers.trees.J48 -CVPARAM C 0.1 0.3 5\" />\n");
143        configFile.append(" <trainer name=\"WekaLocalFQTraining\" param=\"LR weka.classifiers.functions.Logistic\" />\n");
144        configFile.append(" <trainer name=\"WekaLocalFQTraining\" param=\"NET weka.classifiers.functions.RBFNetwork -CVPARAM W 0.1 10.0 3.0 L 2.0 18.0 3.0\" />\n");
145        configFile.append(" <trainer name=\"WekaLocalFQTraining\" param=\"SVM weka.classifiers.functions.SMO -K weka.classifiers.functions.supportVector.RBFKernel\" />\n");
146        configFile.append(" <trainer name=\"WekaLocalFQTraining\" param=\"WHICH de.ugoe.cs.cpdp.wekaclassifier.WHICH\" />\n");
147    }
148   
149    public static void trainersLASER(StringBuilder configFile) {
150        configFile.append(" <trainer name=\"WekaLASERTraining\" param=\"NB weka.classifiers.bayes.NaiveBayes\" />\n");
151        configFile.append(" <trainer name=\"WekaLASERTraining\" param=\"RF weka.classifiers.trees.RandomForest -CVPARAM I 5 25 5\" />\n");
152        configFile.append(" <trainer name=\"WekaLASERTraining\" param=\"DT weka.classifiers.trees.J48 -CVPARAM C 0.1 0.3 5\" />\n");
153        configFile.append(" <trainer name=\"WekaLASERTraining\" param=\"LR weka.classifiers.functions.Logistic\" />\n");
154        configFile.append(" <trainer name=\"WekaLASERTraining\" param=\"NET weka.classifiers.functions.RBFNetwork -CVPARAM W 0.1 10.0 3.0 L 2.0 18.0 3.0\" />\n");
155        configFile.append(" <trainer name=\"WekaLASERTraining\" param=\"SVM weka.classifiers.functions.SMO -K weka.classifiers.functions.supportVector.RBFKernel\" />\n");
156    }
157       
158    public static void dataset(StringBuilder configFile, Dataset dataset) {
159        switch (dataset)
160        {
161            case MDP:
162                configFile.append(" <loader name=\"NasaARFFFolderLoader\" datalocation=\"benchmark/data/MDP\" relative=\"false\"/>\n");
163                break;
164            case MDP_EFFNORM:
165                configFile.append(" <loader name=\"NasaARFFFolderLoader\" datalocation=\"benchmark/data/MDP\" relative=\"false\"/>\n");
166                configFile.append(" <setwisepreprocessor name=\"NormalizeByEffort\" param=\"\"/>\n");
167                configFile.append(" <setwisepreprocessor name=\"AttributeRemoval\" param=\"LOC_EXECUTABLE\"/>\n");
168                break;
169            case MDP_EFFLOGNORM:
170                configFile.append(" <loader name=\"NasaARFFFolderLoader\" datalocation=\"benchmark/data/MDP\" relative=\"false\"/>\n");
171                configFile.append(" <setwisepreprocessor name=\"NormalizeByLogEffort\" param=\"\"/>\n");
172                configFile.append(" <setwisepreprocessor name=\"AttributeRemoval\" param=\"LOC_EXECUTABLE\"/>\n");
173                break;
174            case JURECZKO:
175                configFile.append(" <loader name=\"CSVFolderLoader\" datalocation=\"benchmark/data/JURECZKO\" relative=\"false\"/>\n");
176                break;
177            case JURECZKO_NUMERIC:
178                configFile.append(" <loader name=\"CSVFolderLoader\" datalocation=\"benchmark/data/JURECZKO\" relative=\"false\" classtype=\"numeric\"/>\n");
179                configFile.append(" <setwisepreprocessor name=\"MakeClassBinary\" param=\"\" />\n");
180                break;
181            case FILTERJURECZKO:
182                configFile.append(" <loader name=\"CSVFolderLoader\" datalocation=\"benchmark/data/JURECZKO\" relative=\"false\"/>\n");
183                configFile.append(" <versionfilter name=\"MinInstanceNumberFilter\" param=\"100\" />\n");
184                configFile.append(" <versionfilter name=\"UnbalancedFilter\" param=\"0.05\" />\n");
185                break;
186            case SELECTEDJURECZKO:
187                configFile.append(" <loader name=\"CSVFolderLoader\" datalocation=\"benchmark/data/SELECTEDJURECZKO\" relative=\"false\"/>\n");
188                break;
189            case JURECZKO_EFFNORM:
190                configFile.append(" <loader name=\"CSVFolderLoader\" datalocation=\"benchmark/data/JURECZKO\" relative=\"false\"/>\n");
191                configFile.append(" <setwisepreprocessor name=\"NormalizeByEffort\" param=\"\"/>\n");
192                configFile.append(" <setwisepreprocessor name=\"AttributeRemoval\" param=\"loc\"/>\n");
193                break;
194            case JURECZKO_EFFLOGNORM:
195                configFile.append(" <loader name=\"CSVFolderLoader\" datalocation=\"benchmark/data/JURECZKO\" relative=\"false\"/>\n");
196                configFile.append(" <setwisepreprocessor name=\"NormalizeByLogEffort\" param=\"\"/>\n");
197                configFile.append(" <setwisepreprocessor name=\"AttributeRemoval\" param=\"loc\"/>\n");
198                break;
199            case JURECZKO_EFFNORM_NUMERIC:
200                configFile.append(" <loader name=\"CSVFolderLoader\" datalocation=\"benchmark/data/JURECZKO\" relative=\"false\" classtype=\"numeric\"/>\n");
201                configFile.append(" <setwisepreprocessor name=\"MakeClassBinary\" param=\"\" />\n");
202                configFile.append(" <setwisepreprocessor name=\"NormalizeByEffort\" param=\"\"/>\n");
203                configFile.append(" <setwisepreprocessor name=\"AttributeRemoval\" param=\"loc\"/>\n");
204                break;
205            case JURECZKO_EFFLOGNORM_NUMERIC:
206                configFile.append(" <loader name=\"CSVFolderLoader\" datalocation=\"benchmark/data/JURECZKO\" relative=\"false\" classtype=\"numeric\"/>\n");
207                configFile.append(" <setwisepreprocessor name=\"MakeClassBinary\" param=\"\" />\n");
208                configFile.append(" <setwisepreprocessor name=\"NormalizeByLogEffort\" param=\"\"/>\n");
209                configFile.append(" <setwisepreprocessor name=\"AttributeRemoval\" param=\"loc\"/>\n");
210                break;
211            case JURECZKO_NUMERIC_DUPLICATE:
212                configFile.append(" <loader name=\"CSVFolderLoader\" datalocation=\"benchmark/data/JURECZKO\" relative=\"false\" classtype=\"numeric\"/>\n");
213                configFile.append(" <setwisepreprocessor name=\"CreateBugDuplicates\" param=\"\" />\n");
214                break;
215            case JURECZKO_NUMERIC_WEIGHTS:
216                configFile.append(" <loader name=\"CSVFolderLoader\" datalocation=\"benchmark/data/JURECZKO\" relative=\"false\" classtype=\"numeric\"/>\n");
217                configFile.append(" <setwisepreprocessor name=\"WeightByNumBugs\" param=\"\" />\n");
218                configFile.append(" <setwisepreprocessor name=\"MakeClassBinary\" param=\"\" />\n");
219                break;
220            case AEEEM:
221                configFile.append(" <loader name=\"ARFFFolderLoader\" datalocation=\"benchmark/data/AEEEM\" relative=\"false\"/>\n");
222                break;
223            case AEEEM_LDHH:
224                configFile.append(" <loader name=\"ARFFFolderLoader\" datalocation=\"benchmark/data/AEEEM_LDHH\" relative=\"false\"/>\n");
225                break;
226            case AEEEM_LDHHWCHU:
227                configFile.append(" <loader name=\"ARFFFolderLoader\" datalocation=\"benchmark/data/AEEEM_LDHHWCHU\" relative=\"false\"/>\n");
228                break;
229            case AEEEM_WCHU:
230                configFile.append(" <loader name=\"ARFFFolderLoader\" datalocation=\"benchmark/data/AEEEM_WCHU\" relative=\"false\"/>\n");
231                break;
232            case AEEEM_NUMERIC:
233                configFile.append(" <loader name=\"ARFFFolderLoader\" datalocation=\"benchmark/data/AEEEM\" relative=\"false\" classtype=\"numeric\"/>\n");
234                configFile.append(" <setwisepreprocessor name=\"MakeClassBinary\" param=\"\" />\n");
235                break;
236            case AEEEM_LDHH_NUMERIC:
237                configFile.append(" <loader name=\"ARFFFolderLoader\" datalocation=\"benchmark/data/AEEEM_LDHH\" relative=\"false\" classtype=\"numeric\"/>\n");
238                configFile.append(" <setwisepreprocessor name=\"MakeClassBinary\" param=\"\" />\n");
239                break;
240            case AEEEM_LDHHWCHU_NUMERIC:
241                configFile.append(" <loader name=\"ARFFFolderLoader\" datalocation=\"benchmark/data/AEEEM_LDHHWCHU\" relative=\"false\" classtype=\"numeric\"/>\n");
242                configFile.append(" <setwisepreprocessor name=\"MakeClassBinary\" param=\"\" />\n");
243                break;
244            case AEEEM_WCHU_NUMERIC:
245                configFile.append(" <loader name=\"ARFFFolderLoader\" datalocation=\"benchmark/data/AEEEM_WCHU\" relative=\"false\" classtype=\"numeric\"/>\n");
246                configFile.append(" <setwisepreprocessor name=\"MakeClassBinary\" param=\"\" />\n");
247                break;
248            case AEEEM_LDHHWCHU_EFFNORM:
249                configFile.append(" <loader name=\"ARFFFolderLoader\" datalocation=\"benchmark/data/AEEEM_LDHHWCHU\" relative=\"false\"/>\n");
250                configFile.append(" <setwisepreprocessor name=\"NormalizeByEffort\" param=\"\"/>\n");
251                configFile.append(" <setwisepreprocessor name=\"AttributeRemoval\" param=\"numberOfLinesOfCode\"/>\n");
252                break;
253            case AEEEM_LDHHWCHU_EFFLOGNORM:
254                configFile.append(" <loader name=\"ARFFFolderLoader\" datalocation=\"benchmark/data/AEEEM_LDHHWCHU\" relative=\"false\"/>\n");
255                configFile.append(" <setwisepreprocessor name=\"NormalizeByLogEffort\" param=\"\"/>\n");
256                configFile.append(" <setwisepreprocessor name=\"AttributeRemoval\" param=\"numberOfLinesOfCode\"/>\n");
257                break;
258            case AEEEM_LDHHWCHU_EFFNORM_NUMERIC:
259                configFile.append(" <loader name=\"ARFFFolderLoader\" datalocation=\"benchmark/data/AEEEM_LDHHWCHU\" relative=\"false\" classtype=\"numeric\"/>\n");
260                configFile.append(" <setwisepreprocessor name=\"MakeClassBinary\" param=\"\" />\n");
261                configFile.append(" <setwisepreprocessor name=\"NormalizeByEffort\" param=\"\"/>\n");
262                configFile.append(" <setwisepreprocessor name=\"AttributeRemoval\" param=\"numberOfLinesOfCode\"/>\n");
263                break;
264            case AEEEM_LDHHWCHU_EFFLOGNORM_NUMERIC:
265                configFile.append(" <loader name=\"ARFFFolderLoader\" datalocation=\"benchmark/data/AEEEM_LDHHWCHU\" relative=\"false\" classtype=\"numeric\"/>\n");
266                configFile.append(" <setwisepreprocessor name=\"MakeClassBinary\" param=\"\" />\n");
267                configFile.append(" <setwisepreprocessor name=\"NormalizeByLogEffort\" param=\"\"/>\n");
268                configFile.append(" <setwisepreprocessor name=\"AttributeRemoval\" param=\"numberOfLinesOfCode\"/>\n");
269                break;
270            case AEEEM_LDHHWCHU_NUMERIC_DUPLICATE:
271                configFile.append(" <loader name=\"ARFFFolderLoader\" datalocation=\"benchmark/data/AEEEM_LDHHWCHU\" relative=\"false\" classtype=\"numeric\"/>\n");
272                configFile.append(" <setwisepreprocessor name=\"CreateBugDuplicates\" param=\"\" />\n");
273                break;
274            case AEEEM_LDHHWCHU_NUMERIC_WEIGHTS:
275                configFile.append(" <loader name=\"ARFFFolderLoader\" datalocation=\"benchmark/data/AEEEM_LDHHWCHU\" relative=\"false\" classtype=\"numeric\"/>\n");
276                configFile.append(" <setwisepreprocessor name=\"WeightByNumBugs\" param=\"\" />\n");
277                configFile.append(" <setwisepreprocessor name=\"MakeClassBinary\" param=\"\" />\n");
278                break;
279            case RELINK:
280                configFile.append(" <loader name=\"RelinkFolderLoader\" datalocation=\"benchmark/data/RELINK\" relative=\"false\"/>\n");
281                break;
282            case RELINK_EFFNORM:
283                configFile.append(" <loader name=\"RelinkFolderLoader\" datalocation=\"benchmark/data/RELINK\" relative=\"false\"/>\n");
284                configFile.append(" <setwisepreprocessor name=\"NormalizeByEffort\" param=\"\"/>\n");
285                configFile.append(" <setwisepreprocessor name=\"AttributeRemoval\" param=\"CountLineCodeExe\"/>\n");
286                break;
287            case RELINK_EFFLOGNORM:
288                configFile.append(" <loader name=\"RelinkFolderLoader\" datalocation=\"benchmark/data/RELINK\" relative=\"false\"/>\n");
289                configFile.append(" <setwisepreprocessor name=\"NormalizeByLogEffort\" param=\"\"/>\n");
290                configFile.append(" <setwisepreprocessor name=\"AttributeRemoval\" param=\"CountLineCodeExe\"/>\n");
291                break;
292            case NETGENE:
293                configFile.append(" <loader name=\"NetgeneFolderLoader\" datalocation=\"benchmark/data/NETGENE\" relative=\"false\"/>\n");
294                break;
295            case NETGENE_NUMERIC:
296                configFile.append(" <loader name=\"NetgeneFolderLoader\" datalocation=\"benchmark/data/NETGENE\" relative=\"false\" classtype=\"numeric\"/>\n");
297                configFile.append(" <setwisepreprocessor name=\"MakeClassBinary\" param=\"\" />\n");
298                break;
299            case NETGENE_NUMERIC_DUPLICATE:
300                configFile.append(" <loader name=\"NetgeneFolderLoader\" datalocation=\"benchmark/data/NETGENE\" relative=\"false\" classtype=\"numeric\"/>\n");
301                configFile.append(" <setwisepreprocessor name=\"CreateBugDuplicates\" param=\"\" />\n");
302                break;
303            case NETGENE_NUMERIC_WEIGHTS:
304                configFile.append(" <loader name=\"NetgeneFolderLoader\" datalocation=\"benchmark/data/NETGENE\" relative=\"false\" classtype=\"numeric\"/>\n");
305                configFile.append(" <setwisepreprocessor name=\"WeightByNumBugs\" param=\"\" />\n");
306                configFile.append(" <setwisepreprocessor name=\"MakeClassBinary\" param=\"\" />\n");
307                break;
308            case SMARTSHARK_ALL:
309                configFile.append(" <loader name=\"JsonFolderLoader\" datalocation=\"exp-smartshark/data\" relative=\"false\"/>\n");
310                configFile.append(" <versionfilter name=\"MinInstanceNumberFilter\" param=\"100\" />\n");
311                configFile.append(" <versionfilter name=\"UnbalancedFilter\" param=\"0.05\" />\n");
312                break;
313            case SMARTSHARK_AST:
314                configFile.append(" <loader name=\"JsonFolderLoader\" datalocation=\"exp-smartshark/data\" relative=\"false\"/>\n");
315                configFile.append(" <versionfilter name=\"MinInstanceNumberFilter\" param=\"100\" />\n");
316                configFile.append(" <versionfilter name=\"UnbalancedFilter\" param=\"0.05\" />\n");
317                configFile.append(" <setwisepreprocessor name=\"AttributeRemoval\" param=\"PDA LLOC PUA LOC McCC CLOC TNLM CLLC CCO TNPA NA AD NLPA NLS LDC NM TNPM LCOM5 WMC NOD RFC TNM NL NS NPA NOC CBO TNC TLLOC CI TNLG NLM NLG TNA DIT TCD TNLA NLE NG NLA TNLPA NOS CBOI NLPM LLDC CD TNG NPM CCL NOI NOP TLOC CLC CC DLOC NII TCLOC TNLS NOA TNLPM\"/>\n");
318                break;
319            case SMARTSHARK_SM:
320                configFile.append(" <loader name=\"JsonFolderLoader\" datalocation=\"exp-smartshark/data\" relative=\"false\"/>\n");
321                configFile.append(" <versionfilter name=\"MinInstanceNumberFilter\" param=\"100\" />\n");
322                configFile.append(" <versionfilter name=\"UnbalancedFilter\" param=\"0.05\" />\n");
323                configFile.append(" <setwisepreprocessor name=\"AttributeRemoval\" param=\"ReferenceType LambdaExpression Member TypeArgument ThrowStatement ArraySelector Declaration ClassCreator ForStatement SwitchStatement InnerClassCreator Literal TypeParameter VoidClassReference WhileStatement EnhancedForControl This Statement ForControl BinaryOperation MethodReference SuperMemberReference EnumBody FormalParameter EnumConstantDeclaration Expression PackageDeclaration VariableDeclarator AssertStatement Documented node_count DoStatement InterfaceDeclaration ReturnStatement Cast ExplicitConstructorInvocation EnumDeclaration SynchronizedStatement AnnotationMethod SwitchStatementCase MemberReference TypeDeclaration ArrayInitializer CatchClauseParameter CatchClause VariableDeclaration TryStatement Annotation TryResource MethodInvocation BasicType ElementArrayValue InferredFormalParameter IfStatement SuperConstructorInvocation BreakStatement AnnotationDeclaration FieldDeclaration Assignment ContinueStatement Import Primary BlockStatement ClassDeclaration TernaryExpression ClassReference CompilationUnit ConstantDeclaration LocalVariableDeclaration MethodDeclaration ConstructorDeclaration ElementValuePair ArrayCreator Invocation StatementExpression SuperMethodInvocation\"/>\n");
324                break;
325            case SMARTSHARK_ALL_NUMERIC:
326                configFile.append(" <loader name=\"JsonFolderLoader\" datalocation=\"exp-smartshark/data\" relative=\"false\" classtype=\"numeric\"/>\n");
327                configFile.append(" <setwisepreprocessor name=\"MakeClassBinary\" param=\"\" />\n");
328                configFile.append(" <versionfilter name=\"MinInstanceNumberFilter\" param=\"100\" />\n");
329                configFile.append(" <versionfilter name=\"UnbalancedFilter\" param=\"0.05\" />\n");
330                break;
331            default:
332                throw new InvalidParameterException("Unknown data set: " + dataset.toString());
333        }
334        configFile.append(" <versionfilter name=\"MinClassNumberFilter\" param=\"5\" />\n");
335        configFile.append(" <resultspath path=\"benchmark/results-csv\"/>\n");
336    }
337   
338    public static String ALL(Dataset dataset) {
339        StringBuilder configFile = new StringBuilder();
340        preamble(configFile);
341        dataset(configFile, dataset);
342        trainers(configFile);
343       
344        configFile.append(" <eval name=\"NormalWekaEvaluation\" param=\"\" />\n");
345       
346        postamble(configFile);
347        return configFile.toString();
348    }
349   
350    public static String CV(Dataset dataset) {
351        StringBuilder configFile = new StringBuilder();
352        preamble(configFile);
353        dataset(configFile, dataset);
354        trainers(configFile);
355       
356        configFile.append(" <eval name=\"CVWekaEvaluation\" param=\"\" />\n");
357       
358        postamble(configFile);
359        return configFile.toString();
360    }
361   
362    public static String Random(Dataset dataset) {
363        StringBuilder configFile = new StringBuilder();
364        preamble(configFile);
365        dataset(configFile, dataset);
366       
367        configFile.append(" <trainer name=\"WekaTraining\" param=\"RANDOM de.ugoe.cs.cpdp.wekaclassifier.RandomClass\" />\n");
368        configFile.append(" <eval name=\"NormalWekaEvaluation\" param=\"\" />\n");
369        configFile.append(" <repetitions number=\"10\" />\n");
370       
371        postamble(configFile);
372        return configFile.toString();
373    }
374   
375    public static String Trivial(Dataset dataset) {
376        StringBuilder configFile = new StringBuilder();
377        preamble(configFile);
378        dataset(configFile, dataset);
379       
380        configFile.append(" <trainer name=\"WekaTraining\" param=\"FIX de.ugoe.cs.cpdp.wekaclassifier.FixClass -C 1\" />\n");
381        configFile.append(" <eval name=\"NormalWekaEvaluation\" param=\"\" />\n");
382       
383        postamble(configFile);
384        return configFile.toString();
385    }
386   
387    public static String Koshgoftaar08(Dataset dataset) {
388        StringBuilder configFile = new StringBuilder();
389        preamble(configFile);
390        dataset(configFile, dataset);
391        trainersBagging(configFile);
392       
393        configFile.append(" <eval name=\"NormalWekaEvaluation\" param=\"\" />\n");
394       
395        postamble(configFile);
396        return configFile.toString();
397    }
398   
399    public static String Watanabe08(Dataset dataset) {
400        StringBuilder configFile = new StringBuilder();
401        preamble(configFile);
402        dataset(configFile, dataset);
403        trainers(configFile);
404       
405        configFile.append(" <setwisepreprocessor name=\"AverageStandardization\" param=\"\" />\n");       
406        configFile.append(" <eval name=\"NormalWekaEvaluation\" param=\"\" />\n");
407       
408        postamble(configFile);
409        return configFile.toString();
410    }
411   
412    public static String Turhan09(Dataset dataset) {
413        StringBuilder configFile = new StringBuilder();
414        preamble(configFile);
415        dataset(configFile, dataset);
416        trainers(configFile);
417       
418        configFile.append(" <preprocessor name=\"LogarithmTransform\" param=\"\" />\n");
419        configFile.append(" <pointwiseselector name=\"TurhanFilter\" param=\"10\" />\n");
420        configFile.append(" <eval name=\"NormalWekaEvaluation\" param=\"\" />\n");
421       
422        postamble(configFile);
423        return configFile.toString();
424    }
425   
426    public static String Zimmermann09(Dataset dataset) {
427        StringBuilder configFile = new StringBuilder();
428        preamble(configFile);
429        dataset(configFile, dataset);
430        trainers(configFile);
431       
432        configFile.append(" <setwiseselector name=\"DecisionTreeSelection\" param=\"max median stddev\" />\n");
433        configFile.append(" <eval name=\"NormalWekaEvaluation\" param=\"\" />\n");
434       
435        postamble(configFile);
436        return configFile.toString();
437    }
438   
439    public static String CamargoCruz09(Dataset dataset) {
440        StringBuilder configFile = new StringBuilder();
441        preamble(configFile);
442        dataset(configFile, dataset);
443        trainers(configFile);
444       
445        configFile.append(" <preprocessor name=\"LogarithmTransform\" param=\"\" />\n");
446        configFile.append(" <preprocessor name=\"MedianAsReference\" param=\"10\" />\n");
447        configFile.append(" <eval name=\"NormalWekaEvaluation\" param=\"\" />\n");
448       
449        postamble(configFile);
450        return configFile.toString();
451    }
452   
453    public static String Liu10(Dataset dataset) {
454        StringBuilder configFile = new StringBuilder();
455        preamble(configFile);
456        dataset(configFile, dataset);
457       
458        configFile.append(" <setwisetrainer name=\"GPTraining\" param=\"numberRuns:1,errorType2Weight:15\" />");
459        configFile.append(" <eval name=\"NormalWekaEvaluation\" param=\"\" />\n");
460        configFile.append(" <repetitions number=\"10\" />\n");
461       
462        postamble(configFile);
463        return configFile.toString();
464    }
465   
466    public static String Menzies11(Dataset dataset) {
467        StringBuilder configFile = new StringBuilder();
468        preamble(configFile);
469        dataset(configFile, dataset);
470       
471        trainersLocalWhere(configFile);       
472        configFile.append(" <eval name=\"NormalWekaEvaluation\" param=\"\" />\n");
473        configFile.append(" <repetitions number=\"10\" />\n");
474       
475        postamble(configFile);
476        return configFile.toString();
477    }
478   
479    public static String Ma12(Dataset dataset) {
480        StringBuilder configFile = new StringBuilder();
481        preamble(configFile);
482        dataset(configFile, dataset);
483        trainers(configFile);
484       
485        configFile.append(" <preprocessor name=\"DataGravitation\" param=\"\" />\n");
486        configFile.append(" <eval name=\"NormalWekaEvaluation\" param=\"\" />\n");
487       
488        postamble(configFile);
489        return configFile.toString();
490    }
491   
492    public static String Peters12(Dataset dataset) {
493        StringBuilder configFile = new StringBuilder();
494        preamble(configFile);
495        dataset(configFile, dataset);
496        trainers(configFile);
497       
498        configFile.append(" <preprocessor name=\"MORPH\" param=\"\" />\n");
499        configFile.append(" <eval name=\"NormalWekaEvaluation\" param=\"\" />\n");
500        configFile.append(" <repetitions number=\"10\" />\n");
501       
502        postamble(configFile);
503        return configFile.toString();
504    }
505   
506    public static String Uchigaki12(Dataset dataset) {
507        StringBuilder configFile = new StringBuilder();
508        preamble(configFile);
509        dataset(configFile, dataset);
510       
511        configFile.append(" <preprocessor name=\"ZScoreNormalization\" param=\"\" />\n");
512        configFile.append(" <trainer name=\"WekaTraining\" param=\"LE de.ugoe.cs.cpdp.wekaclassifier.LogisticEnsemble\" />\n");
513        configFile.append(" <eval name=\"NormalWekaEvaluation\" param=\"\" />\n");
514       
515        postamble(configFile);
516        return configFile.toString();
517    }
518   
519    public static String Canfora13(Dataset dataset) {
520        StringBuilder configFile = new StringBuilder();
521        preamble(configFile);
522        dataset(configFile, dataset);
523       
524        configFile.append(" <preprocessor name=\"ZScoreNormalization\" param=\"\" />\n");
525        configFile.append(" <trainer name=\"WekaTraining\" param=\"MODEP de.ugoe.cs.cpdp.wekaclassifier.MODEPClassifier -R 0.7\" />\n");
526        configFile.append(" <eval name=\"NormalWekaEvaluation\" param=\"\" />\n");
527        configFile.append(" <repetitions number=\"10\" />\n");
528       
529        postamble(configFile);
530        return configFile.toString();
531    }
532   
533    public static String Peters13(Dataset dataset) {
534        StringBuilder configFile = new StringBuilder();
535        preamble(configFile);
536        dataset(configFile, dataset);
537        trainers(configFile);
538       
539        configFile.append(" <preprocessor name=\"MORPH\" param=\"\" />\n");
540        configFile.append(" <pointwiseselector name=\"CLIFF\" param=\"0.40\" />");
541        configFile.append(" <eval name=\"NormalWekaEvaluation\" param=\"\" />\n");
542        configFile.append(" <repetitions number=\"10\" />\n");
543       
544        postamble(configFile);
545        return configFile.toString();
546    }
547   
548    public static String Herbold13(Dataset dataset) {
549        StringBuilder configFile = new StringBuilder();
550        preamble(configFile);
551        dataset(configFile, dataset);
552        trainers(configFile);
553       
554        int numNeighbors;
555        switch (dataset)
556        {
557            case AEEEM:
558            case AEEEM_LDHH:
559            case AEEEM_LDHHWCHU:
560            case AEEEM_WCHU:
561            case AEEEM_NUMERIC:
562            case AEEEM_LDHH_NUMERIC:
563            case AEEEM_LDHHWCHU_NUMERIC:
564            case AEEEM_WCHU_NUMERIC:
565            case AEEEM_LDHHWCHU_EFFNORM:
566            case AEEEM_LDHHWCHU_EFFLOGNORM:
567            case AEEEM_LDHHWCHU_EFFNORM_NUMERIC:
568            case AEEEM_LDHHWCHU_EFFLOGNORM_NUMERIC:
569            case AEEEM_LDHHWCHU_NUMERIC_DUPLICATE:
570            case AEEEM_LDHHWCHU_NUMERIC_WEIGHTS:
571                numNeighbors = 2;
572                break;
573            case MDP:
574            case MDP_EFFNORM:
575            case MDP_EFFLOGNORM:
576                numNeighbors = 5;
577                break;
578            case JURECZKO:
579            case JURECZKO_NUMERIC:
580            case JURECZKO_EFFNORM:
581            case JURECZKO_EFFLOGNORM:
582            case JURECZKO_EFFNORM_NUMERIC:
583            case JURECZKO_EFFLOGNORM_NUMERIC:
584            case JURECZKO_NUMERIC_DUPLICATE:
585            case JURECZKO_NUMERIC_WEIGHTS:
586                numNeighbors = 30;
587                break;
588            case FILTERJURECZKO:
589                numNeighbors = 20;
590                break;
591            case RELINK:
592            case RELINK_EFFNORM:
593            case RELINK_EFFLOGNORM:
594                numNeighbors = 1;
595                break;
596            case NETGENE:
597            case NETGENE_NUMERIC:
598            case NETGENE_NUMERIC_DUPLICATE:
599            case NETGENE_NUMERIC_WEIGHTS:
600                numNeighbors = 1;
601                break;
602            case SELECTEDJURECZKO:
603                numNeighbors = 4;
604                break;
605            case SMARTSHARK_ALL:
606            case SMARTSHARK_AST:
607            case SMARTSHARK_SM:
608            case SMARTSHARK_ALL_NUMERIC:
609                // TODO check num neighbors
610            default:
611                numNeighbors = 10;
612                break;
613        }
614       
615        configFile.append(" <setwisepreprocessor name=\"Normalization\" param=\"\" />\n");
616        configFile.append(" <setwiseselector name=\"SetWiseKNNSelection\" param=\""+ numNeighbors +"\" />\n");
617        configFile.append(" <postprocessor name=\"BiasedWeights\" param=\"0.5\" />\n");
618        configFile.append(" <eval name=\"NormalWekaEvaluation\" param=\"\" />\n");
619       
620        postamble(configFile);
621        return configFile.toString();
622    }
623   
624    public static String ZHe13(Dataset dataset) {
625        StringBuilder configFile = new StringBuilder();
626        preamble(configFile);
627        dataset(configFile, dataset);
628        trainersBagging(configFile);
629       
630        int numNeighbors;
631        switch (dataset)
632        {
633            case AEEEM:
634            case AEEEM_LDHH:
635            case AEEEM_LDHHWCHU:
636            case AEEEM_WCHU:
637            case AEEEM_NUMERIC:
638            case AEEEM_LDHH_NUMERIC:
639            case AEEEM_LDHHWCHU_NUMERIC:
640            case AEEEM_WCHU_NUMERIC:
641            case AEEEM_LDHHWCHU_EFFNORM:
642            case AEEEM_LDHHWCHU_EFFLOGNORM:
643            case AEEEM_LDHHWCHU_EFFNORM_NUMERIC:
644            case AEEEM_LDHHWCHU_EFFLOGNORM_NUMERIC:
645                numNeighbors = 1;
646                break;
647            case MDP:
648            case MDP_EFFNORM:
649            case MDP_EFFLOGNORM:
650                numNeighbors = 4;
651                break;
652            case JURECZKO:
653            case JURECZKO_NUMERIC:
654            case JURECZKO_EFFNORM:
655            case JURECZKO_EFFLOGNORM:
656            case JURECZKO_EFFNORM_NUMERIC:
657            case JURECZKO_EFFLOGNORM_NUMERIC:
658                numNeighbors = 16;
659                break;
660            case FILTERJURECZKO:
661                numNeighbors = 13;
662                break;
663            case RELINK:
664            case RELINK_EFFNORM:
665            case RELINK_EFFLOGNORM:
666                numNeighbors = 1;
667                break;
668            case NETGENE:
669            case NETGENE_NUMERIC:
670                numNeighbors = 1;
671                break;
672            case SELECTEDJURECZKO:
673                numNeighbors = 4;
674                break;
675            case SMARTSHARK_ALL:
676            case SMARTSHARK_AST:
677            case SMARTSHARK_SM:
678            case SMARTSHARK_ALL_NUMERIC:
679                // TODO check num neighbors
680            default:
681                numNeighbors = 10;
682                break;
683        }
684       
685        configFile.append(" <setwisepreprocessor name=\"Normalization\" param=\"\" />\n");
686        configFile.append(" <setwiseselector name=\"SeparatabilitySelection\" param=\"" + numNeighbors + "\" />\n");
687        configFile.append(" <setwisepostprocessor name=\"Undersampling\" param=\"\" />\n");
688        configFile.append(" <eval name=\"NormalWekaEvaluation\" param=\"\" />\n");
689        configFile.append(" <repetitions number=\"10\" />\n");
690       
691        postamble(configFile);
692        return configFile.toString();
693    }
694   
695    public static String Nam13(Dataset dataset) {
696        StringBuilder configFile = new StringBuilder();
697        preamble(configFile);
698        dataset(configFile, dataset);
699        trainers(configFile);
700       
701        configFile.append(" <preprocessor name=\"TCAPlusNormalization\" param=\"\" />\n");
702        configFile.append(" <postprocessor name=\"TransferComponentAnalysis\" param=\"\" />\n");
703        configFile.append(" <eval name=\"NormalWekaEvaluation\" param=\"\" />\n");
704       
705        postamble(configFile);
706        return configFile.toString();
707    }
708   
709    public static String Panichella14(Dataset dataset) {
710        StringBuilder configFile = new StringBuilder();
711        preamble(configFile);
712        dataset(configFile, dataset);
713       
714        configFile.append(" <trainer name=\"WekaTraining\" param=\"CODEP-LR de.ugoe.cs.cpdp.wekaclassifier.LogisticCODEP\" />\n");
715        configFile.append(" <trainer name=\"WekaTraining\" param=\"CODEP-BN de.ugoe.cs.cpdp.wekaclassifier.BayesNetCODEP\" />\n");
716        configFile.append(" <eval name=\"NormalWekaEvaluation\" param=\"\" />\n");
717       
718        postamble(configFile);
719        return configFile.toString();
720    }
721   
722    public static String Ryu14(Dataset dataset) {
723        StringBuilder configFile = new StringBuilder();
724        preamble(configFile);
725        dataset(configFile, dataset);
726       
727        configFile.append(" <preprocessor name=\"ZScoreNormalization\" param=\"\" />\n");
728        configFile.append(" <testawaretrainer name=\"WekaTestAwareTraining\" param=\"VCBSVM de.ugoe.cs.cpdp.wekaclassifier.VCBSVM -L 0.1 -B 10\" />\n");
729        configFile.append(" <eval name=\"NormalWekaEvaluation\" param=\"\" />\n");
730        configFile.append(" <repetitions number=\"10\" />\n");
731       
732        postamble(configFile);
733        return configFile.toString();
734    }
735   
736    public static String PHe15(Dataset dataset) {
737        StringBuilder configFile = new StringBuilder();
738        preamble(configFile);
739        dataset(configFile, dataset);
740        trainers(configFile);
741       
742        configFile.append(" <setwisepreprocessor name=\"LogarithmTransform\" param=\"\" />\n");
743        configFile.append(" <setwisepreprocessor name=\"TopMetricFilter\" param=\"\" />\n");
744        configFile.append(" <eval name=\"NormalWekaEvaluation\" param=\"\" />\n");
745       
746        postamble(configFile);
747        return configFile.toString();
748    }
749   
750    public static String Peters15(Dataset dataset) {
751        StringBuilder configFile = new StringBuilder();
752        preamble(configFile);
753        dataset(configFile, dataset);
754        trainers(configFile);
755       
756        configFile.append(" <setwisepreprocessor name=\"LogarithmTransform\" param=\"\" />\n");
757        configFile.append(" <setwiseselector name=\"LACE2\" param=\"0.4\" />\n");
758        configFile.append(" <pointwiseselector name=\"TurhanFilter\" param=\"1\" />\n");
759        configFile.append(" <eval name=\"NormalWekaEvaluation\" param=\"\" />\n");
760        configFile.append(" <repetitions number=\"10\" />\n");
761       
762        postamble(configFile);
763        return configFile.toString();
764    }
765   
766    public static String Kawata15(Dataset dataset) {
767        StringBuilder configFile = new StringBuilder();
768        preamble(configFile);
769        dataset(configFile, dataset);
770        trainers(configFile);
771       
772        configFile.append(" <pointwiseselector name=\"DBSCANFilter\" param=\"\" />\n");
773        configFile.append(" <eval name=\"NormalWekaEvaluation\" param=\"\" />\n");
774       
775        postamble(configFile);
776        return configFile.toString();
777    }
778   
779    public static String YZhang15(Dataset dataset) {
780        StringBuilder configFile = new StringBuilder();
781        preamble(configFile);
782        dataset(configFile, dataset);
783       
784        configFile.append(" <trainer name=\"WekaTraining\" param=\"AVGVOTE weka.classifiers.meta.Vote -S 1 -B &quot;weka.classifiers.trees.ADTree&quot; -B &quot;de.ugoe.cs.cpdp.wekaclassifier.DecisionTableWrapper&quot; -B &quot;de.ugoe.cs.cpdp.wekaclassifier.BayesNetWrapper&quot; -B &quot;weka.classifiers.functions.MultilayerPerceptron&quot; -B &quot;weka.classifiers.functions.RBFNetwork&quot; -R AVG\" />\n");
785        configFile.append(" <trainer name=\"WekaTraining\" param=\"MAXVOTE weka.classifiers.meta.Vote -S 1 -B &quot;weka.classifiers.trees.ADTree&quot; -B &quot;de.ugoe.cs.cpdp.wekaclassifier.DecisionTableWrapper&quot; -B &quot;de.ugoe.cs.cpdp.wekaclassifier.BayesNetWrapper&quot; -B &quot;weka.classifiers.functions.MultilayerPerceptron&quot; -B &quot;weka.classifiers.functions.RBFNetwork&quot; -R MAX\" />\n");
786        configFile.append(" <trainer name=\"WekaTraining\" param=\"BAG-DT weka.classifiers.meta.Bagging -P 100 -S 1 -I 10 -W weka.classifiers.trees.J48\" />\n");
787        configFile.append(" <trainer name=\"WekaTraining\" param=\"BAG-NB weka.classifiers.meta.Bagging -P 100 -S 1 -I 10 -W weka.classifiers.bayes.NaiveBayes\" />\n");
788        configFile.append(" <trainer name=\"WekaTraining\" param=\"BOOST-DT weka.classifiers.meta.AdaBoostM1 -P 100 -S 1 -I 10 -W weka.classifiers.trees.J48\" />\n");
789        configFile.append(" <trainer name=\"WekaTraining\" param=\"BOOST-NB weka.classifiers.meta.AdaBoostM1 -P 100 -S 1 -I 10 -W weka.classifiers.bayes.NaiveBayes\" />\n");
790        configFile.append(" <eval name=\"NormalWekaEvaluation\" param=\"\" />\n");
791       
792        postamble(configFile);
793        return configFile.toString();
794    }
795   
796    public static String Amasaki15(Dataset dataset) {
797        StringBuilder configFile = new StringBuilder();
798        preamble(configFile);
799        dataset(configFile, dataset);
800        trainers(configFile);
801       
802        configFile.append(" <preprocessor name=\"LogarithmTransform\" param=\"\" />\n");
803        configFile.append(" <preprocessor name=\"SynonymAttributePruning\" param=\"\" />\n");
804        configFile.append(" <pointwiseselector name=\"SynonymOutlierRemoval\" param=\"\" />");
805        configFile.append(" <eval name=\"NormalWekaEvaluation\" param=\"\" />\n");
806       
807        postamble(configFile);
808        return configFile.toString();
809    }
810   
811    public static String Ryu15(Dataset dataset) {
812        StringBuilder configFile = new StringBuilder();
813        preamble(configFile);
814        dataset(configFile, dataset);
815        trainersLASER(configFile);
816       
817        configFile.append(" <pointwiseselector name=\"MahalanobisOutlierRemoval\" param=\"\" />\n");
818        configFile.append(" <pointwiseselector name=\"NeighborhoodFilter\" param=\"\" />\n");
819        configFile.append(" <eval name=\"NormalWekaEvaluation\" param=\"\" />\n");
820       
821        postamble(configFile);
822        return configFile.toString();
823    }
824   
825    public static String Nam15(Dataset dataset) {
826        StringBuilder configFile = new StringBuilder();
827        preamble(configFile);
828        dataset(configFile, dataset);
829        trainers(configFile);
830       
831        configFile.append(" <preprocessor name=\"CLAMIProcessor\" param=\"\" />\n");
832        configFile.append(" <eval name=\"NormalWekaEvaluation\" param=\"\" />\n");
833       
834        postamble(configFile);
835        return configFile.toString();
836    }
837}
Note: See TracBrowser for help on using the repository browser.