source: trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/DecentDataLoader.java @ 47

Last change on this file since 47 was 41, checked in by sherbold, 9 years ago
  • formatted code and added copyrights
  • Property svn:mime-type set to text/plain
File size: 16.5 KB
Line 
1// Copyright 2015 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.cpdp.loader;
16
17import java.io.BufferedReader;
18import java.io.File;
19import java.io.FileNotFoundException;
20import java.io.FileReader;
21import java.io.IOException;
22import java.net.URISyntaxException;
23import java.net.URL;
24import java.util.ArrayList;
25import java.util.HashMap;
26import java.util.LinkedHashSet;
27import java.util.LinkedList;
28import java.util.List;
29import java.util.Set;
30
31import org.eclipse.emf.common.util.URI;
32import org.eclipse.emf.ecore.EObject;
33import org.eclipse.emf.ecore.EPackage;
34import org.eclipse.emf.ecore.resource.Resource;
35import org.eclipse.epsilon.common.parse.problem.ParseProblem;
36import org.eclipse.epsilon.emc.emf.EmfUtil;
37import org.eclipse.epsilon.eol.EolModule;
38import org.eclipse.epsilon.eol.IEolExecutableModule;
39import org.eclipse.epsilon.eol.models.IModel;
40import org.eclipse.epsilon.etl.EtlModule;
41
42import de.ugoe.cs.cpdp.decentApp.models.arffx.Instance;
43import de.ugoe.cs.cpdp.decentApp.models.arffx.Model;
44import de.ugoe.cs.cpdp.decentApp.models.arffx.Value;
45import de.ugoe.cs.cpdp.decentApp.ARFFxResourceTool;
46import de.ugoe.cs.cpdp.decentApp.DECENTEpsilonModelHandler;
47import de.ugoe.cs.util.console.Console;
48import weka.core.Attribute;
49import weka.core.DenseInstance;
50import weka.core.Instances;
51import weka.core.converters.ArffSaver;
52
53/**
54 * Class for loading a decent model file. Loads a decent model file and (if no arff file is present)
55 * and does the following conversions: DECENT -> ARFFX -> ARFF
56 *
57 * @author Fabian Trautsch
58 *
59 */
60public class DecentDataLoader implements SingleVersionLoader {
61
62    // Model Handler for Decent Models
63    private DECENTEpsilonModelHandler modelHandler = new DECENTEpsilonModelHandler();
64
65    // Set log level
66    String logLevel = "1";
67    String logToFile = "false";
68
69    // This list contains attributes, that should be removed before building the arff file
70    private static List<String> attributeFilter = new LinkedList<String>();
71
72    // This list contains all names of the different artifacts
73    private static Set<String> artifactNames = new LinkedHashSet<String>();
74
75    // Name of the class attribute.
76    private static final String classAttributeName = "LABEL.Artifact.Target.BugFix.AverageWeight";
77
78    private int getIndexOfArtifactName(String artifactName) {
79        int index = -1;
80        if (artifactNames.contains(artifactName)) {
81            int i = 0;
82            for (String nameInSet : artifactNames) {
83                if (nameInSet.equals(artifactName)) {
84                    index = i;
85                }
86                else {
87                    i++;
88                }
89            }
90        }
91
92        return index;
93    }
94
95    /**
96     * Defines attributes, that should be removed before building the ARFF File from.
97     */
98    private void setAttributeFilter() {
99        attributeFilter.add("Agent.Name");
100
101    }
102
103    /**
104     * Saves the dataset as arff after transformation (decent->arffx) and filtering
105     *
106     * @param dataSet
107     *            the WEKA dataset to save
108     * @param arffLocation
109     *            location where it should be saved to
110     */
111    public void save(Instances dataSet, String arffLocation) {
112
113        ArffSaver saver = new ArffSaver();
114        saver.setInstances(dataSet);
115        try {
116            saver.setFile(new File(arffLocation));
117            saver.writeBatch();
118        }
119        catch (IOException e) {
120            Console.printerrln("Cannot save the file to path: " + arffLocation);
121            e.printStackTrace();
122        }
123    }
124
125    /**
126     * Loads the given decent file and tranform it from decent->arffx->arff
127     *
128     * @return Instances in WEKA format
129     */
130    @Override
131    public Instances load(File file) {
132
133        // Set attributeFilter
134        setAttributeFilter();
135
136        // Register MetaModels
137        try {
138            registerMetaModels();
139        }
140        catch (Exception e1) {
141            Console.printerrln("Metamodels cannot be registered!");
142            e1.printStackTrace();
143        }
144
145        // Set location of decent and arffx Model
146        String decentModelLocation = file.getAbsolutePath();
147        String pathToDecentModelFolder =
148            decentModelLocation.substring(0, decentModelLocation.lastIndexOf(File.separator));
149        String arffxModelLocation = pathToDecentModelFolder + "/model.arffx";
150        String logModelLocation = pathToDecentModelFolder + "/model.log";
151        String arffLocation = pathToDecentModelFolder + "/model.arff";
152
153        // If arff File exists, load from it!
154        if (new File(arffLocation).exists()) {
155            System.out.println("Loading arff File...");
156            BufferedReader reader;
157            Instances data = null;
158            try {
159                reader = new BufferedReader(new FileReader(arffLocation));
160                data = new Instances(reader);
161                reader.close();
162            }
163            catch (FileNotFoundException e) {
164                Console.printerrln("File with path: " + arffLocation + " was not found.");
165                e.printStackTrace();
166            }
167            catch (IOException e) {
168                Console.printerrln("File with path: " + arffLocation + " cannot be read.");
169                e.printStackTrace();
170            }
171
172            // Set class attribute if not set
173            if (data.classIndex() == -1) {
174                Attribute classAttribute = data.attribute(classAttributeName);
175                data.setClass(classAttribute);
176            }
177
178            return data;
179        }
180
181        // Location of EOL Scripts
182        String preprocess = "./decent/epsilon/query/preprocess.eol";
183        String arffxToArffSource = "./decent/epsilon/query/addLabels.eol";
184
185        // Set Log Properties
186        System.setProperty("epsilon.logLevel", logLevel);
187        System.setProperty("epsilon.logToFile", logToFile);
188        System.setProperty("epsilon.logFileAvailable", "false");
189
190        // Set decent2arffx Properties
191        System.setProperty("epsilon.transformation.decent2arffx.skipSource", "false");
192        System.setProperty("epsilon.transformation.decent2arffx.type", "code");
193
194        // Preprocess Data, transform from decent2arffx
195        try {
196            IEolExecutableModule preProcessModule = loadModule(preprocess);
197            IModel preProcessDecentModel =
198                modelHandler.getDECENTModel(decentModelLocation, true, true);
199            IModel preProcessArffxarffxModel =
200                modelHandler.getARFFxModel(arffxModelLocation, false, true);
201            preProcessModule.getContext().getModelRepository().addModel(preProcessDecentModel);
202            preProcessModule.getContext().getModelRepository().addModel(preProcessArffxarffxModel);
203            execute(preProcessModule, logModelLocation);
204            preProcessDecentModel.dispose();
205            preProcessArffxarffxModel.dispose();
206            preProcessModule.reset();
207        }
208        catch (URISyntaxException e) {
209            Console.printerrln("URI Syntax for decent or arffx model is wrong.");
210            e.printStackTrace();
211        }
212        catch (Exception e) {
213            e.printStackTrace();
214        }
215
216        // Transform to arff, for label and confidence attributes
217        try {
218            IEolExecutableModule arffxToArffModule = loadModule(arffxToArffSource);
219            IModel arffxToArffArffxModel =
220                modelHandler.getARFFxModel(arffxModelLocation, true, true);
221            arffxToArffModule.getContext().getModelRepository().addModel(arffxToArffArffxModel);
222            execute(arffxToArffModule, logModelLocation);
223            arffxToArffArffxModel.dispose();
224            // can be stored and retained alternatively
225            arffxToArffModule.reset();
226        }
227        catch (URISyntaxException e) {
228            Console.printerrln("URI Syntax for arffx model is wrong.");
229            e.printStackTrace();
230        }
231        catch (Exception e) {
232            e.printStackTrace();
233        }
234
235        // Unregister MetaModels, otherwise cast will fail
236        HashMap<String, Object> metaModelCache = new HashMap<>();
237        for (String key : EPackage.Registry.INSTANCE.keySet()) {
238            metaModelCache.put(key, EPackage.Registry.INSTANCE.get(key));
239        };
240
241        for (String key : metaModelCache.keySet()) {
242            EPackage.Registry.INSTANCE.remove(key);
243        };
244
245        // Workaround to gernerate a usable URI. Absolute path is not
246        // possible, therefore we need to construct a relative path
247
248        URL location = DecentDataLoader.class.getProtectionDomain().getCodeSource().getLocation();
249        String basePath = location.getFile();
250
251        // Location is the bin folder, so we need to delete the last 4 characters
252        basePath = basePath.substring(0, basePath.length() - 4);
253        String relativePath =
254            new File(basePath).toURI().relativize(new File(arffxModelLocation).toURI()).getPath();
255
256        // Loard arffx file and create WEKA Instances
257        ARFFxResourceTool tool = new ARFFxResourceTool();
258        Resource resource = tool.loadResourceFromXMI(relativePath, "arffx");
259
260        Instances dataSet = null;
261        for (EObject o : resource.getContents()) {
262            Model m = (Model) o;
263            dataSet = createWekaDataFormat(m);
264
265            for (Instance i : m.getData()) {
266                createWekaInstance(dataSet, i);
267            }
268        }
269
270        // Set class attribute
271        Attribute classAttribute = dataSet.attribute(classAttributeName);
272        dataSet.setClass(classAttribute);
273
274        // Save as ARFF
275        save(dataSet, arffLocation);
276
277        return dataSet;
278
279    }
280
281    /**
282     * Creates a WekaInstance from an ARFFX Model Instance
283     *
284     * @param dataSet
285     *            WekaInstance dataset, where the arffx model instances should be added to
286     * @param i
287     *            arffx model instance
288     */
289    private void createWekaInstance(Instances dataSet, Instance i) {
290        double[] values = new double[dataSet.numAttributes()];
291        int j = 0;
292
293        for (Value value : i.getValues()) {
294            String dataValue = value.getContent();
295            String attributeName = value.getOfAttribute().getName();
296
297            if (attributeFilter.contains(attributeName)) {
298                continue;
299            }
300
301            // Is value a LABEL.* attribute?
302            if (isLabel(attributeName)) {
303                values[j] = dataSet.attribute(j).indexOfValue(dataValue);
304            }
305            else if (isConfidenceLabel(attributeName)) {
306                // Is value a CONFIDENCE.* attribute?
307                values[j] = dataSet.attribute(j).indexOfValue(dataValue);
308            }
309            else if (attributeName.equals("Artifact.Name")) {
310                // Is it the name of the artifact?
311                artifactNames.add(dataValue);
312                values[j] = getIndexOfArtifactName(dataValue);
313            }
314            else {
315                // Is it a numeric value?
316                values[j] = Double.parseDouble(dataValue);
317            }
318
319            j++;
320        }
321
322        DenseInstance inst = new DenseInstance(1.0, values);
323        dataSet.add(inst);
324    }
325
326    /**
327     * Creates a Weka Instances set out of a arffx model
328     *
329     * @param m
330     *            arffx model
331     * @return
332     */
333    private Instances createWekaDataFormat(Model m) {
334
335        // Bad solution, can be enhanced (continue in for loop)
336        ArrayList<Attribute> datasetAttributes = new ArrayList<Attribute>();
337        for (de.ugoe.cs.cpdp.decentApp.models.arffx.Attribute attribute : m.getAttributes()) {
338            String attributeName = attribute.getName();
339
340            if (attributeFilter.contains(attributeName)) {
341                continue;
342            }
343
344            Attribute wekaAttr;
345
346            // Is attribute a LABEL.* attribute?
347            if (isLabel(attributeName)) {
348                // Classattribute
349                final ArrayList<String> classAttVals = new ArrayList<String>();
350                classAttVals.add("false");
351                classAttVals.add("true");
352                wekaAttr = new Attribute(attributeName, classAttVals);
353            }
354            else if (isConfidenceLabel(attributeName)) {
355                // Is attribute a CONFIDENCE.* attribute?
356                ArrayList<String> labels = new ArrayList<String>();
357                labels.add("high");
358                labels.add("low");
359                wekaAttr = new Attribute(attributeName, labels);
360            }
361            else {
362                // Is it a numeric attribute?
363                wekaAttr = new Attribute(attributeName);
364            }
365
366            datasetAttributes.add(wekaAttr);
367        }
368
369        return new Instances("test-dataset", datasetAttributes, 0);
370    }
371
372    /**
373     * Helper methods which indicates if the given value starts with "LABEL"
374     *
375     * @param value
376     *            to test
377     * @return
378     */
379    private boolean isLabel(String value) {
380        if (value.length() >= 5 && value.substring(0, 5).equals("LABEL")) {
381            return true;
382        }
383
384        return false;
385    }
386
387    /**
388     * Helper method which indicates if the given value starts with "CONFIDENCE"
389     *
390     * @param value
391     *            to test
392     * @return
393     */
394    private boolean isConfidenceLabel(String value) {
395        if (value.length() >= 10 && value.substring(0, 10).equals("CONFIDENCE")) {
396            return true;
397        }
398
399        return false;
400    }
401
402    /**
403     * Returns if a filename ends with ".decent"
404     *
405     * @return
406     */
407    @Override
408    public boolean filenameFilter(String filename) {
409        return filename.endsWith(".decent");
410    }
411
412    /**
413     * Helper method for executing a eol scripts and adding the log model beforehand
414     *
415     * @param module
416     *            module to execute
417     * @param logModelLocation
418     *            location of the log model
419     * @throws Exception
420     */
421    private void execute(IEolExecutableModule module, String logModelLocation) throws Exception {
422        IModel logModel = modelHandler.getLOGModel(logModelLocation, true, true);
423        module.getContext().getModelRepository().addModel(logModel);
424        module.execute();
425        logModel.dispose();
426    }
427
428    /**
429     * Loads the module from a given source
430     *
431     * @param source
432     *            where the module is (e.g. eol script)
433     * @return
434     * @throws Exception
435     * @throws URISyntaxException
436     */
437    private IEolExecutableModule loadModule(String source) throws Exception, URISyntaxException {
438
439        IEolExecutableModule module = null;
440        if (source.endsWith("etl")) {
441            module = new EtlModule();
442        }
443        else if (source.endsWith("eol")) {
444            module = new EolModule();
445        }
446        else {
447
448        }
449
450        module.parse(modelHandler.getFile(source));
451
452        if (module.getParseProblems().size() > 0) {
453            Console.printerrln("Parse error occured...");
454            for (ParseProblem problem : module.getParseProblems()) {
455                System.err.println(problem.toString());
456            }
457            // System.exit(-1);
458        }
459
460        return module;
461    }
462
463    /**
464     * Helper method for registering the metamodels
465     *
466     * @throws Exception
467     */
468    private void registerMetaModels() throws Exception {
469        String metaModelsPath = DECENTEpsilonModelHandler.metaPath;
470        File metaModelsLocation = new File(metaModelsPath);
471        for (File file : metaModelsLocation.listFiles()) {
472            if (file.getName().endsWith(".ecore")) {
473                EmfUtil.register(URI.createFileURI(file.getAbsolutePath()),
474                                 EPackage.Registry.INSTANCE);
475            }
476        }
477    }
478
479}
Note: See TracBrowser for help on using the repository browser.