package de.ugoe.cs.cpdp.loader; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.net.URISyntaxException; import java.net.URL; import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedHashSet; import java.util.LinkedList; import java.util.List; import java.util.Set; import org.eclipse.emf.common.util.URI; import org.eclipse.emf.ecore.EObject; import org.eclipse.emf.ecore.EPackage; import org.eclipse.emf.ecore.resource.Resource; import org.eclipse.epsilon.common.parse.problem.ParseProblem; import org.eclipse.epsilon.emc.emf.EmfUtil; import org.eclipse.epsilon.eol.EolModule; import org.eclipse.epsilon.eol.IEolExecutableModule; import org.eclipse.epsilon.eol.models.IModel; import org.eclipse.epsilon.etl.EtlModule; import ARFFx.Instance; import ARFFx.Model; import ARFFx.Value; import de.ugoe.cs.cpdp.decentApp.ARFFxResourceTool; import de.ugoe.cs.cpdp.decentApp.DECENTEpsilonModelHandler; import de.ugoe.cs.util.console.Console; import weka.core.Attribute; import weka.core.DenseInstance; import weka.core.Instances; import weka.core.converters.ArffSaver; /** * Class for loading a decent model file. * Loads a decent model file and (if no arff file is present) and does the * following conversions: * DECENT -> ARFFX -> ARFF * * @author Fabian Trautsch * */ public class DecentDataLoader implements SingleVersionLoader{ // Model Handler for Decent Models private DECENTEpsilonModelHandler modelHandler = new DECENTEpsilonModelHandler(); // Set log level String logLevel = "1"; String logToFile = "false"; // This list contains attributes, that should be removed before building the arff file private static List attributeFilter = new LinkedList(); // This list contains all names of the different artifacts private static Set artifactNames = new LinkedHashSet(); // Name of the class attribute. private static final String classAttributeName = "LABEL.Artifact.Target.BugFix.AverageWeight"; private int getIndexOfArtifactName(String artifactName) { int index = -1; if(artifactNames.contains(artifactName)) { int i=0; for(String nameInSet: artifactNames) { if(nameInSet.equals(artifactName)) { index = i; } else { i++; } } } return index; } /** * Defines attributes, that should be removed before building the * ARFF File from. */ private void setAttributeFilter() { attributeFilter.add("Agent.Name"); } /** * Saves the dataset as arff after transformation (decent->arffx) and * filtering * * @param dataSet the WEKA dataset to save * @param arffLocation location where it should be saved to */ public void save(Instances dataSet, String arffLocation) { ArffSaver saver = new ArffSaver(); saver.setInstances(dataSet); try { saver.setFile(new File(arffLocation)); saver.writeBatch(); } catch (IOException e) { Console.printerrln("Cannot save the file to path: "+arffLocation); e.printStackTrace(); } } /** * Loads the given decent file and tranform it from decent->arffx->arff * @return Instances in WEKA format */ @Override public Instances load(File file) { // Set attributeFilter setAttributeFilter(); // Register MetaModels try { registerMetaModels(); } catch (Exception e1) { Console.printerrln("Metamodels cannot be registered!"); e1.printStackTrace(); } // Set location of decent and arffx Model String decentModelLocation = file.getAbsolutePath(); String pathToDecentModelFolder = decentModelLocation.substring(0,decentModelLocation.lastIndexOf(File.separator)); String arffxModelLocation = pathToDecentModelFolder+"/model.arffx"; String logModelLocation = pathToDecentModelFolder+"/model.log"; String arffLocation = pathToDecentModelFolder+"/model.arff"; // If arff File exists, load from it! if(new File(arffLocation).exists()) { System.out.println("Loading arff File..."); BufferedReader reader; Instances data = null; try { reader = new BufferedReader(new FileReader(arffLocation)); data = new Instances(reader); reader.close(); } catch (FileNotFoundException e) { Console.printerrln("File with path: "+arffLocation+" was not found."); e.printStackTrace(); } catch (IOException e) { Console.printerrln("File with path: "+arffLocation+" cannot be read."); e.printStackTrace(); } // Set class attribute if not set if(data.classIndex() == -1) { Attribute classAttribute = data.attribute(classAttributeName); data.setClass(classAttribute); } return data; } // Location of EOL Scripts String preprocess = "./decent/epsilon/query/preprocess.eol"; String arffxToArffSource = "./decent/epsilon/query/addLabels.eol"; // Set Log Properties System.setProperty("epsilon.logLevel", logLevel); System.setProperty("epsilon.logToFile", logToFile); System.setProperty("epsilon.logFileAvailable", "false"); // Set decent2arffx Properties System.setProperty("epsilon.transformation.decent2arffx.skipSource", "false"); System.setProperty("epsilon.transformation.decent2arffx.type", "code"); // Preprocess Data, transform from decent2arffx try { IEolExecutableModule preProcessModule = loadModule(preprocess); IModel preProcessDecentModel = modelHandler.getDECENTModel(decentModelLocation, true, true); IModel preProcessArffxarffxModel = modelHandler.getARFFxModel(arffxModelLocation, false, true); preProcessModule.getContext().getModelRepository().addModel(preProcessDecentModel); preProcessModule.getContext().getModelRepository().addModel(preProcessArffxarffxModel); execute(preProcessModule, logModelLocation); preProcessDecentModel.dispose(); preProcessArffxarffxModel.dispose(); preProcessModule.reset(); } catch (URISyntaxException e) { Console.printerrln("URI Syntax for decent or arffx model is wrong."); e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } // Transform to arff, for label and confidence attributes try { IEolExecutableModule arffxToArffModule = loadModule(arffxToArffSource); IModel arffxToArffArffxModel = modelHandler.getARFFxModel(arffxModelLocation, true, true); arffxToArffModule.getContext().getModelRepository().addModel(arffxToArffArffxModel); execute(arffxToArffModule, logModelLocation); arffxToArffArffxModel.dispose(); // can be stored and retained alternatively arffxToArffModule.reset(); } catch (URISyntaxException e) { Console.printerrln("URI Syntax for arffx model is wrong."); e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } // Unregister MetaModels, otherwise cast will fail HashMap metaModelCache = new HashMap<>(); for (String key : EPackage.Registry.INSTANCE.keySet()) { metaModelCache.put(key, EPackage.Registry.INSTANCE.get(key)); }; for (String key : metaModelCache .keySet()) { EPackage.Registry.INSTANCE.remove(key); }; // Workaround to gernerate a usable URI. Absolute path is not // possible, therefore we need to construct a relative path URL location = DecentDataLoader.class.getProtectionDomain().getCodeSource().getLocation(); String basePath = location.getFile(); // Location is the bin folder, so we need to delete the last 4 characters basePath = basePath.substring(0, basePath.length() - 4); String relativePath = new File(basePath).toURI().relativize(new File(arffxModelLocation).toURI()).getPath(); // Loard arffx file and create WEKA Instances ARFFxResourceTool tool = new ARFFxResourceTool(); Resource resource = tool.loadResourceFromXMI(relativePath, "arffx"); Instances dataSet = null; for(EObject o: resource.getContents()) { Model m = (Model) o; dataSet = createWekaDataFormat(m); for(Instance i : m.getData()) { createWekaInstance(dataSet, i); } } // Set class attribute Attribute classAttribute = dataSet.attribute(classAttributeName); dataSet.setClass(classAttribute); // Save as ARFF save(dataSet, arffLocation); return dataSet; } /** * Creates a WekaInstance from an ARFFX Model Instance * * @param dataSet WekaInstance dataset, where the arffx model instances should be * added to * @param i arffx model instance */ private void createWekaInstance(Instances dataSet, Instance i) { double[] values = new double[dataSet.numAttributes()]; int j=0; for(Value value : i.getValues()) { String dataValue = value.getContent(); String attributeName = value.getOfAttribute().getName(); if(attributeFilter.contains(attributeName)) { continue; } // Is value a LABEL.* attribute? if(isLabel(attributeName)) { values[j] = dataSet.attribute(j).indexOfValue(dataValue); } else if (isConfidenceLabel(attributeName)){ // Is value a CONFIDENCE.* attribute? values[j] = dataSet.attribute(j).indexOfValue(dataValue); } else if(attributeName.equals("Artifact.Name")){ // Is it the name of the artifact? artifactNames.add(dataValue); values[j] = getIndexOfArtifactName(dataValue); } else { // Is it a numeric value? values[j] = Double.parseDouble(dataValue); } j++; } DenseInstance inst = new DenseInstance(1.0, values); dataSet.add(inst); } /** * Creates a Weka Instances set out of a arffx model * @param m arffx model * @return */ private Instances createWekaDataFormat(Model m) { // Bad solution, can be enhanced (continue in for loop) ArrayList datasetAttributes = new ArrayList(); for(ARFFx.Attribute attribute :m.getAttributes()) { String attributeName = attribute.getName(); if(attributeFilter.contains(attributeName)) { continue; } Attribute wekaAttr; // Is attribute a LABEL.* attribute? if(isLabel(attributeName)) { // Classattribute final ArrayList classAttVals = new ArrayList(); classAttVals.add("false"); classAttVals.add("true"); wekaAttr = new Attribute(attributeName, classAttVals); } else if(isConfidenceLabel(attributeName)){ // Is attribute a CONFIDENCE.* attribute? ArrayList labels = new ArrayList(); labels.add("high"); labels.add("low"); wekaAttr = new Attribute(attributeName, labels); } else { // Is it a numeric attribute? wekaAttr = new Attribute(attributeName); } datasetAttributes.add(wekaAttr); } return new Instances("test-dataset", datasetAttributes, 0); } /** * Helper methods which indicates if the given value starts with "LABEL" * * @param value to test * @return */ private boolean isLabel(String value) { if(value.length()>= 5 && value.substring(0, 5).equals("LABEL")) { return true; } return false; } /** * Helper method which indicates if the given value starts with "CONFIDENCE" * @param value to test * @return */ private boolean isConfidenceLabel(String value) { if(value.length()>= 10 && value.substring(0, 10).equals("CONFIDENCE")) { return true; } return false; } /** * Returns if a filename ends with ".decent" * @return */ @Override public boolean filenameFilter(String filename) { return filename.endsWith(".decent"); } /** * Helper method for executing a eol scripts and adding the log model beforehand * @param module module to execute * @param logModelLocation location of the log model * @throws Exception */ private void execute(IEolExecutableModule module, String logModelLocation) throws Exception { IModel logModel = modelHandler.getLOGModel(logModelLocation, true, true); module.getContext().getModelRepository().addModel(logModel); module.execute(); logModel.dispose(); } /** * Loads the module from a given source * * @param source where the module is (e.g. eol script) * @return * @throws Exception * @throws URISyntaxException */ private IEolExecutableModule loadModule(String source) throws Exception, URISyntaxException { IEolExecutableModule module = null; if (source.endsWith("etl")) { module = new EtlModule(); } else if (source.endsWith("eol")) { module = new EolModule(); } else { } module.parse(modelHandler.getFile(source)); if (module.getParseProblems().size() > 0) { Console.printerrln("Parse error occured..."); for (ParseProblem problem : module.getParseProblems()) { System.err.println(problem.toString()); } // System.exit(-1); } return module; } /** * Helper method for registering the metamodels * @throws Exception */ private void registerMetaModels() throws Exception { String metaModelsPath = DECENTEpsilonModelHandler.metaPath; File metaModelsLocation = new File(metaModelsPath); for (File file : metaModelsLocation.listFiles()) { if (file.getName().endsWith(".ecore")) { EmfUtil.register(URI.createFileURI(file.getAbsolutePath()), EPackage.Registry.INSTANCE); } } } }