package de.ugoe.cs.cpdp.loader; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader; import weka.core.Instances; import weka.filters.Filter; import weka.filters.unsupervised.attribute.Add; import weka.filters.unsupervised.attribute.Reorder; public class NasaARFFLoader implements SingleVersionLoader { Map attributeNameMap; List attributeOrder; public NasaARFFLoader() { attributeNameMap = new HashMap<>(); // Map entries for ar project attributeNameMap.put("total_loc", "LOC_TOTAL"); attributeNameMap.put("comment_loc", "LOC_COMMENTS"); attributeNameMap.put("code_and_comment_loc", "LOC_CODE_AND_COMMENT"); attributeNameMap.put("executable_loc", "LOC_EXECUTABLE"); attributeNameMap.put("unique_operands", "NUM_UNIQUE_OPERANDS"); attributeNameMap.put("unique_operators", "NUM_UNIQUE_OPERATORS"); attributeNameMap.put("total_operands", "NUM_OPERANDS"); attributeNameMap.put("total_operators", "NUM_OPERATORS"); attributeNameMap.put("halstead_length", "HALSTEAD_LENGTH"); attributeNameMap.put("halstead_volume", "HALSTEAD_VOLUME"); attributeNameMap.put("halstead_difficulty", "HALSTEAD_DIFFICULTY"); attributeNameMap.put("halstead_effort", "HALSTEAD_EFFORT"); attributeNameMap.put("halstead_error", "HALSTEAD_ERROR_EST"); attributeNameMap.put("halstead_time", "HALSTEAD_PROG_TIME"); attributeNameMap.put("branch_count", "BRANCH_COUNT"); attributeNameMap.put("cyclomatic_complexity", "CYCLOMATIC_COMPLEXITY"); attributeNameMap.put("design_complexity", "DESIGN_COMPLEXITY"); // Map entries for KC2 attributeNameMap.put("loc", "LOC_TOTAL"); // TODO these first two LOCs are guesses attributeNameMap.put("lOCode", "LOC_EXECUTABLE"); // TODO attributeNameMap.put("lOComment", "LOC_COMMENTS"); attributeNameMap.put("lOCodeAndComment", "LOC_CODE_AND_COMMENT"); attributeNameMap.put("uniq_Op", "NUM_UNIQUE_OPERATORS"); attributeNameMap.put("uniq_Opnd", "NUM_UNIQUE_OPERANDS"); attributeNameMap.put("total_Op", "NUM_OPERATORS"); attributeNameMap.put("total_Opnd", "NUM_OPERANDS"); attributeNameMap.put("v", "HALSTEAD_VOLUME"); attributeNameMap.put("l", "HALSTEAD_LENGTH"); attributeNameMap.put("d", "HALSTEAD_DIFFICULTY"); attributeNameMap.put("e", "HALSTEAD_EFFORT"); attributeNameMap.put("b", "HALSTEAD_ERROR_EST"); // TODO not sure about this one attributeNameMap.put("t", "HALSTEAD_PROG_TIME"); attributeNameMap.put("branchCount", "BRANCH_COUNT"); attributeNameMap.put("v(g)", "CYCLOMATIC_COMPLEXITY"); attributeNameMap.put("iv(g)", "DESIGN_COMPLEXITY"); attributeNameMap.put("defects", "bug"); attributeNameMap.put("Defective", "bug"); attributeNameMap.put("problems", "bug"); // build list with normalized attribute order attributeOrder = new LinkedList<>(); attributeOrder.add("LOC_TOTAL"); attributeOrder.add("LOC_EXECUTABLE"); attributeOrder.add("LOC_COMMENTS"); attributeOrder.add("LOC_CODE_AND_COMMENT"); attributeOrder.add("NUM_UNIQUE_OPERATORS"); attributeOrder.add("NUM_UNIQUE_OPERANDS"); attributeOrder.add("NUM_OPERATORS"); attributeOrder.add("NUM_OPERANDS"); attributeOrder.add("HALSTEAD_VOLUME"); attributeOrder.add("HALSTEAD_LENGTH"); attributeOrder.add("HALSTEAD_DIFFICULTY"); attributeOrder.add("HALSTEAD_EFFORT"); attributeOrder.add("HALSTEAD_ERROR_EST"); attributeOrder.add("HALSTEAD_PROG_TIME"); attributeOrder.add("BRANCH_COUNT"); attributeOrder.add("CYCLOMATIC_COMPLEXITY"); attributeOrder.add("DESIGN_COMPLEXITY"); attributeOrder.add("bug"); } /** * Loads the instances. * @param file handle to the file of the instances * @return the instances */ public Instances load(File file) { BufferedReader reader; Instances data; try { reader = new BufferedReader(new FileReader(file)); data = new Instances(reader); reader.close(); } catch (IOException e) { // TODO Auto-generated catch block throw new RuntimeException(e); } //setting class attribute data.setClassIndex(data.numAttributes() - 1); // normalize attribute names for( int i=0; i