- Timestamp:
- 09/24/15 10:59:05 (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/NasaARFFLoader.java
r4 r41 1 // Copyright 2015 Georg-August-Universität Göttingen, Germany 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 1 15 package de.ugoe.cs.cpdp.loader; 2 16 … … 16 30 17 31 /** 18 * Loads the instances for a software version from an ARFF file of the 19 * NASA/SOFTLAB/MDP data. 32 * Loads the instances for a software version from an ARFF file of the NASA/SOFTLAB/MDP data. 20 33 * 21 34 * @author Steffen Herbold … … 23 36 public class NasaARFFLoader implements SingleVersionLoader { 24 37 25 26 * used to map attributes the same attribute with different names to each 27 * other 28 */ 29 Map<String, String> attributeNameMap; 30 31 /** 32 * used to ensure that the attribute order is the same after loading 33 */ 34 List<String> attributeOrder; 35 36 /** 37 * Constructor. Creates a new NasaARFFLoader. 38 */ 39 public NasaARFFLoader() { 40 attributeNameMap = new HashMap<>(); 41 42 // Map entries for ar project 43 attributeNameMap.put("total_loc", "LOC_TOTAL");44 attributeNameMap.put("comment_loc", "LOC_COMMENTS");45 attributeNameMap.put("code_and_comment_loc", "LOC_CODE_AND_COMMENT");46 attributeNameMap.put("executable_loc", "LOC_EXECUTABLE");47 attributeNameMap.put("unique_operands", "NUM_UNIQUE_OPERANDS");48 attributeNameMap.put("unique_operators", "NUM_UNIQUE_OPERATORS");49 attributeNameMap.put("total_operands", "NUM_OPERANDS");50 attributeNameMap.put("total_operators", "NUM_OPERATORS");51 attributeNameMap.put("halstead_length", "HALSTEAD_LENGTH");52 attributeNameMap.put("halstead_volume", "HALSTEAD_VOLUME");53 attributeNameMap.put("halstead_difficulty", "HALSTEAD_DIFFICULTY");54 attributeNameMap.put("halstead_effort", "HALSTEAD_EFFORT");55 attributeNameMap.put("halstead_error", "HALSTEAD_ERROR_EST");56 attributeNameMap.put("halstead_time", "HALSTEAD_PROG_TIME");57 attributeNameMap.put("branch_count", "BRANCH_COUNT");58 attributeNameMap.put("cyclomatic_complexity", "CYCLOMATIC_COMPLEXITY");59 attributeNameMap.put("design_complexity", "DESIGN_COMPLEXITY"); 60 61 // Map entries for KC2 62 attributeNameMap.put("loc", "LOC_TOTAL");63 attributeNameMap.put("lOCode", "LOC_EXECUTABLE");64 attributeNameMap.put("lOComment", "LOC_COMMENTS");65 attributeNameMap.put("lOCodeAndComment", "LOC_CODE_AND_COMMENT");66 attributeNameMap.put("uniq_Op", "NUM_UNIQUE_OPERATORS");67 attributeNameMap.put("uniq_Opnd", "NUM_UNIQUE_OPERANDS");68 attributeNameMap.put("total_Op", "NUM_OPERATORS");69 attributeNameMap.put("total_Opnd", "NUM_OPERANDS");70 attributeNameMap.put("v", "HALSTEAD_VOLUME");71 attributeNameMap.put("l", "HALSTEAD_LENGTH");72 attributeNameMap.put("d", "HALSTEAD_DIFFICULTY");73 attributeNameMap.put("e", "HALSTEAD_EFFORT");74 attributeNameMap.put("b", "HALSTEAD_ERROR_EST");75 attributeNameMap.put("t", "HALSTEAD_PROG_TIME");76 attributeNameMap.put("branchCount", "BRANCH_COUNT");77 attributeNameMap.put("v(g)", "CYCLOMATIC_COMPLEXITY");78 attributeNameMap.put("iv(g)", "DESIGN_COMPLEXITY"); 79 80 attributeNameMap.put("defects", "bug");81 attributeNameMap.put("Defective", "bug");82 attributeNameMap.put("problems", "bug");83 attributeNameMap.put("label", "bug"); 84 85 // build list with normalized attribute order 86 attributeOrder = new LinkedList<>(); 87 88 attributeOrder.add("LOC_TOTAL");89 attributeOrder.add("LOC_EXECUTABLE");90 attributeOrder.add("LOC_COMMENTS");91 attributeOrder.add("LOC_CODE_AND_COMMENT");92 attributeOrder.add("NUM_UNIQUE_OPERATORS");93 attributeOrder.add("NUM_UNIQUE_OPERANDS");94 attributeOrder.add("NUM_OPERATORS");95 attributeOrder.add("NUM_OPERANDS");96 attributeOrder.add("HALSTEAD_VOLUME");97 attributeOrder.add("HALSTEAD_LENGTH");98 attributeOrder.add("HALSTEAD_DIFFICULTY");99 attributeOrder.add("HALSTEAD_EFFORT");100 attributeOrder.add("HALSTEAD_ERROR_EST");101 attributeOrder.add("HALSTEAD_PROG_TIME");102 attributeOrder.add("BRANCH_COUNT");103 attributeOrder.add("CYCLOMATIC_COMPLEXITY");104 attributeOrder.add("DESIGN_COMPLEXITY");105 attributeOrder.add("bug"); 106 } 107 108 /* 109 * (non-Javadoc) 110 * 111 * @see de.ugoe.cs.cpdp.loader.SingleVersionLoader#load(java.io.File) 112 */ 113 @Override 114 public Instances load(File file) { 115 BufferedReader reader;116 Instances data; 117 try { 118 reader = new BufferedReader(new FileReader(file));119 data = new Instances(reader);120 reader.close(); 121 }catch (IOException e) {122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 } catch (Exception e) { 158 throw new RuntimeException("Error while reordering the data", e); 159 } 160 if (data.numAttributes() != attributeOrder.size()) { 161 throw new RuntimeException( 162 163 164 165 166 167 168 169 170 171 172 173 } catch (Exception e) { 174 throw new RuntimeException( 175 176 177 178 179 180 181 String firstValue = data.classAttribute().enumerateValues() 182 .nextElement().toString(); 183 if (firstValue.equals("Y") || firstValue.equals("yes") 184 || firstValue.equals("true")) { 185 classValue = 0.0; 186 } else { 187 classValue = 1.0; 188 } 189 190 for (int i = 0; i < data.numInstances(); i++) {191 if (data.instance(i).classValue() == classValue) { 192 data.instance(i).setValue(data.classIndex() + 1, 1.0); 193 }else {194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 38 /** 39 * used to map attributes the same attribute with different names to each other 40 */ 41 Map<String, String> attributeNameMap; 42 43 /** 44 * used to ensure that the attribute order is the same after loading 45 */ 46 List<String> attributeOrder; 47 48 /** 49 * Constructor. Creates a new NasaARFFLoader. 50 */ 51 public NasaARFFLoader() { 52 attributeNameMap = new HashMap<>(); 53 54 // Map entries for ar project 55 attributeNameMap.put("total_loc", "LOC_TOTAL"); 56 attributeNameMap.put("comment_loc", "LOC_COMMENTS"); 57 attributeNameMap.put("code_and_comment_loc", "LOC_CODE_AND_COMMENT"); 58 attributeNameMap.put("executable_loc", "LOC_EXECUTABLE"); 59 attributeNameMap.put("unique_operands", "NUM_UNIQUE_OPERANDS"); 60 attributeNameMap.put("unique_operators", "NUM_UNIQUE_OPERATORS"); 61 attributeNameMap.put("total_operands", "NUM_OPERANDS"); 62 attributeNameMap.put("total_operators", "NUM_OPERATORS"); 63 attributeNameMap.put("halstead_length", "HALSTEAD_LENGTH"); 64 attributeNameMap.put("halstead_volume", "HALSTEAD_VOLUME"); 65 attributeNameMap.put("halstead_difficulty", "HALSTEAD_DIFFICULTY"); 66 attributeNameMap.put("halstead_effort", "HALSTEAD_EFFORT"); 67 attributeNameMap.put("halstead_error", "HALSTEAD_ERROR_EST"); 68 attributeNameMap.put("halstead_time", "HALSTEAD_PROG_TIME"); 69 attributeNameMap.put("branch_count", "BRANCH_COUNT"); 70 attributeNameMap.put("cyclomatic_complexity", "CYCLOMATIC_COMPLEXITY"); 71 attributeNameMap.put("design_complexity", "DESIGN_COMPLEXITY"); 72 73 // Map entries for KC2 74 attributeNameMap.put("loc", "LOC_TOTAL"); 75 attributeNameMap.put("lOCode", "LOC_EXECUTABLE"); 76 attributeNameMap.put("lOComment", "LOC_COMMENTS"); 77 attributeNameMap.put("lOCodeAndComment", "LOC_CODE_AND_COMMENT"); 78 attributeNameMap.put("uniq_Op", "NUM_UNIQUE_OPERATORS"); 79 attributeNameMap.put("uniq_Opnd", "NUM_UNIQUE_OPERANDS"); 80 attributeNameMap.put("total_Op", "NUM_OPERATORS"); 81 attributeNameMap.put("total_Opnd", "NUM_OPERANDS"); 82 attributeNameMap.put("v", "HALSTEAD_VOLUME"); 83 attributeNameMap.put("l", "HALSTEAD_LENGTH"); 84 attributeNameMap.put("d", "HALSTEAD_DIFFICULTY"); 85 attributeNameMap.put("e", "HALSTEAD_EFFORT"); 86 attributeNameMap.put("b", "HALSTEAD_ERROR_EST"); 87 attributeNameMap.put("t", "HALSTEAD_PROG_TIME"); 88 attributeNameMap.put("branchCount", "BRANCH_COUNT"); 89 attributeNameMap.put("v(g)", "CYCLOMATIC_COMPLEXITY"); 90 attributeNameMap.put("iv(g)", "DESIGN_COMPLEXITY"); 91 92 attributeNameMap.put("defects", "bug"); 93 attributeNameMap.put("Defective", "bug"); 94 attributeNameMap.put("problems", "bug"); 95 attributeNameMap.put("label", "bug"); 96 97 // build list with normalized attribute order 98 attributeOrder = new LinkedList<>(); 99 100 attributeOrder.add("LOC_TOTAL"); 101 attributeOrder.add("LOC_EXECUTABLE"); 102 attributeOrder.add("LOC_COMMENTS"); 103 attributeOrder.add("LOC_CODE_AND_COMMENT"); 104 attributeOrder.add("NUM_UNIQUE_OPERATORS"); 105 attributeOrder.add("NUM_UNIQUE_OPERANDS"); 106 attributeOrder.add("NUM_OPERATORS"); 107 attributeOrder.add("NUM_OPERANDS"); 108 attributeOrder.add("HALSTEAD_VOLUME"); 109 attributeOrder.add("HALSTEAD_LENGTH"); 110 attributeOrder.add("HALSTEAD_DIFFICULTY"); 111 attributeOrder.add("HALSTEAD_EFFORT"); 112 attributeOrder.add("HALSTEAD_ERROR_EST"); 113 attributeOrder.add("HALSTEAD_PROG_TIME"); 114 attributeOrder.add("BRANCH_COUNT"); 115 attributeOrder.add("CYCLOMATIC_COMPLEXITY"); 116 attributeOrder.add("DESIGN_COMPLEXITY"); 117 attributeOrder.add("bug"); 118 } 119 120 /* 121 * (non-Javadoc) 122 * 123 * @see de.ugoe.cs.cpdp.loader.SingleVersionLoader#load(java.io.File) 124 */ 125 @Override 126 public Instances load(File file) { 127 BufferedReader reader; 128 Instances data; 129 try { 130 reader = new BufferedReader(new FileReader(file)); 131 data = new Instances(reader); 132 reader.close(); 133 } 134 catch (IOException e) { 135 throw new RuntimeException("Error reading data", e); 136 } 137 138 // setting class attribute 139 data.setClassIndex(data.numAttributes() - 1); 140 141 // normalize attribute names 142 for (int i = 0; i < data.numAttributes(); i++) { 143 String mapValue = attributeNameMap.get(data.attribute(i).name()); 144 if (mapValue != null) { 145 data.renameAttribute(i, mapValue); 146 } 147 } 148 149 // determine new attribute order (unwanted attributes are implicitly 150 // removed 151 String orderString = ""; 152 for (String attName : attributeOrder) { 153 for (int i = 0; i < data.numAttributes(); i++) { 154 if (attName.equals(data.attribute(i).name())) { 155 orderString += (i + 1) + ","; 156 } 157 } 158 } 159 orderString = orderString.substring(0, orderString.length() - 1); 160 161 String relationName = data.relationName(); 162 String[] options = new String[2]; 163 options[0] = "-R"; 164 options[1] = orderString; 165 Reorder reorder = new Reorder(); 166 try { 167 reorder.setOptions(options); 168 reorder.setInputFormat(data); 169 data = Filter.useFilter(data, reorder); 170 } 171 catch (Exception e) { 172 throw new RuntimeException("Error while reordering the data", e); 173 } 174 if (data.numAttributes() != attributeOrder.size()) { 175 throw new RuntimeException("Invalid number of attributes; filename: " + file.getName()); 176 } 177 178 // normalize bug nominal values 179 Add add = new Add(); 180 add.setAttributeIndex("last"); 181 add.setNominalLabels("0,1"); 182 add.setAttributeName("bug-new"); 183 try { 184 add.setInputFormat(data); 185 data = Filter.useFilter(data, add); 186 } 187 catch (Exception e) { 188 throw new RuntimeException("Error while normalizing the bug nonminal values", e); 189 } 190 data.setRelationName(relationName); 191 192 double classValue; 193 194 String firstValue = data.classAttribute().enumerateValues().nextElement().toString(); 195 if (firstValue.equals("Y") || firstValue.equals("yes") || firstValue.equals("true")) { 196 classValue = 0.0; 197 } 198 else { 199 classValue = 1.0; 200 } 201 202 for (int i = 0; i < data.numInstances(); i++) { 203 if (data.instance(i).classValue() == classValue) { 204 data.instance(i).setValue(data.classIndex() + 1, 1.0); 205 } 206 else { 207 data.instance(i).setValue(data.classIndex() + 1, 0.0); 208 } 209 } 210 211 int oldClassIndex = data.classIndex(); 212 data.setClassIndex(oldClassIndex + 1); 213 data.deleteAttributeAt(oldClassIndex); 214 215 return data; 216 } 217 218 /* 219 * (non-Javadoc) 220 * 221 * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader# 222 * filenameFilter(java.lang.String) 223 */ 224 @Override 225 public boolean filenameFilter(String filename) { 226 return filename.endsWith(".arff"); 227 } 215 228 216 229 }
Note: See TracChangeset
for help on using the changeset viewer.