- Timestamp:
- 08/05/14 10:00:41 (10 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/NasaARFFLoader.java
r3 r4 10 10 import java.util.Map; 11 11 12 import de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader;13 12 import weka.core.Instances; 14 13 import weka.filters.Filter; … … 16 15 import weka.filters.unsupervised.attribute.Reorder; 17 16 17 /** 18 * Loads the instances for a software version from an ARFF file of the 19 * NASA/SOFTLAB/MDP data. 20 * 21 * @author Steffen Herbold 22 */ 18 23 public class NasaARFFLoader implements SingleVersionLoader { 19 24 25 /** 26 * used to map attributes the same attribute with different names to each 27 * other 28 */ 20 29 Map<String, String> attributeNameMap; 30 31 /** 32 * used to ensure that the attribute order is the same after loading 33 */ 21 34 List<String> attributeOrder; 22 35 36 /** 37 * Constructor. Creates a new NasaARFFLoader. 38 */ 23 39 public NasaARFFLoader() { 24 40 attributeNameMap = new HashMap<>(); 25 41 26 42 // Map entries for ar project 27 43 attributeNameMap.put("total_loc", "LOC_TOTAL"); … … 32 48 attributeNameMap.put("unique_operators", "NUM_UNIQUE_OPERATORS"); 33 49 attributeNameMap.put("total_operands", "NUM_OPERANDS"); 34 attributeNameMap.put("total_operators", 35 attributeNameMap.put("halstead_length", 50 attributeNameMap.put("total_operators", "NUM_OPERATORS"); 51 attributeNameMap.put("halstead_length", "HALSTEAD_LENGTH"); 36 52 attributeNameMap.put("halstead_volume", "HALSTEAD_VOLUME"); 37 53 attributeNameMap.put("halstead_difficulty", "HALSTEAD_DIFFICULTY"); … … 41 57 attributeNameMap.put("branch_count", "BRANCH_COUNT"); 42 58 attributeNameMap.put("cyclomatic_complexity", "CYCLOMATIC_COMPLEXITY"); 43 attributeNameMap.put("design_complexity", 44 59 attributeNameMap.put("design_complexity", "DESIGN_COMPLEXITY"); 60 45 61 // Map entries for KC2 46 attributeNameMap.put("loc", "LOC_TOTAL"); // TODO these first two LOCs are guesses47 attributeNameMap.put("lOCode", "LOC_EXECUTABLE"); // TODO62 attributeNameMap.put("loc", "LOC_TOTAL"); 63 attributeNameMap.put("lOCode", "LOC_EXECUTABLE"); 48 64 attributeNameMap.put("lOComment", "LOC_COMMENTS"); 49 65 attributeNameMap.put("lOCodeAndComment", "LOC_CODE_AND_COMMENT"); … … 56 72 attributeNameMap.put("d", "HALSTEAD_DIFFICULTY"); 57 73 attributeNameMap.put("e", "HALSTEAD_EFFORT"); 58 attributeNameMap.put("b", "HALSTEAD_ERROR_EST"); // TODO not sure about this one74 attributeNameMap.put("b", "HALSTEAD_ERROR_EST"); 59 75 attributeNameMap.put("t", "HALSTEAD_PROG_TIME"); 60 76 attributeNameMap.put("branchCount", "BRANCH_COUNT"); 61 attributeNameMap.put("v(g)", 77 attributeNameMap.put("v(g)", "CYCLOMATIC_COMPLEXITY"); 62 78 attributeNameMap.put("iv(g)", "DESIGN_COMPLEXITY"); 63 64 attributeNameMap.put("defects", 79 80 attributeNameMap.put("defects", "bug"); 65 81 attributeNameMap.put("Defective", "bug"); 66 82 attributeNameMap.put("problems", "bug"); 67 83 attributeNameMap.put("label", "bug"); 68 84 69 85 // build list with normalized attribute order 70 86 attributeOrder = new LinkedList<>(); 71 87 72 88 attributeOrder.add("LOC_TOTAL"); 73 89 attributeOrder.add("LOC_EXECUTABLE"); … … 89 105 attributeOrder.add("bug"); 90 106 } 91 92 /** 93 * Loads the instances. 94 * @param file handle to the file of the instances 95 * @return the instances 96 */ 107 108 /* 109 * (non-Javadoc) 110 * 111 * @see de.ugoe.cs.cpdp.loader.SingleVersionLoader#load(java.io.File) 112 */ 113 @Override 97 114 public Instances load(File file) { 98 115 BufferedReader reader; … … 103 120 reader.close(); 104 121 } catch (IOException e) { 105 // TODO Auto-generated catch block 106 throw new RuntimeException(e); 107 } 108 109 //setting class attribute 122 throw new RuntimeException("Error reading data", e); 123 } 124 125 // setting class attribute 110 126 data.setClassIndex(data.numAttributes() - 1); 111 127 112 128 // normalize attribute names 113 for ( int i=0; i<data.numAttributes(); i++) {129 for (int i = 0; i < data.numAttributes(); i++) { 114 130 String mapValue = attributeNameMap.get(data.attribute(i).name()); 115 if ( mapValue!= null) {131 if (mapValue != null) { 116 132 data.renameAttribute(i, mapValue); 117 133 } 118 134 } 119 120 // determine new attribute order (unwanted attributes are implicitly removed 135 136 // determine new attribute order (unwanted attributes are implicitly 137 // removed 121 138 String orderString = ""; 122 for ( String attName : attributeOrder) {123 for ( int i=0; i<data.numAttributes(); i++) {124 if (attName.equals(data.attribute(i).name())) {125 orderString += (i +1) + ",";139 for (String attName : attributeOrder) { 140 for (int i = 0; i < data.numAttributes(); i++) { 141 if (attName.equals(data.attribute(i).name())) { 142 orderString += (i + 1) + ","; 126 143 } 127 144 } 128 145 } 129 orderString = orderString.substring(0, orderString.length() -1);130 146 orderString = orderString.substring(0, orderString.length() - 1); 147 131 148 String relationName = data.relationName(); 132 149 String[] options = new String[2]; … … 139 156 data = Filter.useFilter(data, reorder); 140 157 } catch (Exception e) { 141 // TODO Auto-generated catch block142 throw new RuntimeException();143 }144 if( data.numAttributes()!=attributeOrder.size() ) {145 throw new RuntimeException("Invalid number of attributes; filename: " + file.getName());146 } 147 158 throw new RuntimeException("Error while reordering the data", e); 159 } 160 if (data.numAttributes() != attributeOrder.size()) { 161 throw new RuntimeException( 162 "Invalid number of attributes; filename: " + file.getName()); 163 } 164 148 165 // normalize bug nominal values 149 166 Add add = new Add(); 150 167 add.setAttributeIndex("last"); 151 152 153 168 add.setNominalLabels("0,1"); 169 add.setAttributeName("bug-new"); 170 try { 154 171 add.setInputFormat(data); 155 172 data = Filter.useFilter(data, add); 156 173 } catch (Exception e) { 157 // TODO Auto-generated catch block 158 e.printStackTrace(); 159 } 160 data.setRelationName(relationName); 161 162 double classValue; 163 164 String firstValue = data.classAttribute().enumerateValues().nextElement().toString(); 165 if( firstValue.equals("Y") || firstValue.equals("yes") || firstValue.equals("true") ) { 166 classValue = 0.0; 167 } else { 168 classValue = 1.0; 169 } 170 171 for( int i=0 ; i<data.numInstances() ; i++ ) { 172 if( data.instance(i).classValue() == classValue ) { 173 data.instance(i).setValue(data.classIndex()+1, 1.0); 174 throw new RuntimeException( 175 "Error while normalizing the bug nonminal values", e); 176 } 177 data.setRelationName(relationName); 178 179 double classValue; 180 181 String firstValue = data.classAttribute().enumerateValues() 182 .nextElement().toString(); 183 if (firstValue.equals("Y") || firstValue.equals("yes") 184 || firstValue.equals("true")) { 185 classValue = 0.0; 186 } else { 187 classValue = 1.0; 188 } 189 190 for (int i = 0; i < data.numInstances(); i++) { 191 if (data.instance(i).classValue() == classValue) { 192 data.instance(i).setValue(data.classIndex() + 1, 1.0); 174 193 } else { 175 data.instance(i).setValue(data.classIndex() +1, 0.0);194 data.instance(i).setValue(data.classIndex() + 1, 0.0); 176 195 } 177 196 } 178 197 179 198 int oldClassIndex = data.classIndex(); 180 data.setClassIndex(oldClassIndex +1);199 data.setClassIndex(oldClassIndex + 1); 181 200 data.deleteAttributeAt(oldClassIndex); 182 201 183 202 return data; 184 203 } 185 204 205 /* 206 * (non-Javadoc) 207 * 208 * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader# 209 * filenameFilter(java.lang.String) 210 */ 186 211 @Override 187 212 public boolean filenameFilter(String filename) { 188 213 return filename.endsWith(".arff"); 189 214 } 190 215 191 216 }
Note: See TracChangeset
for help on using the changeset viewer.