- Timestamp:
- 09/24/15 10:59:05 (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/AUDIChangeLoader.java
r38 r41 1 // Copyright 2015 Georg-August-Universität Göttingen, Germany 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 1 15 package de.ugoe.cs.cpdp.loader; 2 16 … … 15 29 /** 16 30 * TODO 31 * 17 32 * @author sherbold 18 * 33 * 19 34 */ 20 35 class AUDIChangeLoader implements SingleVersionLoader { 21 22 private class EntityRevisionPair implements Comparable<EntityRevisionPair> { 23 private final String entity; 24 private final int revision; 25 26 public EntityRevisionPair(String entity, int revision) { 27 this.entity = entity; 28 this.revision = revision; 29 } 30 31 @Override 32 public boolean equals(Object other) { 33 if( !(other instanceof EntityRevisionPair) ) { 34 return false; 35 } else { 36 return compareTo((EntityRevisionPair) other)==0; 37 } 38 } 39 40 @Override 41 public int hashCode() { 42 return entity.hashCode()+revision; 43 } 44 45 @Override 46 public int compareTo(EntityRevisionPair other) { 47 int strCmp = this.entity.compareTo(other.entity); 48 if( strCmp!=0 ) { 49 return strCmp; 50 } 51 return Integer.compare(revision, other.revision); 52 } 53 54 @Override 55 public String toString() { 56 return entity+"@"+revision; 57 } 58 } 59 60 @Override 61 public Instances load(File file) { 62 final String[] lines; 63 String[] lineSplit; 64 String[] lineSplitBug; 65 66 try { 67 lines = FileTools.getLinesFromFile(file.getAbsolutePath()); 68 } catch (IOException e) { 69 throw new RuntimeException(e); 70 } 71 72 // information about bugs are in another file 73 String path = file.getAbsolutePath(); 74 path = path.substring(0, path.length()-14) + "repro.csv"; 75 final String[] linesBug; 76 try { 77 linesBug = FileTools.getLinesFromFile(path); 78 } catch (IOException e) { 79 throw new RuntimeException(e); 80 } 81 82 int revisionIndex=-1; 83 int bugIndex=-1; 84 lineSplitBug = linesBug[0].split(";"); 85 for( int j=0; j<lineSplitBug.length ; j++ ) { 86 if( lineSplitBug[j].equals("svnrev") ) { 87 revisionIndex=j; 88 } 89 if( lineSplitBug[j].equals("num_bugs_trace") ) { 90 bugIndex=j; 91 } 92 } 93 if( revisionIndex<0 ) { 94 throw new RuntimeException("could not find SVN revisions"); 95 } 96 if( bugIndex<0 ) { 97 throw new RuntimeException("could not find bug information"); 98 } 99 100 int metricsStartIndex=-1; 101 int metricsEndIndex=-1; 102 lineSplit = lines[0].split(";"); 103 for( int j=0; j<lineSplit.length ; j++ ) { 104 if( lineSplit[j].equals("lm_LOC") ) { 105 metricsStartIndex=j; 106 } 107 if( lineSplit[j].equals("h_E") ) { 108 metricsEndIndex=j; 109 } 110 } 111 if( metricsStartIndex<0 ) { 112 throw new RuntimeException("could not find first metric, i.e., lm_LOC"); 113 } 114 if( metricsEndIndex<0 ) { 115 throw new RuntimeException("could not find last metric, i.e., h_E"); 116 } 117 int numMetrics = metricsEndIndex-metricsStartIndex+1; 118 119 // create sets of all filenames and revisions 120 SortedMap<EntityRevisionPair, Integer> entityRevisionPairs = new TreeMap<>(); 121 for( int i=1; i<linesBug.length ; i++ ) { 122 lineSplitBug = linesBug[i].split(";"); 123 entityRevisionPairs.put(new EntityRevisionPair(lineSplitBug[0], Integer.parseInt(lineSplitBug[revisionIndex])), i); 124 } 125 126 127 // prepare weka instances 128 final ArrayList<Attribute> atts = new ArrayList<Attribute>(); 129 lineSplit = lines[0].split(";"); 130 for (int j = metricsStartIndex; j<=metricsEndIndex; j++) { 131 atts.add(new Attribute(lineSplit[j]+"_delta")); 132 } 133 for (int j = metricsStartIndex; j<=metricsEndIndex; j++) { 134 atts.add(new Attribute(lineSplit[j]+"_abs")); 135 } 136 final ArrayList<String> classAttVals = new ArrayList<String>(); 137 classAttVals.add("0"); 138 classAttVals.add("1"); 139 final Attribute classAtt = new Attribute("bug", classAttVals); 140 atts.add(classAtt); 141 142 final Instances data = new Instances(file.getName(), atts, 0); 143 data.setClass(classAtt); 144 145 // create data 146 String lastFile = null; 147 double[] lastValues = null; 148 int lastNumBugs = 0; 149 for( Entry<EntityRevisionPair, Integer> entry : entityRevisionPairs.entrySet() ) { 150 try { 151 // first get values 152 lineSplit = lines[entry.getValue()].split(";"); 153 lineSplitBug = linesBug[entry.getValue()].split(";"); 154 int i=0; 155 double[] values = new double[numMetrics]; 156 for(int j=metricsStartIndex ; j<=metricsEndIndex ; j++ ) { 157 values[i] = Double.parseDouble(lineSplit[j]); 158 i++; 159 } 160 int numBugs = Integer.parseInt(lineSplitBug[bugIndex]); 161 162 // then check if an entity must be created 163 if( entry.getKey().entity.equals(lastFile)) { 164 // create new instance 165 double[] instanceValues = new double[2*numMetrics+1]; 166 for( int j=0; j<numMetrics; j++ ) { 167 instanceValues[j] = values[j]-lastValues[j]; 168 instanceValues[j+numMetrics]= values[j]; 169 } 170 // check if any value>0 171 boolean changeOccured = false; 172 for( int j=0; j<numMetrics; j++ ) { 173 if( instanceValues[j]>0 ) { 174 changeOccured = true; 175 } 176 } 177 if( changeOccured ) { 178 instanceValues[instanceValues.length-1] = numBugs<=lastNumBugs ? 0 : 1; 179 data.add(new DenseInstance(1.0, instanceValues)); 180 } 181 } 182 lastFile = entry.getKey().entity; 183 lastValues = values; 184 lastNumBugs = numBugs; 185 } catch(IllegalArgumentException e) { 186 System.err.println("error in line " + entry.getValue() + ": " + e.getMessage()); 187 System.err.println("metrics line: " + lines[entry.getValue()]); 188 System.err.println("bugs line: " + linesBug[entry.getValue()]); 189 System.err.println("line is ignored"); 190 } 191 } 192 193 return data; 194 } 195 196 /* 197 * (non-Javadoc) 198 * 199 * @see 200 * de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader#load( 201 * java.io.File) 202 */ 203 204 public Instances load(File file, String dummy) { 205 final String[] lines; 206 try { 207 lines = FileTools.getLinesFromFile(file.getAbsolutePath()); 208 } catch (IOException e) { 209 throw new RuntimeException(e); 210 } 211 212 // information about bugs are in another file 213 String path = file.getAbsolutePath(); 214 path = path.substring(0, path.length()-14) + "repro.csv"; 215 final String[] linesBug; 216 try { 217 linesBug = FileTools.getLinesFromFile(path); 218 } catch (IOException e) { 219 throw new RuntimeException(e); 220 } 221 222 // configure Instances 223 final ArrayList<Attribute> atts = new ArrayList<Attribute>(); 224 225 String[] lineSplit = lines[0].split(";"); 226 // ignore first three/four and last two columns 227 int offset; 228 if( lineSplit[3].equals("project_rev") ) { 229 offset = 4; 230 } else { 231 offset = 3; 232 } 233 for (int j = 0; j < lineSplit.length - (offset+2); j++) { 234 atts.add(new Attribute(lineSplit[j + offset])); 235 } 236 final ArrayList<String> classAttVals = new ArrayList<String>(); 237 classAttVals.add("0"); 238 classAttVals.add("1"); 239 final Attribute classAtt = new Attribute("bug", classAttVals); 240 atts.add(classAtt); 241 242 final Instances data = new Instances(file.getName(), atts, 0); 243 data.setClass(classAtt); 244 245 // fetch data 246 for (int i = 1; i < lines.length; i++) { 247 boolean validInstance = true; 248 lineSplit = lines[i].split(";"); 249 String[] lineSplitBug = linesBug[i].split(";"); 250 double[] values = new double[data.numAttributes()]; 251 for (int j = 0; validInstance && j < values.length-1; j++) { 252 if( lineSplit[j + offset].trim().isEmpty() ) { 253 validInstance = false; 254 } else { 255 values[j] = Double.parseDouble(lineSplit[j + offset].trim()); 256 } 257 } 258 if( offset==3 ) { 259 values[values.length - 1] = lineSplitBug[7].equals("0") ? 0 : 1; 260 } else { 261 values[values.length - 1] = lineSplitBug[8].equals("0") ? 0 : 1; 262 } 263 264 if( validInstance ) { 265 data.add(new DenseInstance(1.0, values)); 266 } else { 267 System.out.println("instance " + i + " is invalid"); 268 } 269 } 270 return data; 271 } 272 273 /* 274 * (non-Javadoc) 275 * 276 * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader# 277 * filenameFilter(java.lang.String) 278 */ 279 @Override 280 public boolean filenameFilter(String filename) { 281 return filename.endsWith("src.csv"); 282 } 36 37 private class EntityRevisionPair implements Comparable<EntityRevisionPair> { 38 private final String entity; 39 private final int revision; 40 41 public EntityRevisionPair(String entity, int revision) { 42 this.entity = entity; 43 this.revision = revision; 44 } 45 46 @Override 47 public boolean equals(Object other) { 48 if (!(other instanceof EntityRevisionPair)) { 49 return false; 50 } 51 else { 52 return compareTo((EntityRevisionPair) other) == 0; 53 } 54 } 55 56 @Override 57 public int hashCode() { 58 return entity.hashCode() + revision; 59 } 60 61 @Override 62 public int compareTo(EntityRevisionPair other) { 63 int strCmp = this.entity.compareTo(other.entity); 64 if (strCmp != 0) { 65 return strCmp; 66 } 67 return Integer.compare(revision, other.revision); 68 } 69 70 @Override 71 public String toString() { 72 return entity + "@" + revision; 73 } 74 } 75 76 @Override 77 public Instances load(File file) { 78 final String[] lines; 79 String[] lineSplit; 80 String[] lineSplitBug; 81 82 try { 83 lines = FileTools.getLinesFromFile(file.getAbsolutePath()); 84 } 85 catch (IOException e) { 86 throw new RuntimeException(e); 87 } 88 89 // information about bugs are in another file 90 String path = file.getAbsolutePath(); 91 path = path.substring(0, path.length() - 14) + "repro.csv"; 92 final String[] linesBug; 93 try { 94 linesBug = FileTools.getLinesFromFile(path); 95 } 96 catch (IOException e) { 97 throw new RuntimeException(e); 98 } 99 100 int revisionIndex = -1; 101 int bugIndex = -1; 102 lineSplitBug = linesBug[0].split(";"); 103 for (int j = 0; j < lineSplitBug.length; j++) { 104 if (lineSplitBug[j].equals("svnrev")) { 105 revisionIndex = j; 106 } 107 if (lineSplitBug[j].equals("num_bugs_trace")) { 108 bugIndex = j; 109 } 110 } 111 if (revisionIndex < 0) { 112 throw new RuntimeException("could not find SVN revisions"); 113 } 114 if (bugIndex < 0) { 115 throw new RuntimeException("could not find bug information"); 116 } 117 118 int metricsStartIndex = -1; 119 int metricsEndIndex = -1; 120 lineSplit = lines[0].split(";"); 121 for (int j = 0; j < lineSplit.length; j++) { 122 if (lineSplit[j].equals("lm_LOC")) { 123 metricsStartIndex = j; 124 } 125 if (lineSplit[j].equals("h_E")) { 126 metricsEndIndex = j; 127 } 128 } 129 if (metricsStartIndex < 0) { 130 throw new RuntimeException("could not find first metric, i.e., lm_LOC"); 131 } 132 if (metricsEndIndex < 0) { 133 throw new RuntimeException("could not find last metric, i.e., h_E"); 134 } 135 int numMetrics = metricsEndIndex - metricsStartIndex + 1; 136 137 // create sets of all filenames and revisions 138 SortedMap<EntityRevisionPair, Integer> entityRevisionPairs = new TreeMap<>(); 139 for (int i = 1; i < linesBug.length; i++) { 140 lineSplitBug = linesBug[i].split(";"); 141 entityRevisionPairs.put(new EntityRevisionPair(lineSplitBug[0], Integer 142 .parseInt(lineSplitBug[revisionIndex])), i); 143 } 144 145 // prepare weka instances 146 final ArrayList<Attribute> atts = new ArrayList<Attribute>(); 147 lineSplit = lines[0].split(";"); 148 for (int j = metricsStartIndex; j <= metricsEndIndex; j++) { 149 atts.add(new Attribute(lineSplit[j] + "_delta")); 150 } 151 for (int j = metricsStartIndex; j <= metricsEndIndex; j++) { 152 atts.add(new Attribute(lineSplit[j] + "_abs")); 153 } 154 final ArrayList<String> classAttVals = new ArrayList<String>(); 155 classAttVals.add("0"); 156 classAttVals.add("1"); 157 final Attribute classAtt = new Attribute("bug", classAttVals); 158 atts.add(classAtt); 159 160 final Instances data = new Instances(file.getName(), atts, 0); 161 data.setClass(classAtt); 162 163 // create data 164 String lastFile = null; 165 double[] lastValues = null; 166 int lastNumBugs = 0; 167 for (Entry<EntityRevisionPair, Integer> entry : entityRevisionPairs.entrySet()) { 168 try { 169 // first get values 170 lineSplit = lines[entry.getValue()].split(";"); 171 lineSplitBug = linesBug[entry.getValue()].split(";"); 172 int i = 0; 173 double[] values = new double[numMetrics]; 174 for (int j = metricsStartIndex; j <= metricsEndIndex; j++) { 175 values[i] = Double.parseDouble(lineSplit[j]); 176 i++; 177 } 178 int numBugs = Integer.parseInt(lineSplitBug[bugIndex]); 179 180 // then check if an entity must be created 181 if (entry.getKey().entity.equals(lastFile)) { 182 // create new instance 183 double[] instanceValues = new double[2 * numMetrics + 1]; 184 for (int j = 0; j < numMetrics; j++) { 185 instanceValues[j] = values[j] - lastValues[j]; 186 instanceValues[j + numMetrics] = values[j]; 187 } 188 // check if any value>0 189 boolean changeOccured = false; 190 for (int j = 0; j < numMetrics; j++) { 191 if (instanceValues[j] > 0) { 192 changeOccured = true; 193 } 194 } 195 if (changeOccured) { 196 instanceValues[instanceValues.length - 1] = numBugs <= lastNumBugs ? 0 : 1; 197 data.add(new DenseInstance(1.0, instanceValues)); 198 } 199 } 200 lastFile = entry.getKey().entity; 201 lastValues = values; 202 lastNumBugs = numBugs; 203 } 204 catch (IllegalArgumentException e) { 205 System.err.println("error in line " + entry.getValue() + ": " + e.getMessage()); 206 System.err.println("metrics line: " + lines[entry.getValue()]); 207 System.err.println("bugs line: " + linesBug[entry.getValue()]); 208 System.err.println("line is ignored"); 209 } 210 } 211 212 return data; 213 } 214 215 /* 216 * (non-Javadoc) 217 * 218 * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader#load( java.io.File) 219 */ 220 221 public Instances load(File file, String dummy) { 222 final String[] lines; 223 try { 224 lines = FileTools.getLinesFromFile(file.getAbsolutePath()); 225 } 226 catch (IOException e) { 227 throw new RuntimeException(e); 228 } 229 230 // information about bugs are in another file 231 String path = file.getAbsolutePath(); 232 path = path.substring(0, path.length() - 14) + "repro.csv"; 233 final String[] linesBug; 234 try { 235 linesBug = FileTools.getLinesFromFile(path); 236 } 237 catch (IOException e) { 238 throw new RuntimeException(e); 239 } 240 241 // configure Instances 242 final ArrayList<Attribute> atts = new ArrayList<Attribute>(); 243 244 String[] lineSplit = lines[0].split(";"); 245 // ignore first three/four and last two columns 246 int offset; 247 if (lineSplit[3].equals("project_rev")) { 248 offset = 4; 249 } 250 else { 251 offset = 3; 252 } 253 for (int j = 0; j < lineSplit.length - (offset + 2); j++) { 254 atts.add(new Attribute(lineSplit[j + offset])); 255 } 256 final ArrayList<String> classAttVals = new ArrayList<String>(); 257 classAttVals.add("0"); 258 classAttVals.add("1"); 259 final Attribute classAtt = new Attribute("bug", classAttVals); 260 atts.add(classAtt); 261 262 final Instances data = new Instances(file.getName(), atts, 0); 263 data.setClass(classAtt); 264 265 // fetch data 266 for (int i = 1; i < lines.length; i++) { 267 boolean validInstance = true; 268 lineSplit = lines[i].split(";"); 269 String[] lineSplitBug = linesBug[i].split(";"); 270 double[] values = new double[data.numAttributes()]; 271 for (int j = 0; validInstance && j < values.length - 1; j++) { 272 if (lineSplit[j + offset].trim().isEmpty()) { 273 validInstance = false; 274 } 275 else { 276 values[j] = Double.parseDouble(lineSplit[j + offset].trim()); 277 } 278 } 279 if (offset == 3) { 280 values[values.length - 1] = lineSplitBug[7].equals("0") ? 0 : 1; 281 } 282 else { 283 values[values.length - 1] = lineSplitBug[8].equals("0") ? 0 : 1; 284 } 285 286 if (validInstance) { 287 data.add(new DenseInstance(1.0, values)); 288 } 289 else { 290 System.out.println("instance " + i + " is invalid"); 291 } 292 } 293 return data; 294 } 295 296 /* 297 * (non-Javadoc) 298 * 299 * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader# 300 * filenameFilter(java.lang.String) 301 */ 302 @Override 303 public boolean filenameFilter(String filename) { 304 return filename.endsWith("src.csv"); 305 } 283 306 284 307 }
Note: See TracChangeset
for help on using the changeset viewer.