Changeset 41 for trunk/CrossPare/src/de/ugoe/cs/cpdp/loader
- Timestamp:
- 09/24/15 10:59:05 (10 years ago)
- Location:
- trunk/CrossPare/src/de/ugoe/cs/cpdp/loader
- Files:
-
- 18 edited
-
ARFFFolderLoader.java (modified) (2 diffs)
-
ARFFLoader.java (modified) (2 diffs)
-
AUDIChangeFolderLoader.java (modified) (1 diff)
-
AUDIChangeLoader.java (modified) (2 diffs)
-
AUDIDataLoader.java (modified) (2 diffs)
-
AUDIFolderLoader.java (modified) (1 diff)
-
AbstractFolderLoader.java (modified) (3 diffs)
-
CSVDataLoader.java (modified) (3 diffs)
-
CSVFolderLoader.java (modified) (2 diffs)
-
CSVMockusDataLoader.java (modified) (2 diffs)
-
CSVMockusFolderLoader.java (modified) (1 diff)
-
DecentDataLoader.java (modified) (2 diffs)
-
DecentFolderLoader.java (modified) (2 diffs)
-
IDecentVersionLoader.java (modified) (2 diffs)
-
IVersionLoader.java (modified) (2 diffs)
-
NasaARFFFolderLoader.java (modified) (2 diffs)
-
NasaARFFLoader.java (modified) (3 diffs)
-
SingleVersionLoader.java (modified) (3 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/ARFFFolderLoader.java
r4 r41 1 // Copyright 2015 Georg-August-Universität Göttingen, Germany 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 1 15 package de.ugoe.cs.cpdp.loader; 2 16 … … 9 23 public class ARFFFolderLoader extends AbstractFolderLoader { 10 24 11 /*12 * (non-Javadoc)13 *14 * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader#getSingleLoader()15 */16 @Override17 protected SingleVersionLoader getSingleLoader() {18 return new ARFFLoader();19 }25 /* 26 * (non-Javadoc) 27 * 28 * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader#getSingleLoader() 29 */ 30 @Override 31 protected SingleVersionLoader getSingleLoader() { 32 return new ARFFLoader(); 33 } 20 34 } -
trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/ARFFLoader.java
r6 r41 1 // Copyright 2015 Georg-August-Universität Göttingen, Germany 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 1 15 package de.ugoe.cs.cpdp.loader; 2 16 … … 15 29 public class ARFFLoader implements SingleVersionLoader { 16 30 17 /* 18 * (non-Javadoc) 19 * 20 * @see de.ugoe.cs.cpdp.loader.SingleVersionLoader#load(java.io.File) 21 */ 22 @Override 23 public Instances load(File file) { 24 BufferedReader reader; 25 Instances data; 26 try { 27 reader = new BufferedReader(new FileReader(file)); 28 data = new Instances(reader); 29 reader.close(); 30 } catch (IOException e) { 31 throw new RuntimeException("error reading file: " + file.getName(), e); 32 } 31 /* 32 * (non-Javadoc) 33 * 34 * @see de.ugoe.cs.cpdp.loader.SingleVersionLoader#load(java.io.File) 35 */ 36 @Override 37 public Instances load(File file) { 38 BufferedReader reader; 39 Instances data; 40 try { 41 reader = new BufferedReader(new FileReader(file)); 42 data = new Instances(reader); 43 reader.close(); 44 } 45 catch (IOException e) { 46 throw new RuntimeException("error reading file: " + file.getName(), e); 47 } 33 48 34 // setting class attribute35 data.setClassIndex(data.numAttributes() - 1);49 // setting class attribute 50 data.setClassIndex(data.numAttributes() - 1); 36 51 37 return data;38 }52 return data; 53 } 39 54 40 /* 41 * (non-Javadoc) 42 * 43 * @see 44 * de.ugoe.cs.cpdp.loader.SingleVersionLoader#filenameFilter(java.lang.String 45 * ) 46 */ 47 @Override 48 public boolean filenameFilter(String filename) { 49 return filename.endsWith(".arff"); 50 } 55 /* 56 * (non-Javadoc) 57 * 58 * @see de.ugoe.cs.cpdp.loader.SingleVersionLoader#filenameFilter(java.lang.String ) 59 */ 60 @Override 61 public boolean filenameFilter(String filename) { 62 return filename.endsWith(".arff"); 63 } 51 64 52 65 } -
trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/AUDIChangeFolderLoader.java
r38 r41 1 // Copyright 2015 Georg-August-Universität Göttingen, Germany 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 1 15 package de.ugoe.cs.cpdp.loader; 2 16 3 17 public class AUDIChangeFolderLoader extends AbstractFolderLoader { 4 18 5 /*6 * (non-Javadoc)7 *8 * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader#getSingleLoader()9 */10 @Override11 protected SingleVersionLoader getSingleLoader() {12 return new AUDIChangeLoader();13 }19 /* 20 * (non-Javadoc) 21 * 22 * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader#getSingleLoader() 23 */ 24 @Override 25 protected SingleVersionLoader getSingleLoader() { 26 return new AUDIChangeLoader(); 27 } 14 28 } -
trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/AUDIChangeLoader.java
r38 r41 1 // Copyright 2015 Georg-August-Universität Göttingen, Germany 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 1 15 package de.ugoe.cs.cpdp.loader; 2 16 … … 15 29 /** 16 30 * TODO 31 * 17 32 * @author sherbold 18 * 33 * 19 34 */ 20 35 class AUDIChangeLoader implements SingleVersionLoader { 21 22 private class EntityRevisionPair implements Comparable<EntityRevisionPair> { 23 private final String entity; 24 private final int revision; 25 26 public EntityRevisionPair(String entity, int revision) { 27 this.entity = entity; 28 this.revision = revision; 29 } 30 31 @Override 32 public boolean equals(Object other) { 33 if( !(other instanceof EntityRevisionPair) ) { 34 return false; 35 } else { 36 return compareTo((EntityRevisionPair) other)==0; 37 } 38 } 39 40 @Override 41 public int hashCode() { 42 return entity.hashCode()+revision; 43 } 44 45 @Override 46 public int compareTo(EntityRevisionPair other) { 47 int strCmp = this.entity.compareTo(other.entity); 48 if( strCmp!=0 ) { 49 return strCmp; 50 } 51 return Integer.compare(revision, other.revision); 52 } 53 54 @Override 55 public String toString() { 56 return entity+"@"+revision; 57 } 58 } 59 60 @Override 61 public Instances load(File file) { 62 final String[] lines; 63 String[] lineSplit; 64 String[] lineSplitBug; 65 66 try { 67 lines = FileTools.getLinesFromFile(file.getAbsolutePath()); 68 } catch (IOException e) { 69 throw new RuntimeException(e); 70 } 71 72 // information about bugs are in another file 73 String path = file.getAbsolutePath(); 74 path = path.substring(0, path.length()-14) + "repro.csv"; 75 final String[] linesBug; 76 try { 77 linesBug = FileTools.getLinesFromFile(path); 78 } catch (IOException e) { 79 throw new RuntimeException(e); 80 } 81 82 int revisionIndex=-1; 83 int bugIndex=-1; 84 lineSplitBug = linesBug[0].split(";"); 85 for( int j=0; j<lineSplitBug.length ; j++ ) { 86 if( lineSplitBug[j].equals("svnrev") ) { 87 revisionIndex=j; 88 } 89 if( lineSplitBug[j].equals("num_bugs_trace") ) { 90 bugIndex=j; 91 } 92 } 93 if( revisionIndex<0 ) { 94 throw new RuntimeException("could not find SVN revisions"); 95 } 96 if( bugIndex<0 ) { 97 throw new RuntimeException("could not find bug information"); 98 } 99 100 int metricsStartIndex=-1; 101 int metricsEndIndex=-1; 102 lineSplit = lines[0].split(";"); 103 for( int j=0; j<lineSplit.length ; j++ ) { 104 if( lineSplit[j].equals("lm_LOC") ) { 105 metricsStartIndex=j; 106 } 107 if( lineSplit[j].equals("h_E") ) { 108 metricsEndIndex=j; 109 } 110 } 111 if( metricsStartIndex<0 ) { 112 throw new RuntimeException("could not find first metric, i.e., lm_LOC"); 113 } 114 if( metricsEndIndex<0 ) { 115 throw new RuntimeException("could not find last metric, i.e., h_E"); 116 } 117 int numMetrics = metricsEndIndex-metricsStartIndex+1; 118 119 // create sets of all filenames and revisions 120 SortedMap<EntityRevisionPair, Integer> entityRevisionPairs = new TreeMap<>(); 121 for( int i=1; i<linesBug.length ; i++ ) { 122 lineSplitBug = linesBug[i].split(";"); 123 entityRevisionPairs.put(new EntityRevisionPair(lineSplitBug[0], Integer.parseInt(lineSplitBug[revisionIndex])), i); 124 } 125 126 127 // prepare weka instances 128 final ArrayList<Attribute> atts = new ArrayList<Attribute>(); 129 lineSplit = lines[0].split(";"); 130 for (int j = metricsStartIndex; j<=metricsEndIndex; j++) { 131 atts.add(new Attribute(lineSplit[j]+"_delta")); 132 } 133 for (int j = metricsStartIndex; j<=metricsEndIndex; j++) { 134 atts.add(new Attribute(lineSplit[j]+"_abs")); 135 } 136 final ArrayList<String> classAttVals = new ArrayList<String>(); 137 classAttVals.add("0"); 138 classAttVals.add("1"); 139 final Attribute classAtt = new Attribute("bug", classAttVals); 140 atts.add(classAtt); 141 142 final Instances data = new Instances(file.getName(), atts, 0); 143 data.setClass(classAtt); 144 145 // create data 146 String lastFile = null; 147 double[] lastValues = null; 148 int lastNumBugs = 0; 149 for( Entry<EntityRevisionPair, Integer> entry : entityRevisionPairs.entrySet() ) { 150 try { 151 // first get values 152 lineSplit = lines[entry.getValue()].split(";"); 153 lineSplitBug = linesBug[entry.getValue()].split(";"); 154 int i=0; 155 double[] values = new double[numMetrics]; 156 for(int j=metricsStartIndex ; j<=metricsEndIndex ; j++ ) { 157 values[i] = Double.parseDouble(lineSplit[j]); 158 i++; 159 } 160 int numBugs = Integer.parseInt(lineSplitBug[bugIndex]); 161 162 // then check if an entity must be created 163 if( entry.getKey().entity.equals(lastFile)) { 164 // create new instance 165 double[] instanceValues = new double[2*numMetrics+1]; 166 for( int j=0; j<numMetrics; j++ ) { 167 instanceValues[j] = values[j]-lastValues[j]; 168 instanceValues[j+numMetrics]= values[j]; 169 } 170 // check if any value>0 171 boolean changeOccured = false; 172 for( int j=0; j<numMetrics; j++ ) { 173 if( instanceValues[j]>0 ) { 174 changeOccured = true; 175 } 176 } 177 if( changeOccured ) { 178 instanceValues[instanceValues.length-1] = numBugs<=lastNumBugs ? 0 : 1; 179 data.add(new DenseInstance(1.0, instanceValues)); 180 } 181 } 182 lastFile = entry.getKey().entity; 183 lastValues = values; 184 lastNumBugs = numBugs; 185 } catch(IllegalArgumentException e) { 186 System.err.println("error in line " + entry.getValue() + ": " + e.getMessage()); 187 System.err.println("metrics line: " + lines[entry.getValue()]); 188 System.err.println("bugs line: " + linesBug[entry.getValue()]); 189 System.err.println("line is ignored"); 190 } 191 } 192 193 return data; 194 } 195 196 /* 197 * (non-Javadoc) 198 * 199 * @see 200 * de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader#load( 201 * java.io.File) 202 */ 203 204 public Instances load(File file, String dummy) { 205 final String[] lines; 206 try { 207 lines = FileTools.getLinesFromFile(file.getAbsolutePath()); 208 } catch (IOException e) { 209 throw new RuntimeException(e); 210 } 211 212 // information about bugs are in another file 213 String path = file.getAbsolutePath(); 214 path = path.substring(0, path.length()-14) + "repro.csv"; 215 final String[] linesBug; 216 try { 217 linesBug = FileTools.getLinesFromFile(path); 218 } catch (IOException e) { 219 throw new RuntimeException(e); 220 } 221 222 // configure Instances 223 final ArrayList<Attribute> atts = new ArrayList<Attribute>(); 224 225 String[] lineSplit = lines[0].split(";"); 226 // ignore first three/four and last two columns 227 int offset; 228 if( lineSplit[3].equals("project_rev") ) { 229 offset = 4; 230 } else { 231 offset = 3; 232 } 233 for (int j = 0; j < lineSplit.length - (offset+2); j++) { 234 atts.add(new Attribute(lineSplit[j + offset])); 235 } 236 final ArrayList<String> classAttVals = new ArrayList<String>(); 237 classAttVals.add("0"); 238 classAttVals.add("1"); 239 final Attribute classAtt = new Attribute("bug", classAttVals); 240 atts.add(classAtt); 241 242 final Instances data = new Instances(file.getName(), atts, 0); 243 data.setClass(classAtt); 244 245 // fetch data 246 for (int i = 1; i < lines.length; i++) { 247 boolean validInstance = true; 248 lineSplit = lines[i].split(";"); 249 String[] lineSplitBug = linesBug[i].split(";"); 250 double[] values = new double[data.numAttributes()]; 251 for (int j = 0; validInstance && j < values.length-1; j++) { 252 if( lineSplit[j + offset].trim().isEmpty() ) { 253 validInstance = false; 254 } else { 255 values[j] = Double.parseDouble(lineSplit[j + offset].trim()); 256 } 257 } 258 if( offset==3 ) { 259 values[values.length - 1] = lineSplitBug[7].equals("0") ? 0 : 1; 260 } else { 261 values[values.length - 1] = lineSplitBug[8].equals("0") ? 0 : 1; 262 } 263 264 if( validInstance ) { 265 data.add(new DenseInstance(1.0, values)); 266 } else { 267 System.out.println("instance " + i + " is invalid"); 268 } 269 } 270 return data; 271 } 272 273 /* 274 * (non-Javadoc) 275 * 276 * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader# 277 * filenameFilter(java.lang.String) 278 */ 279 @Override 280 public boolean filenameFilter(String filename) { 281 return filename.endsWith("src.csv"); 282 } 36 37 private class EntityRevisionPair implements Comparable<EntityRevisionPair> { 38 private final String entity; 39 private final int revision; 40 41 public EntityRevisionPair(String entity, int revision) { 42 this.entity = entity; 43 this.revision = revision; 44 } 45 46 @Override 47 public boolean equals(Object other) { 48 if (!(other instanceof EntityRevisionPair)) { 49 return false; 50 } 51 else { 52 return compareTo((EntityRevisionPair) other) == 0; 53 } 54 } 55 56 @Override 57 public int hashCode() { 58 return entity.hashCode() + revision; 59 } 60 61 @Override 62 public int compareTo(EntityRevisionPair other) { 63 int strCmp = this.entity.compareTo(other.entity); 64 if (strCmp != 0) { 65 return strCmp; 66 } 67 return Integer.compare(revision, other.revision); 68 } 69 70 @Override 71 public String toString() { 72 return entity + "@" + revision; 73 } 74 } 75 76 @Override 77 public Instances load(File file) { 78 final String[] lines; 79 String[] lineSplit; 80 String[] lineSplitBug; 81 82 try { 83 lines = FileTools.getLinesFromFile(file.getAbsolutePath()); 84 } 85 catch (IOException e) { 86 throw new RuntimeException(e); 87 } 88 89 // information about bugs are in another file 90 String path = file.getAbsolutePath(); 91 path = path.substring(0, path.length() - 14) + "repro.csv"; 92 final String[] linesBug; 93 try { 94 linesBug = FileTools.getLinesFromFile(path); 95 } 96 catch (IOException e) { 97 throw new RuntimeException(e); 98 } 99 100 int revisionIndex = -1; 101 int bugIndex = -1; 102 lineSplitBug = linesBug[0].split(";"); 103 for (int j = 0; j < lineSplitBug.length; j++) { 104 if (lineSplitBug[j].equals("svnrev")) { 105 revisionIndex = j; 106 } 107 if (lineSplitBug[j].equals("num_bugs_trace")) { 108 bugIndex = j; 109 } 110 } 111 if (revisionIndex < 0) { 112 throw new RuntimeException("could not find SVN revisions"); 113 } 114 if (bugIndex < 0) { 115 throw new RuntimeException("could not find bug information"); 116 } 117 118 int metricsStartIndex = -1; 119 int metricsEndIndex = -1; 120 lineSplit = lines[0].split(";"); 121 for (int j = 0; j < lineSplit.length; j++) { 122 if (lineSplit[j].equals("lm_LOC")) { 123 metricsStartIndex = j; 124 } 125 if (lineSplit[j].equals("h_E")) { 126 metricsEndIndex = j; 127 } 128 } 129 if (metricsStartIndex < 0) { 130 throw new RuntimeException("could not find first metric, i.e., lm_LOC"); 131 } 132 if (metricsEndIndex < 0) { 133 throw new RuntimeException("could not find last metric, i.e., h_E"); 134 } 135 int numMetrics = metricsEndIndex - metricsStartIndex + 1; 136 137 // create sets of all filenames and revisions 138 SortedMap<EntityRevisionPair, Integer> entityRevisionPairs = new TreeMap<>(); 139 for (int i = 1; i < linesBug.length; i++) { 140 lineSplitBug = linesBug[i].split(";"); 141 entityRevisionPairs.put(new EntityRevisionPair(lineSplitBug[0], Integer 142 .parseInt(lineSplitBug[revisionIndex])), i); 143 } 144 145 // prepare weka instances 146 final ArrayList<Attribute> atts = new ArrayList<Attribute>(); 147 lineSplit = lines[0].split(";"); 148 for (int j = metricsStartIndex; j <= metricsEndIndex; j++) { 149 atts.add(new Attribute(lineSplit[j] + "_delta")); 150 } 151 for (int j = metricsStartIndex; j <= metricsEndIndex; j++) { 152 atts.add(new Attribute(lineSplit[j] + "_abs")); 153 } 154 final ArrayList<String> classAttVals = new ArrayList<String>(); 155 classAttVals.add("0"); 156 classAttVals.add("1"); 157 final Attribute classAtt = new Attribute("bug", classAttVals); 158 atts.add(classAtt); 159 160 final Instances data = new Instances(file.getName(), atts, 0); 161 data.setClass(classAtt); 162 163 // create data 164 String lastFile = null; 165 double[] lastValues = null; 166 int lastNumBugs = 0; 167 for (Entry<EntityRevisionPair, Integer> entry : entityRevisionPairs.entrySet()) { 168 try { 169 // first get values 170 lineSplit = lines[entry.getValue()].split(";"); 171 lineSplitBug = linesBug[entry.getValue()].split(";"); 172 int i = 0; 173 double[] values = new double[numMetrics]; 174 for (int j = metricsStartIndex; j <= metricsEndIndex; j++) { 175 values[i] = Double.parseDouble(lineSplit[j]); 176 i++; 177 } 178 int numBugs = Integer.parseInt(lineSplitBug[bugIndex]); 179 180 // then check if an entity must be created 181 if (entry.getKey().entity.equals(lastFile)) { 182 // create new instance 183 double[] instanceValues = new double[2 * numMetrics + 1]; 184 for (int j = 0; j < numMetrics; j++) { 185 instanceValues[j] = values[j] - lastValues[j]; 186 instanceValues[j + numMetrics] = values[j]; 187 } 188 // check if any value>0 189 boolean changeOccured = false; 190 for (int j = 0; j < numMetrics; j++) { 191 if (instanceValues[j] > 0) { 192 changeOccured = true; 193 } 194 } 195 if (changeOccured) { 196 instanceValues[instanceValues.length - 1] = numBugs <= lastNumBugs ? 0 : 1; 197 data.add(new DenseInstance(1.0, instanceValues)); 198 } 199 } 200 lastFile = entry.getKey().entity; 201 lastValues = values; 202 lastNumBugs = numBugs; 203 } 204 catch (IllegalArgumentException e) { 205 System.err.println("error in line " + entry.getValue() + ": " + e.getMessage()); 206 System.err.println("metrics line: " + lines[entry.getValue()]); 207 System.err.println("bugs line: " + linesBug[entry.getValue()]); 208 System.err.println("line is ignored"); 209 } 210 } 211 212 return data; 213 } 214 215 /* 216 * (non-Javadoc) 217 * 218 * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader#load( java.io.File) 219 */ 220 221 public Instances load(File file, String dummy) { 222 final String[] lines; 223 try { 224 lines = FileTools.getLinesFromFile(file.getAbsolutePath()); 225 } 226 catch (IOException e) { 227 throw new RuntimeException(e); 228 } 229 230 // information about bugs are in another file 231 String path = file.getAbsolutePath(); 232 path = path.substring(0, path.length() - 14) + "repro.csv"; 233 final String[] linesBug; 234 try { 235 linesBug = FileTools.getLinesFromFile(path); 236 } 237 catch (IOException e) { 238 throw new RuntimeException(e); 239 } 240 241 // configure Instances 242 final ArrayList<Attribute> atts = new ArrayList<Attribute>(); 243 244 String[] lineSplit = lines[0].split(";"); 245 // ignore first three/four and last two columns 246 int offset; 247 if (lineSplit[3].equals("project_rev")) { 248 offset = 4; 249 } 250 else { 251 offset = 3; 252 } 253 for (int j = 0; j < lineSplit.length - (offset + 2); j++) { 254 atts.add(new Attribute(lineSplit[j + offset])); 255 } 256 final ArrayList<String> classAttVals = new ArrayList<String>(); 257 classAttVals.add("0"); 258 classAttVals.add("1"); 259 final Attribute classAtt = new Attribute("bug", classAttVals); 260 atts.add(classAtt); 261 262 final Instances data = new Instances(file.getName(), atts, 0); 263 data.setClass(classAtt); 264 265 // fetch data 266 for (int i = 1; i < lines.length; i++) { 267 boolean validInstance = true; 268 lineSplit = lines[i].split(";"); 269 String[] lineSplitBug = linesBug[i].split(";"); 270 double[] values = new double[data.numAttributes()]; 271 for (int j = 0; validInstance && j < values.length - 1; j++) { 272 if (lineSplit[j + offset].trim().isEmpty()) { 273 validInstance = false; 274 } 275 else { 276 values[j] = Double.parseDouble(lineSplit[j + offset].trim()); 277 } 278 } 279 if (offset == 3) { 280 values[values.length - 1] = lineSplitBug[7].equals("0") ? 0 : 1; 281 } 282 else { 283 values[values.length - 1] = lineSplitBug[8].equals("0") ? 0 : 1; 284 } 285 286 if (validInstance) { 287 data.add(new DenseInstance(1.0, values)); 288 } 289 else { 290 System.out.println("instance " + i + " is invalid"); 291 } 292 } 293 return data; 294 } 295 296 /* 297 * (non-Javadoc) 298 * 299 * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader# 300 * filenameFilter(java.lang.String) 301 */ 302 @Override 303 public boolean filenameFilter(String filename) { 304 return filename.endsWith("src.csv"); 305 } 283 306 284 307 } -
trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/AUDIDataLoader.java
r35 r41 1 // Copyright 2015 Georg-August-Universität Göttingen, Germany 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 1 15 package de.ugoe.cs.cpdp.loader; 2 16 … … 12 26 /** 13 27 * TODO 28 * 14 29 * @author sherbold 15 * 30 * 16 31 */ 17 32 class AUDIDataLoader implements SingleVersionLoader { 18 33 19 /* 20 * (non-Javadoc) 21 * 22 * @see 23 * de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader#load( 24 * java.io.File) 25 */ 26 @Override 27 public Instances load(File file) { 28 final String[] lines; 29 try { 30 lines = FileTools.getLinesFromFile(file.getAbsolutePath()); 31 } catch (IOException e) { 32 throw new RuntimeException(e); 33 } 34 35 // information about bugs are in another file 36 String path = file.getAbsolutePath(); 37 path = path.substring(0, path.length()-14) + "repro.csv"; 38 final String[] linesBug; 39 try { 40 linesBug = FileTools.getLinesFromFile(path); 41 } catch (IOException e) { 42 throw new RuntimeException(e); 43 } 44 45 // configure Instances 46 final ArrayList<Attribute> atts = new ArrayList<Attribute>(); 34 /* 35 * (non-Javadoc) 36 * 37 * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader#load( java.io.File) 38 */ 39 @Override 40 public Instances load(File file) { 41 final String[] lines; 42 try { 43 lines = FileTools.getLinesFromFile(file.getAbsolutePath()); 44 } 45 catch (IOException e) { 46 throw new RuntimeException(e); 47 } 47 48 48 String[] lineSplit = lines[0].split(";"); 49 // ignore first three/four and last two columns 50 int offset; 51 if( lineSplit[3].equals("project_rev") ) { 52 offset = 4; 53 } else { 54 offset = 3; 55 } 56 for (int j = 0; j < lineSplit.length - (offset+2); j++) { 57 atts.add(new Attribute(lineSplit[j + offset])); 58 } 59 final ArrayList<String> classAttVals = new ArrayList<String>(); 60 classAttVals.add("0"); 61 classAttVals.add("1"); 62 final Attribute classAtt = new Attribute("bug", classAttVals); 63 atts.add(classAtt); 49 // information about bugs are in another file 50 String path = file.getAbsolutePath(); 51 path = path.substring(0, path.length() - 14) + "repro.csv"; 52 final String[] linesBug; 53 try { 54 linesBug = FileTools.getLinesFromFile(path); 55 } 56 catch (IOException e) { 57 throw new RuntimeException(e); 58 } 64 59 65 final Instances data = new Instances(file.getName(), atts, 0); 66 data.setClass(classAtt);60 // configure Instances 61 final ArrayList<Attribute> atts = new ArrayList<Attribute>(); 67 62 68 // fetch data 69 for (int i = 1; i < lines.length; i++) { 70 boolean validInstance = true; 71 lineSplit = lines[i].split(";"); 72 String[] lineSplitBug = linesBug[i].split(";"); 73 double[] values = new double[data.numAttributes()]; 74 for (int j = 0; validInstance && j < values.length-1; j++) { 75 if( lineSplit[j + offset].trim().isEmpty() ) { 76 validInstance = false; 77 } else { 78 values[j] = Double.parseDouble(lineSplit[j + offset].trim()); 79 } 80 } 81 if( offset==3 ) { 82 values[values.length - 1] = lineSplitBug[7].equals("0") ? 0 : 1; 83 } else { 84 values[values.length - 1] = lineSplitBug[8].equals("0") ? 0 : 1; 85 } 86 87 if( validInstance ) { 88 data.add(new DenseInstance(1.0, values)); 89 } else { 90 System.out.println("instance " + i + " is invalid"); 91 } 92 } 93 return data; 94 } 63 String[] lineSplit = lines[0].split(";"); 64 // ignore first three/four and last two columns 65 int offset; 66 if (lineSplit[3].equals("project_rev")) { 67 offset = 4; 68 } 69 else { 70 offset = 3; 71 } 72 for (int j = 0; j < lineSplit.length - (offset + 2); j++) { 73 atts.add(new Attribute(lineSplit[j + offset])); 74 } 75 final ArrayList<String> classAttVals = new ArrayList<String>(); 76 classAttVals.add("0"); 77 classAttVals.add("1"); 78 final Attribute classAtt = new Attribute("bug", classAttVals); 79 atts.add(classAtt); 95 80 96 /* 97 * (non-Javadoc) 98 * 99 * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader# 100 * filenameFilter(java.lang.String) 101 */ 102 @Override 103 public boolean filenameFilter(String filename) { 104 return filename.endsWith("src.csv"); 105 } 81 final Instances data = new Instances(file.getName(), atts, 0); 82 data.setClass(classAtt); 83 84 // fetch data 85 for (int i = 1; i < lines.length; i++) { 86 boolean validInstance = true; 87 lineSplit = lines[i].split(";"); 88 String[] lineSplitBug = linesBug[i].split(";"); 89 double[] values = new double[data.numAttributes()]; 90 for (int j = 0; validInstance && j < values.length - 1; j++) { 91 if (lineSplit[j + offset].trim().isEmpty()) { 92 validInstance = false; 93 } 94 else { 95 values[j] = Double.parseDouble(lineSplit[j + offset].trim()); 96 } 97 } 98 if (offset == 3) { 99 values[values.length - 1] = lineSplitBug[7].equals("0") ? 0 : 1; 100 } 101 else { 102 values[values.length - 1] = lineSplitBug[8].equals("0") ? 0 : 1; 103 } 104 105 if (validInstance) { 106 data.add(new DenseInstance(1.0, values)); 107 } 108 else { 109 System.out.println("instance " + i + " is invalid"); 110 } 111 } 112 return data; 113 } 114 115 /* 116 * (non-Javadoc) 117 * 118 * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader# 119 * filenameFilter(java.lang.String) 120 */ 121 @Override 122 public boolean filenameFilter(String filename) { 123 return filename.endsWith("src.csv"); 124 } 106 125 107 126 } -
trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/AUDIFolderLoader.java
r35 r41 1 // Copyright 2015 Georg-August-Universität Göttingen, Germany 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 1 15 package de.ugoe.cs.cpdp.loader; 2 16 3 17 public class AUDIFolderLoader extends AbstractFolderLoader { 4 18 5 /*6 * (non-Javadoc)7 *8 * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader#getSingleLoader()9 */10 @Override11 protected SingleVersionLoader getSingleLoader() {12 return new AUDIDataLoader();13 }19 /* 20 * (non-Javadoc) 21 * 22 * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader#getSingleLoader() 23 */ 24 @Override 25 protected SingleVersionLoader getSingleLoader() { 26 return new AUDIDataLoader(); 27 } 14 28 } -
trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/AbstractFolderLoader.java
r32 r41 1 // Copyright 2015 Georg-August-Universität Göttingen, Germany 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 1 15 package de.ugoe.cs.cpdp.loader; 2 16 … … 10 24 11 25 /** 12 * Abstract class for loading data from a folder. The subfolders of a defined 13 * folder define the projects, the file contained in the subfolder are the 14 * versions of a project. 26 * Abstract class for loading data from a folder. The subfolders of a defined folder define the 27 * projects, the file contained in the subfolder are the versions of a project. 15 28 * 16 29 * @author Steffen Herbold … … 18 31 public abstract class AbstractFolderLoader implements IVersionLoader { 19 32 20 /**21 * Path of the data.22 */23 protected String path = "";33 /** 34 * Path of the data. 35 */ 36 protected String path = ""; 24 37 25 /**26 * @see de.ugoe.cs.cpdp.loader.IVersionLoader#setLocation(java.lang.String)27 */28 @Override29 public void setLocation(String location) {30 path = location;31 }38 /** 39 * @see de.ugoe.cs.cpdp.loader.IVersionLoader#setLocation(java.lang.String) 40 */ 41 @Override 42 public void setLocation(String location) { 43 path = location; 44 } 32 45 33 /**34 * @see de.ugoe.cs.cpdp.loader.IVersionLoader#load()35 */36 @Override37 public List<SoftwareVersion> load() {38 final List<SoftwareVersion> versions = new LinkedList<SoftwareVersion>();46 /** 47 * @see de.ugoe.cs.cpdp.loader.IVersionLoader#load() 48 */ 49 @Override 50 public List<SoftwareVersion> load() { 51 final List<SoftwareVersion> versions = new LinkedList<SoftwareVersion>(); 39 52 40 final File dataDir = new File(path);41 final SingleVersionLoader instancesLoader = getSingleLoader();53 final File dataDir = new File(path); 54 final SingleVersionLoader instancesLoader = getSingleLoader(); 42 55 43 for (File projectDir : dataDir.listFiles()) { 44 if (projectDir.isDirectory()) { 45 String projectName = projectDir.getName(); 46 for (File versionFile : projectDir.listFiles()) { 47 if (versionFile.isFile() 48 && instancesLoader.filenameFilter(versionFile 49 .getName())) { 50 String versionName = versionFile.getName(); 51 Instances data = instancesLoader.load(versionFile); 52 versions.add(new SoftwareVersion(projectName, 53 versionName, data)); 54 } 55 } 56 } 57 } 58 return versions; 59 } 56 for (File projectDir : dataDir.listFiles()) { 57 if (projectDir.isDirectory()) { 58 String projectName = projectDir.getName(); 59 for (File versionFile : projectDir.listFiles()) { 60 if (versionFile.isFile() && 61 instancesLoader.filenameFilter(versionFile.getName())) 62 { 63 String versionName = versionFile.getName(); 64 Instances data = instancesLoader.load(versionFile); 65 versions.add(new SoftwareVersion(projectName, versionName, data)); 66 } 67 } 68 } 69 } 70 return versions; 71 } 60 72 61 /** 62 * Returns the concrete {@link SingleVersionLoader} to be used with this 63 * folder loader. 64 * 65 * @return 66 */ 67 abstract protected SingleVersionLoader getSingleLoader(); 73 /** 74 * Returns the concrete {@link SingleVersionLoader} to be used with this folder loader. 75 * 76 * @return 77 */ 78 abstract protected SingleVersionLoader getSingleLoader(); 68 79 } -
trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/CSVDataLoader.java
r4 r41 1 // Copyright 2015 Georg-August-Universität Göttingen, Germany 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 1 15 package de.ugoe.cs.cpdp.loader; 2 16 … … 11 25 12 26 /** 13 * Loads the instances for a software version from a CSV file of the PROMISE 14 * data set mined byJurezko and Madeyski.27 * Loads the instances for a software version from a CSV file of the PROMISE data set mined by 28 * Jurezko and Madeyski. 15 29 * 16 30 * @author Steffen Herbold … … 18 32 class CSVDataLoader implements SingleVersionLoader { 19 33 20 /* 21 * (non-Javadoc) 22 * 23 * @see 24 * de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader#load( 25 * java.io.File) 26 */ 27 @Override 28 public Instances load(File file) { 29 final String[] lines; 30 try { 31 lines = FileTools.getLinesFromFile(file.getAbsolutePath()); 32 } catch (IOException e) { 33 throw new RuntimeException(e); 34 } 34 /* 35 * (non-Javadoc) 36 * 37 * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader#load( java.io.File) 38 */ 39 @Override 40 public Instances load(File file) { 41 final String[] lines; 42 try { 43 lines = FileTools.getLinesFromFile(file.getAbsolutePath()); 44 } 45 catch (IOException e) { 46 throw new RuntimeException(e); 47 } 35 48 36 // configure Instances37 final ArrayList<Attribute> atts = new ArrayList<Attribute>();49 // configure Instances 50 final ArrayList<Attribute> atts = new ArrayList<Attribute>(); 38 51 39 String[] lineSplit = lines[0].split(",");40 for (int j = 0; j < lineSplit.length - 4; j++) {41 atts.add(new Attribute(lineSplit[j + 3]));42 }43 final ArrayList<String> classAttVals = new ArrayList<String>();44 classAttVals.add("0");45 classAttVals.add("1");46 final Attribute classAtt = new Attribute("bug", classAttVals);47 atts.add(classAtt);52 String[] lineSplit = lines[0].split(","); 53 for (int j = 0; j < lineSplit.length - 4; j++) { 54 atts.add(new Attribute(lineSplit[j + 3])); 55 } 56 final ArrayList<String> classAttVals = new ArrayList<String>(); 57 classAttVals.add("0"); 58 classAttVals.add("1"); 59 final Attribute classAtt = new Attribute("bug", classAttVals); 60 atts.add(classAtt); 48 61 49 final Instances data = new Instances(file.getName(), atts, 0);50 data.setClass(classAtt);62 final Instances data = new Instances(file.getName(), atts, 0); 63 data.setClass(classAtt); 51 64 52 // fetch data 53 for (int i = 1; i < lines.length; i++) { 54 lineSplit = lines[i].split(","); 55 double[] values = new double[lineSplit.length - 3]; 56 for (int j = 0; j < values.length - 1; j++) { 57 values[j] = Double.parseDouble(lineSplit[j + 3].trim()); 58 } 59 values[values.length - 1] = lineSplit[lineSplit.length - 1].trim() 60 .equals("0") ? 0 : 1; 61 data.add(new DenseInstance(1.0, values)); 62 } 65 // fetch data 66 for (int i = 1; i < lines.length; i++) { 67 lineSplit = lines[i].split(","); 68 double[] values = new double[lineSplit.length - 3]; 69 for (int j = 0; j < values.length - 1; j++) { 70 values[j] = Double.parseDouble(lineSplit[j + 3].trim()); 71 } 72 values[values.length - 1] = lineSplit[lineSplit.length - 1].trim().equals("0") ? 0 : 1; 73 data.add(new DenseInstance(1.0, values)); 74 } 63 75 64 return data;65 }76 return data; 77 } 66 78 67 /*68 * (non-Javadoc)69 *70 * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader#71 * filenameFilter(java.lang.String)72 */73 @Override74 public boolean filenameFilter(String filename) {75 return filename.endsWith(".csv");76 }79 /* 80 * (non-Javadoc) 81 * 82 * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader# 83 * filenameFilter(java.lang.String) 84 */ 85 @Override 86 public boolean filenameFilter(String filename) { 87 return filename.endsWith(".csv"); 88 } 77 89 78 90 } -
trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/CSVFolderLoader.java
r4 r41 1 // Copyright 2015 Georg-August-Universität Göttingen, Germany 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 1 15 package de.ugoe.cs.cpdp.loader; 2 16 3 17 /** 4 * Implements the {@link AbstractFolderLoader} for data from the PROMISE 5 * repository mined by Jurezkoand Madeyski.18 * Implements the {@link AbstractFolderLoader} for data from the PROMISE repository mined by Jurezko 19 * and Madeyski. 6 20 * 7 21 * @author Steffen Herbold … … 9 23 public class CSVFolderLoader extends AbstractFolderLoader { 10 24 11 /*12 * (non-Javadoc)13 *14 * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader#getSingleLoader()15 */16 @Override17 protected SingleVersionLoader getSingleLoader() {18 return new CSVDataLoader();19 }25 /* 26 * (non-Javadoc) 27 * 28 * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader#getSingleLoader() 29 */ 30 @Override 31 protected SingleVersionLoader getSingleLoader() { 32 return new CSVDataLoader(); 33 } 20 34 21 35 } -
trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/CSVMockusDataLoader.java
r29 r41 1 // Copyright 2015 Georg-August-Universität Göttingen, Germany 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 1 15 package de.ugoe.cs.cpdp.loader; 2 16 … … 10 24 import de.ugoe.cs.util.FileTools; 11 25 12 13 26 class CSVMockusDataLoader implements SingleVersionLoader { 14 27 15 @Override 16 public Instances load(File file) { 17 final String[] lines; 18 try { 19 20 lines = FileTools.getLinesFromFile(file.getAbsolutePath()); 21 } catch (IOException e) { 22 throw new RuntimeException(e); 23 } 24 25 26 // configure Instances 27 final ArrayList<Attribute> atts = new ArrayList<Attribute>(); 28 @Override 29 public Instances load(File file) { 30 final String[] lines; 31 try { 28 32 29 String[] lineSplit = lines[0].split(","); 30 for (int j = 0; j < lineSplit.length - 3; j++) { 31 atts.add(new Attribute(lineSplit[j + 2])); 32 } 33 34 final ArrayList<String> classAttVals = new ArrayList<String>(); 35 classAttVals.add("0"); 36 classAttVals.add("1"); 37 final Attribute classAtt = new Attribute("bug", classAttVals); 38 atts.add(classAtt); 33 lines = FileTools.getLinesFromFile(file.getAbsolutePath()); 34 } 35 catch (IOException e) { 36 throw new RuntimeException(e); 37 } 39 38 40 final Instances data = new Instances(file.getName(), atts, 0); 41 data.setClass(classAtt);39 // configure Instances 40 final ArrayList<Attribute> atts = new ArrayList<Attribute>(); 42 41 43 // fetch data 44 for (int i = 1; i < lines.length; i++) { 45 lineSplit = lines[i].split(","); 46 double[] values = new double[lineSplit.length - 2]; 47 for (int j = 0; j < values.length - 1; j++) { 48 values[j] = Double.parseDouble(lineSplit[j + 2].trim()); 49 } 50 values[values.length - 1] = lineSplit[lineSplit.length - 1].trim() 51 .equals("0") ? 0 : 1; 52 data.add(new DenseInstance(1.0, values)); 53 } 42 String[] lineSplit = lines[0].split(","); 43 for (int j = 0; j < lineSplit.length - 3; j++) { 44 atts.add(new Attribute(lineSplit[j + 2])); 45 } 54 46 55 return data; 56 } 47 final ArrayList<String> classAttVals = new ArrayList<String>(); 48 classAttVals.add("0"); 49 classAttVals.add("1"); 50 final Attribute classAtt = new Attribute("bug", classAttVals); 51 atts.add(classAtt); 57 52 58 @Override 59 public boolean filenameFilter(String filename) { 60 return filename.endsWith(".csv"); 61 } 53 final Instances data = new Instances(file.getName(), atts, 0); 54 data.setClass(classAtt); 55 56 // fetch data 57 for (int i = 1; i < lines.length; i++) { 58 lineSplit = lines[i].split(","); 59 double[] values = new double[lineSplit.length - 2]; 60 for (int j = 0; j < values.length - 1; j++) { 61 values[j] = Double.parseDouble(lineSplit[j + 2].trim()); 62 } 63 values[values.length - 1] = lineSplit[lineSplit.length - 1].trim().equals("0") ? 0 : 1; 64 data.add(new DenseInstance(1.0, values)); 65 } 66 67 return data; 68 } 69 70 @Override 71 public boolean filenameFilter(String filename) { 72 return filename.endsWith(".csv"); 73 } 62 74 63 75 } 64 -
trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/CSVMockusFolderLoader.java
r28 r41 1 // Copyright 2015 Georg-August-Universität Göttingen, Germany 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 1 15 package de.ugoe.cs.cpdp.loader; 2 16 3 17 public class CSVMockusFolderLoader extends AbstractFolderLoader { 4 18 5 @Override6 protected SingleVersionLoader getSingleLoader() {7 return new CSVMockusDataLoader();8 }19 @Override 20 protected SingleVersionLoader getSingleLoader() { 21 return new CSVMockusDataLoader(); 22 } 9 23 } -
trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/DecentDataLoader.java
r36 r41 1 // Copyright 2015 Georg-August-Universität Göttingen, Germany 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 1 15 package de.ugoe.cs.cpdp.loader; 2 16 … … 38 52 39 53 /** 40 * Class for loading a decent model file. 41 * Loads a decent model file and (if no arff file is present) and does the 42 * following conversions: 43 * DECENT -> ARFFX -> ARFF 54 * Class for loading a decent model file. Loads a decent model file and (if no arff file is present) 55 * and does the following conversions: DECENT -> ARFFX -> ARFF 44 56 * 45 57 * @author Fabian Trautsch 46 * 58 * 47 59 */ 48 public class DecentDataLoader implements SingleVersionLoader{ 49 50 // Model Handler for Decent Models 51 private DECENTEpsilonModelHandler modelHandler = new DECENTEpsilonModelHandler(); 52 53 // Set log level 54 String logLevel = "1"; 55 String logToFile = "false"; 56 57 // This list contains attributes, that should be removed before building the arff file 58 private static List<String> attributeFilter = new LinkedList<String>(); 59 60 // This list contains all names of the different artifacts 61 private static Set<String> artifactNames = new LinkedHashSet<String>(); 62 63 // Name of the class attribute. 64 private static final String classAttributeName = "LABEL.Artifact.Target.BugFix.AverageWeight"; 65 66 67 private int getIndexOfArtifactName(String artifactName) { 68 int index = -1; 69 if(artifactNames.contains(artifactName)) { 70 int i=0; 71 for(String nameInSet: artifactNames) { 72 if(nameInSet.equals(artifactName)) { 73 index = i; 74 } else { 75 i++; 76 } 77 } 78 } 79 80 return index; 81 } 82 83 /** 84 * Defines attributes, that should be removed before building the 85 * ARFF File from. 86 */ 87 private void setAttributeFilter() { 88 attributeFilter.add("Agent.Name"); 89 90 } 91 92 /** 93 * Saves the dataset as arff after transformation (decent->arffx) and 94 * filtering 95 * 96 * @param dataSet the WEKA dataset to save 97 * @param arffLocation location where it should be saved to 98 */ 99 public void save(Instances dataSet, String arffLocation) { 100 101 102 ArffSaver saver = new ArffSaver(); 103 saver.setInstances(dataSet); 104 try { 105 saver.setFile(new File(arffLocation)); 106 saver.writeBatch(); 107 } catch (IOException e) { 108 Console.printerrln("Cannot save the file to path: "+arffLocation); 109 e.printStackTrace(); 110 } 111 } 112 113 114 /** 115 * Loads the given decent file and tranform it from decent->arffx->arff 116 * @return Instances in WEKA format 117 */ 118 @Override 119 public Instances load(File file) { 120 121 // Set attributeFilter 122 setAttributeFilter(); 123 124 // Register MetaModels 125 try { 126 registerMetaModels(); 127 } catch (Exception e1) { 128 Console.printerrln("Metamodels cannot be registered!"); 129 e1.printStackTrace(); 130 } 131 132 // Set location of decent and arffx Model 133 String decentModelLocation = file.getAbsolutePath(); 134 String pathToDecentModelFolder = decentModelLocation.substring(0,decentModelLocation.lastIndexOf(File.separator)); 135 String arffxModelLocation = pathToDecentModelFolder+"/model.arffx"; 136 String logModelLocation = pathToDecentModelFolder+"/model.log"; 137 String arffLocation = pathToDecentModelFolder+"/model.arff"; 138 139 // If arff File exists, load from it! 140 if(new File(arffLocation).exists()) { 141 System.out.println("Loading arff File..."); 142 BufferedReader reader; 143 Instances data = null; 144 try { 145 reader = new BufferedReader(new FileReader(arffLocation)); 146 data = new Instances(reader); 147 reader.close(); 148 } catch (FileNotFoundException e) { 149 Console.printerrln("File with path: "+arffLocation+" was not found."); 150 e.printStackTrace(); 151 } catch (IOException e) { 152 Console.printerrln("File with path: "+arffLocation+" cannot be read."); 153 e.printStackTrace(); 154 } 155 156 // Set class attribute if not set 157 if(data.classIndex() == -1) { 158 Attribute classAttribute = data.attribute(classAttributeName); 159 data.setClass(classAttribute); 160 } 161 162 163 return data; 164 } 165 166 // Location of EOL Scripts 167 String preprocess = "./decent/epsilon/query/preprocess.eol"; 168 String arffxToArffSource = "./decent/epsilon/query/addLabels.eol"; 169 170 // Set Log Properties 171 System.setProperty("epsilon.logLevel", logLevel); 172 System.setProperty("epsilon.logToFile", logToFile); 173 System.setProperty("epsilon.logFileAvailable", "false"); 174 175 // Set decent2arffx Properties 176 System.setProperty("epsilon.transformation.decent2arffx.skipSource", "false"); 177 System.setProperty("epsilon.transformation.decent2arffx.type", "code"); 178 179 180 181 // Preprocess Data, transform from decent2arffx 182 try { 183 IEolExecutableModule preProcessModule = loadModule(preprocess); 184 IModel preProcessDecentModel = modelHandler.getDECENTModel(decentModelLocation, true, true); 185 IModel preProcessArffxarffxModel = modelHandler.getARFFxModel(arffxModelLocation, false, true); 186 preProcessModule.getContext().getModelRepository().addModel(preProcessDecentModel); 187 preProcessModule.getContext().getModelRepository().addModel(preProcessArffxarffxModel); 188 execute(preProcessModule, logModelLocation); 189 preProcessDecentModel.dispose(); 190 preProcessArffxarffxModel.dispose(); 191 preProcessModule.reset(); 192 } catch (URISyntaxException e) { 193 Console.printerrln("URI Syntax for decent or arffx model is wrong."); 194 e.printStackTrace(); 195 } catch (Exception e) { 196 e.printStackTrace(); 197 } 198 199 200 201 202 // Transform to arff, for label and confidence attributes 203 try { 204 IEolExecutableModule arffxToArffModule = loadModule(arffxToArffSource); 205 IModel arffxToArffArffxModel = modelHandler.getARFFxModel(arffxModelLocation, true, true); 206 arffxToArffModule.getContext().getModelRepository().addModel(arffxToArffArffxModel); 207 execute(arffxToArffModule, logModelLocation); 208 arffxToArffArffxModel.dispose(); 209 // can be stored and retained alternatively 210 arffxToArffModule.reset(); 211 } catch (URISyntaxException e) { 212 Console.printerrln("URI Syntax for arffx model is wrong."); 213 e.printStackTrace(); 214 } catch (Exception e) { 215 e.printStackTrace(); 216 } 217 218 // Unregister MetaModels, otherwise cast will fail 219 HashMap<String, Object> metaModelCache = new HashMap<>(); 220 for (String key : EPackage.Registry.INSTANCE.keySet()) { 221 metaModelCache.put(key, EPackage.Registry.INSTANCE.get(key)); 222 }; 223 224 for (String key : metaModelCache .keySet()) { 225 EPackage.Registry.INSTANCE.remove(key); 226 }; 227 228 229 // Workaround to gernerate a usable URI. Absolute path is not 230 // possible, therefore we need to construct a relative path 231 232 URL location = DecentDataLoader.class.getProtectionDomain().getCodeSource().getLocation(); 233 String basePath = location.getFile(); 234 235 // Location is the bin folder, so we need to delete the last 4 characters 236 basePath = basePath.substring(0, basePath.length() - 4); 237 String relativePath = new File(basePath).toURI().relativize(new File(arffxModelLocation).toURI()).getPath(); 238 239 // Loard arffx file and create WEKA Instances 240 ARFFxResourceTool tool = new ARFFxResourceTool(); 241 Resource resource = tool.loadResourceFromXMI(relativePath, "arffx"); 242 243 Instances dataSet = null; 244 for(EObject o: resource.getContents()) { 245 Model m = (Model) o; 246 dataSet = createWekaDataFormat(m); 247 248 for(Instance i : m.getData()) { 249 createWekaInstance(dataSet, i); 250 } 251 } 252 253 // Set class attribute 254 Attribute classAttribute = dataSet.attribute(classAttributeName); 255 dataSet.setClass(classAttribute); 256 257 // Save as ARFF 258 save(dataSet, arffLocation); 259 260 return dataSet; 261 262 } 263 264 265 /** 266 * Creates a WekaInstance from an ARFFX Model Instance 267 * 268 * @param dataSet WekaInstance dataset, where the arffx model instances should be 269 * added to 270 * @param i arffx model instance 271 */ 272 private void createWekaInstance(Instances dataSet, Instance i) { 273 double[] values = new double[dataSet.numAttributes()]; 274 int j=0; 275 276 for(Value value : i.getValues()) { 277 String dataValue = value.getContent(); 278 String attributeName = value.getOfAttribute().getName(); 279 280 if(attributeFilter.contains(attributeName)) { 281 continue; 282 } 283 284 // Is value a LABEL.* attribute? 285 if(isLabel(attributeName)) { 286 values[j] = dataSet.attribute(j).indexOfValue(dataValue); 287 } else if (isConfidenceLabel(attributeName)){ 288 // Is value a CONFIDENCE.* attribute? 289 values[j] = dataSet.attribute(j).indexOfValue(dataValue); 290 } else if(attributeName.equals("Artifact.Name")){ 291 // Is it the name of the artifact? 292 artifactNames.add(dataValue); 293 values[j] = getIndexOfArtifactName(dataValue); 294 } else { 295 // Is it a numeric value? 296 values[j] = Double.parseDouble(dataValue); 297 } 298 299 j++; 300 } 301 302 DenseInstance inst = new DenseInstance(1.0, values); 303 dataSet.add(inst); 304 } 305 306 /** 307 * Creates a Weka Instances set out of a arffx model 308 * @param m arffx model 309 * @return 310 */ 311 private Instances createWekaDataFormat(Model m) { 312 313 // Bad solution, can be enhanced (continue in for loop) 314 ArrayList<Attribute> datasetAttributes = new ArrayList<Attribute>(); 315 for(de.ugoe.cs.cpdp.decentApp.models.arffx.Attribute attribute :m.getAttributes()) { 316 String attributeName = attribute.getName(); 317 318 if(attributeFilter.contains(attributeName)) { 319 continue; 320 } 321 322 Attribute wekaAttr; 323 324 // Is attribute a LABEL.* attribute? 325 if(isLabel(attributeName)) { 326 // Classattribute 327 final ArrayList<String> classAttVals = new ArrayList<String>(); 328 classAttVals.add("false"); 329 classAttVals.add("true"); 330 wekaAttr = new Attribute(attributeName, classAttVals); 331 } else if(isConfidenceLabel(attributeName)){ 332 // Is attribute a CONFIDENCE.* attribute? 333 ArrayList<String> labels = new ArrayList<String>(); 334 labels.add("high"); 335 labels.add("low"); 336 wekaAttr = new Attribute(attributeName, labels); 337 } else { 338 // Is it a numeric attribute? 339 wekaAttr = new Attribute(attributeName); 340 } 341 342 datasetAttributes.add(wekaAttr); 343 } 344 345 346 return new Instances("test-dataset", datasetAttributes, 0); 347 } 348 349 /** 350 * Helper methods which indicates if the given value starts with "LABEL" 351 * 352 * @param value to test 353 * @return 354 */ 355 private boolean isLabel(String value) { 356 if(value.length()>= 5 && value.substring(0, 5).equals("LABEL")) { 357 return true; 358 } 359 360 return false; 361 } 362 363 /** 364 * Helper method which indicates if the given value starts with "CONFIDENCE" 365 * @param value to test 366 * @return 367 */ 368 private boolean isConfidenceLabel(String value) { 369 if(value.length()>= 10 && value.substring(0, 10).equals("CONFIDENCE")) { 370 return true; 371 } 372 373 return false; 374 } 375 376 377 /** 378 * Returns if a filename ends with ".decent" 379 * @return 380 */ 381 @Override 382 public boolean filenameFilter(String filename) { 383 return filename.endsWith(".decent"); 384 } 385 386 /** 387 * Helper method for executing a eol scripts and adding the log model beforehand 388 * @param module module to execute 389 * @param logModelLocation location of the log model 390 * @throws Exception 391 */ 392 private void execute(IEolExecutableModule module, String logModelLocation) 393 throws Exception { 394 IModel logModel = modelHandler.getLOGModel(logModelLocation, true, true); 395 module.getContext().getModelRepository().addModel(logModel); 396 module.execute(); 397 logModel.dispose(); 398 } 399 400 /** 401 * Loads the module from a given source 402 * 403 * @param source where the module is (e.g. eol script) 404 * @return 405 * @throws Exception 406 * @throws URISyntaxException 407 */ 408 private IEolExecutableModule loadModule(String source) throws Exception, 409 URISyntaxException { 410 411 IEolExecutableModule module = null; 412 if (source.endsWith("etl")) { 413 module = new EtlModule(); 414 } else if (source.endsWith("eol")) { 415 module = new EolModule(); 416 } else { 417 418 } 419 420 module.parse(modelHandler.getFile(source)); 421 422 if (module.getParseProblems().size() > 0) { 423 Console.printerrln("Parse error occured..."); 424 for (ParseProblem problem : module.getParseProblems()) { 425 System.err.println(problem.toString()); 426 } 427 // System.exit(-1); 428 } 429 430 return module; 431 } 432 433 /** 434 * Helper method for registering the metamodels 435 * @throws Exception 436 */ 437 private void registerMetaModels() throws Exception { 438 String metaModelsPath = DECENTEpsilonModelHandler.metaPath; 439 File metaModelsLocation = new File(metaModelsPath); 440 for (File file : metaModelsLocation.listFiles()) { 441 if (file.getName().endsWith(".ecore")) { 442 EmfUtil.register(URI.createFileURI(file.getAbsolutePath()), EPackage.Registry.INSTANCE); 443 } 444 } 445 } 446 60 public class DecentDataLoader implements SingleVersionLoader { 61 62 // Model Handler for Decent Models 63 private DECENTEpsilonModelHandler modelHandler = new DECENTEpsilonModelHandler(); 64 65 // Set log level 66 String logLevel = "1"; 67 String logToFile = "false"; 68 69 // This list contains attributes, that should be removed before building the arff file 70 private static List<String> attributeFilter = new LinkedList<String>(); 71 72 // This list contains all names of the different artifacts 73 private static Set<String> artifactNames = new LinkedHashSet<String>(); 74 75 // Name of the class attribute. 76 private static final String classAttributeName = "LABEL.Artifact.Target.BugFix.AverageWeight"; 77 78 private int getIndexOfArtifactName(String artifactName) { 79 int index = -1; 80 if (artifactNames.contains(artifactName)) { 81 int i = 0; 82 for (String nameInSet : artifactNames) { 83 if (nameInSet.equals(artifactName)) { 84 index = i; 85 } 86 else { 87 i++; 88 } 89 } 90 } 91 92 return index; 93 } 94 95 /** 96 * Defines attributes, that should be removed before building the ARFF File from. 97 */ 98 private void setAttributeFilter() { 99 attributeFilter.add("Agent.Name"); 100 101 } 102 103 /** 104 * Saves the dataset as arff after transformation (decent->arffx) and filtering 105 * 106 * @param dataSet 107 * the WEKA dataset to save 108 * @param arffLocation 109 * location where it should be saved to 110 */ 111 public void save(Instances dataSet, String arffLocation) { 112 113 ArffSaver saver = new ArffSaver(); 114 saver.setInstances(dataSet); 115 try { 116 saver.setFile(new File(arffLocation)); 117 saver.writeBatch(); 118 } 119 catch (IOException e) { 120 Console.printerrln("Cannot save the file to path: " + arffLocation); 121 e.printStackTrace(); 122 } 123 } 124 125 /** 126 * Loads the given decent file and tranform it from decent->arffx->arff 127 * 128 * @return Instances in WEKA format 129 */ 130 @Override 131 public Instances load(File file) { 132 133 // Set attributeFilter 134 setAttributeFilter(); 135 136 // Register MetaModels 137 try { 138 registerMetaModels(); 139 } 140 catch (Exception e1) { 141 Console.printerrln("Metamodels cannot be registered!"); 142 e1.printStackTrace(); 143 } 144 145 // Set location of decent and arffx Model 146 String decentModelLocation = file.getAbsolutePath(); 147 String pathToDecentModelFolder = 148 decentModelLocation.substring(0, decentModelLocation.lastIndexOf(File.separator)); 149 String arffxModelLocation = pathToDecentModelFolder + "/model.arffx"; 150 String logModelLocation = pathToDecentModelFolder + "/model.log"; 151 String arffLocation = pathToDecentModelFolder + "/model.arff"; 152 153 // If arff File exists, load from it! 154 if (new File(arffLocation).exists()) { 155 System.out.println("Loading arff File..."); 156 BufferedReader reader; 157 Instances data = null; 158 try { 159 reader = new BufferedReader(new FileReader(arffLocation)); 160 data = new Instances(reader); 161 reader.close(); 162 } 163 catch (FileNotFoundException e) { 164 Console.printerrln("File with path: " + arffLocation + " was not found."); 165 e.printStackTrace(); 166 } 167 catch (IOException e) { 168 Console.printerrln("File with path: " + arffLocation + " cannot be read."); 169 e.printStackTrace(); 170 } 171 172 // Set class attribute if not set 173 if (data.classIndex() == -1) { 174 Attribute classAttribute = data.attribute(classAttributeName); 175 data.setClass(classAttribute); 176 } 177 178 return data; 179 } 180 181 // Location of EOL Scripts 182 String preprocess = "./decent/epsilon/query/preprocess.eol"; 183 String arffxToArffSource = "./decent/epsilon/query/addLabels.eol"; 184 185 // Set Log Properties 186 System.setProperty("epsilon.logLevel", logLevel); 187 System.setProperty("epsilon.logToFile", logToFile); 188 System.setProperty("epsilon.logFileAvailable", "false"); 189 190 // Set decent2arffx Properties 191 System.setProperty("epsilon.transformation.decent2arffx.skipSource", "false"); 192 System.setProperty("epsilon.transformation.decent2arffx.type", "code"); 193 194 // Preprocess Data, transform from decent2arffx 195 try { 196 IEolExecutableModule preProcessModule = loadModule(preprocess); 197 IModel preProcessDecentModel = 198 modelHandler.getDECENTModel(decentModelLocation, true, true); 199 IModel preProcessArffxarffxModel = 200 modelHandler.getARFFxModel(arffxModelLocation, false, true); 201 preProcessModule.getContext().getModelRepository().addModel(preProcessDecentModel); 202 preProcessModule.getContext().getModelRepository().addModel(preProcessArffxarffxModel); 203 execute(preProcessModule, logModelLocation); 204 preProcessDecentModel.dispose(); 205 preProcessArffxarffxModel.dispose(); 206 preProcessModule.reset(); 207 } 208 catch (URISyntaxException e) { 209 Console.printerrln("URI Syntax for decent or arffx model is wrong."); 210 e.printStackTrace(); 211 } 212 catch (Exception e) { 213 e.printStackTrace(); 214 } 215 216 // Transform to arff, for label and confidence attributes 217 try { 218 IEolExecutableModule arffxToArffModule = loadModule(arffxToArffSource); 219 IModel arffxToArffArffxModel = 220 modelHandler.getARFFxModel(arffxModelLocation, true, true); 221 arffxToArffModule.getContext().getModelRepository().addModel(arffxToArffArffxModel); 222 execute(arffxToArffModule, logModelLocation); 223 arffxToArffArffxModel.dispose(); 224 // can be stored and retained alternatively 225 arffxToArffModule.reset(); 226 } 227 catch (URISyntaxException e) { 228 Console.printerrln("URI Syntax for arffx model is wrong."); 229 e.printStackTrace(); 230 } 231 catch (Exception e) { 232 e.printStackTrace(); 233 } 234 235 // Unregister MetaModels, otherwise cast will fail 236 HashMap<String, Object> metaModelCache = new HashMap<>(); 237 for (String key : EPackage.Registry.INSTANCE.keySet()) { 238 metaModelCache.put(key, EPackage.Registry.INSTANCE.get(key)); 239 }; 240 241 for (String key : metaModelCache.keySet()) { 242 EPackage.Registry.INSTANCE.remove(key); 243 }; 244 245 // Workaround to gernerate a usable URI. Absolute path is not 246 // possible, therefore we need to construct a relative path 247 248 URL location = DecentDataLoader.class.getProtectionDomain().getCodeSource().getLocation(); 249 String basePath = location.getFile(); 250 251 // Location is the bin folder, so we need to delete the last 4 characters 252 basePath = basePath.substring(0, basePath.length() - 4); 253 String relativePath = 254 new File(basePath).toURI().relativize(new File(arffxModelLocation).toURI()).getPath(); 255 256 // Loard arffx file and create WEKA Instances 257 ARFFxResourceTool tool = new ARFFxResourceTool(); 258 Resource resource = tool.loadResourceFromXMI(relativePath, "arffx"); 259 260 Instances dataSet = null; 261 for (EObject o : resource.getContents()) { 262 Model m = (Model) o; 263 dataSet = createWekaDataFormat(m); 264 265 for (Instance i : m.getData()) { 266 createWekaInstance(dataSet, i); 267 } 268 } 269 270 // Set class attribute 271 Attribute classAttribute = dataSet.attribute(classAttributeName); 272 dataSet.setClass(classAttribute); 273 274 // Save as ARFF 275 save(dataSet, arffLocation); 276 277 return dataSet; 278 279 } 280 281 /** 282 * Creates a WekaInstance from an ARFFX Model Instance 283 * 284 * @param dataSet 285 * WekaInstance dataset, where the arffx model instances should be added to 286 * @param i 287 * arffx model instance 288 */ 289 private void createWekaInstance(Instances dataSet, Instance i) { 290 double[] values = new double[dataSet.numAttributes()]; 291 int j = 0; 292 293 for (Value value : i.getValues()) { 294 String dataValue = value.getContent(); 295 String attributeName = value.getOfAttribute().getName(); 296 297 if (attributeFilter.contains(attributeName)) { 298 continue; 299 } 300 301 // Is value a LABEL.* attribute? 302 if (isLabel(attributeName)) { 303 values[j] = dataSet.attribute(j).indexOfValue(dataValue); 304 } 305 else if (isConfidenceLabel(attributeName)) { 306 // Is value a CONFIDENCE.* attribute? 307 values[j] = dataSet.attribute(j).indexOfValue(dataValue); 308 } 309 else if (attributeName.equals("Artifact.Name")) { 310 // Is it the name of the artifact? 311 artifactNames.add(dataValue); 312 values[j] = getIndexOfArtifactName(dataValue); 313 } 314 else { 315 // Is it a numeric value? 316 values[j] = Double.parseDouble(dataValue); 317 } 318 319 j++; 320 } 321 322 DenseInstance inst = new DenseInstance(1.0, values); 323 dataSet.add(inst); 324 } 325 326 /** 327 * Creates a Weka Instances set out of a arffx model 328 * 329 * @param m 330 * arffx model 331 * @return 332 */ 333 private Instances createWekaDataFormat(Model m) { 334 335 // Bad solution, can be enhanced (continue in for loop) 336 ArrayList<Attribute> datasetAttributes = new ArrayList<Attribute>(); 337 for (de.ugoe.cs.cpdp.decentApp.models.arffx.Attribute attribute : m.getAttributes()) { 338 String attributeName = attribute.getName(); 339 340 if (attributeFilter.contains(attributeName)) { 341 continue; 342 } 343 344 Attribute wekaAttr; 345 346 // Is attribute a LABEL.* attribute? 347 if (isLabel(attributeName)) { 348 // Classattribute 349 final ArrayList<String> classAttVals = new ArrayList<String>(); 350 classAttVals.add("false"); 351 classAttVals.add("true"); 352 wekaAttr = new Attribute(attributeName, classAttVals); 353 } 354 else if (isConfidenceLabel(attributeName)) { 355 // Is attribute a CONFIDENCE.* attribute? 356 ArrayList<String> labels = new ArrayList<String>(); 357 labels.add("high"); 358 labels.add("low"); 359 wekaAttr = new Attribute(attributeName, labels); 360 } 361 else { 362 // Is it a numeric attribute? 363 wekaAttr = new Attribute(attributeName); 364 } 365 366 datasetAttributes.add(wekaAttr); 367 } 368 369 return new Instances("test-dataset", datasetAttributes, 0); 370 } 371 372 /** 373 * Helper methods which indicates if the given value starts with "LABEL" 374 * 375 * @param value 376 * to test 377 * @return 378 */ 379 private boolean isLabel(String value) { 380 if (value.length() >= 5 && value.substring(0, 5).equals("LABEL")) { 381 return true; 382 } 383 384 return false; 385 } 386 387 /** 388 * Helper method which indicates if the given value starts with "CONFIDENCE" 389 * 390 * @param value 391 * to test 392 * @return 393 */ 394 private boolean isConfidenceLabel(String value) { 395 if (value.length() >= 10 && value.substring(0, 10).equals("CONFIDENCE")) { 396 return true; 397 } 398 399 return false; 400 } 401 402 /** 403 * Returns if a filename ends with ".decent" 404 * 405 * @return 406 */ 407 @Override 408 public boolean filenameFilter(String filename) { 409 return filename.endsWith(".decent"); 410 } 411 412 /** 413 * Helper method for executing a eol scripts and adding the log model beforehand 414 * 415 * @param module 416 * module to execute 417 * @param logModelLocation 418 * location of the log model 419 * @throws Exception 420 */ 421 private void execute(IEolExecutableModule module, String logModelLocation) throws Exception { 422 IModel logModel = modelHandler.getLOGModel(logModelLocation, true, true); 423 module.getContext().getModelRepository().addModel(logModel); 424 module.execute(); 425 logModel.dispose(); 426 } 427 428 /** 429 * Loads the module from a given source 430 * 431 * @param source 432 * where the module is (e.g. eol script) 433 * @return 434 * @throws Exception 435 * @throws URISyntaxException 436 */ 437 private IEolExecutableModule loadModule(String source) throws Exception, URISyntaxException { 438 439 IEolExecutableModule module = null; 440 if (source.endsWith("etl")) { 441 module = new EtlModule(); 442 } 443 else if (source.endsWith("eol")) { 444 module = new EolModule(); 445 } 446 else { 447 448 } 449 450 module.parse(modelHandler.getFile(source)); 451 452 if (module.getParseProblems().size() > 0) { 453 Console.printerrln("Parse error occured..."); 454 for (ParseProblem problem : module.getParseProblems()) { 455 System.err.println(problem.toString()); 456 } 457 // System.exit(-1); 458 } 459 460 return module; 461 } 462 463 /** 464 * Helper method for registering the metamodels 465 * 466 * @throws Exception 467 */ 468 private void registerMetaModels() throws Exception { 469 String metaModelsPath = DECENTEpsilonModelHandler.metaPath; 470 File metaModelsLocation = new File(metaModelsPath); 471 for (File file : metaModelsLocation.listFiles()) { 472 if (file.getName().endsWith(".ecore")) { 473 EmfUtil.register(URI.createFileURI(file.getAbsolutePath()), 474 EPackage.Registry.INSTANCE); 475 } 476 } 477 } 478 447 479 } -
trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/DecentFolderLoader.java
r32 r41 1 // Copyright 2015 Georg-August-Universität Göttingen, Germany 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 1 15 package de.ugoe.cs.cpdp.loader; 2 16 … … 16 30 public class DecentFolderLoader extends AbstractFolderLoader { 17 31 18 /* 19 * (non-Javadoc) 20 * 21 * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader#getSingleLoader() 22 */ 23 @Override 24 protected SingleVersionLoader getSingleLoader() { 25 return new DecentDataLoader(); 26 } 27 28 /** 29 * @see de.ugoe.cs.cpdp.loader.IVersionLoader#load() 30 */ 31 @Override 32 public List<SoftwareVersion> load() { 33 final List<SoftwareVersion> versions = new LinkedList<SoftwareVersion>(); 32 /* 33 * (non-Javadoc) 34 * 35 * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader#getSingleLoader() 36 */ 37 @Override 38 protected SingleVersionLoader getSingleLoader() { 39 return new DecentDataLoader(); 40 } 34 41 35 final File dataDir = new File(path); 36 final SingleVersionLoader instancesLoader = getSingleLoader(); 42 /** 43 * @see de.ugoe.cs.cpdp.loader.IVersionLoader#load() 44 */ 45 @Override 46 public List<SoftwareVersion> load() { 47 final List<SoftwareVersion> versions = new LinkedList<SoftwareVersion>(); 37 48 38 String projectName = dataDir.getName(); 39 40 41 /* 42 * The following lines make it possible, that we can have two different possibilities 43 * to load data: 44 * 1) From one project (e.g. /decent/input/project1) 45 * 2) From more than one project (e.g. /decent/input/) 46 * 47 * Requirement is, that we have a folder structure like this: 48 * "/decent/input/project1/model.decent, /decent/input/project2/model.decent, ..." 49 * 50 * In the first one the "else" is executed, therefore it will just search the folder "project1" 51 * for a "model.decent" file. In the second one, it will look into each folder and searches for 52 * "model.decent" files. 53 */ 54 for (File projectDir : dataDir.listFiles()) { 55 if (projectDir.isDirectory()) { 56 projectName = projectDir.getName(); 57 for (File versionFile : projectDir.listFiles()) { 58 loadDataFromFile(versionFile,instancesLoader, projectName, versions); 59 } 60 } else { 61 loadDataFromFile(projectDir, instancesLoader, projectName, versions); 62 } 63 } 64 return versions; 65 } 66 67 /** 68 * Loads data from a file and adds the instances from the load method to the 69 * versions List. 70 * 71 * @param versionFile file to load from 72 * @param instancesLoader loader that should be used 73 * @param projectName name of the project which was loaded 74 * @param versions list, where the weka instances are added to 75 */ 76 77 private void loadDataFromFile(File versionFile, 78 SingleVersionLoader instancesLoader, String projectName, List<SoftwareVersion> versions) { 79 if (versionFile.isFile() 80 && instancesLoader.filenameFilter(versionFile 81 .getName())) { 82 String versionName = versionFile.getName(); 83 Instances data = instancesLoader.load(versionFile); 84 versions.add(new SoftwareVersion(projectName, 85 versionName, data)); 86 } 87 } 49 final File dataDir = new File(path); 50 final SingleVersionLoader instancesLoader = getSingleLoader(); 51 52 String projectName = dataDir.getName(); 53 54 /* 55 * The following lines make it possible, that we can have two different possibilities to 56 * load data: 1) From one project (e.g. /decent/input/project1) 2) From more than one 57 * project (e.g. /decent/input/) 58 * 59 * Requirement is, that we have a folder structure like this: 60 * "/decent/input/project1/model.decent, /decent/input/project2/model.decent, ..." 61 * 62 * In the first one the "else" is executed, therefore it will just search the folder 63 * "project1" for a "model.decent" file. In the second one, it will look into each folder 64 * and searches for "model.decent" files. 65 */ 66 for (File projectDir : dataDir.listFiles()) { 67 if (projectDir.isDirectory()) { 68 projectName = projectDir.getName(); 69 for (File versionFile : projectDir.listFiles()) { 70 loadDataFromFile(versionFile, instancesLoader, projectName, versions); 71 } 72 } 73 else { 74 loadDataFromFile(projectDir, instancesLoader, projectName, versions); 75 } 76 } 77 return versions; 78 } 79 80 /** 81 * Loads data from a file and adds the instances from the load method to the versions List. 82 * 83 * @param versionFile 84 * file to load from 85 * @param instancesLoader 86 * loader that should be used 87 * @param projectName 88 * name of the project which was loaded 89 * @param versions 90 * list, where the weka instances are added to 91 */ 92 93 private void loadDataFromFile(File versionFile, 94 SingleVersionLoader instancesLoader, 95 String projectName, 96 List<SoftwareVersion> versions) 97 { 98 if (versionFile.isFile() && instancesLoader.filenameFilter(versionFile.getName())) { 99 String versionName = versionFile.getName(); 100 Instances data = instancesLoader.load(versionFile); 101 versions.add(new SoftwareVersion(projectName, versionName, data)); 102 } 103 } 88 104 89 105 } -
trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/IDecentVersionLoader.java
r32 r41 1 // Copyright 2015 Georg-August-Universität Göttingen, Germany 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 1 15 package de.ugoe.cs.cpdp.loader; 2 16 … … 5 19 import de.ugoe.cs.cpdp.versions.SoftwareVersion; 6 20 7 public interface IDecentVersionLoader extends IVersionLoader {8 9 public List<SoftwareVersion> load(List<String> decentAttributes);21 public interface IDecentVersionLoader extends IVersionLoader { 22 23 public List<SoftwareVersion> load(List<String> decentAttributes); 10 24 11 25 } -
trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/IVersionLoader.java
r32 r41 1 // Copyright 2015 Georg-August-Universität Göttingen, Germany 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 1 15 package de.ugoe.cs.cpdp.loader; 2 16 … … 12 26 public interface IVersionLoader { 13 27 14 /**15 * Sets the location of the data.16 *17 * @param location18 * location of the data19 */20 public void setLocation(String location);28 /** 29 * Sets the location of the data. 30 * 31 * @param location 32 * location of the data 33 */ 34 public void setLocation(String location); 21 35 22 /**23 * Loads the data.24 *25 * @return the data26 */27 public List<SoftwareVersion> load();36 /** 37 * Loads the data. 38 * 39 * @return the data 40 */ 41 public List<SoftwareVersion> load(); 28 42 29 43 } -
trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/NasaARFFFolderLoader.java
r4 r41 1 // Copyright 2015 Georg-August-Universität Göttingen, Germany 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 1 15 package de.ugoe.cs.cpdp.loader; 2 16 3 17 /** 4 * Implements the {@link AbstractFolderLoader} for the NASA/SOFTLAB/MDP data 5 * set. 18 * Implements the {@link AbstractFolderLoader} for the NASA/SOFTLAB/MDP data set. 6 19 * 7 20 * @author Steffen Herbold … … 9 22 public class NasaARFFFolderLoader extends AbstractFolderLoader { 10 23 11 /*12 * (non-Javadoc)13 *14 * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader#getSingleLoader()15 */16 @Override17 protected SingleVersionLoader getSingleLoader() {18 return new NasaARFFLoader();19 }24 /* 25 * (non-Javadoc) 26 * 27 * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader#getSingleLoader() 28 */ 29 @Override 30 protected SingleVersionLoader getSingleLoader() { 31 return new NasaARFFLoader(); 32 } 20 33 21 34 } -
trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/NasaARFFLoader.java
r4 r41 1 // Copyright 2015 Georg-August-Universität Göttingen, Germany 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 1 15 package de.ugoe.cs.cpdp.loader; 2 16 … … 16 30 17 31 /** 18 * Loads the instances for a software version from an ARFF file of the 19 * NASA/SOFTLAB/MDP data. 32 * Loads the instances for a software version from an ARFF file of the NASA/SOFTLAB/MDP data. 20 33 * 21 34 * @author Steffen Herbold … … 23 36 public class NasaARFFLoader implements SingleVersionLoader { 24 37 25 /**26 * used to map attributes the same attribute with different names to each 27 * other 28 */ 29 Map<String, String> attributeNameMap; 30 31 /** 32 * used to ensure that the attribute order is the same after loading 33 */ 34 List<String> attributeOrder; 35 36 /** 37 * Constructor. Creates a new NasaARFFLoader. 38 */ 39 public NasaARFFLoader() { 40 attributeNameMap = new HashMap<>(); 41 42 // Map entries for ar project 43 attributeNameMap.put("total_loc", "LOC_TOTAL");44 attributeNameMap.put("comment_loc", "LOC_COMMENTS");45 attributeNameMap.put("code_and_comment_loc", "LOC_CODE_AND_COMMENT");46 attributeNameMap.put("executable_loc", "LOC_EXECUTABLE");47 attributeNameMap.put("unique_operands", "NUM_UNIQUE_OPERANDS");48 attributeNameMap.put("unique_operators", "NUM_UNIQUE_OPERATORS");49 attributeNameMap.put("total_operands", "NUM_OPERANDS");50 attributeNameMap.put("total_operators", "NUM_OPERATORS");51 attributeNameMap.put("halstead_length", "HALSTEAD_LENGTH");52 attributeNameMap.put("halstead_volume", "HALSTEAD_VOLUME");53 attributeNameMap.put("halstead_difficulty", "HALSTEAD_DIFFICULTY");54 attributeNameMap.put("halstead_effort", "HALSTEAD_EFFORT");55 attributeNameMap.put("halstead_error", "HALSTEAD_ERROR_EST");56 attributeNameMap.put("halstead_time", "HALSTEAD_PROG_TIME");57 attributeNameMap.put("branch_count", "BRANCH_COUNT");58 attributeNameMap.put("cyclomatic_complexity", "CYCLOMATIC_COMPLEXITY");59 attributeNameMap.put("design_complexity", "DESIGN_COMPLEXITY"); 60 61 // Map entries for KC2 62 attributeNameMap.put("loc", "LOC_TOTAL");63 attributeNameMap.put("lOCode", "LOC_EXECUTABLE");64 attributeNameMap.put("lOComment", "LOC_COMMENTS");65 attributeNameMap.put("lOCodeAndComment", "LOC_CODE_AND_COMMENT");66 attributeNameMap.put("uniq_Op", "NUM_UNIQUE_OPERATORS");67 attributeNameMap.put("uniq_Opnd", "NUM_UNIQUE_OPERANDS");68 attributeNameMap.put("total_Op", "NUM_OPERATORS");69 attributeNameMap.put("total_Opnd", "NUM_OPERANDS");70 attributeNameMap.put("v", "HALSTEAD_VOLUME");71 attributeNameMap.put("l", "HALSTEAD_LENGTH");72 attributeNameMap.put("d", "HALSTEAD_DIFFICULTY");73 attributeNameMap.put("e", "HALSTEAD_EFFORT");74 attributeNameMap.put("b", "HALSTEAD_ERROR_EST");75 attributeNameMap.put("t", "HALSTEAD_PROG_TIME");76 attributeNameMap.put("branchCount", "BRANCH_COUNT");77 attributeNameMap.put("v(g)", "CYCLOMATIC_COMPLEXITY");78 attributeNameMap.put("iv(g)", "DESIGN_COMPLEXITY"); 79 80 attributeNameMap.put("defects", "bug");81 attributeNameMap.put("Defective", "bug");82 attributeNameMap.put("problems", "bug");83 attributeNameMap.put("label", "bug"); 84 85 // build list with normalized attribute order 86 attributeOrder = new LinkedList<>(); 87 88 attributeOrder.add("LOC_TOTAL");89 attributeOrder.add("LOC_EXECUTABLE");90 attributeOrder.add("LOC_COMMENTS");91 attributeOrder.add("LOC_CODE_AND_COMMENT");92 attributeOrder.add("NUM_UNIQUE_OPERATORS");93 attributeOrder.add("NUM_UNIQUE_OPERANDS");94 attributeOrder.add("NUM_OPERATORS");95 attributeOrder.add("NUM_OPERANDS");96 attributeOrder.add("HALSTEAD_VOLUME");97 attributeOrder.add("HALSTEAD_LENGTH");98 attributeOrder.add("HALSTEAD_DIFFICULTY");99 attributeOrder.add("HALSTEAD_EFFORT");100 attributeOrder.add("HALSTEAD_ERROR_EST");101 attributeOrder.add("HALSTEAD_PROG_TIME");102 attributeOrder.add("BRANCH_COUNT");103 attributeOrder.add("CYCLOMATIC_COMPLEXITY");104 attributeOrder.add("DESIGN_COMPLEXITY");105 attributeOrder.add("bug"); 106 } 107 108 /* 109 * (non-Javadoc) 110 * 111 * @see de.ugoe.cs.cpdp.loader.SingleVersionLoader#load(java.io.File) 112 */ 113 @Override 114 public Instances load(File file) { 115 BufferedReader reader;116 Instances data; 117 try { 118 reader = new BufferedReader(new FileReader(file));119 data = new Instances(reader);120 reader.close(); 121 }catch (IOException e) {122 throw new RuntimeException("Error reading data", e);123 }124 125 // setting class attribute126 data.setClassIndex(data.numAttributes() - 1);127 128 // normalize attribute names129 for (int i = 0; i < data.numAttributes(); i++) {130 String mapValue = attributeNameMap.get(data.attribute(i).name());131 if (mapValue != null) {132 data.renameAttribute(i, mapValue);133 }134 }135 136 // determine new attribute order (unwanted attributes are implicitly137 // removed138 String orderString = "";139 for (String attName : attributeOrder) {140 for (int i = 0; i < data.numAttributes(); i++) {141 if (attName.equals(data.attribute(i).name())) {142 orderString += (i + 1) + ",";143 }144 }145 }146 orderString = orderString.substring(0, orderString.length() - 1);147 148 String relationName = data.relationName();149 String[] options = new String[2];150 options[0] = "-R";151 options[1] = orderString;152 Reorder reorder = new Reorder();153 try {154 reorder.setOptions(options);155 reorder.setInputFormat(data);156 data = Filter.useFilter(data, reorder);157 } catch (Exception e) { 158 throw new RuntimeException("Error while reordering the data", e); 159 } 160 if (data.numAttributes() != attributeOrder.size()) { 161 throw new RuntimeException( 162 "Invalid number of attributes; filename: " + file.getName());163 }164 165 // normalize bug nominal values166 Add add = new Add();167 add.setAttributeIndex("last");168 add.setNominalLabels("0,1");169 add.setAttributeName("bug-new");170 try {171 add.setInputFormat(data);172 data = Filter.useFilter(data, add);173 } catch (Exception e) { 174 throw new RuntimeException( 175 "Error while normalizing the bug nonminal values", e);176 }177 data.setRelationName(relationName);178 179 double classValue;180 181 String firstValue = data.classAttribute().enumerateValues() 182 .nextElement().toString(); 183 if (firstValue.equals("Y") || firstValue.equals("yes") 184 || firstValue.equals("true")) { 185 classValue = 0.0; 186 } else { 187 classValue = 1.0; 188 } 189 190 for (int i = 0; i < data.numInstances(); i++) {191 if (data.instance(i).classValue() == classValue) { 192 data.instance(i).setValue(data.classIndex() + 1, 1.0); 193 }else {194 data.instance(i).setValue(data.classIndex() + 1, 0.0);195 }196 }197 198 int oldClassIndex = data.classIndex();199 data.setClassIndex(oldClassIndex + 1);200 data.deleteAttributeAt(oldClassIndex);201 202 return data;203 }204 205 /*206 * (non-Javadoc)207 *208 * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader#209 * filenameFilter(java.lang.String)210 */211 @Override212 public boolean filenameFilter(String filename) {213 return filename.endsWith(".arff");214 }38 /** 39 * used to map attributes the same attribute with different names to each other 40 */ 41 Map<String, String> attributeNameMap; 42 43 /** 44 * used to ensure that the attribute order is the same after loading 45 */ 46 List<String> attributeOrder; 47 48 /** 49 * Constructor. Creates a new NasaARFFLoader. 50 */ 51 public NasaARFFLoader() { 52 attributeNameMap = new HashMap<>(); 53 54 // Map entries for ar project 55 attributeNameMap.put("total_loc", "LOC_TOTAL"); 56 attributeNameMap.put("comment_loc", "LOC_COMMENTS"); 57 attributeNameMap.put("code_and_comment_loc", "LOC_CODE_AND_COMMENT"); 58 attributeNameMap.put("executable_loc", "LOC_EXECUTABLE"); 59 attributeNameMap.put("unique_operands", "NUM_UNIQUE_OPERANDS"); 60 attributeNameMap.put("unique_operators", "NUM_UNIQUE_OPERATORS"); 61 attributeNameMap.put("total_operands", "NUM_OPERANDS"); 62 attributeNameMap.put("total_operators", "NUM_OPERATORS"); 63 attributeNameMap.put("halstead_length", "HALSTEAD_LENGTH"); 64 attributeNameMap.put("halstead_volume", "HALSTEAD_VOLUME"); 65 attributeNameMap.put("halstead_difficulty", "HALSTEAD_DIFFICULTY"); 66 attributeNameMap.put("halstead_effort", "HALSTEAD_EFFORT"); 67 attributeNameMap.put("halstead_error", "HALSTEAD_ERROR_EST"); 68 attributeNameMap.put("halstead_time", "HALSTEAD_PROG_TIME"); 69 attributeNameMap.put("branch_count", "BRANCH_COUNT"); 70 attributeNameMap.put("cyclomatic_complexity", "CYCLOMATIC_COMPLEXITY"); 71 attributeNameMap.put("design_complexity", "DESIGN_COMPLEXITY"); 72 73 // Map entries for KC2 74 attributeNameMap.put("loc", "LOC_TOTAL"); 75 attributeNameMap.put("lOCode", "LOC_EXECUTABLE"); 76 attributeNameMap.put("lOComment", "LOC_COMMENTS"); 77 attributeNameMap.put("lOCodeAndComment", "LOC_CODE_AND_COMMENT"); 78 attributeNameMap.put("uniq_Op", "NUM_UNIQUE_OPERATORS"); 79 attributeNameMap.put("uniq_Opnd", "NUM_UNIQUE_OPERANDS"); 80 attributeNameMap.put("total_Op", "NUM_OPERATORS"); 81 attributeNameMap.put("total_Opnd", "NUM_OPERANDS"); 82 attributeNameMap.put("v", "HALSTEAD_VOLUME"); 83 attributeNameMap.put("l", "HALSTEAD_LENGTH"); 84 attributeNameMap.put("d", "HALSTEAD_DIFFICULTY"); 85 attributeNameMap.put("e", "HALSTEAD_EFFORT"); 86 attributeNameMap.put("b", "HALSTEAD_ERROR_EST"); 87 attributeNameMap.put("t", "HALSTEAD_PROG_TIME"); 88 attributeNameMap.put("branchCount", "BRANCH_COUNT"); 89 attributeNameMap.put("v(g)", "CYCLOMATIC_COMPLEXITY"); 90 attributeNameMap.put("iv(g)", "DESIGN_COMPLEXITY"); 91 92 attributeNameMap.put("defects", "bug"); 93 attributeNameMap.put("Defective", "bug"); 94 attributeNameMap.put("problems", "bug"); 95 attributeNameMap.put("label", "bug"); 96 97 // build list with normalized attribute order 98 attributeOrder = new LinkedList<>(); 99 100 attributeOrder.add("LOC_TOTAL"); 101 attributeOrder.add("LOC_EXECUTABLE"); 102 attributeOrder.add("LOC_COMMENTS"); 103 attributeOrder.add("LOC_CODE_AND_COMMENT"); 104 attributeOrder.add("NUM_UNIQUE_OPERATORS"); 105 attributeOrder.add("NUM_UNIQUE_OPERANDS"); 106 attributeOrder.add("NUM_OPERATORS"); 107 attributeOrder.add("NUM_OPERANDS"); 108 attributeOrder.add("HALSTEAD_VOLUME"); 109 attributeOrder.add("HALSTEAD_LENGTH"); 110 attributeOrder.add("HALSTEAD_DIFFICULTY"); 111 attributeOrder.add("HALSTEAD_EFFORT"); 112 attributeOrder.add("HALSTEAD_ERROR_EST"); 113 attributeOrder.add("HALSTEAD_PROG_TIME"); 114 attributeOrder.add("BRANCH_COUNT"); 115 attributeOrder.add("CYCLOMATIC_COMPLEXITY"); 116 attributeOrder.add("DESIGN_COMPLEXITY"); 117 attributeOrder.add("bug"); 118 } 119 120 /* 121 * (non-Javadoc) 122 * 123 * @see de.ugoe.cs.cpdp.loader.SingleVersionLoader#load(java.io.File) 124 */ 125 @Override 126 public Instances load(File file) { 127 BufferedReader reader; 128 Instances data; 129 try { 130 reader = new BufferedReader(new FileReader(file)); 131 data = new Instances(reader); 132 reader.close(); 133 } 134 catch (IOException e) { 135 throw new RuntimeException("Error reading data", e); 136 } 137 138 // setting class attribute 139 data.setClassIndex(data.numAttributes() - 1); 140 141 // normalize attribute names 142 for (int i = 0; i < data.numAttributes(); i++) { 143 String mapValue = attributeNameMap.get(data.attribute(i).name()); 144 if (mapValue != null) { 145 data.renameAttribute(i, mapValue); 146 } 147 } 148 149 // determine new attribute order (unwanted attributes are implicitly 150 // removed 151 String orderString = ""; 152 for (String attName : attributeOrder) { 153 for (int i = 0; i < data.numAttributes(); i++) { 154 if (attName.equals(data.attribute(i).name())) { 155 orderString += (i + 1) + ","; 156 } 157 } 158 } 159 orderString = orderString.substring(0, orderString.length() - 1); 160 161 String relationName = data.relationName(); 162 String[] options = new String[2]; 163 options[0] = "-R"; 164 options[1] = orderString; 165 Reorder reorder = new Reorder(); 166 try { 167 reorder.setOptions(options); 168 reorder.setInputFormat(data); 169 data = Filter.useFilter(data, reorder); 170 } 171 catch (Exception e) { 172 throw new RuntimeException("Error while reordering the data", e); 173 } 174 if (data.numAttributes() != attributeOrder.size()) { 175 throw new RuntimeException("Invalid number of attributes; filename: " + file.getName()); 176 } 177 178 // normalize bug nominal values 179 Add add = new Add(); 180 add.setAttributeIndex("last"); 181 add.setNominalLabels("0,1"); 182 add.setAttributeName("bug-new"); 183 try { 184 add.setInputFormat(data); 185 data = Filter.useFilter(data, add); 186 } 187 catch (Exception e) { 188 throw new RuntimeException("Error while normalizing the bug nonminal values", e); 189 } 190 data.setRelationName(relationName); 191 192 double classValue; 193 194 String firstValue = data.classAttribute().enumerateValues().nextElement().toString(); 195 if (firstValue.equals("Y") || firstValue.equals("yes") || firstValue.equals("true")) { 196 classValue = 0.0; 197 } 198 else { 199 classValue = 1.0; 200 } 201 202 for (int i = 0; i < data.numInstances(); i++) { 203 if (data.instance(i).classValue() == classValue) { 204 data.instance(i).setValue(data.classIndex() + 1, 1.0); 205 } 206 else { 207 data.instance(i).setValue(data.classIndex() + 1, 0.0); 208 } 209 } 210 211 int oldClassIndex = data.classIndex(); 212 data.setClassIndex(oldClassIndex + 1); 213 data.deleteAttributeAt(oldClassIndex); 214 215 return data; 216 } 217 218 /* 219 * (non-Javadoc) 220 * 221 * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader# 222 * filenameFilter(java.lang.String) 223 */ 224 @Override 225 public boolean filenameFilter(String filename) { 226 return filename.endsWith(".arff"); 227 } 215 228 216 229 } -
trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/SingleVersionLoader.java
r4 r41 1 // Copyright 2015 Georg-August-Universität Göttingen, Germany 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 1 15 package de.ugoe.cs.cpdp.loader; 2 16 … … 6 20 7 21 /** 8 * Interface for version loaders, i.e., loading of a set of instances from a 9 * file 22 * Interface for version loaders, i.e., loading of a set of instances from a file 10 23 * 11 24 * @author Steffen Herbold … … 13 26 public interface SingleVersionLoader { 14 27 15 /**16 * Loads the instances.17 *18 * @param file19 * handle to the file of the instances20 * @return the instances21 */22 Instances load(File file);28 /** 29 * Loads the instances. 30 * 31 * @param file 32 * handle to the file of the instances 33 * @return the instances 34 */ 35 Instances load(File file); 23 36 24 /**25 * Defines a filter for the files to be loaded; only strings that end with 26 * the filter areconsidered.27 *28 * @param filename29 * string defining the filename filter30 * @return true if a filename shall be considered31 */32 boolean filenameFilter(String endsWith);37 /** 38 * Defines a filter for the files to be loaded; only strings that end with the filter are 39 * considered. 40 * 41 * @param filename 42 * string defining the filename filter 43 * @return true if a filename shall be considered 44 */ 45 boolean filenameFilter(String endsWith); 33 46 }
Note: See TracChangeset
for help on using the changeset viewer.