Changeset 4 for trunk/CrossPare/src/de/ugoe/cs/cpdp
- Timestamp:
- 08/05/14 10:00:41 (10 years ago)
- Location:
- trunk/CrossPare/src/de/ugoe/cs/cpdp/loader
- Files:
-
- 3 added
- 6 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/AbstractFolderLoader.java
r2 r4 9 9 import de.ugoe.cs.cpdp.versions.SoftwareVersion; 10 10 11 11 /** 12 * Abstract class for loading data from a folder. The subfolders of a defined 13 * folder define the projects, the file contained in the subfolder are the 14 * versions of a project. 15 * 16 * @author Steffen Herbold 17 */ 12 18 public abstract class AbstractFolderLoader implements IVersionLoader { 13 14 // TODO15 interface SingleVersionLoader {16 Instances load(File file);17 boolean filenameFilter(String filename);18 }19 19 20 20 /** … … 22 22 */ 23 23 private String path = ""; 24 24 25 25 /** 26 26 * @see de.ugoe.cs.cpdp.loader.IVersionLoader#setLocation(java.lang.String) … … 28 28 @Override 29 29 public void setLocation(String location) { 30 path =location;30 path = location; 31 31 } 32 32 33 33 /** 34 34 * @see de.ugoe.cs.cpdp.loader.IVersionLoader#load() … … 37 37 public List<SoftwareVersion> load() { 38 38 final List<SoftwareVersion> versions = new LinkedList<SoftwareVersion>(); 39 39 40 40 final File dataDir = new File(path); 41 41 final SingleVersionLoader instancesLoader = getSingleLoader(); 42 43 for ( File projectDir : dataDir.listFiles()) {44 if ( projectDir.isDirectory()) {42 43 for (File projectDir : dataDir.listFiles()) { 44 if (projectDir.isDirectory()) { 45 45 String projectName = projectDir.getName(); 46 for( File versionFile : projectDir.listFiles() ) { 47 if( versionFile.isFile() && instancesLoader.filenameFilter(versionFile.getName()) ) { 46 for (File versionFile : projectDir.listFiles()) { 47 if (versionFile.isFile() 48 && instancesLoader.filenameFilter(versionFile 49 .getName())) { 48 50 String versionName = versionFile.getName(); 49 51 Instances data = instancesLoader.load(versionFile); 50 versions.add(new SoftwareVersion(projectName, versionName, data)); 52 versions.add(new SoftwareVersion(projectName, 53 versionName, data)); 51 54 } 52 55 } … … 55 58 return versions; 56 59 } 57 60 61 /** 62 * Returns the concrete {@link SingleVersionLoader} to be used with this 63 * folder loader. 64 * 65 * @return 66 */ 58 67 abstract protected SingleVersionLoader getSingleLoader(); 59 68 } -
trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/CSVDataLoader.java
r2 r4 8 8 import weka.core.DenseInstance; 9 9 import weka.core.Instances; 10 import de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader;11 10 import de.ugoe.cs.util.FileTools; 12 11 13 12 /** 14 * Loads the instances for a software version from a CSV file of the // TODO dataset citation 15 * data set. 13 * Loads the instances for a software version from a CSV file of the PROMISE 14 * data set mined by Jurezko and Madeyski. 15 * 16 16 * @author Steffen Herbold 17 17 */ 18 18 class CSVDataLoader implements SingleVersionLoader { 19 20 /** 21 * Loads the instances. 22 * @param file handle to the file of the instances 23 * @return the instances 19 20 /* 21 * (non-Javadoc) 22 * 23 * @see 24 * de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader#load( 25 * java.io.File) 24 26 */ 25 27 @Override … … 31 33 throw new RuntimeException(e); 32 34 } 33 35 34 36 // configure Instances 35 37 final ArrayList<Attribute> atts = new ArrayList<Attribute>(); 36 37 String[] lineSplit = lines[0].split(","); 38 for ( int j=0 ; j<lineSplit.length-4 ; j++) {39 atts.add(new Attribute(lineSplit[j +3]));38 39 String[] lineSplit = lines[0].split(","); 40 for (int j = 0; j < lineSplit.length - 4; j++) { 41 atts.add(new Attribute(lineSplit[j + 3])); 40 42 } 41 43 final ArrayList<String> classAttVals = new ArrayList<String>(); … … 44 46 final Attribute classAtt = new Attribute("bug", classAttVals); 45 47 atts.add(classAtt); 46 48 47 49 final Instances data = new Instances(file.getName(), atts, 0); 48 50 data.setClass(classAtt); 49 51 50 52 // fetch data 51 for ( int i=1 ; i<lines.length ; i++) {53 for (int i = 1; i < lines.length; i++) { 52 54 lineSplit = lines[i].split(","); 53 double[] values = new double[lineSplit.length -3];54 for ( int j=0 ; j<values.length-1 ; j++) {55 values[j] = Double.parseDouble(lineSplit[j +3].trim());55 double[] values = new double[lineSplit.length - 3]; 56 for (int j = 0; j < values.length - 1; j++) { 57 values[j] = Double.parseDouble(lineSplit[j + 3].trim()); 56 58 } 57 values[values.length-1] = lineSplit[lineSplit.length-1].trim().equals("0") ? 0 : 1; 59 values[values.length - 1] = lineSplit[lineSplit.length - 1].trim() 60 .equals("0") ? 0 : 1; 58 61 data.add(new DenseInstance(1.0, values)); 59 62 } 60 63 61 64 return data; 62 65 } 63 66 67 /* 68 * (non-Javadoc) 69 * 70 * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader# 71 * filenameFilter(java.lang.String) 72 */ 64 73 @Override 65 74 public boolean filenameFilter(String filename) { -
trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/CSVFolderLoader.java
r2 r4 2 2 3 3 /** 4 * Implements a {@link IVersionLoader} for data from // TODO data reference5 * Each folder contained in the defined location ({@link #setLocation(String)}) represents a project, the data files6 * within the versions.4 * Implements the {@link AbstractFolderLoader} for data from the PROMISE 5 * repository mined by Jurezko and Madeyski. 6 * 7 7 * @author Steffen Herbold 8 8 */ 9 9 public class CSVFolderLoader extends AbstractFolderLoader { 10 10 11 /* 12 * (non-Javadoc) 13 * 14 * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader#getSingleLoader() 15 */ 11 16 @Override 12 17 protected SingleVersionLoader getSingleLoader() { … … 14 19 } 15 20 16 17 21 } -
trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/IVersionLoader.java
r2 r4 7 7 /** 8 8 * Implements the interface for loading software versions from a data source. 9 * 9 10 * @author Steffen Herbold 10 11 */ 11 12 public interface IVersionLoader { 12 13 13 14 /** 14 * Sets the location of the data. 15 * @param location location of the data 15 * Sets the location of the data. 16 * 17 * @param location 18 * location of the data 16 19 */ 17 20 public void setLocation(String location); 18 21 19 22 /** 20 23 * Loads the data. 24 * 21 25 * @return the data 22 26 */ -
trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/NasaARFFFolderLoader.java
r2 r4 1 1 package de.ugoe.cs.cpdp.loader; 2 2 3 /** 4 * Implements the {@link AbstractFolderLoader} for the NASA/SOFTLAB/MDP data 5 * set. 6 * 7 * @author Steffen Herbold 8 */ 3 9 public class NasaARFFFolderLoader extends AbstractFolderLoader { 4 10 11 /* 12 * (non-Javadoc) 13 * 14 * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader#getSingleLoader() 15 */ 5 16 @Override 6 17 protected SingleVersionLoader getSingleLoader() { -
trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/NasaARFFLoader.java
r3 r4 10 10 import java.util.Map; 11 11 12 import de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader;13 12 import weka.core.Instances; 14 13 import weka.filters.Filter; … … 16 15 import weka.filters.unsupervised.attribute.Reorder; 17 16 17 /** 18 * Loads the instances for a software version from an ARFF file of the 19 * NASA/SOFTLAB/MDP data. 20 * 21 * @author Steffen Herbold 22 */ 18 23 public class NasaARFFLoader implements SingleVersionLoader { 19 24 25 /** 26 * used to map attributes the same attribute with different names to each 27 * other 28 */ 20 29 Map<String, String> attributeNameMap; 30 31 /** 32 * used to ensure that the attribute order is the same after loading 33 */ 21 34 List<String> attributeOrder; 22 35 36 /** 37 * Constructor. Creates a new NasaARFFLoader. 38 */ 23 39 public NasaARFFLoader() { 24 40 attributeNameMap = new HashMap<>(); 25 41 26 42 // Map entries for ar project 27 43 attributeNameMap.put("total_loc", "LOC_TOTAL"); … … 32 48 attributeNameMap.put("unique_operators", "NUM_UNIQUE_OPERATORS"); 33 49 attributeNameMap.put("total_operands", "NUM_OPERANDS"); 34 attributeNameMap.put("total_operators", 35 attributeNameMap.put("halstead_length", 50 attributeNameMap.put("total_operators", "NUM_OPERATORS"); 51 attributeNameMap.put("halstead_length", "HALSTEAD_LENGTH"); 36 52 attributeNameMap.put("halstead_volume", "HALSTEAD_VOLUME"); 37 53 attributeNameMap.put("halstead_difficulty", "HALSTEAD_DIFFICULTY"); … … 41 57 attributeNameMap.put("branch_count", "BRANCH_COUNT"); 42 58 attributeNameMap.put("cyclomatic_complexity", "CYCLOMATIC_COMPLEXITY"); 43 attributeNameMap.put("design_complexity", 44 59 attributeNameMap.put("design_complexity", "DESIGN_COMPLEXITY"); 60 45 61 // Map entries for KC2 46 attributeNameMap.put("loc", "LOC_TOTAL"); // TODO these first two LOCs are guesses47 attributeNameMap.put("lOCode", "LOC_EXECUTABLE"); // TODO62 attributeNameMap.put("loc", "LOC_TOTAL"); 63 attributeNameMap.put("lOCode", "LOC_EXECUTABLE"); 48 64 attributeNameMap.put("lOComment", "LOC_COMMENTS"); 49 65 attributeNameMap.put("lOCodeAndComment", "LOC_CODE_AND_COMMENT"); … … 56 72 attributeNameMap.put("d", "HALSTEAD_DIFFICULTY"); 57 73 attributeNameMap.put("e", "HALSTEAD_EFFORT"); 58 attributeNameMap.put("b", "HALSTEAD_ERROR_EST"); // TODO not sure about this one74 attributeNameMap.put("b", "HALSTEAD_ERROR_EST"); 59 75 attributeNameMap.put("t", "HALSTEAD_PROG_TIME"); 60 76 attributeNameMap.put("branchCount", "BRANCH_COUNT"); 61 attributeNameMap.put("v(g)", 77 attributeNameMap.put("v(g)", "CYCLOMATIC_COMPLEXITY"); 62 78 attributeNameMap.put("iv(g)", "DESIGN_COMPLEXITY"); 63 64 attributeNameMap.put("defects", 79 80 attributeNameMap.put("defects", "bug"); 65 81 attributeNameMap.put("Defective", "bug"); 66 82 attributeNameMap.put("problems", "bug"); 67 83 attributeNameMap.put("label", "bug"); 68 84 69 85 // build list with normalized attribute order 70 86 attributeOrder = new LinkedList<>(); 71 87 72 88 attributeOrder.add("LOC_TOTAL"); 73 89 attributeOrder.add("LOC_EXECUTABLE"); … … 89 105 attributeOrder.add("bug"); 90 106 } 91 92 /** 93 * Loads the instances. 94 * @param file handle to the file of the instances 95 * @return the instances 96 */ 107 108 /* 109 * (non-Javadoc) 110 * 111 * @see de.ugoe.cs.cpdp.loader.SingleVersionLoader#load(java.io.File) 112 */ 113 @Override 97 114 public Instances load(File file) { 98 115 BufferedReader reader; … … 103 120 reader.close(); 104 121 } catch (IOException e) { 105 // TODO Auto-generated catch block 106 throw new RuntimeException(e); 107 } 108 109 //setting class attribute 122 throw new RuntimeException("Error reading data", e); 123 } 124 125 // setting class attribute 110 126 data.setClassIndex(data.numAttributes() - 1); 111 127 112 128 // normalize attribute names 113 for ( int i=0; i<data.numAttributes(); i++) {129 for (int i = 0; i < data.numAttributes(); i++) { 114 130 String mapValue = attributeNameMap.get(data.attribute(i).name()); 115 if ( mapValue!= null) {131 if (mapValue != null) { 116 132 data.renameAttribute(i, mapValue); 117 133 } 118 134 } 119 120 // determine new attribute order (unwanted attributes are implicitly removed 135 136 // determine new attribute order (unwanted attributes are implicitly 137 // removed 121 138 String orderString = ""; 122 for ( String attName : attributeOrder) {123 for ( int i=0; i<data.numAttributes(); i++) {124 if (attName.equals(data.attribute(i).name())) {125 orderString += (i +1) + ",";139 for (String attName : attributeOrder) { 140 for (int i = 0; i < data.numAttributes(); i++) { 141 if (attName.equals(data.attribute(i).name())) { 142 orderString += (i + 1) + ","; 126 143 } 127 144 } 128 145 } 129 orderString = orderString.substring(0, orderString.length() -1);130 146 orderString = orderString.substring(0, orderString.length() - 1); 147 131 148 String relationName = data.relationName(); 132 149 String[] options = new String[2]; … … 139 156 data = Filter.useFilter(data, reorder); 140 157 } catch (Exception e) { 141 // TODO Auto-generated catch block142 throw new RuntimeException();143 }144 if( data.numAttributes()!=attributeOrder.size() ) {145 throw new RuntimeException("Invalid number of attributes; filename: " + file.getName());146 } 147 158 throw new RuntimeException("Error while reordering the data", e); 159 } 160 if (data.numAttributes() != attributeOrder.size()) { 161 throw new RuntimeException( 162 "Invalid number of attributes; filename: " + file.getName()); 163 } 164 148 165 // normalize bug nominal values 149 166 Add add = new Add(); 150 167 add.setAttributeIndex("last"); 151 152 153 168 add.setNominalLabels("0,1"); 169 add.setAttributeName("bug-new"); 170 try { 154 171 add.setInputFormat(data); 155 172 data = Filter.useFilter(data, add); 156 173 } catch (Exception e) { 157 // TODO Auto-generated catch block 158 e.printStackTrace(); 159 } 160 data.setRelationName(relationName); 161 162 double classValue; 163 164 String firstValue = data.classAttribute().enumerateValues().nextElement().toString(); 165 if( firstValue.equals("Y") || firstValue.equals("yes") || firstValue.equals("true") ) { 166 classValue = 0.0; 167 } else { 168 classValue = 1.0; 169 } 170 171 for( int i=0 ; i<data.numInstances() ; i++ ) { 172 if( data.instance(i).classValue() == classValue ) { 173 data.instance(i).setValue(data.classIndex()+1, 1.0); 174 throw new RuntimeException( 175 "Error while normalizing the bug nonminal values", e); 176 } 177 data.setRelationName(relationName); 178 179 double classValue; 180 181 String firstValue = data.classAttribute().enumerateValues() 182 .nextElement().toString(); 183 if (firstValue.equals("Y") || firstValue.equals("yes") 184 || firstValue.equals("true")) { 185 classValue = 0.0; 186 } else { 187 classValue = 1.0; 188 } 189 190 for (int i = 0; i < data.numInstances(); i++) { 191 if (data.instance(i).classValue() == classValue) { 192 data.instance(i).setValue(data.classIndex() + 1, 1.0); 174 193 } else { 175 data.instance(i).setValue(data.classIndex() +1, 0.0);194 data.instance(i).setValue(data.classIndex() + 1, 0.0); 176 195 } 177 196 } 178 197 179 198 int oldClassIndex = data.classIndex(); 180 data.setClassIndex(oldClassIndex +1);199 data.setClassIndex(oldClassIndex + 1); 181 200 data.deleteAttributeAt(oldClassIndex); 182 201 183 202 return data; 184 203 } 185 204 205 /* 206 * (non-Javadoc) 207 * 208 * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader# 209 * filenameFilter(java.lang.String) 210 */ 186 211 @Override 187 212 public boolean filenameFilter(String filename) { 188 213 return filename.endsWith(".arff"); 189 214 } 190 215 191 216 }
Note: See TracChangeset
for help on using the changeset viewer.