package de.ugoe.cs.cpdp.loader; import java.io.File; import java.io.IOException; import java.util.ArrayList; import weka.core.Attribute; import weka.core.DenseInstance; import weka.core.Instances; import de.ugoe.cs.util.FileTools; /** * TODO * @author sherbold * */ class AUDIDataLoader implements SingleVersionLoader { /* * (non-Javadoc) * * @see * de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader#load( * java.io.File) */ @Override public Instances load(File file) { final String[] lines; try { lines = FileTools.getLinesFromFile(file.getAbsolutePath()); } catch (IOException e) { throw new RuntimeException(e); } // information about bugs are in another file String path = file.getAbsolutePath(); path = path.substring(0, path.length()-14) + "repro.csv"; final String[] linesBug; try { linesBug = FileTools.getLinesFromFile(path); } catch (IOException e) { throw new RuntimeException(e); } // configure Instances final ArrayList atts = new ArrayList(); String[] lineSplit = lines[0].split(";"); // ignore first three/four and last two columns int offset; if( lineSplit[3].equals("project_rev") ) { offset = 4; } else { offset = 3; } for (int j = 0; j < lineSplit.length - (offset+2); j++) { atts.add(new Attribute(lineSplit[j + offset])); } final ArrayList classAttVals = new ArrayList(); classAttVals.add("0"); classAttVals.add("1"); final Attribute classAtt = new Attribute("bug", classAttVals); atts.add(classAtt); final Instances data = new Instances(file.getName(), atts, 0); data.setClass(classAtt); // fetch data for (int i = 1; i < lines.length; i++) { boolean validInstance = true; lineSplit = lines[i].split(";"); String[] lineSplitBug = linesBug[i].split(";"); double[] values = new double[data.numAttributes()]; for (int j = 0; validInstance && j < values.length-1; j++) { if( lineSplit[j + offset].trim().isEmpty() ) { validInstance = false; } else { values[j] = Double.parseDouble(lineSplit[j + offset].trim()); } } if( offset==3 ) { values[values.length - 1] = lineSplitBug[7].equals("0") ? 0 : 1; } else { values[values.length - 1] = lineSplitBug[8].equals("0") ? 0 : 1; } if( validInstance ) { data.add(new DenseInstance(1.0, values)); } else { System.out.println("instance " + i + " is invalid"); } } return data; } /* * (non-Javadoc) * * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader# * filenameFilter(java.lang.String) */ @Override public boolean filenameFilter(String filename) { return filename.endsWith("src.csv"); } }