// Copyright 2015 Georg-August-Universität Göttingen, Germany
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package de.ugoe.cs.cpdp.loader;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Map.Entry;
import java.util.SortedMap;
import java.util.TreeMap;
import weka.core.Attribute;
import weka.core.DenseInstance;
import weka.core.Instances;
import de.ugoe.cs.util.FileTools;
/**
*
* Loads data from the automative defect data set from Audi Electronic Ventures donated by Altinger
* et al. at the MSR 2015. This loader contains the changes per commit, i.e., it is for JIT defect
* prediction.
*
*
* @author Steffen Herbold
*/
class AUDIChangeLoader implements SingleVersionLoader {
/**
*
* Internal helper class.
*
*
* @author Steffen Herbold
*/
private class EntityRevisionPair implements Comparable {
/**
* string that defines an entity
*/
private final String entity;
/**
* revision number of the entity
*/
private final int revision;
/**
*
* Constructor. Creates a new EntityRevisionPair.
*
*
* @param entity
* the entity
* @param revision
* the revision
*/
public EntityRevisionPair(String entity, int revision) {
this.entity = entity;
this.revision = revision;
}
/*
* (non-Javadoc)
*
* @see java.lang.Object#equals(java.lang.Object)
*/
@Override
public boolean equals(Object other) {
if (!(other instanceof EntityRevisionPair)) {
return false;
}
else {
return compareTo((EntityRevisionPair) other) == 0;
}
}
/*
* (non-Javadoc)
*
* @see java.lang.Object#hashCode()
*/
@Override
public int hashCode() {
return entity.hashCode() + revision;
}
/*
* (non-Javadoc)
*
* @see java.lang.Comparable#compareTo(java.lang.Object)
*/
@Override
public int compareTo(EntityRevisionPair other) {
int strCmp = this.entity.compareTo(other.entity);
if (strCmp != 0) {
return strCmp;
}
return Integer.compare(revision, other.revision);
}
/*
* (non-Javadoc)
*
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
return entity + "@" + revision;
}
}
/*
* (non-Javadoc)
*
* @see de.ugoe.cs.cpdp.loader.SingleVersionLoader#load(java.io.File)
*/
@Override
public Instances load(File file) {
final String[] lines;
String[] lineSplit;
String[] lineSplitBug;
try {
lines = FileTools.getLinesFromFile(file.getAbsolutePath());
}
catch (IOException e) {
throw new RuntimeException(e);
}
// information about bugs are in another file
String path = file.getAbsolutePath();
path = path.substring(0, path.length() - 14) + "repro.csv";
final String[] linesBug;
try {
linesBug = FileTools.getLinesFromFile(path);
}
catch (IOException e) {
throw new RuntimeException(e);
}
int revisionIndex = -1;
int bugIndex = -1;
lineSplitBug = linesBug[0].split(";");
for (int j = 0; j < lineSplitBug.length; j++) {
if (lineSplitBug[j].equals("svnrev")) {
revisionIndex = j;
}
if (lineSplitBug[j].equals("num_bugs_trace")) {
bugIndex = j;
}
}
if (revisionIndex < 0) {
throw new RuntimeException("could not find SVN revisions");
}
if (bugIndex < 0) {
throw new RuntimeException("could not find bug information");
}
int metricsStartIndex = -1;
int metricsEndIndex = -1;
lineSplit = lines[0].split(";");
for (int j = 0; j < lineSplit.length; j++) {
if (lineSplit[j].equals("lm_LOC")) {
metricsStartIndex = j;
}
if (lineSplit[j].equals("h_E")) {
metricsEndIndex = j;
}
}
if (metricsStartIndex < 0) {
throw new RuntimeException("could not find first metric, i.e., lm_LOC");
}
if (metricsEndIndex < 0) {
throw new RuntimeException("could not find last metric, i.e., h_E");
}
int numMetrics = metricsEndIndex - metricsStartIndex + 1;
// create sets of all filenames and revisions
SortedMap entityRevisionPairs = new TreeMap<>();
for (int i = 1; i < linesBug.length; i++) {
lineSplitBug = linesBug[i].split(";");
entityRevisionPairs.put(
new EntityRevisionPair(lineSplitBug[0],
Integer
.parseInt(lineSplitBug[revisionIndex])),
i);
}
// prepare weka instances
final ArrayList atts = new ArrayList();
lineSplit = lines[0].split(";");
for (int j = metricsStartIndex; j <= metricsEndIndex; j++) {
atts.add(new Attribute(lineSplit[j] + "_delta"));
}
for (int j = metricsStartIndex; j <= metricsEndIndex; j++) {
atts.add(new Attribute(lineSplit[j] + "_abs"));
}
final ArrayList classAttVals = new ArrayList();
classAttVals.add("0");
classAttVals.add("1");
final Attribute classAtt = new Attribute("bug", classAttVals);
atts.add(classAtt);
final Instances data = new Instances(file.getName(), atts, 0);
data.setClass(classAtt);
// create data
String lastFile = null;
double[] lastValues = null;
int lastNumBugs = 0;
for (Entry entry : entityRevisionPairs.entrySet()) {
try {
// first get values
lineSplit = lines[entry.getValue()].split(";");
lineSplitBug = linesBug[entry.getValue()].split(";");
int i = 0;
double[] values = new double[numMetrics];
for (int j = metricsStartIndex; j <= metricsEndIndex; j++) {
values[i] = Double.parseDouble(lineSplit[j]);
i++;
}
int numBugs = Integer.parseInt(lineSplitBug[bugIndex]);
// then check if an entity must be created
if (entry.getKey().entity.equals(lastFile)) {
// create new instance
double[] instanceValues = new double[2 * numMetrics + 1];
for (int j = 0; j < numMetrics; j++) {
instanceValues[j] = values[j] - lastValues[j];
instanceValues[j + numMetrics] = values[j];
}
// check if any value>0
boolean changeOccured = false;
for (int j = 0; j < numMetrics; j++) {
if (instanceValues[j] > 0) {
changeOccured = true;
}
}
if (changeOccured) {
instanceValues[instanceValues.length - 1] = numBugs <= lastNumBugs ? 0 : 1;
data.add(new DenseInstance(1.0, instanceValues));
}
}
lastFile = entry.getKey().entity;
lastValues = values;
lastNumBugs = numBugs;
}
catch (IllegalArgumentException e) {
System.err.println("error in line " + entry.getValue() + ": " + e.getMessage());
System.err.println("metrics line: " + lines[entry.getValue()]);
System.err.println("bugs line: " + linesBug[entry.getValue()]);
System.err.println("line is ignored");
}
}
return data;
}
/*
* (non-Javadoc)
*
* @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader#load( java.io.File)
*/
public Instances load(File file, String dummy) {
final String[] lines;
try {
lines = FileTools.getLinesFromFile(file.getAbsolutePath());
}
catch (IOException e) {
throw new RuntimeException(e);
}
// information about bugs are in another file
String path = file.getAbsolutePath();
path = path.substring(0, path.length() - 14) + "repro.csv";
final String[] linesBug;
try {
linesBug = FileTools.getLinesFromFile(path);
}
catch (IOException e) {
throw new RuntimeException(e);
}
// configure Instances
final ArrayList atts = new ArrayList();
String[] lineSplit = lines[0].split(";");
// ignore first three/four and last two columns
int offset;
if (lineSplit[3].equals("project_rev")) {
offset = 4;
}
else {
offset = 3;
}
for (int j = 0; j < lineSplit.length - (offset + 2); j++) {
atts.add(new Attribute(lineSplit[j + offset]));
}
final ArrayList classAttVals = new ArrayList();
classAttVals.add("0");
classAttVals.add("1");
final Attribute classAtt = new Attribute("bug", classAttVals);
atts.add(classAtt);
final Instances data = new Instances(file.getName(), atts, 0);
data.setClass(classAtt);
// fetch data
for (int i = 1; i < lines.length; i++) {
boolean validInstance = true;
lineSplit = lines[i].split(";");
String[] lineSplitBug = linesBug[i].split(";");
double[] values = new double[data.numAttributes()];
for (int j = 0; validInstance && j < values.length - 1; j++) {
if (lineSplit[j + offset].trim().isEmpty()) {
validInstance = false;
}
else {
values[j] = Double.parseDouble(lineSplit[j + offset].trim());
}
}
if (offset == 3) {
values[values.length - 1] = lineSplitBug[7].equals("0") ? 0 : 1;
}
else {
values[values.length - 1] = lineSplitBug[8].equals("0") ? 0 : 1;
}
if (validInstance) {
data.add(new DenseInstance(1.0, values));
}
else {
System.out.println("instance " + i + " is invalid");
}
}
return data;
}
/*
* (non-Javadoc)
*
* @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader#
* filenameFilter(java.lang.String)
*/
@Override
public boolean filenameFilter(String filename) {
return filename.endsWith("src.csv");
}
}