1 | package de.ugoe.cs.cpdp.loader;
|
---|
2 |
|
---|
3 | import java.io.File;
|
---|
4 | import java.io.IOException;
|
---|
5 | import java.util.ArrayList;
|
---|
6 | import java.util.Map.Entry;
|
---|
7 | import java.util.SortedMap;
|
---|
8 | import java.util.TreeMap;
|
---|
9 |
|
---|
10 | import weka.core.Attribute;
|
---|
11 | import weka.core.DenseInstance;
|
---|
12 | import weka.core.Instances;
|
---|
13 | import de.ugoe.cs.util.FileTools;
|
---|
14 |
|
---|
15 | /**
|
---|
16 | * TODO
|
---|
17 | * @author sherbold
|
---|
18 | *
|
---|
19 | */
|
---|
20 | class AUDIChangeLoader implements SingleVersionLoader {
|
---|
21 |
|
---|
22 | private class EntityRevisionPair implements Comparable<EntityRevisionPair> {
|
---|
23 | private final String entity;
|
---|
24 | private final int revision;
|
---|
25 |
|
---|
26 | public EntityRevisionPair(String entity, int revision) {
|
---|
27 | this.entity = entity;
|
---|
28 | this.revision = revision;
|
---|
29 | }
|
---|
30 |
|
---|
31 | @Override
|
---|
32 | public boolean equals(Object other) {
|
---|
33 | if( !(other instanceof EntityRevisionPair) ) {
|
---|
34 | return false;
|
---|
35 | } else {
|
---|
36 | return compareTo((EntityRevisionPair) other)==0;
|
---|
37 | }
|
---|
38 | }
|
---|
39 |
|
---|
40 | @Override
|
---|
41 | public int hashCode() {
|
---|
42 | return entity.hashCode()+revision;
|
---|
43 | }
|
---|
44 |
|
---|
45 | @Override
|
---|
46 | public int compareTo(EntityRevisionPair other) {
|
---|
47 | int strCmp = this.entity.compareTo(other.entity);
|
---|
48 | if( strCmp!=0 ) {
|
---|
49 | return strCmp;
|
---|
50 | }
|
---|
51 | return Integer.compare(revision, other.revision);
|
---|
52 | }
|
---|
53 |
|
---|
54 | @Override
|
---|
55 | public String toString() {
|
---|
56 | return entity+"@"+revision;
|
---|
57 | }
|
---|
58 | }
|
---|
59 |
|
---|
60 | @Override
|
---|
61 | public Instances load(File file) {
|
---|
62 | final String[] lines;
|
---|
63 | String[] lineSplit;
|
---|
64 | String[] lineSplitBug;
|
---|
65 |
|
---|
66 | try {
|
---|
67 | lines = FileTools.getLinesFromFile(file.getAbsolutePath());
|
---|
68 | } catch (IOException e) {
|
---|
69 | throw new RuntimeException(e);
|
---|
70 | }
|
---|
71 |
|
---|
72 | // information about bugs are in another file
|
---|
73 | String path = file.getAbsolutePath();
|
---|
74 | path = path.substring(0, path.length()-14) + "repro.csv";
|
---|
75 | final String[] linesBug;
|
---|
76 | try {
|
---|
77 | linesBug = FileTools.getLinesFromFile(path);
|
---|
78 | } catch (IOException e) {
|
---|
79 | throw new RuntimeException(e);
|
---|
80 | }
|
---|
81 |
|
---|
82 | int revisionIndex=-1;
|
---|
83 | int bugIndex=-1;
|
---|
84 | lineSplitBug = linesBug[0].split(";");
|
---|
85 | for( int j=0; j<lineSplitBug.length ; j++ ) {
|
---|
86 | if( lineSplitBug[j].equals("svnrev") ) {
|
---|
87 | revisionIndex=j;
|
---|
88 | }
|
---|
89 | if( lineSplitBug[j].equals("num_bugs_trace") ) {
|
---|
90 | bugIndex=j;
|
---|
91 | }
|
---|
92 | }
|
---|
93 | if( revisionIndex<0 ) {
|
---|
94 | throw new RuntimeException("could not find SVN revisions");
|
---|
95 | }
|
---|
96 | if( bugIndex<0 ) {
|
---|
97 | throw new RuntimeException("could not find bug information");
|
---|
98 | }
|
---|
99 |
|
---|
100 | int metricsStartIndex=-1;
|
---|
101 | int metricsEndIndex=-1;
|
---|
102 | lineSplit = lines[0].split(";");
|
---|
103 | for( int j=0; j<lineSplit.length ; j++ ) {
|
---|
104 | if( lineSplit[j].equals("lm_LOC") ) {
|
---|
105 | metricsStartIndex=j;
|
---|
106 | }
|
---|
107 | if( lineSplit[j].equals("h_E") ) {
|
---|
108 | metricsEndIndex=j;
|
---|
109 | }
|
---|
110 | }
|
---|
111 | if( metricsStartIndex<0 ) {
|
---|
112 | throw new RuntimeException("could not find first metric, i.e., lm_LOC");
|
---|
113 | }
|
---|
114 | if( metricsEndIndex<0 ) {
|
---|
115 | throw new RuntimeException("could not find last metric, i.e., h_E");
|
---|
116 | }
|
---|
117 | int numMetrics = metricsEndIndex-metricsStartIndex+1;
|
---|
118 |
|
---|
119 | // create sets of all filenames and revisions
|
---|
120 | SortedMap<EntityRevisionPair, Integer> entityRevisionPairs = new TreeMap<>();
|
---|
121 | for( int i=1; i<linesBug.length ; i++ ) {
|
---|
122 | lineSplitBug = linesBug[i].split(";");
|
---|
123 | entityRevisionPairs.put(new EntityRevisionPair(lineSplitBug[0], Integer.parseInt(lineSplitBug[revisionIndex])), i);
|
---|
124 | }
|
---|
125 |
|
---|
126 |
|
---|
127 | // prepare weka instances
|
---|
128 | final ArrayList<Attribute> atts = new ArrayList<Attribute>();
|
---|
129 | lineSplit = lines[0].split(";");
|
---|
130 | for (int j = metricsStartIndex; j<=metricsEndIndex; j++) {
|
---|
131 | atts.add(new Attribute(lineSplit[j]+"_delta"));
|
---|
132 | }
|
---|
133 | for (int j = metricsStartIndex; j<=metricsEndIndex; j++) {
|
---|
134 | atts.add(new Attribute(lineSplit[j]+"_abs"));
|
---|
135 | }
|
---|
136 | final ArrayList<String> classAttVals = new ArrayList<String>();
|
---|
137 | classAttVals.add("0");
|
---|
138 | classAttVals.add("1");
|
---|
139 | final Attribute classAtt = new Attribute("bug", classAttVals);
|
---|
140 | atts.add(classAtt);
|
---|
141 |
|
---|
142 | final Instances data = new Instances(file.getName(), atts, 0);
|
---|
143 | data.setClass(classAtt);
|
---|
144 |
|
---|
145 | // create data
|
---|
146 | String lastFile = null;
|
---|
147 | double[] lastValues = null;
|
---|
148 | int lastNumBugs = 0;
|
---|
149 | for( Entry<EntityRevisionPair, Integer> entry : entityRevisionPairs.entrySet() ) {
|
---|
150 | try {
|
---|
151 | // first get values
|
---|
152 | lineSplit = lines[entry.getValue()].split(";");
|
---|
153 | lineSplitBug = linesBug[entry.getValue()].split(";");
|
---|
154 | int i=0;
|
---|
155 | double[] values = new double[numMetrics];
|
---|
156 | for(int j=metricsStartIndex ; j<=metricsEndIndex ; j++ ) {
|
---|
157 | values[i] = Double.parseDouble(lineSplit[j]);
|
---|
158 | i++;
|
---|
159 | }
|
---|
160 | int numBugs = Integer.parseInt(lineSplitBug[bugIndex]);
|
---|
161 |
|
---|
162 | // then check if an entity must be created
|
---|
163 | if( entry.getKey().entity.equals(lastFile)) {
|
---|
164 | // create new instance
|
---|
165 | double[] instanceValues = new double[2*numMetrics+1];
|
---|
166 | for( int j=0; j<numMetrics; j++ ) {
|
---|
167 | instanceValues[j] = values[j]-lastValues[j];
|
---|
168 | instanceValues[j+numMetrics]= values[j];
|
---|
169 | }
|
---|
170 | // check if any value>0
|
---|
171 | boolean changeOccured = false;
|
---|
172 | for( int j=0; j<numMetrics; j++ ) {
|
---|
173 | if( instanceValues[j]>0 ) {
|
---|
174 | changeOccured = true;
|
---|
175 | }
|
---|
176 | }
|
---|
177 | if( changeOccured ) {
|
---|
178 | instanceValues[instanceValues.length-1] = numBugs<=lastNumBugs ? 0 : 1;
|
---|
179 | data.add(new DenseInstance(1.0, instanceValues));
|
---|
180 | }
|
---|
181 | }
|
---|
182 | lastFile = entry.getKey().entity;
|
---|
183 | lastValues = values;
|
---|
184 | lastNumBugs = numBugs;
|
---|
185 | } catch(IllegalArgumentException e) {
|
---|
186 | System.err.println("error in line " + entry.getValue() + ": " + e.getMessage());
|
---|
187 | System.err.println("metrics line: " + lines[entry.getValue()]);
|
---|
188 | System.err.println("bugs line: " + linesBug[entry.getValue()]);
|
---|
189 | System.err.println("line is ignored");
|
---|
190 | }
|
---|
191 | }
|
---|
192 |
|
---|
193 | return data;
|
---|
194 | }
|
---|
195 |
|
---|
196 | /*
|
---|
197 | * (non-Javadoc)
|
---|
198 | *
|
---|
199 | * @see
|
---|
200 | * de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader#load(
|
---|
201 | * java.io.File)
|
---|
202 | */
|
---|
203 |
|
---|
204 | public Instances load(File file, String dummy) {
|
---|
205 | final String[] lines;
|
---|
206 | try {
|
---|
207 | lines = FileTools.getLinesFromFile(file.getAbsolutePath());
|
---|
208 | } catch (IOException e) {
|
---|
209 | throw new RuntimeException(e);
|
---|
210 | }
|
---|
211 |
|
---|
212 | // information about bugs are in another file
|
---|
213 | String path = file.getAbsolutePath();
|
---|
214 | path = path.substring(0, path.length()-14) + "repro.csv";
|
---|
215 | final String[] linesBug;
|
---|
216 | try {
|
---|
217 | linesBug = FileTools.getLinesFromFile(path);
|
---|
218 | } catch (IOException e) {
|
---|
219 | throw new RuntimeException(e);
|
---|
220 | }
|
---|
221 |
|
---|
222 | // configure Instances
|
---|
223 | final ArrayList<Attribute> atts = new ArrayList<Attribute>();
|
---|
224 |
|
---|
225 | String[] lineSplit = lines[0].split(";");
|
---|
226 | // ignore first three/four and last two columns
|
---|
227 | int offset;
|
---|
228 | if( lineSplit[3].equals("project_rev") ) {
|
---|
229 | offset = 4;
|
---|
230 | } else {
|
---|
231 | offset = 3;
|
---|
232 | }
|
---|
233 | for (int j = 0; j < lineSplit.length - (offset+2); j++) {
|
---|
234 | atts.add(new Attribute(lineSplit[j + offset]));
|
---|
235 | }
|
---|
236 | final ArrayList<String> classAttVals = new ArrayList<String>();
|
---|
237 | classAttVals.add("0");
|
---|
238 | classAttVals.add("1");
|
---|
239 | final Attribute classAtt = new Attribute("bug", classAttVals);
|
---|
240 | atts.add(classAtt);
|
---|
241 |
|
---|
242 | final Instances data = new Instances(file.getName(), atts, 0);
|
---|
243 | data.setClass(classAtt);
|
---|
244 |
|
---|
245 | // fetch data
|
---|
246 | for (int i = 1; i < lines.length; i++) {
|
---|
247 | boolean validInstance = true;
|
---|
248 | lineSplit = lines[i].split(";");
|
---|
249 | String[] lineSplitBug = linesBug[i].split(";");
|
---|
250 | double[] values = new double[data.numAttributes()];
|
---|
251 | for (int j = 0; validInstance && j < values.length-1; j++) {
|
---|
252 | if( lineSplit[j + offset].trim().isEmpty() ) {
|
---|
253 | validInstance = false;
|
---|
254 | } else {
|
---|
255 | values[j] = Double.parseDouble(lineSplit[j + offset].trim());
|
---|
256 | }
|
---|
257 | }
|
---|
258 | if( offset==3 ) {
|
---|
259 | values[values.length - 1] = lineSplitBug[7].equals("0") ? 0 : 1;
|
---|
260 | } else {
|
---|
261 | values[values.length - 1] = lineSplitBug[8].equals("0") ? 0 : 1;
|
---|
262 | }
|
---|
263 |
|
---|
264 | if( validInstance ) {
|
---|
265 | data.add(new DenseInstance(1.0, values));
|
---|
266 | } else {
|
---|
267 | System.out.println("instance " + i + " is invalid");
|
---|
268 | }
|
---|
269 | }
|
---|
270 | return data;
|
---|
271 | }
|
---|
272 |
|
---|
273 | /*
|
---|
274 | * (non-Javadoc)
|
---|
275 | *
|
---|
276 | * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader#
|
---|
277 | * filenameFilter(java.lang.String)
|
---|
278 | */
|
---|
279 | @Override
|
---|
280 | public boolean filenameFilter(String filename) {
|
---|
281 | return filename.endsWith("src.csv");
|
---|
282 | }
|
---|
283 |
|
---|
284 | }
|
---|