1 | // Copyright 2015 Georg-August-Universität Göttingen, Germany
|
---|
2 | //
|
---|
3 | // Licensed under the Apache License, Version 2.0 (the "License");
|
---|
4 | // you may not use this file except in compliance with the License.
|
---|
5 | // You may obtain a copy of the License at
|
---|
6 | //
|
---|
7 | // http://www.apache.org/licenses/LICENSE-2.0
|
---|
8 | //
|
---|
9 | // Unless required by applicable law or agreed to in writing, software
|
---|
10 | // distributed under the License is distributed on an "AS IS" BASIS,
|
---|
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
---|
12 | // See the License for the specific language governing permissions and
|
---|
13 | // limitations under the License.
|
---|
14 |
|
---|
15 | package de.ugoe.cs.cpdp.loader;
|
---|
16 |
|
---|
17 | import java.io.File;
|
---|
18 | import java.io.IOException;
|
---|
19 | import java.util.ArrayList;
|
---|
20 | import java.util.Map.Entry;
|
---|
21 | import java.util.SortedMap;
|
---|
22 | import java.util.TreeMap;
|
---|
23 |
|
---|
24 | import weka.core.Attribute;
|
---|
25 | import weka.core.DenseInstance;
|
---|
26 | import weka.core.Instances;
|
---|
27 | import de.ugoe.cs.util.FileTools;
|
---|
28 |
|
---|
29 | /**
|
---|
30 | * TODO
|
---|
31 | *
|
---|
32 | * @author sherbold
|
---|
33 | *
|
---|
34 | */
|
---|
35 | class AUDIChangeLoader implements SingleVersionLoader {
|
---|
36 |
|
---|
37 | private class EntityRevisionPair implements Comparable<EntityRevisionPair> {
|
---|
38 | private final String entity;
|
---|
39 | private final int revision;
|
---|
40 |
|
---|
41 | public EntityRevisionPair(String entity, int revision) {
|
---|
42 | this.entity = entity;
|
---|
43 | this.revision = revision;
|
---|
44 | }
|
---|
45 |
|
---|
46 | @Override
|
---|
47 | public boolean equals(Object other) {
|
---|
48 | if (!(other instanceof EntityRevisionPair)) {
|
---|
49 | return false;
|
---|
50 | }
|
---|
51 | else {
|
---|
52 | return compareTo((EntityRevisionPair) other) == 0;
|
---|
53 | }
|
---|
54 | }
|
---|
55 |
|
---|
56 | @Override
|
---|
57 | public int hashCode() {
|
---|
58 | return entity.hashCode() + revision;
|
---|
59 | }
|
---|
60 |
|
---|
61 | @Override
|
---|
62 | public int compareTo(EntityRevisionPair other) {
|
---|
63 | int strCmp = this.entity.compareTo(other.entity);
|
---|
64 | if (strCmp != 0) {
|
---|
65 | return strCmp;
|
---|
66 | }
|
---|
67 | return Integer.compare(revision, other.revision);
|
---|
68 | }
|
---|
69 |
|
---|
70 | @Override
|
---|
71 | public String toString() {
|
---|
72 | return entity + "@" + revision;
|
---|
73 | }
|
---|
74 | }
|
---|
75 |
|
---|
76 | @Override
|
---|
77 | public Instances load(File file) {
|
---|
78 | final String[] lines;
|
---|
79 | String[] lineSplit;
|
---|
80 | String[] lineSplitBug;
|
---|
81 |
|
---|
82 | try {
|
---|
83 | lines = FileTools.getLinesFromFile(file.getAbsolutePath());
|
---|
84 | }
|
---|
85 | catch (IOException e) {
|
---|
86 | throw new RuntimeException(e);
|
---|
87 | }
|
---|
88 |
|
---|
89 | // information about bugs are in another file
|
---|
90 | String path = file.getAbsolutePath();
|
---|
91 | path = path.substring(0, path.length() - 14) + "repro.csv";
|
---|
92 | final String[] linesBug;
|
---|
93 | try {
|
---|
94 | linesBug = FileTools.getLinesFromFile(path);
|
---|
95 | }
|
---|
96 | catch (IOException e) {
|
---|
97 | throw new RuntimeException(e);
|
---|
98 | }
|
---|
99 |
|
---|
100 | int revisionIndex = -1;
|
---|
101 | int bugIndex = -1;
|
---|
102 | lineSplitBug = linesBug[0].split(";");
|
---|
103 | for (int j = 0; j < lineSplitBug.length; j++) {
|
---|
104 | if (lineSplitBug[j].equals("svnrev")) {
|
---|
105 | revisionIndex = j;
|
---|
106 | }
|
---|
107 | if (lineSplitBug[j].equals("num_bugs_trace")) {
|
---|
108 | bugIndex = j;
|
---|
109 | }
|
---|
110 | }
|
---|
111 | if (revisionIndex < 0) {
|
---|
112 | throw new RuntimeException("could not find SVN revisions");
|
---|
113 | }
|
---|
114 | if (bugIndex < 0) {
|
---|
115 | throw new RuntimeException("could not find bug information");
|
---|
116 | }
|
---|
117 |
|
---|
118 | int metricsStartIndex = -1;
|
---|
119 | int metricsEndIndex = -1;
|
---|
120 | lineSplit = lines[0].split(";");
|
---|
121 | for (int j = 0; j < lineSplit.length; j++) {
|
---|
122 | if (lineSplit[j].equals("lm_LOC")) {
|
---|
123 | metricsStartIndex = j;
|
---|
124 | }
|
---|
125 | if (lineSplit[j].equals("h_E")) {
|
---|
126 | metricsEndIndex = j;
|
---|
127 | }
|
---|
128 | }
|
---|
129 | if (metricsStartIndex < 0) {
|
---|
130 | throw new RuntimeException("could not find first metric, i.e., lm_LOC");
|
---|
131 | }
|
---|
132 | if (metricsEndIndex < 0) {
|
---|
133 | throw new RuntimeException("could not find last metric, i.e., h_E");
|
---|
134 | }
|
---|
135 | int numMetrics = metricsEndIndex - metricsStartIndex + 1;
|
---|
136 |
|
---|
137 | // create sets of all filenames and revisions
|
---|
138 | SortedMap<EntityRevisionPair, Integer> entityRevisionPairs = new TreeMap<>();
|
---|
139 | for (int i = 1; i < linesBug.length; i++) {
|
---|
140 | lineSplitBug = linesBug[i].split(";");
|
---|
141 | entityRevisionPairs.put(new EntityRevisionPair(lineSplitBug[0], Integer
|
---|
142 | .parseInt(lineSplitBug[revisionIndex])), i);
|
---|
143 | }
|
---|
144 |
|
---|
145 | // prepare weka instances
|
---|
146 | final ArrayList<Attribute> atts = new ArrayList<Attribute>();
|
---|
147 | lineSplit = lines[0].split(";");
|
---|
148 | for (int j = metricsStartIndex; j <= metricsEndIndex; j++) {
|
---|
149 | atts.add(new Attribute(lineSplit[j] + "_delta"));
|
---|
150 | }
|
---|
151 | for (int j = metricsStartIndex; j <= metricsEndIndex; j++) {
|
---|
152 | atts.add(new Attribute(lineSplit[j] + "_abs"));
|
---|
153 | }
|
---|
154 | final ArrayList<String> classAttVals = new ArrayList<String>();
|
---|
155 | classAttVals.add("0");
|
---|
156 | classAttVals.add("1");
|
---|
157 | final Attribute classAtt = new Attribute("bug", classAttVals);
|
---|
158 | atts.add(classAtt);
|
---|
159 |
|
---|
160 | final Instances data = new Instances(file.getName(), atts, 0);
|
---|
161 | data.setClass(classAtt);
|
---|
162 |
|
---|
163 | // create data
|
---|
164 | String lastFile = null;
|
---|
165 | double[] lastValues = null;
|
---|
166 | int lastNumBugs = 0;
|
---|
167 | for (Entry<EntityRevisionPair, Integer> entry : entityRevisionPairs.entrySet()) {
|
---|
168 | try {
|
---|
169 | // first get values
|
---|
170 | lineSplit = lines[entry.getValue()].split(";");
|
---|
171 | lineSplitBug = linesBug[entry.getValue()].split(";");
|
---|
172 | int i = 0;
|
---|
173 | double[] values = new double[numMetrics];
|
---|
174 | for (int j = metricsStartIndex; j <= metricsEndIndex; j++) {
|
---|
175 | values[i] = Double.parseDouble(lineSplit[j]);
|
---|
176 | i++;
|
---|
177 | }
|
---|
178 | int numBugs = Integer.parseInt(lineSplitBug[bugIndex]);
|
---|
179 |
|
---|
180 | // then check if an entity must be created
|
---|
181 | if (entry.getKey().entity.equals(lastFile)) {
|
---|
182 | // create new instance
|
---|
183 | double[] instanceValues = new double[2 * numMetrics + 1];
|
---|
184 | for (int j = 0; j < numMetrics; j++) {
|
---|
185 | instanceValues[j] = values[j] - lastValues[j];
|
---|
186 | instanceValues[j + numMetrics] = values[j];
|
---|
187 | }
|
---|
188 | // check if any value>0
|
---|
189 | boolean changeOccured = false;
|
---|
190 | for (int j = 0; j < numMetrics; j++) {
|
---|
191 | if (instanceValues[j] > 0) {
|
---|
192 | changeOccured = true;
|
---|
193 | }
|
---|
194 | }
|
---|
195 | if (changeOccured) {
|
---|
196 | instanceValues[instanceValues.length - 1] = numBugs <= lastNumBugs ? 0 : 1;
|
---|
197 | data.add(new DenseInstance(1.0, instanceValues));
|
---|
198 | }
|
---|
199 | }
|
---|
200 | lastFile = entry.getKey().entity;
|
---|
201 | lastValues = values;
|
---|
202 | lastNumBugs = numBugs;
|
---|
203 | }
|
---|
204 | catch (IllegalArgumentException e) {
|
---|
205 | System.err.println("error in line " + entry.getValue() + ": " + e.getMessage());
|
---|
206 | System.err.println("metrics line: " + lines[entry.getValue()]);
|
---|
207 | System.err.println("bugs line: " + linesBug[entry.getValue()]);
|
---|
208 | System.err.println("line is ignored");
|
---|
209 | }
|
---|
210 | }
|
---|
211 |
|
---|
212 | return data;
|
---|
213 | }
|
---|
214 |
|
---|
215 | /*
|
---|
216 | * (non-Javadoc)
|
---|
217 | *
|
---|
218 | * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader#load( java.io.File)
|
---|
219 | */
|
---|
220 |
|
---|
221 | public Instances load(File file, String dummy) {
|
---|
222 | final String[] lines;
|
---|
223 | try {
|
---|
224 | lines = FileTools.getLinesFromFile(file.getAbsolutePath());
|
---|
225 | }
|
---|
226 | catch (IOException e) {
|
---|
227 | throw new RuntimeException(e);
|
---|
228 | }
|
---|
229 |
|
---|
230 | // information about bugs are in another file
|
---|
231 | String path = file.getAbsolutePath();
|
---|
232 | path = path.substring(0, path.length() - 14) + "repro.csv";
|
---|
233 | final String[] linesBug;
|
---|
234 | try {
|
---|
235 | linesBug = FileTools.getLinesFromFile(path);
|
---|
236 | }
|
---|
237 | catch (IOException e) {
|
---|
238 | throw new RuntimeException(e);
|
---|
239 | }
|
---|
240 |
|
---|
241 | // configure Instances
|
---|
242 | final ArrayList<Attribute> atts = new ArrayList<Attribute>();
|
---|
243 |
|
---|
244 | String[] lineSplit = lines[0].split(";");
|
---|
245 | // ignore first three/four and last two columns
|
---|
246 | int offset;
|
---|
247 | if (lineSplit[3].equals("project_rev")) {
|
---|
248 | offset = 4;
|
---|
249 | }
|
---|
250 | else {
|
---|
251 | offset = 3;
|
---|
252 | }
|
---|
253 | for (int j = 0; j < lineSplit.length - (offset + 2); j++) {
|
---|
254 | atts.add(new Attribute(lineSplit[j + offset]));
|
---|
255 | }
|
---|
256 | final ArrayList<String> classAttVals = new ArrayList<String>();
|
---|
257 | classAttVals.add("0");
|
---|
258 | classAttVals.add("1");
|
---|
259 | final Attribute classAtt = new Attribute("bug", classAttVals);
|
---|
260 | atts.add(classAtt);
|
---|
261 |
|
---|
262 | final Instances data = new Instances(file.getName(), atts, 0);
|
---|
263 | data.setClass(classAtt);
|
---|
264 |
|
---|
265 | // fetch data
|
---|
266 | for (int i = 1; i < lines.length; i++) {
|
---|
267 | boolean validInstance = true;
|
---|
268 | lineSplit = lines[i].split(";");
|
---|
269 | String[] lineSplitBug = linesBug[i].split(";");
|
---|
270 | double[] values = new double[data.numAttributes()];
|
---|
271 | for (int j = 0; validInstance && j < values.length - 1; j++) {
|
---|
272 | if (lineSplit[j + offset].trim().isEmpty()) {
|
---|
273 | validInstance = false;
|
---|
274 | }
|
---|
275 | else {
|
---|
276 | values[j] = Double.parseDouble(lineSplit[j + offset].trim());
|
---|
277 | }
|
---|
278 | }
|
---|
279 | if (offset == 3) {
|
---|
280 | values[values.length - 1] = lineSplitBug[7].equals("0") ? 0 : 1;
|
---|
281 | }
|
---|
282 | else {
|
---|
283 | values[values.length - 1] = lineSplitBug[8].equals("0") ? 0 : 1;
|
---|
284 | }
|
---|
285 |
|
---|
286 | if (validInstance) {
|
---|
287 | data.add(new DenseInstance(1.0, values));
|
---|
288 | }
|
---|
289 | else {
|
---|
290 | System.out.println("instance " + i + " is invalid");
|
---|
291 | }
|
---|
292 | }
|
---|
293 | return data;
|
---|
294 | }
|
---|
295 |
|
---|
296 | /*
|
---|
297 | * (non-Javadoc)
|
---|
298 | *
|
---|
299 | * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader#
|
---|
300 | * filenameFilter(java.lang.String)
|
---|
301 | */
|
---|
302 | @Override
|
---|
303 | public boolean filenameFilter(String filename) {
|
---|
304 | return filename.endsWith("src.csv");
|
---|
305 | }
|
---|
306 |
|
---|
307 | }
|
---|