1 | // Copyright 2015 Georg-August-Universität Göttingen, Germany |
---|
2 | // |
---|
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
---|
4 | // you may not use this file except in compliance with the License. |
---|
5 | // You may obtain a copy of the License at |
---|
6 | // |
---|
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
---|
8 | // |
---|
9 | // Unless required by applicable law or agreed to in writing, software |
---|
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
---|
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
---|
12 | // See the License for the specific language governing permissions and |
---|
13 | // limitations under the License. |
---|
14 | |
---|
15 | package de.ugoe.cs.cpdp.loader; |
---|
16 | |
---|
17 | import java.io.File; |
---|
18 | import java.io.IOException; |
---|
19 | import java.util.ArrayList; |
---|
20 | |
---|
21 | import weka.core.Attribute; |
---|
22 | import weka.core.DenseInstance; |
---|
23 | import weka.core.Instances; |
---|
24 | import de.ugoe.cs.util.FileTools; |
---|
25 | |
---|
26 | class CSVMockusDataLoader implements SingleVersionLoader { |
---|
27 | |
---|
28 | @Override |
---|
29 | public Instances load(File file) { |
---|
30 | final String[] lines; |
---|
31 | try { |
---|
32 | |
---|
33 | lines = FileTools.getLinesFromFile(file.getAbsolutePath()); |
---|
34 | } |
---|
35 | catch (IOException e) { |
---|
36 | throw new RuntimeException(e); |
---|
37 | } |
---|
38 | |
---|
39 | // configure Instances |
---|
40 | final ArrayList<Attribute> atts = new ArrayList<Attribute>(); |
---|
41 | |
---|
42 | String[] lineSplit = lines[0].split(","); |
---|
43 | for (int j = 0; j < lineSplit.length - 3; j++) { |
---|
44 | atts.add(new Attribute(lineSplit[j + 2])); |
---|
45 | } |
---|
46 | |
---|
47 | final ArrayList<String> classAttVals = new ArrayList<String>(); |
---|
48 | classAttVals.add("0"); |
---|
49 | classAttVals.add("1"); |
---|
50 | final Attribute classAtt = new Attribute("bug", classAttVals); |
---|
51 | atts.add(classAtt); |
---|
52 | |
---|
53 | final Instances data = new Instances(file.getName(), atts, 0); |
---|
54 | data.setClass(classAtt); |
---|
55 | |
---|
56 | // fetch data |
---|
57 | for (int i = 1; i < lines.length; i++) { |
---|
58 | lineSplit = lines[i].split(","); |
---|
59 | double[] values = new double[lineSplit.length - 2]; |
---|
60 | for (int j = 0; j < values.length - 1; j++) { |
---|
61 | values[j] = Double.parseDouble(lineSplit[j + 2].trim()); |
---|
62 | } |
---|
63 | values[values.length - 1] = lineSplit[lineSplit.length - 1].trim().equals("0") ? 0 : 1; |
---|
64 | data.add(new DenseInstance(1.0, values)); |
---|
65 | } |
---|
66 | |
---|
67 | return data; |
---|
68 | } |
---|
69 | |
---|
70 | @Override |
---|
71 | public boolean filenameFilter(String filename) { |
---|
72 | return filename.endsWith(".csv"); |
---|
73 | } |
---|
74 | |
---|
75 | } |
---|