source: trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/AUDIDataLoader.java @ 135

Last change on this file since 135 was 135, checked in by sherbold, 8 years ago
  • code documentation and formatting
  • Property svn:mime-type set to text/plain
File size: 4.2 KB
Line 
1// Copyright 2015 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.cpdp.loader;
16
17import java.io.File;
18import java.io.IOException;
19import java.util.ArrayList;
20
21import weka.core.Attribute;
22import weka.core.DenseInstance;
23import weka.core.Instances;
24import de.ugoe.cs.util.FileTools;
25
26/**
27 * Loads data from the automative defect data set from Audi Electronic Ventures donated by Altinger
28 * et al. at the MSR 2015. This loader creates overall defect labels, for the final revision.
29 *
30 * @author Steffen Herbold
31 *
32 */
33class AUDIDataLoader implements SingleVersionLoader {
34
35    /*
36     * (non-Javadoc)
37     *
38     * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader#load( java.io.File)
39     */
40    @Override
41    public Instances load(File file) {
42        final String[] lines;
43        try {
44            lines = FileTools.getLinesFromFile(file.getAbsolutePath());
45        }
46        catch (IOException e) {
47            throw new RuntimeException(e);
48        }
49
50        // information about bugs are in another file
51        String path = file.getAbsolutePath();
52        path = path.substring(0, path.length() - 14) + "repro.csv";
53        final String[] linesBug;
54        try {
55            linesBug = FileTools.getLinesFromFile(path);
56        }
57        catch (IOException e) {
58            throw new RuntimeException(e);
59        }
60
61        // configure Instances
62        final ArrayList<Attribute> atts = new ArrayList<Attribute>();
63
64        String[] lineSplit = lines[0].split(";");
65        // ignore first three/four and last two columns
66        int offset;
67        if (lineSplit[3].equals("project_rev")) {
68            offset = 4;
69        }
70        else {
71            offset = 3;
72        }
73        for (int j = 0; j < lineSplit.length - (offset + 2); j++) {
74            atts.add(new Attribute(lineSplit[j + offset]));
75        }
76        final ArrayList<String> classAttVals = new ArrayList<String>();
77        classAttVals.add("0");
78        classAttVals.add("1");
79        final Attribute classAtt = new Attribute("bug", classAttVals);
80        atts.add(classAtt);
81
82        final Instances data = new Instances(file.getName(), atts, 0);
83        data.setClass(classAtt);
84
85        // fetch data
86        for (int i = 1; i < lines.length; i++) {
87            boolean validInstance = true;
88            lineSplit = lines[i].split(";");
89            String[] lineSplitBug = linesBug[i].split(";");
90            double[] values = new double[data.numAttributes()];
91            for (int j = 0; validInstance && j < values.length - 1; j++) {
92                if (lineSplit[j + offset].trim().isEmpty()) {
93                    validInstance = false;
94                }
95                else {
96                    values[j] = Double.parseDouble(lineSplit[j + offset].trim());
97                }
98            }
99            if (offset == 3) {
100                values[values.length - 1] = lineSplitBug[7].equals("0") ? 0 : 1;
101            }
102            else {
103                values[values.length - 1] = lineSplitBug[8].equals("0") ? 0 : 1;
104            }
105
106            if (validInstance) {
107                data.add(new DenseInstance(1.0, values));
108            }
109            else {
110                System.out.println("instance " + i + " is invalid");
111            }
112        }
113        return data;
114    }
115
116    /*
117     * (non-Javadoc)
118     *
119     * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader#
120     * filenameFilter(java.lang.String)
121     */
122    @Override
123    public boolean filenameFilter(String filename) {
124        return filename.endsWith("src.csv");
125    }
126
127}
Note: See TracBrowser for help on using the repository browser.