source: trunk/CrossPare/src/de/ugoe/cs/cpdp/loader/AUDIDataLoader.java @ 89

Last change on this file since 89 was 86, checked in by sherbold, 9 years ago
  • switched workspace encoding to UTF-8 and fixed broken characters
  • Property svn:mime-type set to text/plain
File size: 4.0 KB
Line 
1// Copyright 2015 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.cpdp.loader;
16
17import java.io.File;
18import java.io.IOException;
19import java.util.ArrayList;
20
21import weka.core.Attribute;
22import weka.core.DenseInstance;
23import weka.core.Instances;
24import de.ugoe.cs.util.FileTools;
25
26/**
27 * TODO
28 *
29 * @author sherbold
30 *
31 */
32class AUDIDataLoader implements SingleVersionLoader {
33
34    /*
35     * (non-Javadoc)
36     *
37     * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader#load( java.io.File)
38     */
39    @Override
40    public Instances load(File file) {
41        final String[] lines;
42        try {
43            lines = FileTools.getLinesFromFile(file.getAbsolutePath());
44        }
45        catch (IOException e) {
46            throw new RuntimeException(e);
47        }
48
49        // information about bugs are in another file
50        String path = file.getAbsolutePath();
51        path = path.substring(0, path.length() - 14) + "repro.csv";
52        final String[] linesBug;
53        try {
54            linesBug = FileTools.getLinesFromFile(path);
55        }
56        catch (IOException e) {
57            throw new RuntimeException(e);
58        }
59
60        // configure Instances
61        final ArrayList<Attribute> atts = new ArrayList<Attribute>();
62
63        String[] lineSplit = lines[0].split(";");
64        // ignore first three/four and last two columns
65        int offset;
66        if (lineSplit[3].equals("project_rev")) {
67            offset = 4;
68        }
69        else {
70            offset = 3;
71        }
72        for (int j = 0; j < lineSplit.length - (offset + 2); j++) {
73            atts.add(new Attribute(lineSplit[j + offset]));
74        }
75        final ArrayList<String> classAttVals = new ArrayList<String>();
76        classAttVals.add("0");
77        classAttVals.add("1");
78        final Attribute classAtt = new Attribute("bug", classAttVals);
79        atts.add(classAtt);
80
81        final Instances data = new Instances(file.getName(), atts, 0);
82        data.setClass(classAtt);
83
84        // fetch data
85        for (int i = 1; i < lines.length; i++) {
86            boolean validInstance = true;
87            lineSplit = lines[i].split(";");
88            String[] lineSplitBug = linesBug[i].split(";");
89            double[] values = new double[data.numAttributes()];
90            for (int j = 0; validInstance && j < values.length - 1; j++) {
91                if (lineSplit[j + offset].trim().isEmpty()) {
92                    validInstance = false;
93                }
94                else {
95                    values[j] = Double.parseDouble(lineSplit[j + offset].trim());
96                }
97            }
98            if (offset == 3) {
99                values[values.length - 1] = lineSplitBug[7].equals("0") ? 0 : 1;
100            }
101            else {
102                values[values.length - 1] = lineSplitBug[8].equals("0") ? 0 : 1;
103            }
104
105            if (validInstance) {
106                data.add(new DenseInstance(1.0, values));
107            }
108            else {
109                System.out.println("instance " + i + " is invalid");
110            }
111        }
112        return data;
113    }
114
115    /*
116     * (non-Javadoc)
117     *
118     * @see de.ugoe.cs.cpdp.loader.AbstractFolderLoader.SingleVersionLoader#
119     * filenameFilter(java.lang.String)
120     */
121    @Override
122    public boolean filenameFilter(String filename) {
123        return filename.endsWith("src.csv");
124    }
125
126}
Note: See TracBrowser for help on using the repository browser.