| 1 | // Copyright 2015 Georg-August-Universität Göttingen, Germany |
|---|
| 2 | // |
|---|
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
|---|
| 4 | // you may not use this file except in compliance with the License. |
|---|
| 5 | // You may obtain a copy of the License at |
|---|
| 6 | // |
|---|
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
|---|
| 8 | // |
|---|
| 9 | // Unless required by applicable law or agreed to in writing, software |
|---|
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
|---|
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|---|
| 12 | // See the License for the specific language governing permissions and |
|---|
| 13 | // limitations under the License. |
|---|
| 14 | |
|---|
| 15 | package de.ugoe.cs.cpdp.loader; |
|---|
| 16 | |
|---|
| 17 | import java.io.File; |
|---|
| 18 | import java.io.IOException; |
|---|
| 19 | import java.util.ArrayList; |
|---|
| 20 | |
|---|
| 21 | import weka.core.Attribute; |
|---|
| 22 | import weka.core.DenseInstance; |
|---|
| 23 | import weka.core.Instances; |
|---|
| 24 | import de.ugoe.cs.util.FileTools; |
|---|
| 25 | |
|---|
| 26 | class CSVMockusDataLoader implements SingleVersionLoader { |
|---|
| 27 | |
|---|
| 28 | @Override |
|---|
| 29 | public Instances load(File file) { |
|---|
| 30 | final String[] lines; |
|---|
| 31 | try { |
|---|
| 32 | |
|---|
| 33 | lines = FileTools.getLinesFromFile(file.getAbsolutePath()); |
|---|
| 34 | } |
|---|
| 35 | catch (IOException e) { |
|---|
| 36 | throw new RuntimeException(e); |
|---|
| 37 | } |
|---|
| 38 | |
|---|
| 39 | // configure Instances |
|---|
| 40 | final ArrayList<Attribute> atts = new ArrayList<Attribute>(); |
|---|
| 41 | |
|---|
| 42 | String[] lineSplit = lines[0].split(","); |
|---|
| 43 | for (int j = 0; j < lineSplit.length - 3; j++) { |
|---|
| 44 | atts.add(new Attribute(lineSplit[j + 2])); |
|---|
| 45 | } |
|---|
| 46 | |
|---|
| 47 | final ArrayList<String> classAttVals = new ArrayList<String>(); |
|---|
| 48 | classAttVals.add("0"); |
|---|
| 49 | classAttVals.add("1"); |
|---|
| 50 | final Attribute classAtt = new Attribute("bug", classAttVals); |
|---|
| 51 | atts.add(classAtt); |
|---|
| 52 | |
|---|
| 53 | final Instances data = new Instances(file.getName(), atts, 0); |
|---|
| 54 | data.setClass(classAtt); |
|---|
| 55 | |
|---|
| 56 | // fetch data |
|---|
| 57 | for (int i = 1; i < lines.length; i++) { |
|---|
| 58 | lineSplit = lines[i].split(","); |
|---|
| 59 | double[] values = new double[lineSplit.length - 2]; |
|---|
| 60 | for (int j = 0; j < values.length - 1; j++) { |
|---|
| 61 | values[j] = Double.parseDouble(lineSplit[j + 2].trim()); |
|---|
| 62 | } |
|---|
| 63 | values[values.length - 1] = lineSplit[lineSplit.length - 1].trim().equals("0") ? 0 : 1; |
|---|
| 64 | data.add(new DenseInstance(1.0, values)); |
|---|
| 65 | } |
|---|
| 66 | |
|---|
| 67 | return data; |
|---|
| 68 | } |
|---|
| 69 | |
|---|
| 70 | @Override |
|---|
| 71 | public boolean filenameFilter(String filename) { |
|---|
| 72 | return filename.endsWith(".csv"); |
|---|
| 73 | } |
|---|
| 74 | |
|---|
| 75 | } |
|---|