source: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/BiasedWeights.java @ 47

Last change on this file since 47 was 41, checked in by sherbold, 9 years ago
  • formatted code and added copyrights
  • Property svn:mime-type set to text/plain
File size: 3.3 KB
Line 
1// Copyright 2015 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.cpdp.dataprocessing;
16
17import org.apache.commons.collections4.list.SetUniqueList;
18
19import weka.core.Instance;
20import weka.core.Instances;
21
22/**
23 * Sets the bias of the weights of the training data. By using a bias of 0.5 (default value) the
24 * total weight of the positive instances (i.e. fault-prone) is equal to the total weight of the
25 * negative instances (i.e. non-fault-prone). Otherwise the weights between the two will be
26 * distributed according to the bias, where <0.5 means in favor of the negative instances and
27 * >0.5 in favor of the positive instances. equal to the total weight of the test
28 *
29 * @author Steffen Herbold
30 */
31public class BiasedWeights implements IProcessesingStrategy, ISetWiseProcessingStrategy {
32
33    /**
34     * bias used for the weighting
35     */
36    private double bias = 0.5;
37
38    /**
39     * Sets the bias to be used for weighting.
40     *
41     * @param parameters
42     *            string with the bias
43     */
44    @Override
45    public void setParameter(String parameters) {
46        bias = Double.parseDouble(parameters);
47    }
48
49    /**
50     * @see de.ugoe.cs.cpdp.dataprocessing.ProcessesingStrategy#apply(weka.core.Instances,
51     *      weka.core.Instances)
52     */
53    @Override
54    public void apply(Instances testdata, Instances traindata) {
55        // setBiasedWeights(testdata);
56        setBiasedWeights(traindata);
57    }
58
59    /**
60     * @see de.ugoe.cs.cpdp.dataprocessing.SetWiseProcessingStrategy#apply(weka.core.Instances,
61     *      org.apache.commons.collections4.list.SetUniqueList)
62     */
63    @Override
64    public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) {
65        for (Instances traindata : traindataSet) {
66            setBiasedWeights(traindata);
67        }
68    }
69
70    /**
71     * Helper method that sets the weights for a given data set.
72     *
73     * @param data
74     *            data set whose weights are set
75     */
76    private void setBiasedWeights(Instances data) {
77        final int classIndex = data.classIndex();
78
79        final int[] counts = data.attributeStats(classIndex).nominalCounts;
80
81        final double weightNegatives = ((1 - bias) * data.numInstances()) / counts[0];
82        final double weightPositives = (bias * data.numInstances()) / counts[1];
83
84        for (int i = 0; i < data.numInstances(); i++) {
85            Instance instance = data.instance(i);
86            if (instance.value(classIndex) == 0) {
87                instance.setWeight(weightNegatives);
88            }
89            if (instance.value(classIndex) == 1) {
90                instance.setWeight(weightPositives);
91            }
92        }
93    }
94
95}
Note: See TracBrowser for help on using the repository browser.