source: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/LogarithmTransform.java @ 117

Last change on this file since 117 was 86, checked in by sherbold, 9 years ago
  • switched workspace encoding to UTF-8 and fixed broken characters
  • Property svn:mime-type set to text/plain
File size: 4.8 KB
Line 
1// Copyright 2015 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.cpdp.dataprocessing;
16
17import org.apache.commons.collections4.list.SetUniqueList;
18
19import weka.core.Attribute;
20import weka.core.Instance;
21import weka.core.Instances;
22
23/**
24 * Logarithm transformation after Carmargo Cruz and Ochimizu: Towards Logistic Regression Models for
25 * Predicting Fault-prone Code across Software Projects. <br>
26 * <br>
27 * Transform each attribute value x into log(x+1).
28 *
29 * @author Steffen Herbold
30 */
31public class LogarithmTransform implements ISetWiseProcessingStrategy, IProcessesingStrategy {
32
33    /**
34     * Does not have parameters. String is ignored.
35     *
36     * @param parameters
37     *            ignored
38     */
39    @Override
40    public void setParameter(String parameters) {
41        // dummy
42    }
43
44    /**
45     * @see de.ugoe.cs.cpdp.dataprocessing.SetWiseProcessingStrategy#apply(weka.core.Instances,
46     *      org.apache.commons.collections4.list.SetUniqueList)
47     */
48    @Override
49    public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) {
50        final Attribute classAttribute = testdata.classAttribute();
51
52        // preprocess testdata
53        for (int i = 0; i < testdata.numInstances(); i++) {
54            Instance instance = testdata.instance(i);
55            for (int j = 0; j < testdata.numAttributes(); j++) {
56                if (testdata.attribute(j) != classAttribute && testdata.attribute(j).isNumeric()) {
57                    if (instance.value(j) < 0) {
58                        instance.setValue(j, (-1 * (Math.log(-1 * instance.value(j)))));
59                    }
60                    else {
61                        instance.setValue(j, Math.log(1 + instance.value(j)));
62                    }
63                }
64            }
65        }
66
67        // preprocess training data
68        for (Instances traindata : traindataSet) {
69            for (int i = 0; i < traindata.numInstances(); i++) {
70                Instance instance = traindata.instance(i);
71                for (int j = 0; j < testdata.numAttributes(); j++) {
72                    if (traindata.attribute(j) != classAttribute &&
73                        traindata.attribute(j).isNumeric())
74                    {
75                        if (instance.value(j) < 0) {
76                            instance.setValue(j, (-1 * (Math.log(-1 * instance.value(j)))));
77                        }
78                        else {
79                            instance.setValue(j, Math.log(1 + instance.value(j)));
80                        }
81                    }
82                }
83            }
84        }
85    }
86
87    /**
88     * @see de.ugoe.cs.cpdp.dataprocessing.ProcessesingStrategy#apply(weka.core.Instances,
89     *      weka.core.Instances)
90     */
91    @Override
92    public void apply(Instances testdata, Instances traindata) {
93        final Attribute classAttribute = testdata.classAttribute();
94
95        // preprocess testdata
96        for (int i = 0; i < testdata.numInstances(); i++) {
97            Instance instance = testdata.instance(i);
98            for (int j = 0; j < testdata.numAttributes(); j++) {
99                if (testdata.attribute(j) != classAttribute && testdata.attribute(j).isNumeric()) {
100                    if (instance.value(j) < 0) {
101                        instance.setValue(j, (-1 * (Math.log(-1 * instance.value(j)))));
102                    }
103                    else {
104                        instance.setValue(j, Math.log(1 + instance.value(j)));
105                    }
106                }
107            }
108        }
109
110        // preprocess training data
111        for (int i = 0; i < traindata.numInstances(); i++) {
112            Instance instance = traindata.instance(i);
113            for (int j = 0; j < testdata.numAttributes(); j++) {
114                if (traindata.attribute(j) != classAttribute && traindata.attribute(j).isNumeric())
115                {
116                    if (instance.value(j) < 0) {
117                        instance.setValue(j, (-1 * (Math.log(-1 * instance.value(j)))));
118                    }
119                    else {
120                        instance.setValue(j, Math.log(1 + instance.value(j)));
121                    }
122                }
123            }
124        }
125    }
126}
Note: See TracBrowser for help on using the repository browser.