source: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/AverageStandardization.java @ 83

Last change on this file since 83 was 41, checked in by sherbold, 9 years ago
  • formatted code and added copyrights
  • Property svn:mime-type set to text/plain
File size: 4.2 KB
Line 
1// Copyright 2015 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.cpdp.dataprocessing;
16
17import org.apache.commons.collections4.list.SetUniqueList;
18
19import weka.core.Attribute;
20import weka.core.Instance;
21import weka.core.Instances;
22
23/**
24 * Standardization procedure after Watanabe et al.: Adapting a Fault Prediction Model to Allow Inter
25 * Language Reuse. <br>
26 * <br>
27 * In comparison to Watanabe et al., we transform training data instead of the test data. Otherwise,
28 * this approach would not be feasible with multiple projects.
29 *
30 * @author Steffen Herbold
31 */
32public class AverageStandardization implements ISetWiseProcessingStrategy, IProcessesingStrategy {
33
34    /**
35     * Does not have parameters. String is ignored.
36     *
37     * @param parameters
38     *            ignored
39     */
40    @Override
41    public void setParameter(String parameters) {
42        // dummy
43    }
44
45    /**
46     * @see de.ugoe.cs.cpdp.dataprocessing.SetWiseProcessingStrategy#apply(weka.core.Instances,
47     *      org.apache.commons.collections4.list.SetUniqueList)
48     */
49    @Override
50    public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) {
51        final Attribute classAttribute = testdata.classAttribute();
52
53        final double[] meanTest = new double[testdata.numAttributes()];
54
55        // get means of testdata
56        for (int j = 0; j < testdata.numAttributes(); j++) {
57            if (testdata.attribute(j) != classAttribute) {
58                meanTest[j] = testdata.meanOrMode(j);
59            }
60        }
61
62        // preprocess training data
63        for (Instances traindata : traindataSet) {
64            double[] meanTrain = new double[testdata.numAttributes()];
65            for (int j = 0; j < testdata.numAttributes(); j++) {
66                if (testdata.attribute(j) != classAttribute) {
67                    meanTrain[j] = traindata.meanOrMode(j);
68                }
69            }
70
71            for (int i = 0; i < traindata.numInstances(); i++) {
72                Instance instance = traindata.instance(i);
73                for (int j = 0; j < testdata.numAttributes(); j++) {
74                    if (testdata.attribute(j) != classAttribute) {
75                        instance.setValue(j, instance.value(j) * meanTest[j] / meanTrain[j]);
76                    }
77                }
78            }
79        }
80    }
81
82    /**
83     * @see de.ugoe.cs.cpdp.dataprocessing.ProcessesingStrategy#apply(weka.core.Instances,
84     *      weka.core.Instances)
85     */
86    @Override
87    public void apply(Instances testdata, Instances traindata) {
88        final Attribute classAttribute = testdata.classAttribute();
89
90        final double[] meanTest = new double[testdata.numAttributes()];
91
92        // get means of testdata
93        for (int j = 0; j < testdata.numAttributes(); j++) {
94            if (testdata.attribute(j) != classAttribute) {
95                meanTest[j] = testdata.meanOrMode(j);
96            }
97        }
98
99        // preprocess training data
100        final double[] meanTrain = new double[testdata.numAttributes()];
101        for (int j = 0; j < testdata.numAttributes(); j++) {
102            if (testdata.attribute(j) != classAttribute) {
103                meanTrain[j] = traindata.meanOrMode(j);
104            }
105        }
106
107        for (int i = 0; i < traindata.numInstances(); i++) {
108            Instance instance = traindata.instance(i);
109            for (int j = 0; j < testdata.numAttributes(); j++) {
110                if (testdata.attribute(j) != classAttribute) {
111                    instance.setValue(j, instance.value(j) * meanTest[j] / meanTrain[j]);
112                }
113            }
114        }
115    }
116
117}
Note: See TracBrowser for help on using the repository browser.