source: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/Normalization.java @ 47

Last change on this file since 47 was 41, checked in by sherbold, 9 years ago
  • formatted code and added copyrights
  • Property svn:mime-type set to text/plain
File size: 4.5 KB
Line 
1// Copyright 2015 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.cpdp.dataprocessing;
16
17import org.apache.commons.collections4.list.SetUniqueList;
18
19import weka.core.Attribute;
20import weka.core.Instance;
21import weka.core.Instances;
22import weka.experiment.Stats;
23
24/**
25 * Normalizes each attribute of each data set separately.
26 *
27 * @author Steffen Herbold
28 */
29public class Normalization implements ISetWiseProcessingStrategy, IProcessesingStrategy {
30
31    /**
32     * @see de.ugoe.cs.cpdp.dataprocessing.SetWiseProcessingStrategy#apply(weka.core.Instances,
33     *      org.apache.commons.collections4.list.SetUniqueList)
34     */
35    @Override
36    public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) {
37        final Attribute classAtt = testdata.classAttribute();
38
39        for (int i = 0; i < testdata.numAttributes(); i++) {
40            if (!testdata.attribute(i).equals(classAtt)) {
41                Stats teststats = testdata.attributeStats(i).numericStats;
42
43                double minVal = teststats.min;
44                double maxVal = teststats.max;
45
46                for (Instances traindata : traindataSet) {
47                    Stats trainstats = traindata.attributeStats(i).numericStats;
48                    if (minVal > trainstats.min) {
49                        minVal = trainstats.min;
50                    }
51                    if (maxVal < trainstats.max) {
52                        maxVal = trainstats.max;
53                    }
54                }
55
56                for (int j = 0; j < testdata.numInstances(); j++) {
57                    Instance inst = testdata.instance(j);
58                    double newValue = (inst.value(i) - minVal) / (maxVal - minVal);
59                    inst.setValue(i, newValue);
60                }
61
62                for (Instances traindata : traindataSet) {
63                    for (int j = 0; j < traindata.numInstances(); j++) {
64                        Instance inst = traindata.instance(j);
65                        double newValue = (inst.value(i) - minVal) / (maxVal - minVal);
66                        inst.setValue(i, newValue);
67                    }
68                }
69            }
70        }
71
72    }
73
74    /**
75     * @see de.ugoe.cs.cpdp.dataprocessing.ProcessesingStrategy#apply(weka.core.Instances,
76     *      weka.core.Instances)
77     */
78    @Override
79    public void apply(Instances testdata, Instances traindata) {
80        final Attribute classAtt = testdata.classAttribute();
81
82        for (int i = 0; i < testdata.numAttributes(); i++) {
83            if (!testdata.attribute(i).equals(classAtt)) {
84                Stats teststats = testdata.attributeStats(i).numericStats;
85
86                double minVal = teststats.min;
87                double maxVal = teststats.max;
88
89                Stats trainstats = traindata.attributeStats(i).numericStats;
90                if (minVal > trainstats.min) {
91                    minVal = trainstats.min;
92                }
93                if (maxVal < trainstats.max) {
94                    maxVal = trainstats.max;
95                }
96
97                for (int j = 0; j < testdata.numInstances(); j++) {
98                    Instance inst = testdata.instance(j);
99                    double newValue = (inst.value(i) - minVal) / (maxVal - minVal);
100                    inst.setValue(i, newValue);
101                }
102
103                for (int j = 0; j < traindata.numInstances(); j++) {
104                    Instance inst = traindata.instance(j);
105                    double newValue = (inst.value(i) - minVal) / (maxVal - minVal);
106                    inst.setValue(i, newValue);
107                }
108            }
109        }
110    }
111
112    /**
113     * Does not have parameters. String is ignored.
114     *
115     * @param parameters
116     *            ignored
117     */
118    @Override
119    public void setParameter(String parameters) {
120        // no parameters
121    }
122}
Note: See TracBrowser for help on using the repository browser.