source: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/SynonymOutlierRemoval.java @ 135

Last change on this file since 135 was 135, checked in by sherbold, 8 years ago
  • code documentation and formatting
  • Property svn:mime-type set to text/plain
File size: 3.4 KB
Line 
1// Copyright 2015 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.cpdp.dataselection;
16
17import weka.core.Instances;
18
19/**
20 * <p>
21 * Synonym outlier removal after Amasaki et al. (2015).
22 * </p>
23 *
24 * @author Steffen Herbold
25 */
26public class SynonymOutlierRemoval implements IPointWiseDataselectionStrategy {
27
28    /*
29     * (non-Javadoc)
30     *
31     * @see de.ugoe.cs.cpdp.IParameterizable#setParameter(java.lang.String)
32     */
33    @Override
34    public void setParameter(String parameters) {
35        // do nothing
36    }
37
38    /*
39     * (non-Javadoc)
40     *
41     * @see de.ugoe.cs.cpdp.dataselection.IPointWiseDataselectionStrategy#apply(weka.core.Instances,
42     * weka.core.Instances)
43     */
44    @Override
45    public Instances apply(Instances testdata, Instances traindata) {
46        applySynonymRemoval(traindata);
47        return traindata;
48    }
49
50    /**
51     * <p>
52     * Applies the synonym outlier removal.
53     * </p>
54     *
55     * @param traindata
56     *            data from which the outliers are removed.
57     */
58    public void applySynonymRemoval(Instances traindata) {
59        double minDistance[][] = new double[traindata.size()][traindata.numAttributes() - 1];
60        double minDistanceAttribute[] = new double[traindata.numAttributes() - 1];
61        double distance;
62        for (int j = 0; j < minDistanceAttribute.length; j++) {
63            minDistanceAttribute[j] = Double.MAX_VALUE;
64        }
65        for (int i1 = traindata.size() - 1; i1 < traindata.size(); i1++) {
66            int k = 0;
67            for (int j = 0; j < traindata.numAttributes(); j++) {
68                if (j != traindata.classIndex()) {
69                    minDistance[i1][k] = Double.MAX_VALUE;
70                    for (int i2 = 0; i2 < traindata.size(); i2++) {
71                        if (i1 != i2) {
72                            distance =
73                                Math.abs(traindata.get(i1).value(j) - traindata.get(i2).value(j));
74                            if (distance < minDistance[i1][k]) {
75                                minDistance[i1][k] = distance;
76                            }
77                            if (distance < minDistanceAttribute[k]) {
78                                minDistanceAttribute[k] = distance;
79                            }
80                        }
81                    }
82                    k++;
83                }
84            }
85        }
86        for (int i = traindata.size() - 1; i >= 0; i--) {
87            boolean hasClosest = false;
88            for (int j = 0; !hasClosest && j < traindata.numAttributes(); j++) {
89                hasClosest = minDistance[i][j] <= minDistanceAttribute[j];
90            }
91            if (!hasClosest) {
92                traindata.delete(i);
93            }
94        }
95    }
96}
Note: See TracBrowser for help on using the repository browser.