source: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/SynonymOutlierRemoval.java @ 66

Last change on this file since 66 was 64, checked in by sherbold, 9 years ago
  • added some new approaches
  • Property svn:mime-type set to text/plain
File size: 3.2 KB
Line 
1// Copyright 2015 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.cpdp.dataselection;
16
17import weka.core.Instances;
18
19/**
20 * <p>
21 * Synonym outlier removal after Amasaki et al. (2015).
22 * </p>
23 *
24 * @author Steffen Herbold
25 */
26public class SynonymOutlierRemoval implements IPointWiseDataselectionStrategy {
27
28    /* (non-Javadoc)
29     * @see de.ugoe.cs.cpdp.IParameterizable#setParameter(java.lang.String)
30     */
31    @Override
32    public void setParameter(String parameters) {
33        // do nothing
34    }
35
36    /* (non-Javadoc)
37     * @see de.ugoe.cs.cpdp.dataselection.IPointWiseDataselectionStrategy#apply(weka.core.Instances, weka.core.Instances)
38     */
39    @Override
40    public Instances apply(Instances testdata, Instances traindata) {
41        applySynonymRemoval(traindata);
42        return traindata;
43    }
44
45    /**
46     * <p>
47     * Applies the synonym outlier removal.
48     * </p>
49     *
50     * @param traindata data from which the outliers are removed.
51     */
52    public void applySynonymRemoval(Instances traindata) {
53        double minDistance[][] = new double[traindata.size()][traindata.numAttributes()-1];
54        double minDistanceAttribute[] = new double[traindata.numAttributes()-1];
55        double distance;
56        for( int j=0; j<minDistanceAttribute.length; j++ ) {
57            minDistanceAttribute[j] = Double.MAX_VALUE;
58        }
59        for (int i1 = traindata.size()-1; i1 < traindata.size(); i1++) {
60            int k=0;
61            for (int j = 0; j < traindata.numAttributes(); j++) {
62                if( j!=traindata.classIndex() ) {
63                    minDistance[i1][k] = Double.MAX_VALUE;
64                    for (int i2 = 0; i2 < traindata.size(); i2++) {
65                        if (i1 != i2) {
66                            distance = Math.abs(traindata.get(i1).value(j) - traindata.get(i2).value(j));
67                            if (distance < minDistance[i1][k]) {
68                                minDistance[i1][k] = distance;
69                            }
70                            if( distance < minDistanceAttribute[k] ) {
71                                minDistanceAttribute[k] = distance;
72                            }
73                        }
74                    }
75                    k++;
76                }
77            }
78        }
79        for( int i=traindata.size()-1; i>=0; i-- ) {
80            boolean hasClosest = false;
81            for( int j=0; !hasClosest && j<traindata.numAttributes(); j++ ) {
82                hasClosest = minDistance[i][j]<=minDistanceAttribute[j];
83            }
84            if( !hasClosest ) {
85                traindata.delete(i);
86            }
87        }
88    }
89}
Note: See TracBrowser for help on using the repository browser.