source: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/SynonymAttributePruning.java @ 87

Last change on this file since 87 was 86, checked in by sherbold, 9 years ago
  • switched workspace encoding to UTF-8 and fixed broken characters
  • Property svn:mime-type set to text/plain
File size: 3.2 KB
Line 
1// Copyright 2015 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.cpdp.dataprocessing;
16
17import weka.core.Instances;
18
19/**
20 * <p>
21 * Synonym pruning after Amasaki et al. (2015). The selection of the attributes for pruning happens
22 * only on the training data. The attributes are deleted from both the training and test data.
23 * </p>
24 *
25 * @author Steffen Herbold
26 */
27public class SynonymAttributePruning implements IProcessesingStrategy {
28
29    /*
30     * (non-Javadoc)
31     *
32     * @see de.ugoe.cs.cpdp.IParameterizable#setParameter(java.lang.String)
33     */
34    @Override
35    public void setParameter(String parameters) {
36
37    }
38
39    /**
40     * @see de.ugoe.cs.cpdp.dataprocessing.ProcessesingStrategy#apply(weka.core.Instances,
41     *      weka.core.Instances)
42     */
43    @Override
44    public void apply(Instances testdata, Instances traindata) {
45        applySynonymPruning(testdata, traindata);
46    }
47
48    /**
49     * <p>
50     * Applies the synonym pruning based on the training data.
51     * </p>
52     *
53     * @param testdata
54     *            the test data
55     * @param traindata
56     *            the training data
57     */
58    private void applySynonymPruning(Instances testdata, Instances traindata) {
59        double distance;
60        for (int j = traindata.numAttributes() - 1; j >= 0; j--) {
61            if( j!=traindata.classIndex() ) {
62                boolean hasClosest = false;
63                for (int i1 = 0; !hasClosest && i1 < traindata.size(); i1++) {
64                    for (int i2 = 0; !hasClosest && i2 < traindata.size(); i2++) {
65                        if (i1 != i2) {
66                            double minVal = Double.MAX_VALUE;
67                            double distanceJ = Double.MAX_VALUE;
68                            for (int k = 0; k < traindata.numAttributes(); k++) {
69                                distance = Math.abs(traindata.get(i1).value(k) - traindata.get(i2).value(k));
70                                if (distance < minVal) {
71                                    minVal = distance;
72                                }
73                                if (k == j) {
74                                    distanceJ = distance;
75                                }
76                            }
77                            hasClosest = distanceJ <= minVal;
78                        }
79                    }
80                }
81                if (!hasClosest) {
82                    testdata.deleteAttributeAt(j);
83                    traindata.deleteAttributeAt(j);
84                }
85            }
86        }
87    }
88}
Note: See TracBrowser for help on using the repository browser.