source: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/CLAProcessor.java @ 87

Last change on this file since 87 was 86, checked in by sherbold, 9 years ago
  • switched workspace encoding to UTF-8 and fixed broken characters
  • Property svn:mime-type set to text/plain
File size: 3.7 KB
Line 
1// Copyright 2015 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.cpdp.dataprocessing;
16
17import java.util.Arrays;
18
19import org.apache.commons.math3.stat.descriptive.rank.Median;
20
21import weka.core.Instance;
22import weka.core.Instances;
23
24/**
25 * <p>
26 * This processor implements the CLA strategy from the CLAMI paper at ASE 2014 be Nam et al. With
27 * CLA, the original classification of the data is removed and instead a new classification is
28 * created based on metric values that are higher than the median of the metric.
29 * </p>
30 * <p>
31 * This can also be done for the test data (i.e., TestAsTraining data selection), as the original
32 * classification is completely ignored. Hence, CLA is an approach for unsupervised learning.
33 * </p>
34 *
35 * @author Steffen Herbold
36 */
37public class CLAProcessor implements IProcessesingStrategy {
38
39    /*
40     * (non-Javadoc)
41     *
42     * @see de.ugoe.cs.cpdp.IParameterizable#setParameter(java.lang.String)
43     */
44    @Override
45    public void setParameter(String parameters) {
46        // TODO Auto-generated method stub
47
48    }
49
50    /*
51     * (non-Javadoc)
52     *
53     * @see de.ugoe.cs.cpdp.dataprocessing.IProcessesingStrategy#apply(weka.core. Instances,
54     * weka.core.Instances)
55     */
56    @Override
57    public void apply(Instances testdata, Instances traindata) {
58        applyCLA(traindata);
59    }
60
61    /**
62     * Applies the CLA processor the the data.
63     *
64     * @param data
65     *            data to which the processor is applied
66     */
67    public void applyCLA(Instances data) {
68        // first determine medians
69        double[] medians = new double[data.numAttributes()];
70        // get medians
71        for (int j = 0; j < data.numAttributes(); j++) {
72            if (j != data.classIndex()) {
73                medians[j] = data.kthSmallestValue(j, (data.numInstances() + 1) >> 1);
74            }
75        }
76        // now determine cluster number for each instance
77        double[] clusterNumber = new double[data.numInstances()];
78        for (int i = 0; i < data.numInstances(); i++) {
79            int countHighValues = 0;
80            Instance currentInstance = data.get(i);
81            for (int j = 0; j < data.numAttributes(); j++) {
82                if (j != data.classIndex()) {
83                    if (currentInstance.value(j) > medians[j]) {
84                        countHighValues++;
85                    }
86                }
87            }
88            clusterNumber[i] = countHighValues;
89        }
90
91        // determine median of cluster number
92        Median m = new Median();
93        double medianClusterNumber = m.evaluate(Arrays.stream(clusterNumber).distinct().toArray());
94
95        // finally modify the instances
96        // drop the unclean instances
97        for (int i = data.numInstances() - 1; i >= 0; i--) {
98            // set the classification
99            if (clusterNumber[i] > medianClusterNumber) {
100                data.get(i).setClassValue(1.0d);
101            }
102            else {
103                data.get(i).setClassValue(0.0d);
104            }
105        }
106    }
107
108}
Note: See TracBrowser for help on using the repository browser.