source: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/CLAProcessor.java

Last change on this file was 135, checked in by sherbold, 8 years ago
  • code documentation and formatting
  • Property svn:mime-type set to text/plain
File size: 3.6 KB
Line 
1// Copyright 2015 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.cpdp.dataprocessing;
16
17import java.util.Arrays;
18
19import org.apache.commons.math3.stat.descriptive.rank.Median;
20
21import weka.core.Instance;
22import weka.core.Instances;
23
24/**
25 * <p>
26 * This processor implements the CLA strategy from the CLAMI paper at ASE 2014 be Nam et al. With
27 * CLA, the original classification of the data is removed and instead a new classification is
28 * created based on metric values that are higher than the median of the metric.
29 * </p>
30 * <p>
31 * This can also be done for the test data (i.e., TestAsTraining data selection), as the original
32 * classification is completely ignored. Hence, CLA is an approach for unsupervised learning.
33 * </p>
34 *
35 * @author Steffen Herbold
36 */
37public class CLAProcessor implements IProcessesingStrategy {
38
39    /*
40     * (non-Javadoc)
41     *
42     * @see de.ugoe.cs.cpdp.IParameterizable#setParameter(java.lang.String)
43     */
44    @Override
45    public void setParameter(String parameters) {
46        // dummy, parameters not used
47    }
48
49    /*
50     * (non-Javadoc)
51     *
52     * @see de.ugoe.cs.cpdp.dataprocessing.IProcessesingStrategy#apply(weka.core. Instances,
53     * weka.core.Instances)
54     */
55    @Override
56    public void apply(Instances testdata, Instances traindata) {
57        applyCLA(traindata);
58    }
59
60    /**
61     * Applies the CLA processor the the data.
62     *
63     * @param data
64     *            data to which the processor is applied
65     */
66    public void applyCLA(Instances data) {
67        // first determine medians
68        double[] medians = new double[data.numAttributes()];
69        // get medians
70        for (int j = 0; j < data.numAttributes(); j++) {
71            if (j != data.classIndex()) {
72                medians[j] = data.kthSmallestValue(j, (data.numInstances() + 1) >> 1);
73            }
74        }
75        // now determine cluster number for each instance
76        double[] clusterNumber = new double[data.numInstances()];
77        for (int i = 0; i < data.numInstances(); i++) {
78            int countHighValues = 0;
79            Instance currentInstance = data.get(i);
80            for (int j = 0; j < data.numAttributes(); j++) {
81                if (j != data.classIndex()) {
82                    if (currentInstance.value(j) > medians[j]) {
83                        countHighValues++;
84                    }
85                }
86            }
87            clusterNumber[i] = countHighValues;
88        }
89
90        // determine median of cluster number
91        Median m = new Median();
92        double medianClusterNumber = m.evaluate(Arrays.stream(clusterNumber).distinct().toArray());
93
94        // finally modify the instances
95        // drop the unclean instances
96        for (int i = data.numInstances() - 1; i >= 0; i--) {
97            // set the classification
98            if (clusterNumber[i] > medianClusterNumber) {
99                data.get(i).setClassValue(1.0d);
100            }
101            else {
102                data.get(i).setClassValue(0.0d);
103            }
104        }
105    }
106
107}
Note: See TracBrowser for help on using the repository browser.