Context Navigation

CLAProcessor.java @ 74

Last change on this file since 74 was 57, checked in by sherbold, 9 years ago
CLA now working with the median oder the distinct cluster numbers (credits to JC for pointing this out)
Property svn:mime-type set to `text/plain`
File size: 3.7 KB

Line
1	// Copyright 2015 Georg-August-Universität Göttingen, Germany
2	//
3	// Licensed under the Apache License, Version 2.0 (the "License");
4	// you may not use this file except in compliance with the License.
5	// You may obtain a copy of the License at
6	//
7	// http://www.apache.org/licenses/LICENSE-2.0
8	//
9	// Unless required by applicable law or agreed to in writing, software
10	// distributed under the License is distributed on an "AS IS" BASIS,
11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	// See the License for the specific language governing permissions and
13	// limitations under the License.
14
15	package de.ugoe.cs.cpdp.dataprocessing;
16
17	import java.util.Arrays;
18
19	import org.apache.commons.math3.stat.descriptive.rank.Median;
20
21	import weka.core.Instance;
22	import weka.core.Instances;
23
24	/**
25	* <p>
26	* This processor implements the CLA strategy from the CLAMI paper at ASE 2014 be Nam et al. With
27	* CLA, the original classification of the data is removed and instead a new classification is
28	* created based on metric values that are higher than the median of the metric.
29	* </p>
30	* <p>
31	* This can also be done for the test data (i.e., TestAsTraining data selection), as the original
32	* classification is completely ignored. Hence, CLA is an approach for unsupervised learning.
33	* </p>
34	*
35	* @author Steffen Herbold
36	*/
37	public class CLAProcessor implements IProcessesingStrategy {
38
39	/*
40	* (non-Javadoc)
41	*
42	* @see de.ugoe.cs.cpdp.IParameterizable#setParameter(java.lang.String)
43	*/
44	@Override
45	public void setParameter(String parameters) {
46	// TODO Auto-generated method stub
47
48	}
49
50	/*
51	* (non-Javadoc)
52	*
53	* @see de.ugoe.cs.cpdp.dataprocessing.IProcessesingStrategy#apply(weka.core. Instances,
54	* weka.core.Instances)
55	*/
56	@Override
57	public void apply(Instances testdata, Instances traindata) {
58	applyCLA(traindata);
59	}
60
61	/**
62	* Applies the CLA processor the the data.
63	*
64	* @param data
65	* data to which the processor is applied
66	*/
67	public void applyCLA(Instances data) {
68	// first determine medians
69	double[] medians = new double[data.numAttributes()];
70	// get medians
71	for (int j = 0; j < data.numAttributes(); j++) {
72	if (j != data.classIndex()) {
73	medians[j] = data.kthSmallestValue(j, (data.numInstances() + 1) >> 1);
74	}
75	}
76	// now determine cluster number for each instance
77	double[] clusterNumber = new double[data.numInstances()];
78	for (int i = 0; i < data.numInstances(); i++) {
79	int countHighValues = 0;
80	Instance currentInstance = data.get(i);
81	for (int j = 0; j < data.numAttributes(); j++) {
82	if (j != data.classIndex()) {
83	if (currentInstance.value(j) > medians[j]) {
84	countHighValues++;
85	}
86	}
87	}
88	clusterNumber[i] = countHighValues;
89	}
90
91	// determine median of cluster number
92	Median m = new Median();
93	double medianClusterNumber = m.evaluate(Arrays.stream(clusterNumber).distinct().toArray());
94
95	// finally modify the instances
96	// drop the unclean instances
97	for (int i = data.numInstances() - 1; i >= 0; i--) {
98	// set the classification
99	if (clusterNumber[i] > medianClusterNumber) {
100	data.get(i).setClassValue(1.0d);
101	}
102	else {
103	data.get(i).setClassValue(0.0d);
104	}
105	}
106	}
107
108	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format