source: trunk/CrossPare/src/de/ugoe/cs/cpdp/wekaclassifier/AbstractCODEP.java @ 146

Last change on this file since 146 was 129, checked in by sherbold, 8 years ago
  • added the same workaround for the problem with Discretize to the TopMetricFilter?. We slightly refactored the implementation within the AbstractCODEP by putting the rescaling of sets to the WekaUtils? to facilitate better re-use.
  • Property svn:mime-type set to text/plain
File size: 8.6 KB
Line 
1// Copyright 2015 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.cpdp.wekaclassifier;
16
17import java.util.ArrayList;
18import java.util.HashMap;
19import java.util.List;
20import java.util.Map;
21import java.util.logging.Level;
22import java.util.regex.Matcher;
23import java.util.regex.Pattern;
24
25import de.ugoe.cs.cpdp.util.WekaUtils;
26import de.ugoe.cs.util.console.Console;
27import weka.classifiers.AbstractClassifier;
28import weka.classifiers.Classifier;
29import weka.classifiers.bayes.BayesNet;
30import weka.classifiers.functions.Logistic;
31import weka.classifiers.functions.MultilayerPerceptron;
32import weka.classifiers.functions.RBFNetwork;
33import weka.classifiers.rules.DecisionTable;
34import weka.classifiers.trees.ADTree;
35import weka.core.Attribute;
36import weka.core.DenseInstance;
37import weka.core.Instance;
38import weka.core.Instances;
39
40/**
41 * <p>
42 * Implements CODEP proposed by Panichella et al. (2014).
43 * </p>
44 *
45 * @author Steffen Herbold
46 */
47public abstract class AbstractCODEP extends AbstractClassifier {
48
49    /**
50     * Default serialization ID.
51     */
52    private static final long serialVersionUID = 1L;
53
54    /**
55     * List of classifiers that is internally used.
56     */
57    private List<Classifier> internalClassifiers = null;
58
59    /**
60     * List of attributes that is internally used.
61     */
62    private ArrayList<Attribute> internalAttributes = null;
63
64    /**
65     * Trained CODEP classifier.
66     */
67    private Classifier codepClassifier = null;
68
69    /**
70     * Map that store attributes for upscaling for each classifier
71     */
72    private Map<Integer, Integer> upscaleIndex = null;
73
74    /*
75     * (non-Javadoc)
76     *
77     * @see weka.classifiers.AbstractClassifier#classifyInstance(weka.core.Instance)
78     */
79    @Override
80    public double classifyInstance(Instance instance) throws Exception {
81        if (codepClassifier == null) {
82            throw new RuntimeException("classifier must be trained first, call to buildClassifier missing");
83        }
84        Instances tmp = new Instances("tmp", internalAttributes, 1);
85        tmp.setClass(internalAttributes.get(internalAttributes.size() - 1));
86        tmp.add(createInternalInstance(instance));
87        return codepClassifier.classifyInstance(tmp.firstInstance());
88    }
89
90    /*
91     * (non-Javadoc)
92     *
93     * @see weka.classifiers.Classifier#buildClassifier(weka.core.Instances)
94     */
95    @Override
96    public void buildClassifier(Instances traindata) throws Exception {
97        setupInternalClassifiers();
98        setupInternalAttributes();
99        upscaleIndex = new HashMap<>();
100
101        int classifierIndex = 0;
102        boolean secondAttempt = false;
103        Instances traindataCopy = null;
104        for (Classifier classifier : internalClassifiers) {
105            boolean trainingSuccessfull = false;
106            do {
107                Console.traceln(Level.FINE,
108                                "internally training " + classifier.getClass().getName());
109                try {
110                    if (secondAttempt) {
111                        classifier.buildClassifier(traindataCopy);
112                        trainingSuccessfull = true;
113                    }
114                    else {
115                        classifier.buildClassifier(traindata);
116                        trainingSuccessfull = true;
117                    }
118                }
119                catch (IllegalArgumentException e) {
120                    String regex = "A nominal attribute \\((.*)\\) cannot have duplicate labels.*";
121                    Pattern p = Pattern.compile(regex);
122                    Matcher m = p.matcher(e.getMessage());
123                    if (!m.find()) {
124                        // cannot treat problem, rethrow exception
125                        throw e;
126                    }
127                    String attributeName = m.group(1);
128                    int attrIndex = traindata.attribute(attributeName).index();
129                    if (secondAttempt) {
130                        throw new RuntimeException("cannot be handled correctly yet, because upscaleIndex is a Map");
131                        // traindataCopy = upscaleAttribute(traindataCopy, attrIndex);
132                    }
133                    else {
134                        traindataCopy = WekaUtils.upscaleAttribute(traindata, attrIndex);
135                    }
136
137                    upscaleIndex.put(classifierIndex, attrIndex);
138                    Console
139                        .traceln(Level.FINE,
140                                 "upscaled attribute " + attributeName + "; restarting training");
141                    secondAttempt = true;
142                    continue;
143                }
144            }
145            while (!trainingSuccessfull); // dummy loop for internal continue
146            classifierIndex++;
147            secondAttempt = false;
148        }
149
150        Instances internalTraindata =
151            new Instances("internal instances", internalAttributes, traindata.size());
152        internalTraindata.setClass(internalAttributes.get(internalAttributes.size() - 1));
153
154        for (Instance instance : traindata) {
155            internalTraindata.add(createInternalInstance(instance));
156        }
157
158        codepClassifier = getCodepClassifier();
159        codepClassifier.buildClassifier(internalTraindata);
160    }
161
162    /**
163     * <p>
164     * Creates a CODEP instance using the classifications of the internal classifiers.
165     * </p>
166     *
167     * @param instance
168     *            instance for which the CODEP instance is created
169     * @return CODEP instance
170     * @throws Exception
171     *             thrown if an exception occurs during classification with an internal classifier
172     */
173    private Instance createInternalInstance(Instance instance) throws Exception {
174        double[] values = new double[internalAttributes.size()];
175        Instances traindataCopy;
176        for (int j = 0; j < internalClassifiers.size(); j++) {
177            if (upscaleIndex.containsKey(j)) {
178                // instance value must be upscaled
179                int attrIndex = upscaleIndex.get(j);
180                double upscaledVal = instance.value(attrIndex) * WekaUtils.SCALER;
181                traindataCopy = new Instances(instance.dataset());
182                instance = new DenseInstance(instance.weight(), instance.toDoubleArray());
183                instance.setValue(attrIndex, upscaledVal);
184                traindataCopy.add(instance);
185                instance.setDataset(traindataCopy);
186            }
187            values[j] = internalClassifiers.get(j).classifyInstance(instance);
188        }
189        values[internalAttributes.size() - 1] = instance.classValue();
190        return new DenseInstance(1.0, values);
191    }
192
193    /**
194     * <p>
195     * Sets up the attributes array.
196     * </p>
197     */
198    private void setupInternalAttributes() {
199        internalAttributes = new ArrayList<>();
200        for (Classifier classifier : internalClassifiers) {
201            internalAttributes.add(new Attribute(classifier.getClass().getName()));
202        }
203        final ArrayList<String> classAttVals = new ArrayList<String>();
204        classAttVals.add("0");
205        classAttVals.add("1");
206        final Attribute classAtt = new Attribute("bug", classAttVals);
207        internalAttributes.add(classAtt);
208    }
209
210    /**
211     * <p>
212     * Sets up the classifier array.
213     * </p>
214     */
215    private void setupInternalClassifiers() {
216        internalClassifiers = new ArrayList<>(6);
217        // create training data with prediction labels
218
219        internalClassifiers.add(new ADTree());
220        internalClassifiers.add(new BayesNet());
221        internalClassifiers.add(new DecisionTable());
222        internalClassifiers.add(new Logistic());
223        internalClassifiers.add(new MultilayerPerceptron());
224        internalClassifiers.add(new RBFNetwork());
225    }
226
227    /**
228     * <p>
229     * Abstract method through which implementing classes define which classifier is used for the
230     * CODEP.
231     * </p>
232     *
233     * @return classifier for CODEP
234     */
235    abstract protected Classifier getCodepClassifier();
236}
Note: See TracBrowser for help on using the repository browser.