source: trunk/CrossPare/src/de/ugoe/cs/cpdp/wekaclassifier/DecisionTableWrapper.java @ 146

Last change on this file since 146 was 130, checked in by sherbold, 8 years ago
  • added wrapper classes for BayesNet? and DecisionTable? training that can upscale attributes in case Discretize fails due to differences between buckets being smaller than 0.000001
  • Property svn:mime-type set to text/plain
File size: 4.2 KB
Line 
1// Copyright 2015 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.cpdp.wekaclassifier;
16
17import java.util.HashSet;
18import java.util.Set;
19import java.util.logging.Level;
20import java.util.regex.Matcher;
21import java.util.regex.Pattern;
22
23import de.ugoe.cs.cpdp.util.WekaUtils;
24import de.ugoe.cs.util.console.Console;
25import weka.classifiers.rules.DecisionTable;
26import weka.core.DenseInstance;
27import weka.core.Instance;
28import weka.core.Instances;
29
30/**
31 * <p>
32 * Wrapper to max DecisionTable to deal with a problem with Discretize
33 * </p>
34 *
35 * @author Steffen Herbold
36 */
37public class DecisionTableWrapper extends DecisionTable {
38
39    /**
40     * generated ID
41     */
42    private static final long serialVersionUID = -483513461292145615L;
43
44    /**
45     * Map that store attributes for upscaling for each classifier
46     */
47    private Set<Integer> upscaleIndex = new HashSet<>();
48
49    /*
50     * (non-Javadoc)
51     *
52     * @see weka.classifiers.rules.DecisionTable#buildClassifier(weka.core.Instances)
53     */
54    @Override
55    public void buildClassifier(Instances traindata) throws Exception {
56        boolean trainingSuccessfull = false;
57        boolean secondAttempt = false;
58        Instances traindataCopy = null;
59        do {
60            try {
61                if (secondAttempt) {
62                    super.buildClassifier(traindataCopy);
63                    trainingSuccessfull = true;
64                }
65                else {
66                    super.buildClassifier(traindata);
67                    trainingSuccessfull = true;
68                }
69            }
70            catch (IllegalArgumentException e) {
71                String regex = "A nominal attribute \\((.*)\\) cannot have duplicate labels.*";
72                Pattern p = Pattern.compile(regex);
73                Matcher m = p.matcher(e.getMessage());
74                if (!m.find()) {
75                    // cannot treat problem, rethrow exception
76                    throw e;
77                }
78                String attributeName = m.group(1);
79                int attrIndex = traindata.attribute(attributeName).index();
80                if (secondAttempt) {
81                    throw new RuntimeException("cannot be handled correctly yet, because upscaleIndex is a Map");
82                    // traindataCopy = upscaleAttribute(traindataCopy, attrIndex);
83                }
84                else {
85                    traindataCopy = WekaUtils.upscaleAttribute(traindata, attrIndex);
86                }
87
88                upscaleIndex.add(attrIndex);
89                Console.traceln(Level.FINE, "upscaled attribute " + attributeName +
90                    "; restarting training of DecisionTable");
91                secondAttempt = true;
92                continue;
93            }
94        }
95        while (!trainingSuccessfull); // dummy loop for internal continue
96    }
97
98    /*
99     * (non-Javadoc)
100     *
101     * @see weka.classifiers.rules.DecisionTable#distributionForInstance(weka.core.Instance)
102     */
103    @Override
104    public double[] distributionForInstance(Instance instance) throws Exception {
105        Instances traindataCopy;
106        for (int attrIndex : upscaleIndex) {
107            // instance value must be upscaled
108            double upscaledVal = instance.value(attrIndex) * WekaUtils.SCALER;
109            traindataCopy = new Instances(instance.dataset());
110            instance = new DenseInstance(instance.weight(), instance.toDoubleArray());
111            instance.setValue(attrIndex, upscaledVal);
112            traindataCopy.add(instance);
113            instance.setDataset(traindataCopy);
114        }
115        return super.distributionForInstance(instance);
116    }
117}
Note: See TracBrowser for help on using the repository browser.