[130] | 1 | // Copyright 2015 Georg-August-Universität Göttingen, Germany
|
---|
| 2 | //
|
---|
| 3 | // Licensed under the Apache License, Version 2.0 (the "License");
|
---|
| 4 | // you may not use this file except in compliance with the License.
|
---|
| 5 | // You may obtain a copy of the License at
|
---|
| 6 | //
|
---|
| 7 | // http://www.apache.org/licenses/LICENSE-2.0
|
---|
| 8 | //
|
---|
| 9 | // Unless required by applicable law or agreed to in writing, software
|
---|
| 10 | // distributed under the License is distributed on an "AS IS" BASIS,
|
---|
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
---|
| 12 | // See the License for the specific language governing permissions and
|
---|
| 13 | // limitations under the License.
|
---|
| 14 |
|
---|
| 15 | package de.ugoe.cs.cpdp.wekaclassifier;
|
---|
| 16 |
|
---|
| 17 | import java.util.HashSet;
|
---|
| 18 | import java.util.Set;
|
---|
| 19 | import java.util.logging.Level;
|
---|
| 20 | import java.util.regex.Matcher;
|
---|
| 21 | import java.util.regex.Pattern;
|
---|
| 22 |
|
---|
| 23 | import de.ugoe.cs.cpdp.util.WekaUtils;
|
---|
| 24 | import de.ugoe.cs.util.console.Console;
|
---|
| 25 | import weka.classifiers.bayes.BayesNet;
|
---|
| 26 | import weka.core.DenseInstance;
|
---|
| 27 | import weka.core.Instance;
|
---|
| 28 | import weka.core.Instances;
|
---|
| 29 |
|
---|
| 30 | /**
|
---|
| 31 | * <p>
|
---|
| 32 | * Wrapper to max BayesNet to deal with a problem with Discretize
|
---|
| 33 | * </p>
|
---|
| 34 | *
|
---|
| 35 | * @author Steffen Herbold
|
---|
| 36 | */
|
---|
| 37 | public class BayesNetWrapper extends BayesNet {
|
---|
| 38 |
|
---|
| 39 | /**
|
---|
| 40 | * generated ID
|
---|
| 41 | */
|
---|
| 42 | private static final long serialVersionUID = -4835134612921456157L;
|
---|
| 43 |
|
---|
| 44 | /**
|
---|
| 45 | * Map that store attributes for upscaling for each classifier
|
---|
| 46 | */
|
---|
| 47 | private Set<Integer> upscaleIndex = new HashSet<>();
|
---|
| 48 |
|
---|
| 49 | /*
|
---|
| 50 | * (non-Javadoc)
|
---|
| 51 | *
|
---|
| 52 | * @see weka.classifiers.bayes.BayesNet#buildClassifier(weka.core.Instances)
|
---|
| 53 | */
|
---|
| 54 | @Override
|
---|
| 55 | public void buildClassifier(Instances traindata) throws Exception {
|
---|
| 56 | boolean trainingSuccessfull = false;
|
---|
| 57 | boolean secondAttempt = false;
|
---|
| 58 | Instances traindataCopy = null;
|
---|
| 59 | do {
|
---|
| 60 | try {
|
---|
| 61 | if (secondAttempt) {
|
---|
| 62 | super.buildClassifier(traindataCopy);
|
---|
| 63 | trainingSuccessfull = true;
|
---|
| 64 | }
|
---|
| 65 | else {
|
---|
| 66 | super.buildClassifier(traindata);
|
---|
| 67 | trainingSuccessfull = true;
|
---|
| 68 | }
|
---|
| 69 | }
|
---|
| 70 | catch (IllegalArgumentException e) {
|
---|
| 71 | String regex = "A nominal attribute \\((.*)\\) cannot have duplicate labels.*";
|
---|
| 72 | Pattern p = Pattern.compile(regex);
|
---|
| 73 | Matcher m = p.matcher(e.getMessage());
|
---|
| 74 | if (!m.find()) {
|
---|
| 75 | // cannot treat problem, rethrow exception
|
---|
| 76 | throw e;
|
---|
| 77 | }
|
---|
| 78 | String attributeName = m.group(1);
|
---|
| 79 | int attrIndex = traindata.attribute(attributeName).index();
|
---|
| 80 | if (secondAttempt) {
|
---|
| 81 | throw new RuntimeException("cannot be handled correctly yet, because upscaleIndex is a Map");
|
---|
| 82 | // traindataCopy = upscaleAttribute(traindataCopy, attrIndex);
|
---|
| 83 | }
|
---|
| 84 | else {
|
---|
| 85 | traindataCopy = WekaUtils.upscaleAttribute(traindata, attrIndex);
|
---|
| 86 | }
|
---|
| 87 |
|
---|
| 88 | upscaleIndex.add(attrIndex);
|
---|
| 89 | Console.traceln(Level.FINE, "upscaled attribute " + attributeName +
|
---|
| 90 | "; restarting training of BayesNet");
|
---|
| 91 | secondAttempt = true;
|
---|
| 92 | continue;
|
---|
| 93 | }
|
---|
| 94 | }
|
---|
| 95 | while (!trainingSuccessfull); // dummy loop for internal continue
|
---|
| 96 | }
|
---|
| 97 |
|
---|
| 98 | /*
|
---|
| 99 | * (non-Javadoc)
|
---|
| 100 | *
|
---|
| 101 | * @see weka.classifiers.bayes.BayesNet#distributionForInstance(weka.core.Instance)
|
---|
| 102 | */
|
---|
| 103 | @Override
|
---|
| 104 | public double[] distributionForInstance(Instance instance) throws Exception {
|
---|
| 105 | Instances traindataCopy;
|
---|
| 106 | for (int attrIndex : upscaleIndex) {
|
---|
| 107 | // instance value must be upscaled
|
---|
| 108 | double upscaledVal = instance.value(attrIndex) * WekaUtils.SCALER;
|
---|
| 109 | traindataCopy = new Instances(instance.dataset());
|
---|
| 110 | instance = new DenseInstance(instance.weight(), instance.toDoubleArray());
|
---|
| 111 | instance.setValue(attrIndex, upscaledVal);
|
---|
| 112 | traindataCopy.add(instance);
|
---|
| 113 | instance.setDataset(traindataCopy);
|
---|
| 114 | }
|
---|
| 115 | return super.distributionForInstance(instance);
|
---|
| 116 | }
|
---|
| 117 | }
|
---|