/*
 * Decompiled with CFR 0.152.
 */
package weka.classifiers.trees;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.List;
import java.util.Vector;
import weka.classifiers.AbstractClassifier;
import weka.classifiers.UpdateableClassifier;
import weka.classifiers.trees.ht.ActiveHNode;
import weka.classifiers.trees.ht.GiniSplitMetric;
import weka.classifiers.trees.ht.HNode;
import weka.classifiers.trees.ht.InactiveHNode;
import weka.classifiers.trees.ht.InfoGainSplitMetric;
import weka.classifiers.trees.ht.LeafNode;
import weka.classifiers.trees.ht.LearningNode;
import weka.classifiers.trees.ht.NBNode;
import weka.classifiers.trees.ht.NBNodeAdaptive;
import weka.classifiers.trees.ht.SplitCandidate;
import weka.classifiers.trees.ht.SplitMetric;
import weka.classifiers.trees.ht.SplitNode;
import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.CapabilitiesHandler;
import weka.core.Drawable;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.SelectedTag;
import weka.core.Tag;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;

public class HoeffdingTree
extends AbstractClassifier
implements UpdateableClassifier,
WeightedInstancesHandler,
OptionHandler,
CapabilitiesHandler,
RevisionHandler,
TechnicalInformationHandler,
Drawable,
Serializable {
    private static final long serialVersionUID = 7117521775722396251L;
    protected Instances m_header;
    protected HNode m_root;
    protected double m_gracePeriod = 200.0;
    protected double m_splitConfidence = 1.0E-7;
    protected double m_hoeffdingTieThreshold = 0.05;
    protected double m_minFracWeightForTwoBranchesGain = 0.01;
    protected int m_selectedSplitMetric = 1;
    protected SplitMetric m_splitMetric = new InfoGainSplitMetric(this.m_minFracWeightForTwoBranchesGain);
    protected int m_leafStrategy = 2;
    protected double m_nbThreshold = 0.0;
    protected int m_activeLeafCount;
    protected int m_inactiveLeafCount;
    protected int m_decisionNodeCount;
    public static final int GINI_SPLIT = 0;
    public static final int INFO_GAIN_SPLIT = 1;
    public static final Tag[] TAGS_SELECTION = new Tag[]{new Tag(0, "Gini split"), new Tag(1, "Info gain split")};
    public static final int LEAF_MAJ_CLASS = 0;
    public static final int LEAF_NB = 1;
    public static final int LEAF_NB_ADAPTIVE = 2;
    public static final Tag[] TAGS_SELECTION2 = new Tag[]{new Tag(0, "Majority class"), new Tag(1, "Naive Bayes"), new Tag(2, "Naive Bayes adaptive")};
    protected boolean m_printLeafModels;

    public String globalInfo() {
        return "A Hoeffding tree (VFDT) is an incremental, anytime decision tree induction algorithm that is capable of learning from massive data streams, assuming that the distribution generating examples does not change over time. Hoeffding trees exploit the fact that a small sample can often be enough to choose an optimal splitting attribute. This idea is supported mathematically by the Hoeffding bound, which quantifies the number of observations (in our case, examples) needed to estimate some statistics within a prescribed precision (in our case, the goodness of an attribute).\n\nA theoretically appealing feature  of Hoeffding Trees not shared by otherincremental decision tree learners is that  it has sound guarantees of performance. Using the Hoeffding bound one can show that  its output is asymptotically nearly identical to that of a non-incremental learner  using infinitely many examples. For more information see: \n\n" + this.getTechnicalInformation().toString();
    }

    @Override
    public TechnicalInformation getTechnicalInformation() {
        TechnicalInformation result = new TechnicalInformation(TechnicalInformation.Type.INPROCEEDINGS);
        result.setValue(TechnicalInformation.Field.AUTHOR, "Geoff Hulten and Laurie Spencer and Pedro Domingos");
        result.setValue(TechnicalInformation.Field.TITLE, "Mining time-changing data streams");
        result.setValue(TechnicalInformation.Field.BOOKTITLE, "ACM SIGKDD Intl. Conf. on Knowledge Discovery and Data Mining");
        result.setValue(TechnicalInformation.Field.YEAR, "2001");
        result.setValue(TechnicalInformation.Field.PAGES, "97-106");
        result.setValue(TechnicalInformation.Field.PUBLISHER, "ACM Press");
        return result;
    }

    protected void reset() {
        this.m_root = null;
        this.m_activeLeafCount = 0;
        this.m_inactiveLeafCount = 0;
        this.m_decisionNodeCount = 0;
    }

    @Override
    public Capabilities getCapabilities() {
        Capabilities result = super.getCapabilities();
        result.disableAll();
        result.enable(Capabilities.Capability.NOMINAL_ATTRIBUTES);
        result.enable(Capabilities.Capability.DATE_ATTRIBUTES);
        result.enable(Capabilities.Capability.NUMERIC_ATTRIBUTES);
        result.enable(Capabilities.Capability.MISSING_VALUES);
        result.enable(Capabilities.Capability.NOMINAL_CLASS);
        result.enable(Capabilities.Capability.MISSING_CLASS_VALUES);
        result.setMinimumNumberInstances(0);
        return result;
    }

    @Override
    public Enumeration<Option> listOptions() {
        Vector<Option> newVector = new Vector<Option>();
        newVector.add(new Option("\tThe leaf prediction strategy to use. 0 = majority class, 1 = naive Bayes, 2 = naive Bayes adaptive.\n\t(default = 2)", "L", 1, "-L"));
        newVector.add(new Option("\tThe splitting criterion to use. 0 = Gini, 1 = Info gain\n\t(default = 1)", "S", 1, "-S"));
        newVector.add(new Option("\tThe allowable error in a split decision - values closer to zero will take longer to decide\n\t(default = 1e-7)", "E", 1, "-E"));
        newVector.add(new Option("\tThreshold below which a split will be forced to break ties\n\t(default = 0.05)", "H", 1, "-H"));
        newVector.add(new Option("\tMinimum fraction of weight required down at least two branches for info gain splitting\n\t(default = 0.01)", "M", 1, "-M"));
        newVector.add(new Option("\tGrace period - the number of instances a leaf should observe between split attempts\n\t(default = 200)", "G", 1, "-G"));
        newVector.add(new Option("\tThe number of instances (weight) a leaf should observe before allowing naive Bayes to make predictions (NB or NB adaptive only)\n\t(default = 0)", "N", 1, "-N"));
        newVector.add(new Option("\tPrint leaf models when using naive Bayes at the leaves.", "P", 0, "-P"));
        return newVector.elements();
    }

    @Override
    public void setOptions(String[] options) throws Exception {
        this.reset();
        super.setOptions(options);
        String opt = Utils.getOption('L', options);
        if (opt.length() > 0) {
            this.setLeafPredictionStrategy(new SelectedTag(Integer.parseInt(opt), TAGS_SELECTION2));
        }
        if ((opt = Utils.getOption('S', options)).length() > 0) {
            this.setSplitCriterion(new SelectedTag(Integer.parseInt(opt), TAGS_SELECTION));
        }
        if ((opt = Utils.getOption('E', options)).length() > 0) {
            this.setSplitConfidence(Double.parseDouble(opt));
        }
        if ((opt = Utils.getOption('H', options)).length() > 0) {
            this.setHoeffdingTieThreshold(Double.parseDouble(opt));
        }
        if ((opt = Utils.getOption('M', options)).length() > 0) {
            this.setMinimumFractionOfWeightInfoGain(Double.parseDouble(opt));
        }
        if ((opt = Utils.getOption('G', options)).length() > 0) {
            this.setGracePeriod(Double.parseDouble(opt));
        }
        if ((opt = Utils.getOption('N', options)).length() > 0) {
            this.setNaiveBayesPredictionThreshold(Double.parseDouble(opt));
        }
        this.m_printLeafModels = Utils.getFlag('P', options);
    }

    @Override
    public String[] getOptions() {
        ArrayList<String> options = new ArrayList<String>();
        options.add("-L");
        options.add("" + this.getLeafPredictionStrategy().getSelectedTag().getID());
        options.add("-S");
        options.add("" + this.getSplitCriterion().getSelectedTag().getID());
        options.add("-E");
        options.add("" + this.getSplitConfidence());
        options.add("-H");
        options.add("" + this.getHoeffdingTieThreshold());
        options.add("-M");
        options.add("" + this.getMinimumFractionOfWeightInfoGain());
        options.add("-G");
        options.add("" + this.getGracePeriod());
        options.add("-N");
        options.add("" + this.getNaiveBayesPredictionThreshold());
        if (this.m_printLeafModels) {
            options.add("-P");
        }
        return options.toArray(new String[1]);
    }

    public String printLeafModelsTipText() {
        return "Print leaf models (naive bayes leaves only)";
    }

    public void setPrintLeafModels(boolean p) {
        this.m_printLeafModels = p;
    }

    public boolean getPrintLeafModels() {
        return this.m_printLeafModels;
    }

    public String minimumFractionOfWeightInfoGainTipText() {
        return "Minimum fraction of weight required down at least two branches for info gain splitting.";
    }

    public void setMinimumFractionOfWeightInfoGain(double m) {
        this.m_minFracWeightForTwoBranchesGain = m;
    }

    public double getMinimumFractionOfWeightInfoGain() {
        return this.m_minFracWeightForTwoBranchesGain;
    }

    public String gracePeriodTipText() {
        return "Number of instances (or total weight of instances) a leaf should observe between split attempts.";
    }

    public void setGracePeriod(double grace) {
        this.m_gracePeriod = grace;
    }

    public double getGracePeriod() {
        return this.m_gracePeriod;
    }

    public String hoeffdingTieThresholdTipText() {
        return "Theshold below which a split will be forced to break ties.";
    }

    public void setHoeffdingTieThreshold(double ht) {
        this.m_hoeffdingTieThreshold = ht;
    }

    public double getHoeffdingTieThreshold() {
        return this.m_hoeffdingTieThreshold;
    }

    public String splitConfidenceTipText() {
        return "The allowable error in a split decision. Values closer to zero will take longer to decide.";
    }

    public void setSplitConfidence(double sc) {
        this.m_splitConfidence = sc;
    }

    public double getSplitConfidence() {
        return this.m_splitConfidence;
    }

    public String splitCriterionTipText() {
        return "The splitting criterion to use";
    }

    public void setSplitCriterion(SelectedTag crit) {
        if (crit.getTags() == TAGS_SELECTION) {
            this.m_selectedSplitMetric = crit.getSelectedTag().getID();
        }
    }

    public SelectedTag getSplitCriterion() {
        return new SelectedTag(this.m_selectedSplitMetric, TAGS_SELECTION);
    }

    public String leafPredictionStrategyTipText() {
        return "The leaf prediction strategy to use";
    }

    public void setLeafPredictionStrategy(SelectedTag strat) {
        if (strat.getTags() == TAGS_SELECTION2) {
            this.m_leafStrategy = strat.getSelectedTag().getID();
        }
    }

    public SelectedTag getLeafPredictionStrategy() {
        return new SelectedTag(this.m_leafStrategy, TAGS_SELECTION2);
    }

    public String naiveBayesPredictionThresholdTipText() {
        return "The number of instances (weight) a leaf should observe before allowing naive Bayes (adaptive) to make predictions";
    }

    public void setNaiveBayesPredictionThreshold(double n) {
        this.m_nbThreshold = n;
    }

    public double getNaiveBayesPredictionThreshold() {
        return this.m_nbThreshold;
    }

    protected static double computeHoeffdingBound(double max, double confidence, double weight) {
        return Math.sqrt(max * max * Math.log(1.0 / confidence) / (2.0 * weight));
    }

    @Override
    public void buildClassifier(Instances data) throws Exception {
        this.reset();
        this.m_header = new Instances(data, 0);
        this.m_splitMetric = this.m_selectedSplitMetric == 0 ? new GiniSplitMetric() : new InfoGainSplitMetric(this.m_minFracWeightForTwoBranchesGain);
        data = new Instances(data);
        data.deleteWithMissingClass();
        int i = 0;
        while (i < data.numInstances()) {
            this.updateClassifier(data.instance(i));
            ++i;
        }
        this.getCapabilities().testWithFail(data);
    }

    @Override
    public void updateClassifier(Instance inst) throws Exception {
        if (inst.classIsMissing()) {
            return;
        }
        if (this.m_root == null) {
            this.m_root = this.newLearningNode();
        }
        LeafNode l = this.m_root.leafForInstance(inst, null, null);
        HNode actualNode = l.m_theNode;
        if (actualNode == null) {
            actualNode = new ActiveHNode();
            l.m_parentNode.setChild(l.m_parentBranch, actualNode);
        }
        if (actualNode instanceof LearningNode) {
            double totalWeight;
            actualNode.updateNode(inst);
            if (actualNode instanceof ActiveHNode && (totalWeight = actualNode.totalWeight()) - ((ActiveHNode)actualNode).m_weightSeenAtLastSplitEval > this.m_gracePeriod) {
                this.trySplit((ActiveHNode)actualNode, l.m_parentNode, l.m_parentBranch);
                ((ActiveHNode)actualNode).m_weightSeenAtLastSplitEval = totalWeight;
            }
        }
    }

    @Override
    public double[] distributionForInstance(Instance inst) throws Exception {
        Attribute classAtt = inst.classAttribute();
        double[] pred = new double[classAtt.numValues()];
        if (this.m_root != null) {
            LeafNode l = this.m_root.leafForInstance(inst, null, null);
            HNode actualNode = l.m_theNode;
            if (actualNode == null) {
                actualNode = l.m_parentNode;
            }
            pred = actualNode.getDistribution(inst, classAtt);
        } else {
            int i = 0;
            while (i < classAtt.numValues()) {
                pred[i] = 1.0;
                ++i;
            }
            Utils.normalize(pred);
        }
        return pred;
    }

    protected void deactivateNode(ActiveHNode toDeactivate, SplitNode parent, String parentBranch) {
        InactiveHNode leaf = new InactiveHNode(toDeactivate.m_classDistribution);
        if (parent == null) {
            this.m_root = leaf;
        } else {
            parent.setChild(parentBranch, leaf);
        }
        --this.m_activeLeafCount;
        ++this.m_inactiveLeafCount;
    }

    protected void activateNode(InactiveHNode toActivate, SplitNode parent, String parentBranch) {
        ActiveHNode leaf = new ActiveHNode();
        leaf.m_classDistribution = toActivate.m_classDistribution;
        if (parent == null) {
            this.m_root = leaf;
        } else {
            parent.setChild(parentBranch, leaf);
        }
        ++this.m_activeLeafCount;
        --this.m_inactiveLeafCount;
    }

    protected void trySplit(ActiveHNode node, SplitNode parent, String parentBranch) throws Exception {
        if (node.numEntriesInClassDistribution() > 1) {
            List<SplitCandidate> bestSplits = node.getPossibleSplits(this.m_splitMetric);
            Collections.sort(bestSplits);
            boolean doSplit = false;
            if (bestSplits.size() < 2) {
                doSplit = bestSplits.size() > 0;
            } else {
                double metricMax = this.m_splitMetric.getMetricRange(node.m_classDistribution);
                double hoeffdingBound = HoeffdingTree.computeHoeffdingBound(metricMax, this.m_splitConfidence, node.totalWeight());
                SplitCandidate best = bestSplits.get(bestSplits.size() - 1);
                SplitCandidate secondBest = bestSplits.get(bestSplits.size() - 2);
                if (best.m_splitMerit - secondBest.m_splitMerit > hoeffdingBound || hoeffdingBound < this.m_hoeffdingTieThreshold) {
                    doSplit = true;
                }
            }
            if (doSplit) {
                SplitCandidate best = bestSplits.get(bestSplits.size() - 1);
                if (best.m_splitTest == null) {
                    this.deactivateNode(node, parent, parentBranch);
                } else {
                    SplitNode newSplit = new SplitNode(node.m_classDistribution, best.m_splitTest);
                    int i = 0;
                    while (i < best.numSplits()) {
                        ActiveHNode newChild = this.newLearningNode();
                        newChild.m_classDistribution = best.m_postSplitClassDistributions.get(i);
                        newChild.m_weightSeenAtLastSplitEval = newChild.totalWeight();
                        String branchName = "";
                        if (this.m_header.attribute(best.m_splitTest.splitAttributes().get(0)).isNumeric()) {
                            branchName = i == 0 ? "left" : "right";
                        } else {
                            Attribute splitAtt = this.m_header.attribute(best.m_splitTest.splitAttributes().get(0));
                            branchName = splitAtt.value(i);
                        }
                        newSplit.setChild(branchName, newChild);
                        ++i;
                    }
                    --this.m_activeLeafCount;
                    ++this.m_decisionNodeCount;
                    this.m_activeLeafCount += best.numSplits();
                    if (parent == null) {
                        this.m_root = newSplit;
                    } else {
                        parent.setChild(parentBranch, newSplit);
                    }
                }
            }
        }
    }

    protected ActiveHNode newLearningNode() throws Exception {
        ActiveHNode newChild = this.m_leafStrategy == 0 ? new ActiveHNode() : (this.m_leafStrategy == 1 ? new NBNode(this.m_header, this.m_nbThreshold) : new NBNodeAdaptive(this.m_header, this.m_nbThreshold));
        return newChild;
    }

    public String toString() {
        if (this.m_root == null) {
            return "No model built yet!";
        }
        return this.m_root.toString(this.m_printLeafModels);
    }

    @Override
    public String getRevision() {
        return RevisionUtils.extract("$Revision: 9766 $");
    }

    public static void main(String[] args) {
        HoeffdingTree.runClassifier(new HoeffdingTree(), args);
    }

    @Override
    public int graphType() {
        return 1;
    }

    @Override
    public String graph() throws Exception {
        if (this.m_root == null) {
            throw new Exception("No model built yet!");
        }
        this.m_root.installNodeNums(0);
        StringBuffer buff = new StringBuffer();
        buff.append("digraph HoeffdingTree {\n");
        this.m_root.graphTree(buff);
        buff.append("}\n");
        return buff.toString();
    }
}

