/*
 * Decompiled with CFR 0.152.
 */
package weka.filters.supervised.attribute;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Vector;
import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.ContingencyTables;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.Range;
import weka.core.RevisionUtils;
import weka.core.SpecialFunctions;
import weka.core.Statistics;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;
import weka.filters.SimpleBatchFilter;
import weka.filters.SupervisedFilter;

public class MergeNominalValues
extends SimpleBatchFilter
implements SupervisedFilter,
WeightedInstancesHandler,
TechnicalInformationHandler {
    static final long serialVersionUID = 7447337831221353842L;
    protected double m_SigLevel = 0.05;
    protected Range m_SelectCols = new Range("first-last");
    protected int[] m_SelectedAttributes;
    protected boolean[] m_AttToBeModified;
    protected int[][] m_Indicators;
    protected boolean m_UseShortIdentifiers = false;

    @Override
    public String globalInfo() {
        return "Merges values of all nominal attributes among the specified attributes, excluding the class attribute, using the CHAID method, but without considering to re-split merged subsets. It implements Steps 1 and 2 described by Kass (1980), see\n\n" + this.getTechnicalInformation().toString() + "\n\n" + "Once attribute values have been merged, a chi-squared test using the Bonferroni " + "correction is applied to check if the resulting attribute is a valid predictor, " + "based on the Bonferroni multiplier in Equation 3.2 in Kass (1980). If an attribute does " + "not pass this test, all remaining values (if any) are merged. Nevertheless, useless " + "predictors can slip through without being fully merged, e.g. identifier attributes.\n\n" + "The code applies the Yates correction when the chi-squared statistic is computed.\n\n" + "Note that the algorithm is quadratic in the number of attribute values for an attribute.";
    }

    @Override
    public TechnicalInformation getTechnicalInformation() {
        TechnicalInformation result = new TechnicalInformation(TechnicalInformation.Type.ARTICLE);
        result.setValue(TechnicalInformation.Field.AUTHOR, "Gordon V. Kass");
        result.setValue(TechnicalInformation.Field.TITLE, "An Exploratory Technique for Investigating Large Quantities of Categorical Data");
        result.setValue(TechnicalInformation.Field.JOURNAL, "Applied Statistics");
        result.setValue(TechnicalInformation.Field.YEAR, "1980");
        result.setValue(TechnicalInformation.Field.VOLUME, "29");
        result.setValue(TechnicalInformation.Field.NUMBER, "2");
        result.setValue(TechnicalInformation.Field.PAGES, "119-127");
        return result;
    }

    @Override
    public Enumeration<Option> listOptions() {
        Vector<Option> result = new Vector<Option>();
        result.addElement(new Option("\tThe significance level (default: 0.05).\n", "-L", 1, "-L <double>"));
        result.addElement(new Option("\tSets list of attributes to act on (or its inverse). 'first and 'last' are accepted as well.'\n\tE.g.: first-5,7,9,20-last\n\t(default: first-last)", "R", 1, "-R <range>"));
        result.addElement(new Option("\tInvert matching sense (i.e. act on all attributes not specified in list)", "V", 0, "-V"));
        result.addElement(new Option("\tUse short identifiers for merged subsets.", "O", 0, "-O"));
        result.addAll(Collections.list(super.listOptions()));
        return result.elements();
    }

    @Override
    public String[] getOptions() {
        Vector<String> result = new Vector<String>();
        result.add("-L");
        result.add("" + this.getSignificanceLevel());
        if (!this.getAttributeIndices().equals("")) {
            // empty if block
        }
        result.add("-R");
        result.add(this.getAttributeIndices());
        if (this.getInvertSelection()) {
            result.add("-V");
        }
        if (this.getUseShortIdentifiers()) {
            result.add("-O");
        }
        Collections.addAll(result, super.getOptions());
        return result.toArray(new String[result.size()]);
    }

    @Override
    public void setOptions(String[] options) throws Exception {
        String significanceLevelString = Utils.getOption('L', options);
        if (significanceLevelString.length() != 0) {
            this.setSignificanceLevel(Double.parseDouble(significanceLevelString));
        } else {
            this.setSignificanceLevel(0.05);
        }
        String tmpStr = Utils.getOption('R', options);
        if (tmpStr.length() != 0) {
            this.setAttributeIndices(tmpStr);
        } else {
            this.setAttributeIndices("first-last");
        }
        this.setInvertSelection(Utils.getFlag('V', options));
        this.setUseShortIdentifiers(Utils.getFlag('O', options));
        super.setOptions(options);
        Utils.checkForRemainingOptions(options);
    }

    public String significanceLevelTipText() {
        return "The significance level for the chi-squared test used to decide when to stop merging.";
    }

    public double getSignificanceLevel() {
        return this.m_SigLevel;
    }

    public void setSignificanceLevel(double sF) {
        this.m_SigLevel = sF;
    }

    public String attributeIndicesTipText() {
        return "Specify range of attributes to act on (or its inverse). This is a comma separated list of attribute indices, with \"first\" and \"last\" valid values. Specify an inclusive range with \"-\". E.g: \"first-3,5,6-10,last\".";
    }

    public String getAttributeIndices() {
        return this.m_SelectCols.getRanges();
    }

    public void setAttributeIndices(String rangeList) {
        this.m_SelectCols.setRanges(rangeList);
    }

    public void setAttributeIndicesArray(int[] attributes) {
        this.setAttributeIndices(Range.indicesToRangeList(attributes));
    }

    public String invertSelectionTipText() {
        return "Determines whether selected attributes are to be acted on or all other attributes are used instead.";
    }

    public boolean getInvertSelection() {
        return this.m_SelectCols.getInvert();
    }

    public void setInvertSelection(boolean invert) {
        this.m_SelectCols.setInvert(invert);
    }

    public String useShortIdentifiersTipText() {
        return "Whether to use short identifiers for the merged values.";
    }

    public boolean getUseShortIdentifiers() {
        return this.m_UseShortIdentifiers;
    }

    public void setUseShortIdentifiers(boolean b) {
        this.m_UseShortIdentifiers = b;
    }

    @Override
    public boolean allowAccessToFullInputFormat() {
        return true;
    }

    @Override
    protected Instances determineOutputFormat(Instances inputFormat) {
        Object m_SelectedAttribute;
        Object att;
        int current;
        int m_SelectedAttribute2;
        this.m_SelectCols.setUpper(inputFormat.numAttributes() - 1);
        this.m_SelectedAttributes = this.m_SelectCols.getSelection();
        double[][][] freqs = new double[inputFormat.numAttributes()][][];
        int[] nArray = this.m_SelectedAttributes;
        int n = this.m_SelectedAttributes.length;
        int n2 = 0;
        while (n2 < n) {
            current = m_SelectedAttribute2 = nArray[n2];
            att = inputFormat.attribute(current);
            if (current != inputFormat.classIndex() && ((Attribute)att).isNominal()) {
                freqs[current] = new double[((Attribute)att).numValues()][inputFormat.numClasses()];
            }
            ++n2;
        }
        for (Instance inst : inputFormat) {
            att = this.m_SelectedAttributes;
            current = this.m_SelectedAttributes.length;
            int n3 = 0;
            while (n3 < current) {
                m_SelectedAttribute = att[n3];
                Object current2 = m_SelectedAttribute;
                if (current2 != inputFormat.classIndex() && inputFormat.attribute((int)current2).isNominal() && !inst.isMissing((int)current2) && !inst.classIsMissing()) {
                    double[] dArray = freqs[current2][(int)inst.value((int)current2)];
                    int n4 = (int)inst.classValue();
                    dArray[n4] = dArray[n4] + inst.weight();
                }
                ++n3;
            }
        }
        this.m_AttToBeModified = new boolean[inputFormat.numAttributes()];
        this.m_Indicators = new int[inputFormat.numAttributes()][];
        nArray = this.m_SelectedAttributes;
        m_SelectedAttribute = this.m_SelectedAttributes.length;
        Object object = 0;
        while (object < m_SelectedAttribute) {
            m_SelectedAttribute2 = nArray[object];
            current = m_SelectedAttribute2;
            if (current != inputFormat.classIndex() && inputFormat.attribute(current).isNominal()) {
                if (this.m_Debug) {
                    System.err.println(inputFormat.attribute(current));
                }
                this.m_Indicators[current] = this.mergeValues(freqs[current]);
                if (this.m_Debug) {
                    int j = 0;
                    while (j < this.m_Indicators[current].length) {
                        System.err.print(" - " + this.m_Indicators[current][j] + " - ");
                        ++j;
                    }
                    System.err.println();
                }
                int k = 0;
                while (k < this.m_Indicators[current].length) {
                    if (this.m_Indicators[current][k] != k) {
                        this.m_AttToBeModified[current] = true;
                    }
                    ++k;
                }
            }
            ++object;
        }
        ArrayList<Attribute> atts = new ArrayList<Attribute>();
        int i = 0;
        while (i < inputFormat.numAttributes()) {
            int current3 = i;
            Attribute att2 = inputFormat.attribute(current3);
            if (this.m_AttToBeModified[i]) {
                int numValues = 0;
                int j = 0;
                while (j < this.m_Indicators[current3].length) {
                    if (this.m_Indicators[current3][j] + 1 > numValues) {
                        numValues = this.m_Indicators[current3][j] + 1;
                    }
                    ++j;
                }
                ArrayList<StringBuilder> vals = new ArrayList<StringBuilder>(numValues);
                int j2 = 0;
                while (j2 < numValues) {
                    vals.add(null);
                    ++j2;
                }
                j2 = 0;
                while (j2 < this.m_Indicators[current3].length) {
                    int index = this.m_Indicators[current3][j2];
                    StringBuilder val = (StringBuilder)vals.get(index);
                    if (val == null) {
                        if (this.m_UseShortIdentifiers) {
                            vals.set(index, new StringBuilder("" + (index + 1)));
                        } else {
                            vals.set(index, new StringBuilder(att2.value(j2)));
                        }
                    } else if (!this.m_UseShortIdentifiers) {
                        ((StringBuilder)vals.get(index)).append("_or_").append(att2.value(j2));
                    }
                    ++j2;
                }
                ArrayList<String> valsAsStrings = new ArrayList<String>(vals.size());
                for (StringBuilder val : vals) {
                    valsAsStrings.add(val.toString());
                }
                atts.add(new Attribute(String.valueOf(att2.name()) + "_merged_values", valsAsStrings));
            } else {
                atts.add((Attribute)att2.copy());
            }
            ++i;
        }
        Instances data = new Instances(inputFormat.relationName(), atts, 0);
        data.setClassIndex(inputFormat.classIndex());
        return data;
    }

    protected double BFfactor(int c, int r) {
        double sum = 0.0;
        double multiplier = 1.0;
        int i = 0;
        while (i < r) {
            sum += multiplier * Math.exp((double)c * Math.log(r - i) - (SpecialFunctions.lnFactorial(i) + SpecialFunctions.lnFactorial(r - i)));
            multiplier *= -1.0;
            ++i;
        }
        return sum;
    }

    protected int[] mergeValues(double[][] counts) {
        int[] indicators = new int[((double[][])counts).length];
        int i = 0;
        while (i < indicators.length) {
            indicators[i] = i;
            ++i;
        }
        while (((double[][])counts).length > 1) {
            double[][] reducedCounts = new double[2][];
            double minVal = Double.MAX_VALUE;
            int toMergeOne = -1;
            int toMergeTwo = -1;
            int i2 = 0;
            while (i2 < ((double[][])counts).length) {
                reducedCounts[0] = counts[i2];
                int j = i2 + 1;
                while (j < ((double[][])counts).length) {
                    reducedCounts[1] = counts[j];
                    double val = ContingencyTables.chiVal(reducedCounts, true);
                    if (val < minVal) {
                        minVal = val;
                        toMergeOne = i2;
                        toMergeTwo = j;
                    }
                    ++j;
                }
                ++i2;
            }
            if (Statistics.chiSquaredProbability(minVal, reducedCounts[0].length - 1) <= this.m_SigLevel) {
                double val = ContingencyTables.chiVal(counts, true);
                int df = (counts[0].length - 1) * (((double[][])counts).length - 1);
                double originalSig = Statistics.chiSquaredProbability(val, df);
                double adjustedSig = originalSig * this.BFfactor(indicators.length, ((double[][])counts).length);
                if (this.m_Debug) {
                    System.err.println("Original p-value: " + originalSig + "\tAdjusted p-value: " + adjustedSig);
                }
                if (adjustedSig <= this.m_SigLevel) break;
                int i3 = 0;
                while (i3 < indicators.length) {
                    indicators[i3] = 0;
                    ++i3;
                }
                break;
            }
            double[][] newCounts = new double[((double[][])counts).length - 1][];
            int i4 = 0;
            while (i4 < ((double[][])counts).length) {
                if (i4 < toMergeTwo) {
                    newCounts[i4] = counts[i4];
                } else if (i4 == toMergeTwo) {
                    int k = 0;
                    while (k < counts[i4].length) {
                        double[] dArray = newCounts[toMergeOne];
                        int n = k;
                        dArray[n] = dArray[n] + counts[i4][k];
                        ++k;
                    }
                } else {
                    newCounts[i4 - 1] = counts[i4];
                }
                ++i4;
            }
            i4 = 0;
            while (i4 < indicators.length) {
                if (indicators[i4] >= toMergeTwo) {
                    if (indicators[i4] == toMergeTwo) {
                        indicators[i4] = toMergeOne;
                    } else {
                        int n = i4;
                        indicators[n] = indicators[n] - 1;
                    }
                }
                ++i4;
            }
            counts = newCounts;
        }
        return indicators;
    }

    @Override
    public Capabilities getCapabilities() {
        Capabilities result = super.getCapabilities();
        result.disableAll();
        result.enableAllAttributes();
        result.enable(Capabilities.Capability.MISSING_VALUES);
        result.enableAllClasses();
        result.enable(Capabilities.Capability.MISSING_CLASS_VALUES);
        return result;
    }

    @Override
    protected Instances process(Instances instances) throws Exception {
        Instances result = new Instances(this.getOutputFormat(), instances.numInstances());
        int i = 0;
        while (i < instances.numInstances()) {
            Instance inst = instances.instance(i);
            double[] newData = new double[instances.numAttributes()];
            int j = 0;
            while (j < instances.numAttributes()) {
                newData[j] = this.m_AttToBeModified[j] && !inst.isMissing(j) ? (double)this.m_Indicators[j][(int)inst.value(j)] : inst.value(j);
                ++j;
            }
            DenseInstance instNew = new DenseInstance(1.0, newData);
            instNew.setDataset(result);
            this.copyValues(instNew, false, inst.dataset(), this.getOutputFormat());
            result.add(instNew);
            ++i;
        }
        return result;
    }

    @Override
    public String getRevision() {
        return RevisionUtils.extract("$Revision: 10215 $");
    }

    public static void main(String[] args) {
        MergeNominalValues.runFilter(new MergeNominalValues(), args);
    }
}

