/*
 * Decompiled with CFR 0.152.
 */
package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;

import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.SubspaceClusteringAlgorithm;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.Subspace;
import de.lmu.ifi.dbs.elki.data.model.SubspaceModel;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayMIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDVar;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceMaximumDistanceFunction;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid;
import de.lmu.ifi.dbs.elki.math.random.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.BitsUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.AbstractParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
import java.util.Random;

@Title(value="DOC: Density-based Optimal projective Clustering")
@Reference(authors="C. M. Procopiuc, M. Jones, P. K. Agarwal, T. M. Murali", title="A Monte Carlo algorithm for fast projective clustering", booktitle="Proc. ACM SIGMOD Int. Conf. on Management of Data (SIGMOD '02)", url="http://dx.doi.org/10.1145/564691.564739")
public class DOC<V extends NumberVector>
extends AbstractAlgorithm<Clustering<SubspaceModel>>
implements SubspaceClusteringAlgorithm<SubspaceModel> {
    private static final Logging LOG = Logging.getLogger(DOC.class);
    private double alpha;
    private double beta;
    private double w;
    private boolean heuristics;
    private int d_zero;
    private RandomFactory rnd;

    public DOC(double d, double d2, double d3, boolean bl, int n, RandomFactory randomFactory) {
        this.alpha = d;
        this.beta = d2;
        this.w = d3;
        this.heuristics = bl;
        this.d_zero = n;
        this.rnd = randomFactory;
    }

    public Clustering<SubspaceModel> run(Database database, Relation<V> relation) {
        Object object;
        IndefiniteProgress indefiniteProgress;
        int n = RelationUtil.dimensionality(relation);
        ArrayModifiableDBIDs arrayModifiableDBIDs = DBIDUtil.newArray(relation.getDBIDs());
        double d = Math.abs(Math.log(n + n) / Math.log(this.beta * 0.5));
        int n2 = (int)(2.0 / this.alpha);
        int n3 = (int)(Math.pow(2.0 / this.alpha, d) * Math.log(4.0));
        if (this.heuristics) {
            n3 = Math.min(n3, Math.min(1000000, n * n));
        }
        int n4 = (int)(this.alpha * (double)arrayModifiableDBIDs.size());
        Clustering<SubspaceModel> clustering = new Clustering<SubspaceModel>("DOC Clusters", "DOC");
        IndefiniteProgress indefiniteProgress2 = indefiniteProgress = LOG.isVerbose() ? new IndefiniteProgress("Number of clusters", LOG) : null;
        while (arrayModifiableDBIDs.size() > n4 && (object = this.heuristics ? (Object)this.runFastDOC(database, relation, arrayModifiableDBIDs, n, n2, n3, (int)d) : (Object)this.runDOC(database, relation, arrayModifiableDBIDs, n, n2, n3, (int)d, n4)) != null) {
            clustering.addToplevelCluster((Cluster<SubspaceModel>)object);
            arrayModifiableDBIDs.removeDBIDs(((Cluster)object).getIDs());
            if (indefiniteProgress == null) continue;
            indefiniteProgress.setProcessed(clustering.getAllClusters().size(), LOG);
        }
        if (arrayModifiableDBIDs.size() > 0) {
            object = BitsUtil.ones(n);
            clustering.addToplevelCluster(new Cluster<SubspaceModel>(arrayModifiableDBIDs, true, new SubspaceModel(new Subspace((long[])object), Centroid.make(relation, arrayModifiableDBIDs))));
        }
        LOG.setCompleted(indefiniteProgress);
        return clustering;
    }

    private Cluster<SubspaceModel> runDOC(Database database, Relation<V> relation, ArrayModifiableDBIDs arrayModifiableDBIDs, int n, int n2, int n3, int n4, int n5) {
        ModifiableDBIDs modifiableDBIDs = null;
        long[] lArray = null;
        double d = Double.NEGATIVE_INFINITY;
        SubspaceMaximumDistanceFunction subspaceMaximumDistanceFunction = new SubspaceMaximumDistanceFunction(BitsUtil.zero(n));
        DistanceQuery<NumberVector> distanceQuery = database.getDistanceQuery(relation, subspaceMaximumDistanceFunction, new Object[0]);
        RangeQuery<NumberVector> rangeQuery = database.getRangeQuery(distanceQuery, new Object[0]);
        FiniteProgress finiteProgress = LOG.isVerbose() ? new FiniteProgress("Iteration progress for current cluster", n3 * n2, LOG) : null;
        Random random = this.rnd.getSingleThreadedRandom();
        DBIDArrayMIter dBIDArrayMIter = arrayModifiableDBIDs.iter();
        for (int i = 0; i < n2; ++i) {
            dBIDArrayMIter.seek(random.nextInt(arrayModifiableDBIDs.size()));
            for (int j = 0; j < n3; ++j) {
                ModifiableDBIDs modifiableDBIDs2 = DBIDUtil.randomSample((DBIDs)arrayModifiableDBIDs, n4, random);
                long[] lArray2 = BitsUtil.zero(n);
                for (int k = 0; k < n; ++k) {
                    if (!this.dimensionIsRelevant(k, relation, modifiableDBIDs2)) continue;
                    BitsUtil.setI(lArray2, k);
                }
                if (BitsUtil.cardinality(lArray2) > 0) {
                    subspaceMaximumDistanceFunction.setSelectedDimensions(lArray2);
                    ModifiableDBIDs modifiableDBIDs3 = DBIDUtil.intersection(arrayModifiableDBIDs, rangeQuery.getRangeForDBID(dBIDArrayMIter, this.w));
                    if (LOG.isDebuggingFiner()) {
                        LOG.finer("Testing a cluster candidate, |C| = " + modifiableDBIDs3.size() + ", |D| = " + BitsUtil.cardinality(lArray2));
                    }
                    if (modifiableDBIDs3.size() < n5) {
                        if (LOG.isDebuggingFiner()) {
                            LOG.finer("... but it's too small.");
                        }
                    } else {
                        double d2 = this.computeClusterQuality(modifiableDBIDs3.size(), BitsUtil.cardinality(lArray2));
                        if (d2 > d) {
                            if (LOG.isDebuggingFiner()) {
                                LOG.finer("... and it's the best so far: " + d2 + " vs. " + d);
                            }
                            modifiableDBIDs = modifiableDBIDs3;
                            lArray = lArray2;
                            d = d2;
                        } else if (LOG.isDebuggingFiner()) {
                            LOG.finer("... but we already have a better one.");
                        }
                    }
                }
                LOG.incrementProcessed(finiteProgress);
            }
        }
        LOG.ensureCompleted(finiteProgress);
        return modifiableDBIDs != null ? this.makeCluster(relation, modifiableDBIDs, lArray) : null;
    }

    private Cluster<SubspaceModel> runFastDOC(Database database, Relation<V> relation, ArrayModifiableDBIDs arrayModifiableDBIDs, int n, int n2, int n3, int n4) {
        Object object;
        RangeQuery<NumberVector> rangeQuery;
        long[] lArray = null;
        DBIDVar dBIDVar = DBIDUtil.newVar();
        FiniteProgress finiteProgress = LOG.isVerbose() ? new FiniteProgress("Iteration progress for current cluster", n3 * n2, LOG) : null;
        Random random = this.rnd.getSingleThreadedRandom();
        DBIDArrayMIter dBIDArrayMIter = arrayModifiableDBIDs.iter();
        block0: for (int i = 0; i < n2; ++i) {
            dBIDArrayMIter.seek(random.nextInt(arrayModifiableDBIDs.size()));
            for (int j = 0; j < n3; ++j) {
                rangeQuery = DBIDUtil.randomSample((DBIDs)arrayModifiableDBIDs, n4, random);
                object = BitsUtil.zero(n);
                for (int k = 0; k < n; ++k) {
                    if (!this.dimensionIsRelevant(k, relation, (DBIDs)((Object)rangeQuery))) continue;
                    BitsUtil.setI((long[])object, k);
                }
                if (lArray == null || BitsUtil.cardinality((long[])object) > BitsUtil.cardinality(lArray)) {
                    lArray = object;
                    dBIDVar.set(dBIDArrayMIter);
                    if (BitsUtil.cardinality(lArray) >= this.d_zero) {
                        if (finiteProgress == null) break block0;
                        finiteProgress.setProcessed(finiteProgress.getTotal(), LOG);
                        break block0;
                    }
                }
                LOG.incrementProcessed(finiteProgress);
            }
        }
        LOG.ensureCompleted(finiteProgress);
        if (lArray == null || BitsUtil.cardinality(lArray) == 0) {
            return null;
        }
        SubspaceMaximumDistanceFunction subspaceMaximumDistanceFunction = new SubspaceMaximumDistanceFunction(lArray);
        DistanceQuery<NumberVector> distanceQuery = database.getDistanceQuery(relation, subspaceMaximumDistanceFunction, new Object[0]);
        rangeQuery = database.getRangeQuery(distanceQuery, "single_query");
        object = DBIDUtil.intersection(arrayModifiableDBIDs, rangeQuery.getRangeForDBID(dBIDVar, this.w));
        return object.size() > 0 ? this.makeCluster(relation, (DBIDs)object, lArray) : null;
    }

    private boolean dimensionIsRelevant(int n, Relation<V> relation, DBIDs dBIDs) {
        double d = Double.POSITIVE_INFINITY;
        double d2 = Double.NEGATIVE_INFINITY;
        DBIDIter dBIDIter = dBIDs.iter();
        while (dBIDIter.valid()) {
            double d3 = ((NumberVector)relation.get(dBIDIter)).doubleValue(n);
            d = d3 < d ? d3 : d;
            double d4 = d2 = d3 > d2 ? d3 : d2;
            if (d2 - d > this.w) {
                return false;
            }
            dBIDIter.advance();
        }
        return true;
    }

    private Cluster<SubspaceModel> makeCluster(Relation<V> relation, DBIDs dBIDs, long[] lArray) {
        HashSetModifiableDBIDs hashSetModifiableDBIDs = DBIDUtil.newHashSet(dBIDs);
        Cluster<SubspaceModel> cluster = new Cluster<SubspaceModel>(hashSetModifiableDBIDs);
        cluster.setModel(new SubspaceModel(new Subspace(lArray), Centroid.make(relation, hashSetModifiableDBIDs)));
        return cluster;
    }

    private double computeClusterQuality(int n, int n2) {
        return (double)n * Math.pow(1.0 / this.beta, n2);
    }

    @Override
    public TypeInformation[] getInputTypeRestriction() {
        return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
    }

    @Override
    protected Logging getLogger() {
        return LOG;
    }

    public static class Parameterizer<V extends NumberVector>
    extends AbstractParameterizer {
        public static final OptionID ALPHA_ID = new OptionID("doc.alpha", "Minimum relative density for a set of points to be considered a cluster (|C|>=doc.alpha*|S|).");
        public static final OptionID BETA_ID = new OptionID("doc.beta", "Preference of cluster size versus number of relevant dimensions (higher value means higher priority on larger clusters).");
        public static final OptionID W_ID = new OptionID("doc.w", "Maximum extent of scattering of points along a single attribute for the attribute to be considered relevant.");
        public static final OptionID HEURISTICS_ID = new OptionID("doc.fastdoc", "Use heuristics as described, thus using the FastDOC algorithm (not yet implemented).");
        public static final OptionID D_ZERO_ID = new OptionID("doc.d0", "Parameter for FastDOC, setting the number of relevant attributes which, when found for a cluster, are deemed enough to stop iterating.");
        public static final OptionID RANDOM_ID = new OptionID("doc.random-seed", "Random seed, for reproducible experiments.");
        protected double alpha;
        protected double beta;
        protected double w;
        protected boolean heuristics;
        protected int d_zero;
        protected RandomFactory random = RandomFactory.DEFAULT;

        @Override
        protected void makeOptions(Parameterization parameterization) {
            super.makeOptions(parameterization);
            AbstractParameter abstractParameter = (DoubleParameter)((DoubleParameter)new DoubleParameter(ALPHA_ID, 0.2).addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE)).addConstraint(CommonConstraints.LESS_EQUAL_ONE_DOUBLE);
            if (parameterization.grab(abstractParameter)) {
                this.alpha = (Double)abstractParameter.getValue();
            }
            if (parameterization.grab(abstractParameter = (DoubleParameter)((DoubleParameter)new DoubleParameter(BETA_ID, 0.8).addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE)).addConstraint(CommonConstraints.LESS_THAN_ONE_DOUBLE))) {
                this.beta = (Double)abstractParameter.getValue();
            }
            if (parameterization.grab(abstractParameter = (DoubleParameter)new DoubleParameter(W_ID, 0.05).addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE))) {
                this.w = (Double)abstractParameter.getValue();
            }
            if (parameterization.grab(abstractParameter = new Flag(HEURISTICS_ID))) {
                this.heuristics = (Boolean)abstractParameter.getValue();
            }
            if (this.heuristics && parameterization.grab(abstractParameter = (IntParameter)new IntParameter(D_ZERO_ID, 5).addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT))) {
                this.d_zero = (Integer)abstractParameter.getValue();
            }
            if (parameterization.grab(abstractParameter = new RandomParameter(RANDOM_ID))) {
                this.random = (RandomFactory)abstractParameter.getValue();
            }
        }

        @Override
        protected DOC<V> makeInstance() {
            return new DOC(this.alpha, this.beta, this.w, this.heuristics, this.d_zero, this.random);
        }
    }
}

