/*
 * Decompiled with CFR 0.152.
 */
package de.lmu.ifi.dbs.elki.algorithm.clustering;

import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.model.PrototypeModel;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBIDMIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDVar;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessEqualGlobalConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import java.util.ArrayList;

@Reference(authors="A. McCallum, K. Nigam, L.H. Ungar", title="Efficient Clustering of High Dimensional Data Sets with Application to Reference Matching", booktitle="Proc. 6th ACM SIGKDD international conference on Knowledge discovery and data mining", url="http://dx.doi.org/10.1145%2F347090.347123")
public class CanopyPreClustering<O>
extends AbstractDistanceBasedAlgorithm<O, Clustering<PrototypeModel<O>>>
implements ClusteringAlgorithm<Clustering<PrototypeModel<O>>> {
    private static final Logging LOG = Logging.getLogger(CanopyPreClustering.class);
    private double t1;
    private double t2;

    public CanopyPreClustering(DistanceFunction<? super O> distanceFunction, double d, double d2) {
        super(distanceFunction);
        this.t1 = d;
        this.t2 = d2;
    }

    public Clustering<PrototypeModel<O>> run(Database database, Relation<O> relation) {
        DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, this.getDistanceFunction(), new Object[0]);
        HashSetModifiableDBIDs hashSetModifiableDBIDs = DBIDUtil.newHashSet(relation.getDBIDs());
        ArrayList arrayList = new ArrayList();
        int n = relation.size();
        if (!(this.t1 >= this.t2)) {
            throw new AbortException("T1 must be at least as large as T2.");
        }
        FiniteProgress finiteProgress = LOG.isVerbose() ? new FiniteProgress("Canopy clustering", n, LOG) : null;
        DBIDVar dBIDVar = DBIDUtil.newVar();
        while (!hashSetModifiableDBIDs.isEmpty()) {
            hashSetModifiableDBIDs.pop(dBIDVar);
            ArrayModifiableDBIDs arrayModifiableDBIDs = DBIDUtil.newArray();
            arrayModifiableDBIDs.add(dBIDVar);
            DBIDMIter dBIDMIter = hashSetModifiableDBIDs.iter();
            while (dBIDMIter.valid()) {
                double d = distanceQuery.distance((DBIDRef)dBIDVar, (DBIDRef)dBIDMIter);
                if (!(d > this.t1)) {
                    arrayModifiableDBIDs.add(dBIDMIter);
                    if (d <= this.t2) {
                        dBIDMIter.remove();
                    }
                }
                dBIDMIter.advance();
            }
            arrayList.add(new Cluster<PrototypeModel<O>>((DBIDs)arrayModifiableDBIDs, new PrototypeModel<O>(relation.get(dBIDVar))));
            if (finiteProgress == null) continue;
            finiteProgress.setProcessed(n - hashSetModifiableDBIDs.size(), LOG);
        }
        LOG.ensureCompleted(finiteProgress);
        return new Clustering<PrototypeModel<O>>("Canopy clustering", "canopy-clustering", arrayList);
    }

    @Override
    public TypeInformation[] getInputTypeRestriction() {
        return TypeUtil.array(this.getDistanceFunction().getInputTypeRestriction());
    }

    @Override
    protected Logging getLogger() {
        return LOG;
    }

    public static class Parameterizer<O>
    extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
        public static final OptionID T1_ID = new OptionID("canopy.t1", "Inclusion threshold for canopy clustering. t1 >= t2!");
        public static final OptionID T2_ID = new OptionID("canopy.t2", "Removal threshold for canopy clustering. t1 >= t2!");
        private double t1;
        private double t2;

        @Override
        protected void makeOptions(Parameterization parameterization) {
            DoubleParameter doubleParameter;
            super.makeOptions(parameterization);
            DoubleParameter doubleParameter2 = new DoubleParameter(T1_ID);
            if (parameterization.grab(doubleParameter2)) {
                this.t1 = doubleParameter2.doubleValue();
            }
            if (parameterization.grab(doubleParameter = new DoubleParameter(T2_ID))) {
                this.t2 = doubleParameter.doubleValue();
            }
            parameterization.checkConstraint(new LessEqualGlobalConstraint<Double>(doubleParameter, doubleParameter2));
        }

        @Override
        protected CanopyPreClustering<O> makeInstance() {
            return new CanopyPreClustering(this.distanceFunction, this.t1, this.t2);
        }
    }
}

