/*
 * Decompiled with CFR 0.152.
 */
package de.lmu.ifi.dbs.elki.algorithm.clustering.trivial;

import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.model.ClusterModel;
import de.lmu.ifi.dbs.elki.data.model.Model;
import de.lmu.ifi.dbs.elki.data.type.NoSupportedDataTypeException;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.utilities.Alias;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.PatternParameter;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;

@Title(value="Clustering by label")
@Description(value="Cluster points by a (pre-assigned!) label. For comparing results with a reference clustering.")
@Alias(value={"de.lmu.ifi.dbs.elki.algorithm.clustering.ByLabelClustering"})
public class ByLabelClustering
extends AbstractAlgorithm<Clustering<Model>>
implements ClusteringAlgorithm<Clustering<Model>> {
    private static final Logging LOG = Logging.getLogger(ByLabelClustering.class);
    private boolean multiple;
    private Pattern noisepattern = null;

    public ByLabelClustering(boolean bl, Pattern pattern) {
        this.multiple = bl;
        this.noisepattern = pattern;
    }

    public ByLabelClustering() {
        this(false, null);
    }

    @Override
    public Clustering<Model> run(Database database) {
        try {
            Relation relation = database.getRelation(TypeUtil.CLASSLABEL, new Object[0]);
            return this.run(relation);
        }
        catch (NoSupportedDataTypeException noSupportedDataTypeException) {
            return this.run(database.getRelation(this.getInputTypeRestriction()[0], new Object[0]));
        }
    }

    public Clustering<Model> run(Relation<?> relation) {
        HashMap<String, DBIDs> hashMap = this.multiple ? this.multipleAssignment(relation) : this.singleAssignment(relation);
        ArrayModifiableDBIDs arrayModifiableDBIDs = DBIDUtil.newArray();
        Clustering<Model> clustering = new Clustering<Model>("By Label Clustering", "bylabel-clustering");
        for (Map.Entry<String, DBIDs> entry : hashMap.entrySet()) {
            DBIDs dBIDs = entry.getValue();
            if (dBIDs.size() <= 1) {
                arrayModifiableDBIDs.addDBIDs(dBIDs);
                continue;
            }
            Cluster<ClusterModel> cluster = new Cluster<ClusterModel>(entry.getKey(), dBIDs, ClusterModel.CLUSTER);
            if (this.noisepattern != null && this.noisepattern.matcher(entry.getKey()).find()) {
                cluster.setNoise(true);
            }
            clustering.addToplevelCluster(cluster);
        }
        if (arrayModifiableDBIDs.size() > 0) {
            Cluster<ClusterModel> cluster = new Cluster<ClusterModel>("Noise", (DBIDs)arrayModifiableDBIDs, ClusterModel.CLUSTER);
            cluster.setNoise(true);
            clustering.addToplevelCluster((Cluster<Model>)cluster);
        }
        return clustering;
    }

    private HashMap<String, DBIDs> singleAssignment(Relation<?> relation) {
        HashMap<String, DBIDs> hashMap = new HashMap<String, DBIDs>();
        DBIDIter dBIDIter = relation.iterDBIDs();
        while (dBIDIter.valid()) {
            Object obj = relation.get(dBIDIter);
            String string = obj != null ? obj.toString() : null;
            this.assign(hashMap, string, dBIDIter);
            dBIDIter.advance();
        }
        return hashMap;
    }

    private HashMap<String, DBIDs> multipleAssignment(Relation<?> relation) {
        HashMap<String, DBIDs> hashMap = new HashMap<String, DBIDs>();
        DBIDIter dBIDIter = relation.iterDBIDs();
        while (dBIDIter.valid()) {
            String[] stringArray;
            for (String string : stringArray = relation.get(dBIDIter).toString().split(" ")) {
                this.assign(hashMap, string, dBIDIter);
            }
            dBIDIter.advance();
        }
        return hashMap;
    }

    private void assign(HashMap<String, DBIDs> hashMap, String string, DBIDRef dBIDRef) {
        if (hashMap.containsKey(string)) {
            DBIDs dBIDs = hashMap.get(string);
            if (dBIDs instanceof DBID) {
                HashSetModifiableDBIDs hashSetModifiableDBIDs = DBIDUtil.newHashSet();
                hashSetModifiableDBIDs.add((DBID)dBIDs);
                hashSetModifiableDBIDs.add(dBIDRef);
                hashMap.put(string, hashSetModifiableDBIDs);
            } else {
                assert (dBIDs instanceof HashSetModifiableDBIDs);
                assert (dBIDs.size() > 1);
                ((ModifiableDBIDs)dBIDs).add(dBIDRef);
            }
        } else {
            hashMap.put(string, DBIDUtil.deref(dBIDRef));
        }
    }

    @Override
    public TypeInformation[] getInputTypeRestriction() {
        return TypeUtil.array(TypeUtil.GUESSED_LABEL);
    }

    @Override
    protected Logging getLogger() {
        return LOG;
    }

    public static class Parameterizer
    extends AbstractParameterizer {
        public static final OptionID MULTIPLE_ID = new OptionID("bylabelclustering.multiple", "Flag to indicate that only subspaces with large coverage (i.e. the fraction of the database that is covered by the dense units) are selected, the rest will be pruned.");
        public static final OptionID NOISE_ID = new OptionID("bylabelclustering.noise", "Pattern to recognize noise classes by their label.");
        protected boolean multiple;
        protected Pattern noisepat;

        @Override
        protected void makeOptions(Parameterization parameterization) {
            super.makeOptions(parameterization);
            Flag flag = new Flag(MULTIPLE_ID);
            if (parameterization.grab(flag)) {
                this.multiple = (Boolean)flag.getValue();
            }
            PatternParameter patternParameter = new PatternParameter(NOISE_ID);
            patternParameter.setOptional(true);
            if (parameterization.grab(patternParameter)) {
                this.noisepat = (Pattern)patternParameter.getValue();
            }
        }

        @Override
        protected ByLabelClustering makeInstance() {
            return new ByLabelClustering(this.multiple, this.noisepat);
        }
    }
}

