source: trunk/CrossPare/src/de/ugoe/cs/cpdp/wekaclassifier/MODEPClassifier.java @ 121

Last change on this file since 121 was 86, checked in by sherbold, 9 years ago
  • switched workspace encoding to UTF-8 and fixed broken characters
  • Property svn:mime-type set to text/plain
File size: 14.2 KB
Line 
1// Copyright 2015 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.cpdp.wekaclassifier;
16
17import java.util.ArrayList;
18import java.util.List;
19import java.util.logging.Level;
20
21import org.uma.jmetal.algorithm.Algorithm;
22import org.uma.jmetal.algorithm.multiobjective.nsgaii.NSGAIIBuilder;
23import org.uma.jmetal.operator.CrossoverOperator;
24import org.uma.jmetal.operator.MutationOperator;
25import org.uma.jmetal.operator.SelectionOperator;
26import org.uma.jmetal.operator.impl.crossover.SBXCrossover;
27import org.uma.jmetal.operator.impl.mutation.UniformMutation;
28import org.uma.jmetal.operator.impl.selection.BinaryTournamentSelection;
29import org.uma.jmetal.problem.ConstrainedProblem;
30import org.uma.jmetal.problem.Problem;
31import org.uma.jmetal.problem.impl.AbstractDoubleProblem;
32import org.uma.jmetal.runner.AbstractAlgorithmRunner;
33import org.uma.jmetal.solution.DoubleSolution;
34import org.uma.jmetal.util.AlgorithmRunner;
35import org.uma.jmetal.util.comparator.RankingAndCrowdingDistanceComparator;
36import org.uma.jmetal.util.solutionattribute.impl.NumberOfViolatedConstraints;
37import org.uma.jmetal.util.solutionattribute.impl.OverallConstraintViolation;
38
39import de.ugoe.cs.util.console.Console;
40import weka.classifiers.AbstractClassifier;
41import weka.core.Attribute;
42import weka.core.Instance;
43import weka.core.Instances;
44import weka.core.Utils;
45
46/**
47 * <p>
48 * Implements MODEP after Canfora et al., 2013. The class learns the family of MODEP classifiers
49 * with the NSGAII algorithm. Based on a defined threshold for the desired recall, we then pick the
50 * best of the results, i.e., the one that requires the fewest LOC and is still achieves the desired
51 * recall.
52 * </p>
53 * <p>
54 * Our implementation currently only allows a threshold for the desired recall, not for the desired
55 * number of LOC.
56 * </p>
57 *
58 * @author Steffen Herbold
59 */
60public class MODEPClassifier extends AbstractClassifier {
61
62    /** Default serial ID */
63    private static final long serialVersionUID = 1L;
64
65    /**
66     * Coefficients of the calculated solution
67     */
68    double[][] solutionCoefficients = null;
69
70    /**
71     * Threshold for the desired recall; default 0.7
72     */
73    double desiredRecall = 0.7;
74
75    /*
76     * (non-Javadoc)
77     *
78     * @see weka.classifiers.AbstractClassifier#setOptions(java.lang.String[])
79     */
80    @Override
81    public void setOptions(String[] options) throws Exception {
82        String desiredRecallString = Utils.getOption('R', options);
83        if (!desiredRecallString.isEmpty()) {
84            desiredRecall = Double.parseDouble(desiredRecallString);
85        }
86    }
87
88    /*
89     * (non-Javadoc)
90     *
91     * @see weka.classifiers.AbstractClassifier#distributionForInstance(weka.core.Instance)
92     */
93    @Override
94    public double[] distributionForInstance(Instance instance) throws Exception {
95        return distributionForInstance(solutionCoefficients, instance);
96    }
97
98    /*
99     * (non-Javadoc)
100     *
101     * @see weka.classifiers.Classifier#buildClassifier(weka.core.Instances)
102     */
103    @Override
104    public void buildClassifier(Instances traindata) throws Exception {
105        double desiredInstances =
106            desiredRecall * traindata.attributeStats(traindata.classIndex()).nominalCounts[1];
107        MyNSGAIIRunner nsgaIIrunner = new MyNSGAIIRunner(traindata, desiredInstances);
108        List<DoubleSolution> solutions = nsgaIIrunner.run();
109        DoubleSolution bestSolution = null;
110        for (DoubleSolution solution : solutions) {
111            if (solution.getObjective(0) >= desiredInstances) {
112                if (bestSolution == null) {
113                    bestSolution = solution;
114                }
115                else {
116                    if (bestSolution.getObjective(1) < solution.getObjective(1)) {
117                        bestSolution = solution;
118                    }
119                }
120            }
121        }
122
123        if (bestSolution == null) {
124            Console.trace(Level.WARNING,
125                          "no solution with desired recall found, pick best recall instead");
126            for (DoubleSolution solution : solutions) {
127                if (bestSolution == null) {
128                    bestSolution = solution;
129                }
130                if (solution.getObjective(0) > bestSolution.getObjective(0)) {
131                    bestSolution = solution;
132                }
133            }
134        }
135
136        solutionCoefficients = solutionToCoefficients(bestSolution);
137    }
138
139    /**
140     * <p>
141     * Internal helper method to get the distribution for an instance given the coefficients of the
142     * logistic regression
143     * </p>
144     *
145     * @param coefficients
146     *            the coefficients
147     * @param instance
148     *            the instance
149     * @return probability for each class
150     */
151    private static double[] distributionForInstance(double[][] coefficients, Instance instance) {
152        int numClasses = instance.classAttribute().numValues();
153        double[] results = new double[numClasses];
154
155        for (int i = 0; i < numClasses; i++) {
156            int k = 0;
157            for (int j = 0; j < instance.numAttributes(); j++) {
158                if (j != instance.classIndex()) {
159                    results[i] += coefficients[i][k] * instance.value(j);
160                    k++;
161                }
162            }
163            results[i] = sigmoid(results[i]);
164        }
165        return results;
166    }
167
168    /**
169     * <p>
170     * returns the classification of an instance with the logistic regression for the given
171     * coefficients
172     * </p>
173     *
174     * @param coefficients
175     *            the coefficients
176     * @param instance
177     *            the instance
178     * @return the binary classification
179     */
180    private static double logisticRegression(double[][] coefficients, Instance instance) {
181        double[] results = distributionForInstance(coefficients, instance);
182        double maxResult = Double.MIN_VALUE;
183        int maxIndex = 0;
184        for (int i = 0; i < results.length; i++) {
185            if (maxResult < results[i]) {
186                maxResult = results[i];
187                maxIndex = i;
188            }
189        }
190        return Double.parseDouble(instance.classAttribute().value(maxIndex));
191    }
192
193    /**
194     * <p>
195     * Sigmoid function, required for logistic regression
196     * </p>
197     *
198     * @param z
199     *            value for which the sigmoid is calcualted
200     * @return 1/(1+exp(-z))
201     */
202    private static double sigmoid(double z) {
203        return 1.0 / (1.0 + Math.exp(-z));
204    }
205
206    /**
207     * <p>
208     * Converts a {@link DoubleSolution} into a coefficient matrix
209     * </p>
210     *
211     * @param solution
212     *            solution the is converted
213     * @return coefficient matrix
214     */
215    public static double[][] solutionToCoefficients(DoubleSolution solution) {
216        int numberOfVariables = solution.getNumberOfVariables();
217        int numberOfCoefficients = numberOfVariables / 2;
218
219        double[][] coefficients = new double[2][numberOfCoefficients];
220
221        for (int i = 0; i < numberOfVariables; i++) {
222            if (i < numberOfCoefficients) {
223                coefficients[0][i] = solution.getVariableValue(i);
224            }
225            else {
226                coefficients[1][i - numberOfCoefficients] = solution.getVariableValue(i);
227            }
228        }
229        return coefficients;
230    }
231
232    /**
233     * <p>
234     * Executes the NSGAII algorithm with the MODEP problem.
235     * </p>
236     *
237     * @author Steffen Herbold
238     */
239    private class MyNSGAIIRunner extends AbstractAlgorithmRunner {
240
241        /**
242         * configured algorithm to solve the {@link MODEPProblem}.
243         */
244        final Algorithm<List<DoubleSolution>> algorithm;
245
246        /**
247         * <p>
248         * Constructor. Configures the algorithm object.
249         * </p>
250         *
251         * @param data
252         *            data for which the MODEP problem is optimized
253         * @param minEffectiveness
254         *            minimal desired effectiveness
255         */
256        private MyNSGAIIRunner(Instances data, double minEffectiveness) {
257            final Problem<DoubleSolution> problem = new MODEPProblem(data, minEffectiveness);
258            double crossoverProbability = 0.6;
259            double crossoverDistributionIndex = 20.0;
260            final CrossoverOperator<DoubleSolution> crossover =
261                new SBXCrossover(crossoverProbability, crossoverDistributionIndex);
262
263            double mutationProbability = 0.05;
264            double mutationPerturbation = 0.0;
265            final MutationOperator<DoubleSolution> mutation =
266                new UniformMutation(mutationProbability, mutationPerturbation);
267            final SelectionOperator<List<DoubleSolution>, DoubleSolution> selection =
268                new BinaryTournamentSelection<DoubleSolution>(new RankingAndCrowdingDistanceComparator<DoubleSolution>());
269
270            algorithm = new NSGAIIBuilder<DoubleSolution>(problem, crossover, mutation)
271                .setSelectionOperator(selection).setMaxIterations(400).setPopulationSize(100)
272                .build();
273        }
274
275        /**
276         * <p>
277         * Executes the NSGAII algorithm.
278         * </p>
279         *
280         * @return solutions of the problem; should be a Pareto front
281         */
282        public List<DoubleSolution> run() {
283            AlgorithmRunner algorithmRunner = new AlgorithmRunner.Executor(algorithm).execute();
284            List<DoubleSolution> population = algorithm.getResult();
285            Console.traceln(Level.FINEST,
286                            "genetic algorithm run time: " + algorithmRunner.getComputingTime());
287            return population;
288        }
289    }
290
291    /**
292     * <p>
293     * Problem definition of MODEP.
294     * </p>
295     *
296     * @author Steffen Herbold
297     */
298    private class MODEPProblem extends AbstractDoubleProblem
299        implements ConstrainedProblem<DoubleSolution>
300    {
301        /** Default serial ID */
302        private static final long serialVersionUID = 1L;
303
304        /**
305         * Data for which MODEP is defined
306         */
307        private final Instances data;
308
309        /**
310         * Minimal desired effectiveness.
311         */
312        private final double minEffectiveness;
313
314        /**
315         * Stores the contraint violations for all current solutions
316         */
317        public OverallConstraintViolation<DoubleSolution> overallConstraintViolationDegree;
318
319        /**
320         * Stores the number of violated constraints for all current solutions.
321         */
322        public NumberOfViolatedConstraints<DoubleSolution> numberOfViolatedConstraints;
323
324        /**
325         * <p>
326         * Constructor. Initializes the MODEP problem.
327         * </p>
328         *
329         * @param data
330         *            data for which MODEP is defined
331         * @param minEffectiveness
332         *            minimal desired effectiveness
333         */
334        public MODEPProblem(Instances data, double minEffectiveness) {
335            this.data = data;
336            this.minEffectiveness = minEffectiveness;
337            setNumberOfVariables(2 * (data.numAttributes() - 1));
338            setNumberOfObjectives(2);
339            setNumberOfConstraints(1);
340            setName("MODEPProblem");
341
342            List<Double> lowerLimit = new ArrayList<>(getNumberOfVariables());
343            List<Double> upperLimit = new ArrayList<>(getNumberOfVariables());
344
345            for (int i = 0; i < getNumberOfVariables(); i++) {
346                lowerLimit.add(-100.0);
347                upperLimit.add(100.0);
348            }
349
350            setLowerLimit(lowerLimit);
351            setUpperLimit(upperLimit);
352
353            overallConstraintViolationDegree = new OverallConstraintViolation<>();
354            numberOfViolatedConstraints = new NumberOfViolatedConstraints<>();
355        }
356
357        /*
358         * (non-Javadoc)
359         *
360         * @see org.uma.jmetal.problem.Problem#evaluate(org.uma.jmetal.solution.Solution)
361         */
362        @Override
363        public void evaluate(DoubleSolution solution) {
364            double[][] coefficients = solutionToCoefficients(solution);
365
366            final Attribute loc = data.attribute("loc");
367            double effectiveness = 0.0;
368            double cost = 0.0;
369            for (int i = 0; i < data.size(); i++) {
370                double currentClass = logisticRegression(coefficients, data.get(i));
371                if (currentClass == 1.0) {
372                    if (data.get(i).classValue() == 1.0) {
373                        effectiveness++;
374                    }
375                    cost -= data.get(i).value(loc);
376                }
377            }
378
379            solution.setObjective(0, effectiveness);
380            solution.setObjective(1, cost);
381        }
382
383        /*
384         * (non-Javadoc)
385         *
386         * @see
387         * org.uma.jmetal.problem.ConstrainedProblem#evaluateConstraints(org.uma.jmetal.solution.
388         * Solution)
389         */
390        @Override
391        public void evaluateConstraints(DoubleSolution solution) {
392            double constraintViolation = minEffectiveness - solution.getObjective(0);
393            if (constraintViolation > 0) {
394                overallConstraintViolationDegree.setAttribute(solution, constraintViolation);
395                numberOfViolatedConstraints.setAttribute(solution, 1);
396            }
397            else {
398                overallConstraintViolationDegree.setAttribute(solution, 0.0);
399                numberOfViolatedConstraints.setAttribute(solution, 0);
400            }
401        }
402    }
403}
Note: See TracBrowser for help on using the repository browser.