1 | // Copyright 2015 Georg-August-Universität Göttingen, Germany
|
---|
2 | //
|
---|
3 | // Licensed under the Apache License, Version 2.0 (the "License");
|
---|
4 | // you may not use this file except in compliance with the License.
|
---|
5 | // You may obtain a copy of the License at
|
---|
6 | //
|
---|
7 | // http://www.apache.org/licenses/LICENSE-2.0
|
---|
8 | //
|
---|
9 | // Unless required by applicable law or agreed to in writing, software
|
---|
10 | // distributed under the License is distributed on an "AS IS" BASIS,
|
---|
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
---|
12 | // See the License for the specific language governing permissions and
|
---|
13 | // limitations under the License.
|
---|
14 |
|
---|
15 | package de.ugoe.cs.cpdp.wekaclassifier;
|
---|
16 |
|
---|
17 | import java.util.ArrayList;
|
---|
18 | import java.util.List;
|
---|
19 | import java.util.logging.Level;
|
---|
20 |
|
---|
21 | import org.uma.jmetal.algorithm.Algorithm;
|
---|
22 | import org.uma.jmetal.algorithm.multiobjective.nsgaii.NSGAIIBuilder;
|
---|
23 | import org.uma.jmetal.operator.CrossoverOperator;
|
---|
24 | import org.uma.jmetal.operator.MutationOperator;
|
---|
25 | import org.uma.jmetal.operator.SelectionOperator;
|
---|
26 | import org.uma.jmetal.operator.impl.crossover.SBXCrossover;
|
---|
27 | import org.uma.jmetal.operator.impl.mutation.UniformMutation;
|
---|
28 | import org.uma.jmetal.operator.impl.selection.BinaryTournamentSelection;
|
---|
29 | import org.uma.jmetal.problem.ConstrainedProblem;
|
---|
30 | import org.uma.jmetal.problem.Problem;
|
---|
31 | import org.uma.jmetal.problem.impl.AbstractDoubleProblem;
|
---|
32 | import org.uma.jmetal.runner.AbstractAlgorithmRunner;
|
---|
33 | import org.uma.jmetal.solution.DoubleSolution;
|
---|
34 | import org.uma.jmetal.util.AlgorithmRunner;
|
---|
35 | import org.uma.jmetal.util.comparator.RankingAndCrowdingDistanceComparator;
|
---|
36 | import org.uma.jmetal.util.solutionattribute.impl.NumberOfViolatedConstraints;
|
---|
37 | import org.uma.jmetal.util.solutionattribute.impl.OverallConstraintViolation;
|
---|
38 |
|
---|
39 | import de.ugoe.cs.util.console.Console;
|
---|
40 | import weka.classifiers.AbstractClassifier;
|
---|
41 | import weka.core.Attribute;
|
---|
42 | import weka.core.Instance;
|
---|
43 | import weka.core.Instances;
|
---|
44 | import weka.core.Utils;
|
---|
45 |
|
---|
46 | /**
|
---|
47 | * <p>
|
---|
48 | * Implements MODEP after Canfora et al., 2013. The class learns the family of MODEP classifiers
|
---|
49 | * with the NSGAII algorithm. Based on a defined threshold for the desired recall, we then pick the
|
---|
50 | * best of the results, i.e., the one that requires the fewest LOC and is still achieves the desired
|
---|
51 | * recall.
|
---|
52 | * </p>
|
---|
53 | * <p>
|
---|
54 | * Our implementation currently only allows a threshold for the desired recall, not for the desired
|
---|
55 | * number of LOC.
|
---|
56 | * </p>
|
---|
57 | *
|
---|
58 | * @author Steffen Herbold
|
---|
59 | */
|
---|
60 | public class MODEPClassifier extends AbstractClassifier {
|
---|
61 |
|
---|
62 | /** Default serial ID */
|
---|
63 | private static final long serialVersionUID = 1L;
|
---|
64 |
|
---|
65 | /**
|
---|
66 | * Coefficients of the calculated solution
|
---|
67 | */
|
---|
68 | double[][] solutionCoefficients = null;
|
---|
69 |
|
---|
70 | /**
|
---|
71 | * Threshold for the desired recall; default 0.7
|
---|
72 | */
|
---|
73 | double desiredRecall = 0.7;
|
---|
74 |
|
---|
75 | /*
|
---|
76 | * (non-Javadoc)
|
---|
77 | *
|
---|
78 | * @see weka.classifiers.AbstractClassifier#setOptions(java.lang.String[])
|
---|
79 | */
|
---|
80 | @Override
|
---|
81 | public void setOptions(String[] options) throws Exception {
|
---|
82 | String desiredRecallString = Utils.getOption('R', options);
|
---|
83 | if (!desiredRecallString.isEmpty()) {
|
---|
84 | desiredRecall = Double.parseDouble(desiredRecallString);
|
---|
85 | }
|
---|
86 | }
|
---|
87 |
|
---|
88 | /*
|
---|
89 | * (non-Javadoc)
|
---|
90 | *
|
---|
91 | * @see weka.classifiers.AbstractClassifier#distributionForInstance(weka.core.Instance)
|
---|
92 | */
|
---|
93 | @Override
|
---|
94 | public double[] distributionForInstance(Instance instance) throws Exception {
|
---|
95 | return distributionForInstance(solutionCoefficients, instance);
|
---|
96 | }
|
---|
97 |
|
---|
98 | /*
|
---|
99 | * (non-Javadoc)
|
---|
100 | *
|
---|
101 | * @see weka.classifiers.Classifier#buildClassifier(weka.core.Instances)
|
---|
102 | */
|
---|
103 | @Override
|
---|
104 | public void buildClassifier(Instances traindata) throws Exception {
|
---|
105 | double desiredInstances =
|
---|
106 | desiredRecall * traindata.attributeStats(traindata.classIndex()).nominalCounts[1];
|
---|
107 | MyNSGAIIRunner nsgaIIrunner = new MyNSGAIIRunner(traindata, desiredInstances);
|
---|
108 | List<DoubleSolution> solutions = nsgaIIrunner.run();
|
---|
109 | DoubleSolution bestSolution = null;
|
---|
110 | for (DoubleSolution solution : solutions) {
|
---|
111 | if (solution.getObjective(0) >= desiredInstances) {
|
---|
112 | if (bestSolution == null) {
|
---|
113 | bestSolution = solution;
|
---|
114 | }
|
---|
115 | else {
|
---|
116 | if (bestSolution.getObjective(1) < solution.getObjective(1)) {
|
---|
117 | bestSolution = solution;
|
---|
118 | }
|
---|
119 | }
|
---|
120 | }
|
---|
121 | }
|
---|
122 |
|
---|
123 | if (bestSolution == null) {
|
---|
124 | Console.trace(Level.WARNING,
|
---|
125 | "no solution with desired recall found, pick best recall instead");
|
---|
126 | for (DoubleSolution solution : solutions) {
|
---|
127 | if (bestSolution == null) {
|
---|
128 | bestSolution = solution;
|
---|
129 | }
|
---|
130 | if (solution.getObjective(0) > bestSolution.getObjective(0)) {
|
---|
131 | bestSolution = solution;
|
---|
132 | }
|
---|
133 | }
|
---|
134 | }
|
---|
135 |
|
---|
136 | solutionCoefficients = solutionToCoefficients(bestSolution);
|
---|
137 | }
|
---|
138 |
|
---|
139 | /**
|
---|
140 | * <p>
|
---|
141 | * Internal helper method to get the distribution for an instance given the coefficients of the
|
---|
142 | * logistic regression
|
---|
143 | * </p>
|
---|
144 | *
|
---|
145 | * @param coefficients
|
---|
146 | * the coefficients
|
---|
147 | * @param instance
|
---|
148 | * the instance
|
---|
149 | * @return probability for each class
|
---|
150 | */
|
---|
151 | private static double[] distributionForInstance(double[][] coefficients, Instance instance) {
|
---|
152 | int numClasses = instance.classAttribute().numValues();
|
---|
153 | double[] results = new double[numClasses];
|
---|
154 |
|
---|
155 | for (int i = 0; i < numClasses; i++) {
|
---|
156 | int k = 0;
|
---|
157 | for (int j = 0; j < instance.numAttributes(); j++) {
|
---|
158 | if (j != instance.classIndex()) {
|
---|
159 | results[i] += coefficients[i][k] * instance.value(j);
|
---|
160 | k++;
|
---|
161 | }
|
---|
162 | }
|
---|
163 | results[i] = sigmoid(results[i]);
|
---|
164 | }
|
---|
165 | return results;
|
---|
166 | }
|
---|
167 |
|
---|
168 | /**
|
---|
169 | * <p>
|
---|
170 | * returns the classification of an instance with the logistic regression for the given
|
---|
171 | * coefficients
|
---|
172 | * </p>
|
---|
173 | *
|
---|
174 | * @param coefficients
|
---|
175 | * the coefficients
|
---|
176 | * @param instance
|
---|
177 | * the instance
|
---|
178 | * @return the binary classification
|
---|
179 | */
|
---|
180 | private static double logisticRegression(double[][] coefficients, Instance instance) {
|
---|
181 | double[] results = distributionForInstance(coefficients, instance);
|
---|
182 | double maxResult = Double.MIN_VALUE;
|
---|
183 | int maxIndex = 0;
|
---|
184 | for (int i = 0; i < results.length; i++) {
|
---|
185 | if (maxResult < results[i]) {
|
---|
186 | maxResult = results[i];
|
---|
187 | maxIndex = i;
|
---|
188 | }
|
---|
189 | }
|
---|
190 | return Double.parseDouble(instance.classAttribute().value(maxIndex));
|
---|
191 | }
|
---|
192 |
|
---|
193 | /**
|
---|
194 | * <p>
|
---|
195 | * Sigmoid function, required for logistic regression
|
---|
196 | * </p>
|
---|
197 | *
|
---|
198 | * @param z
|
---|
199 | * value for which the sigmoid is calcualted
|
---|
200 | * @return 1/(1+exp(-z))
|
---|
201 | */
|
---|
202 | private static double sigmoid(double z) {
|
---|
203 | return 1.0 / (1.0 + Math.exp(-z));
|
---|
204 | }
|
---|
205 |
|
---|
206 | /**
|
---|
207 | * <p>
|
---|
208 | * Converts a {@link DoubleSolution} into a coefficient matrix
|
---|
209 | * </p>
|
---|
210 | *
|
---|
211 | * @param solution
|
---|
212 | * solution the is converted
|
---|
213 | * @return coefficient matrix
|
---|
214 | */
|
---|
215 | public static double[][] solutionToCoefficients(DoubleSolution solution) {
|
---|
216 | int numberOfVariables = solution.getNumberOfVariables();
|
---|
217 | int numberOfCoefficients = numberOfVariables / 2;
|
---|
218 |
|
---|
219 | double[][] coefficients = new double[2][numberOfCoefficients];
|
---|
220 |
|
---|
221 | for (int i = 0; i < numberOfVariables; i++) {
|
---|
222 | if (i < numberOfCoefficients) {
|
---|
223 | coefficients[0][i] = solution.getVariableValue(i);
|
---|
224 | }
|
---|
225 | else {
|
---|
226 | coefficients[1][i - numberOfCoefficients] = solution.getVariableValue(i);
|
---|
227 | }
|
---|
228 | }
|
---|
229 | return coefficients;
|
---|
230 | }
|
---|
231 |
|
---|
232 | /**
|
---|
233 | * <p>
|
---|
234 | * Executes the NSGAII algorithm with the MODEP problem.
|
---|
235 | * </p>
|
---|
236 | *
|
---|
237 | * @author Steffen Herbold
|
---|
238 | */
|
---|
239 | private class MyNSGAIIRunner extends AbstractAlgorithmRunner {
|
---|
240 |
|
---|
241 | /**
|
---|
242 | * configured algorithm to solve the {@link MODEPProblem}.
|
---|
243 | */
|
---|
244 | final Algorithm<List<DoubleSolution>> algorithm;
|
---|
245 |
|
---|
246 | /**
|
---|
247 | * <p>
|
---|
248 | * Constructor. Configures the algorithm object.
|
---|
249 | * </p>
|
---|
250 | *
|
---|
251 | * @param data
|
---|
252 | * data for which the MODEP problem is optimized
|
---|
253 | * @param minEffectiveness
|
---|
254 | * minimal desired effectiveness
|
---|
255 | */
|
---|
256 | private MyNSGAIIRunner(Instances data, double minEffectiveness) {
|
---|
257 | final Problem<DoubleSolution> problem = new MODEPProblem(data, minEffectiveness);
|
---|
258 | double crossoverProbability = 0.6;
|
---|
259 | double crossoverDistributionIndex = 20.0;
|
---|
260 | final CrossoverOperator<DoubleSolution> crossover =
|
---|
261 | new SBXCrossover(crossoverProbability, crossoverDistributionIndex);
|
---|
262 |
|
---|
263 | double mutationProbability = 0.05;
|
---|
264 | double mutationPerturbation = 0.0;
|
---|
265 | final MutationOperator<DoubleSolution> mutation =
|
---|
266 | new UniformMutation(mutationProbability, mutationPerturbation);
|
---|
267 | final SelectionOperator<List<DoubleSolution>, DoubleSolution> selection =
|
---|
268 | new BinaryTournamentSelection<DoubleSolution>(new RankingAndCrowdingDistanceComparator<DoubleSolution>());
|
---|
269 |
|
---|
270 | algorithm = new NSGAIIBuilder<DoubleSolution>(problem, crossover, mutation)
|
---|
271 | .setSelectionOperator(selection).setMaxIterations(400).setPopulationSize(100)
|
---|
272 | .build();
|
---|
273 | }
|
---|
274 |
|
---|
275 | /**
|
---|
276 | * <p>
|
---|
277 | * Executes the NSGAII algorithm.
|
---|
278 | * </p>
|
---|
279 | *
|
---|
280 | * @return solutions of the problem; should be a Pareto front
|
---|
281 | */
|
---|
282 | public List<DoubleSolution> run() {
|
---|
283 | AlgorithmRunner algorithmRunner = new AlgorithmRunner.Executor(algorithm).execute();
|
---|
284 | List<DoubleSolution> population = algorithm.getResult();
|
---|
285 | Console.traceln(Level.FINEST,
|
---|
286 | "genetic algorithm run time: " + algorithmRunner.getComputingTime());
|
---|
287 | return population;
|
---|
288 | }
|
---|
289 | }
|
---|
290 |
|
---|
291 | /**
|
---|
292 | * <p>
|
---|
293 | * Problem definition of MODEP.
|
---|
294 | * </p>
|
---|
295 | *
|
---|
296 | * @author Steffen Herbold
|
---|
297 | */
|
---|
298 | private class MODEPProblem extends AbstractDoubleProblem
|
---|
299 | implements ConstrainedProblem<DoubleSolution>
|
---|
300 | {
|
---|
301 | /** Default serial ID */
|
---|
302 | private static final long serialVersionUID = 1L;
|
---|
303 |
|
---|
304 | /**
|
---|
305 | * Data for which MODEP is defined
|
---|
306 | */
|
---|
307 | private final Instances data;
|
---|
308 |
|
---|
309 | /**
|
---|
310 | * Minimal desired effectiveness.
|
---|
311 | */
|
---|
312 | private final double minEffectiveness;
|
---|
313 |
|
---|
314 | /**
|
---|
315 | * Stores the contraint violations for all current solutions
|
---|
316 | */
|
---|
317 | public OverallConstraintViolation<DoubleSolution> overallConstraintViolationDegree;
|
---|
318 |
|
---|
319 | /**
|
---|
320 | * Stores the number of violated constraints for all current solutions.
|
---|
321 | */
|
---|
322 | public NumberOfViolatedConstraints<DoubleSolution> numberOfViolatedConstraints;
|
---|
323 |
|
---|
324 | /**
|
---|
325 | * <p>
|
---|
326 | * Constructor. Initializes the MODEP problem.
|
---|
327 | * </p>
|
---|
328 | *
|
---|
329 | * @param data
|
---|
330 | * data for which MODEP is defined
|
---|
331 | * @param minEffectiveness
|
---|
332 | * minimal desired effectiveness
|
---|
333 | */
|
---|
334 | public MODEPProblem(Instances data, double minEffectiveness) {
|
---|
335 | this.data = data;
|
---|
336 | this.minEffectiveness = minEffectiveness;
|
---|
337 | setNumberOfVariables(2 * (data.numAttributes() - 1));
|
---|
338 | setNumberOfObjectives(2);
|
---|
339 | setNumberOfConstraints(1);
|
---|
340 | setName("MODEPProblem");
|
---|
341 |
|
---|
342 | List<Double> lowerLimit = new ArrayList<>(getNumberOfVariables());
|
---|
343 | List<Double> upperLimit = new ArrayList<>(getNumberOfVariables());
|
---|
344 |
|
---|
345 | for (int i = 0; i < getNumberOfVariables(); i++) {
|
---|
346 | lowerLimit.add(-100.0);
|
---|
347 | upperLimit.add(100.0);
|
---|
348 | }
|
---|
349 |
|
---|
350 | setLowerLimit(lowerLimit);
|
---|
351 | setUpperLimit(upperLimit);
|
---|
352 |
|
---|
353 | overallConstraintViolationDegree = new OverallConstraintViolation<>();
|
---|
354 | numberOfViolatedConstraints = new NumberOfViolatedConstraints<>();
|
---|
355 | }
|
---|
356 |
|
---|
357 | /*
|
---|
358 | * (non-Javadoc)
|
---|
359 | *
|
---|
360 | * @see org.uma.jmetal.problem.Problem#evaluate(org.uma.jmetal.solution.Solution)
|
---|
361 | */
|
---|
362 | @Override
|
---|
363 | public void evaluate(DoubleSolution solution) {
|
---|
364 | double[][] coefficients = solutionToCoefficients(solution);
|
---|
365 |
|
---|
366 | final Attribute loc = data.attribute("loc");
|
---|
367 | double effectiveness = 0.0;
|
---|
368 | double cost = 0.0;
|
---|
369 | for (int i = 0; i < data.size(); i++) {
|
---|
370 | double currentClass = logisticRegression(coefficients, data.get(i));
|
---|
371 | if (currentClass == 1.0) {
|
---|
372 | if (data.get(i).classValue() == 1.0) {
|
---|
373 | effectiveness++;
|
---|
374 | }
|
---|
375 | cost -= data.get(i).value(loc);
|
---|
376 | }
|
---|
377 | }
|
---|
378 |
|
---|
379 | solution.setObjective(0, effectiveness);
|
---|
380 | solution.setObjective(1, cost);
|
---|
381 | }
|
---|
382 |
|
---|
383 | /*
|
---|
384 | * (non-Javadoc)
|
---|
385 | *
|
---|
386 | * @see
|
---|
387 | * org.uma.jmetal.problem.ConstrainedProblem#evaluateConstraints(org.uma.jmetal.solution.
|
---|
388 | * Solution)
|
---|
389 | */
|
---|
390 | @Override
|
---|
391 | public void evaluateConstraints(DoubleSolution solution) {
|
---|
392 | double constraintViolation = minEffectiveness - solution.getObjective(0);
|
---|
393 | if (constraintViolation > 0) {
|
---|
394 | overallConstraintViolationDegree.setAttribute(solution, constraintViolation);
|
---|
395 | numberOfViolatedConstraints.setAttribute(solution, 1);
|
---|
396 | }
|
---|
397 | else {
|
---|
398 | overallConstraintViolationDegree.setAttribute(solution, 0.0);
|
---|
399 | numberOfViolatedConstraints.setAttribute(solution, 0);
|
---|
400 | }
|
---|
401 | }
|
---|
402 | }
|
---|
403 | }
|
---|