1 | // Copyright 2015 Georg-August-Universität Göttingen, Germany |
---|
2 | // |
---|
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
---|
4 | // you may not use this file except in compliance with the License. |
---|
5 | // You may obtain a copy of the License at |
---|
6 | // |
---|
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
---|
8 | // |
---|
9 | // Unless required by applicable law or agreed to in writing, software |
---|
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
---|
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
---|
12 | // See the License for the specific language governing permissions and |
---|
13 | // limitations under the License. |
---|
14 | |
---|
15 | package de.ugoe.cs.cpdp.training; |
---|
16 | |
---|
17 | import java.util.ArrayList; |
---|
18 | import java.util.Collections; |
---|
19 | import java.util.Comparator; |
---|
20 | import java.util.HashMap; |
---|
21 | |
---|
22 | import weka.core.Instance; |
---|
23 | import de.ugoe.cs.cpdp.training.WekaLocalFQTraining.QuadTreePayload; |
---|
24 | |
---|
25 | /** |
---|
26 | * QuadTree implementation |
---|
27 | * |
---|
28 | * QuadTree gets a list of instances and then recursively split them into 4 childs For this it uses |
---|
29 | * the median of the 2 values x,y |
---|
30 | */ |
---|
31 | public class QuadTree { |
---|
32 | |
---|
33 | /* 1 parent or null */ |
---|
34 | private QuadTree parent = null; |
---|
35 | |
---|
36 | /* 4 childs, 1 per quadrant */ |
---|
37 | private QuadTree child_nw; |
---|
38 | private QuadTree child_ne; |
---|
39 | private QuadTree child_se; |
---|
40 | private QuadTree child_sw; |
---|
41 | |
---|
42 | /* list (only helps with generation of list of childs!) */ |
---|
43 | private ArrayList<QuadTree> l = new ArrayList<QuadTree>(); |
---|
44 | |
---|
45 | /* level only used for debugging */ |
---|
46 | public int level = 0; |
---|
47 | |
---|
48 | /* size of the quadrant */ |
---|
49 | private double[] x; |
---|
50 | private double[] y; |
---|
51 | |
---|
52 | public static boolean verbose = false; |
---|
53 | public static int size = 0; |
---|
54 | public static double alpha = 0; |
---|
55 | |
---|
56 | /* cluster payloads */ |
---|
57 | public static ArrayList<ArrayList<QuadTreePayload<Instance>>> ccluster = |
---|
58 | new ArrayList<ArrayList<QuadTreePayload<Instance>>>(); |
---|
59 | |
---|
60 | /* cluster sizes (index is cluster number, arraylist is list of boxes (x0,y0,x1,y1) */ |
---|
61 | public static HashMap<Integer, ArrayList<Double[][]>> csize = |
---|
62 | new HashMap<Integer, ArrayList<Double[][]>>(); |
---|
63 | |
---|
64 | /* payload of this instance */ |
---|
65 | private ArrayList<QuadTreePayload<Instance>> payload; |
---|
66 | |
---|
67 | public QuadTree(QuadTree parent, ArrayList<QuadTreePayload<Instance>> payload) { |
---|
68 | this.parent = parent; |
---|
69 | this.payload = payload; |
---|
70 | } |
---|
71 | |
---|
72 | public String toString() { |
---|
73 | String n = ""; |
---|
74 | if (this.parent == null) { |
---|
75 | n += "rootnode "; |
---|
76 | } |
---|
77 | String level = new String(new char[this.level]).replace("\0", "-"); |
---|
78 | n += level + " instances: " + this.getNumbers(); |
---|
79 | return n; |
---|
80 | } |
---|
81 | |
---|
82 | /** |
---|
83 | * Returns the payload, used for clustering in the clustering list we only have children with |
---|
84 | * paylod |
---|
85 | * |
---|
86 | * @return payload |
---|
87 | */ |
---|
88 | public ArrayList<QuadTreePayload<Instance>> getPayload() { |
---|
89 | return this.payload; |
---|
90 | } |
---|
91 | |
---|
92 | /** |
---|
93 | * Calculate the density of this quadrant |
---|
94 | * |
---|
95 | * density = number of instances / global size (all instances) |
---|
96 | * |
---|
97 | * @return density |
---|
98 | */ |
---|
99 | public double getDensity() { |
---|
100 | double dens = 0; |
---|
101 | dens = (double) this.getNumbers() / QuadTree.size; |
---|
102 | return dens; |
---|
103 | } |
---|
104 | |
---|
105 | public void setSize(double[] x, double[] y) { |
---|
106 | this.x = x; |
---|
107 | this.y = y; |
---|
108 | } |
---|
109 | |
---|
110 | public double[][] getSize() { |
---|
111 | return new double[][] |
---|
112 | { this.x, this.y }; |
---|
113 | } |
---|
114 | |
---|
115 | public Double[][] getSizeDouble() { |
---|
116 | Double[] tmpX = new Double[2]; |
---|
117 | Double[] tmpY = new Double[2]; |
---|
118 | |
---|
119 | tmpX[0] = this.x[0]; |
---|
120 | tmpX[1] = this.x[1]; |
---|
121 | |
---|
122 | tmpY[0] = this.y[0]; |
---|
123 | tmpY[1] = this.y[1]; |
---|
124 | |
---|
125 | return new Double[][] |
---|
126 | { tmpX, tmpY }; |
---|
127 | } |
---|
128 | |
---|
129 | /** |
---|
130 | * TODO: DRY, median ist immer dasselbe |
---|
131 | * |
---|
132 | * @return median for x |
---|
133 | */ |
---|
134 | private double getMedianForX() { |
---|
135 | double med_x = 0; |
---|
136 | |
---|
137 | Collections.sort(this.payload, new Comparator<QuadTreePayload<Instance>>() { |
---|
138 | @Override |
---|
139 | public int compare(QuadTreePayload<Instance> x1, QuadTreePayload<Instance> x2) { |
---|
140 | return Double.compare(x1.x, x2.x); |
---|
141 | } |
---|
142 | }); |
---|
143 | |
---|
144 | if (this.payload.size() % 2 == 0) { |
---|
145 | int mid = this.payload.size() / 2; |
---|
146 | med_x = (this.payload.get(mid).x + this.payload.get(mid + 1).x) / 2; |
---|
147 | } |
---|
148 | else { |
---|
149 | int mid = this.payload.size() / 2; |
---|
150 | med_x = this.payload.get(mid).x; |
---|
151 | } |
---|
152 | |
---|
153 | if (QuadTree.verbose) { |
---|
154 | System.out.println("sorted:"); |
---|
155 | for (int i = 0; i < this.payload.size(); i++) { |
---|
156 | System.out.print("" + this.payload.get(i).x + ","); |
---|
157 | } |
---|
158 | System.out.println("median x: " + med_x); |
---|
159 | } |
---|
160 | return med_x; |
---|
161 | } |
---|
162 | |
---|
163 | private double getMedianForY() { |
---|
164 | double med_y = 0; |
---|
165 | |
---|
166 | Collections.sort(this.payload, new Comparator<QuadTreePayload<Instance>>() { |
---|
167 | @Override |
---|
168 | public int compare(QuadTreePayload<Instance> y1, QuadTreePayload<Instance> y2) { |
---|
169 | return Double.compare(y1.y, y2.y); |
---|
170 | } |
---|
171 | }); |
---|
172 | |
---|
173 | if (this.payload.size() % 2 == 0) { |
---|
174 | int mid = this.payload.size() / 2; |
---|
175 | med_y = (this.payload.get(mid).y + this.payload.get(mid + 1).y) / 2; |
---|
176 | } |
---|
177 | else { |
---|
178 | int mid = this.payload.size() / 2; |
---|
179 | med_y = this.payload.get(mid).y; |
---|
180 | } |
---|
181 | |
---|
182 | if (QuadTree.verbose) { |
---|
183 | System.out.println("sorted:"); |
---|
184 | for (int i = 0; i < this.payload.size(); i++) { |
---|
185 | System.out.print("" + this.payload.get(i).y + ","); |
---|
186 | } |
---|
187 | System.out.println("median y: " + med_y); |
---|
188 | } |
---|
189 | return med_y; |
---|
190 | } |
---|
191 | |
---|
192 | /** |
---|
193 | * Reurns the number of instances in the payload |
---|
194 | * |
---|
195 | * @return int number of instances |
---|
196 | */ |
---|
197 | public int getNumbers() { |
---|
198 | int number = 0; |
---|
199 | if (this.payload != null) { |
---|
200 | number = this.payload.size(); |
---|
201 | } |
---|
202 | return number; |
---|
203 | } |
---|
204 | |
---|
205 | /** |
---|
206 | * Calculate median values of payload for x, y and split into 4 sectors |
---|
207 | * |
---|
208 | * @return Array of QuadTree nodes (4 childs) |
---|
209 | * @throws Exception |
---|
210 | * if we would run into an recursive loop |
---|
211 | */ |
---|
212 | public QuadTree[] split() throws Exception { |
---|
213 | |
---|
214 | double medx = this.getMedianForX(); |
---|
215 | double medy = this.getMedianForY(); |
---|
216 | |
---|
217 | // Payload lists for each child |
---|
218 | ArrayList<QuadTreePayload<Instance>> nw = new ArrayList<QuadTreePayload<Instance>>(); |
---|
219 | ArrayList<QuadTreePayload<Instance>> sw = new ArrayList<QuadTreePayload<Instance>>(); |
---|
220 | ArrayList<QuadTreePayload<Instance>> ne = new ArrayList<QuadTreePayload<Instance>>(); |
---|
221 | ArrayList<QuadTreePayload<Instance>> se = new ArrayList<QuadTreePayload<Instance>>(); |
---|
222 | |
---|
223 | // sort the payloads to new payloads |
---|
224 | // here we have the problem that payloads with the same values are sorted |
---|
225 | // into the same slots and it could happen that medx and medy = size_x[1] and size_y[1] |
---|
226 | // in that case we would have an endless loop |
---|
227 | for (int i = 0; i < this.payload.size(); i++) { |
---|
228 | |
---|
229 | QuadTreePayload<Instance> item = this.payload.get(i); |
---|
230 | |
---|
231 | // north west |
---|
232 | if (item.x <= medx && item.y >= medy) { |
---|
233 | nw.add(item); |
---|
234 | } |
---|
235 | |
---|
236 | // south west |
---|
237 | else if (item.x <= medx && item.y <= medy) { |
---|
238 | sw.add(item); |
---|
239 | } |
---|
240 | |
---|
241 | // north east |
---|
242 | else if (item.x >= medx && item.y >= medy) { |
---|
243 | ne.add(item); |
---|
244 | } |
---|
245 | |
---|
246 | // south east |
---|
247 | else if (item.x >= medx && item.y <= medy) { |
---|
248 | se.add(item); |
---|
249 | } |
---|
250 | } |
---|
251 | |
---|
252 | // if we assign one child a payload equal to our own (see problem above) |
---|
253 | // we throw an exceptions which stops the recursion on this node |
---|
254 | if (nw.equals(this.payload)) { |
---|
255 | throw new Exception("payload equal"); |
---|
256 | } |
---|
257 | if (sw.equals(this.payload)) { |
---|
258 | throw new Exception("payload equal"); |
---|
259 | } |
---|
260 | if (ne.equals(this.payload)) { |
---|
261 | throw new Exception("payload equal"); |
---|
262 | } |
---|
263 | if (se.equals(this.payload)) { |
---|
264 | throw new Exception("payload equal"); |
---|
265 | } |
---|
266 | |
---|
267 | this.child_nw = new QuadTree(this, nw); |
---|
268 | this.child_nw.setSize(new double[] |
---|
269 | { this.x[0], medx }, new double[] |
---|
270 | { medy, this.y[1] }); |
---|
271 | this.child_nw.level = this.level + 1; |
---|
272 | |
---|
273 | this.child_sw = new QuadTree(this, sw); |
---|
274 | this.child_sw.setSize(new double[] |
---|
275 | { this.x[0], medx }, new double[] |
---|
276 | { this.y[0], medy }); |
---|
277 | this.child_sw.level = this.level + 1; |
---|
278 | |
---|
279 | this.child_ne = new QuadTree(this, ne); |
---|
280 | this.child_ne.setSize(new double[] |
---|
281 | { medx, this.x[1] }, new double[] |
---|
282 | { medy, this.y[1] }); |
---|
283 | this.child_ne.level = this.level + 1; |
---|
284 | |
---|
285 | this.child_se = new QuadTree(this, se); |
---|
286 | this.child_se.setSize(new double[] |
---|
287 | { medx, this.x[1] }, new double[] |
---|
288 | { this.y[0], medy }); |
---|
289 | this.child_se.level = this.level + 1; |
---|
290 | |
---|
291 | this.payload = null; |
---|
292 | return new QuadTree[] |
---|
293 | { this.child_nw, this.child_ne, this.child_se, this.child_sw }; |
---|
294 | } |
---|
295 | |
---|
296 | /** |
---|
297 | * TODO: static method |
---|
298 | * |
---|
299 | * @param q |
---|
300 | */ |
---|
301 | public void recursiveSplit(QuadTree q) { |
---|
302 | if (QuadTree.verbose) { |
---|
303 | System.out.println("splitting: " + q); |
---|
304 | } |
---|
305 | if (q.getNumbers() < QuadTree.alpha) { |
---|
306 | return; |
---|
307 | } |
---|
308 | else { |
---|
309 | // exception is thrown if we would run into an endless loop (see comments in split()) |
---|
310 | try { |
---|
311 | QuadTree[] childs = q.split(); |
---|
312 | this.recursiveSplit(childs[0]); |
---|
313 | this.recursiveSplit(childs[1]); |
---|
314 | this.recursiveSplit(childs[2]); |
---|
315 | this.recursiveSplit(childs[3]); |
---|
316 | } |
---|
317 | catch (Exception e) { |
---|
318 | return; |
---|
319 | } |
---|
320 | } |
---|
321 | } |
---|
322 | |
---|
323 | /** |
---|
324 | * returns an list of childs sorted by density |
---|
325 | * |
---|
326 | * @param q |
---|
327 | * QuadTree |
---|
328 | * @return list of QuadTrees |
---|
329 | */ |
---|
330 | private void generateList(QuadTree q) { |
---|
331 | |
---|
332 | // we only have all childs or none at all |
---|
333 | if (q.child_ne == null) { |
---|
334 | this.l.add(q); |
---|
335 | } |
---|
336 | |
---|
337 | if (q.child_ne != null) { |
---|
338 | this.generateList(q.child_ne); |
---|
339 | } |
---|
340 | if (q.child_nw != null) { |
---|
341 | this.generateList(q.child_nw); |
---|
342 | } |
---|
343 | if (q.child_se != null) { |
---|
344 | this.generateList(q.child_se); |
---|
345 | } |
---|
346 | if (q.child_sw != null) { |
---|
347 | this.generateList(q.child_sw); |
---|
348 | } |
---|
349 | } |
---|
350 | |
---|
351 | /** |
---|
352 | * Checks if passed QuadTree is neighboring to us |
---|
353 | * |
---|
354 | * @param q |
---|
355 | * QuadTree |
---|
356 | * @return true if passed QuadTree is a neighbor |
---|
357 | */ |
---|
358 | public boolean isNeighbour(QuadTree q) { |
---|
359 | boolean is_neighbour = false; |
---|
360 | |
---|
361 | double[][] our_size = this.getSize(); |
---|
362 | double[][] new_size = q.getSize(); |
---|
363 | |
---|
364 | // X is i=0, Y is i=1 |
---|
365 | for (int i = 0; i < 2; i++) { |
---|
366 | // we are smaller than q |
---|
367 | // -------------- q |
---|
368 | // ------- we |
---|
369 | if (our_size[i][0] >= new_size[i][0] && our_size[i][1] <= new_size[i][1]) { |
---|
370 | is_neighbour = true; |
---|
371 | } |
---|
372 | // we overlap with q at some point |
---|
373 | // a) ---------------q |
---|
374 | // ----------- we |
---|
375 | // b) --------- q |
---|
376 | // --------- we |
---|
377 | if ((our_size[i][0] >= new_size[i][0] && our_size[i][0] <= new_size[i][1]) || |
---|
378 | (our_size[i][1] >= new_size[i][0] && our_size[i][1] <= new_size[i][1])) |
---|
379 | { |
---|
380 | is_neighbour = true; |
---|
381 | } |
---|
382 | // we are larger than q |
---|
383 | // ---- q |
---|
384 | // ---------- we |
---|
385 | if (our_size[i][1] >= new_size[i][1] && our_size[i][0] <= new_size[i][0]) { |
---|
386 | is_neighbour = true; |
---|
387 | } |
---|
388 | } |
---|
389 | |
---|
390 | if (is_neighbour && QuadTree.verbose) { |
---|
391 | System.out.println(this + " neighbour of: " + q); |
---|
392 | } |
---|
393 | |
---|
394 | return is_neighbour; |
---|
395 | } |
---|
396 | |
---|
397 | /** |
---|
398 | * Perform pruning and clustering of the quadtree |
---|
399 | * |
---|
400 | * Pruning according to: Tim Menzies, Andrew Butcher, David Cok, Andrian Marcus, Lucas Layman, |
---|
401 | * Forrest Shull, Burak Turhan, Thomas Zimmermann, |
---|
402 | * "Local versus Global Lessons for Defect Prediction and Effort Estimation," IEEE Transactions |
---|
403 | * on Software Engineering, vol. 39, no. 6, pp. 822-834, June, 2013 |
---|
404 | * |
---|
405 | * 1) get list of leaf quadrants 2) sort by their density 3) set stop_rule to 0.5 * highest |
---|
406 | * Density in the list 4) merge all nodes with a density > stop_rule to the new cluster and |
---|
407 | * remove all from list 5) repeat |
---|
408 | * |
---|
409 | * @param q |
---|
410 | * List of QuadTree (children only) |
---|
411 | */ |
---|
412 | public void gridClustering(ArrayList<QuadTree> list) { |
---|
413 | |
---|
414 | if (list.size() == 0) { |
---|
415 | return; |
---|
416 | } |
---|
417 | |
---|
418 | double stop_rule; |
---|
419 | QuadTree biggest; |
---|
420 | QuadTree current; |
---|
421 | |
---|
422 | // current clusterlist |
---|
423 | ArrayList<QuadTreePayload<Instance>> current_cluster; |
---|
424 | |
---|
425 | // remove list (for removal of items after scanning of the list) |
---|
426 | ArrayList<Integer> remove = new ArrayList<Integer>(); |
---|
427 | |
---|
428 | // 1. find biggest, and add it |
---|
429 | biggest = list.get(list.size() - 1); |
---|
430 | stop_rule = biggest.getDensity() * 0.5; |
---|
431 | |
---|
432 | current_cluster = new ArrayList<QuadTreePayload<Instance>>(); |
---|
433 | current_cluster.addAll(biggest.getPayload()); |
---|
434 | |
---|
435 | // remove the biggest because we are starting with it |
---|
436 | remove.add(list.size() - 1); |
---|
437 | |
---|
438 | ArrayList<Double[][]> tmpSize = new ArrayList<Double[][]>(); |
---|
439 | tmpSize.add(biggest.getSizeDouble()); |
---|
440 | |
---|
441 | // check the items for their density |
---|
442 | for (int i = list.size() - 1; i >= 0; i--) { |
---|
443 | current = list.get(i); |
---|
444 | |
---|
445 | // 2. find neighbors with correct density |
---|
446 | // if density > stop_rule and is_neighbour add to cluster and remove from list |
---|
447 | if (current.getDensity() > stop_rule && !current.equals(biggest) && |
---|
448 | current.isNeighbour(biggest)) |
---|
449 | { |
---|
450 | current_cluster.addAll(current.getPayload()); |
---|
451 | |
---|
452 | // add it to remove list (we cannot remove it inside the loop because it would move |
---|
453 | // the index) |
---|
454 | remove.add(i); |
---|
455 | |
---|
456 | // get the size |
---|
457 | tmpSize.add(current.getSizeDouble()); |
---|
458 | } |
---|
459 | } |
---|
460 | |
---|
461 | // 3. remove our removal candidates from the list |
---|
462 | for (Integer item : remove) { |
---|
463 | list.remove((int) item); |
---|
464 | } |
---|
465 | |
---|
466 | // 4. add to cluster |
---|
467 | QuadTree.ccluster.add(current_cluster); |
---|
468 | |
---|
469 | // 5. add sizes of our current (biggest) this adds a number of sizes (all QuadTree Instances |
---|
470 | // belonging to this cluster) |
---|
471 | // we need that to classify test instances to a cluster later |
---|
472 | Integer cnumber = new Integer(QuadTree.ccluster.size() - 1); |
---|
473 | if (QuadTree.csize.containsKey(cnumber) == false) { |
---|
474 | QuadTree.csize.put(cnumber, tmpSize); |
---|
475 | } |
---|
476 | |
---|
477 | // repeat |
---|
478 | this.gridClustering(list); |
---|
479 | } |
---|
480 | |
---|
481 | public void printInfo() { |
---|
482 | System.out.println("we have " + ccluster.size() + " clusters"); |
---|
483 | |
---|
484 | for (int i = 0; i < ccluster.size(); i++) { |
---|
485 | System.out.println("cluster: " + i + " size: " + ccluster.get(i).size()); |
---|
486 | } |
---|
487 | } |
---|
488 | |
---|
489 | /** |
---|
490 | * Helper Method to get a sorted list (by density) for all children |
---|
491 | * |
---|
492 | * @param q |
---|
493 | * QuadTree |
---|
494 | * @return Sorted ArrayList of quadtrees |
---|
495 | */ |
---|
496 | public ArrayList<QuadTree> getList(QuadTree q) { |
---|
497 | this.generateList(q); |
---|
498 | |
---|
499 | Collections.sort(this.l, new Comparator<QuadTree>() { |
---|
500 | @Override |
---|
501 | public int compare(QuadTree x1, QuadTree x2) { |
---|
502 | return Double.compare(x1.getDensity(), x2.getDensity()); |
---|
503 | } |
---|
504 | }); |
---|
505 | |
---|
506 | return this.l; |
---|
507 | } |
---|
508 | } |
---|