1 | package de.ugoe.cs.cpdp.training; |
---|
2 | |
---|
3 | import java.util.ArrayList; |
---|
4 | import java.util.Collections; |
---|
5 | import java.util.Comparator; |
---|
6 | import java.util.HashMap; |
---|
7 | |
---|
8 | import weka.core.Instance; |
---|
9 | import de.ugoe.cs.cpdp.training.WekaLocalTraining2.QuadTreePayload; |
---|
10 | |
---|
11 | /** |
---|
12 | * QuadTree implementation |
---|
13 | * |
---|
14 | * QuadTree gets a list of instances and then recursively split them into 4 childs |
---|
15 | * For this it uses the median of the 2 values x,y |
---|
16 | */ |
---|
17 | public class QuadTree { |
---|
18 | |
---|
19 | /* 1 parent or null */ |
---|
20 | private QuadTree parent = null; |
---|
21 | |
---|
22 | /* 4 childs, 1 per quadrant */ |
---|
23 | private QuadTree child_nw; |
---|
24 | private QuadTree child_ne; |
---|
25 | private QuadTree child_se; |
---|
26 | private QuadTree child_sw; |
---|
27 | |
---|
28 | /* list (only helps with generation of list of childs!) */ |
---|
29 | private ArrayList<QuadTree> l = new ArrayList<QuadTree>(); |
---|
30 | |
---|
31 | /* level only used for debugging */ |
---|
32 | public int level = 0; |
---|
33 | |
---|
34 | /* size of the quadrant */ |
---|
35 | private double[] x; |
---|
36 | private double[] y; |
---|
37 | |
---|
38 | public static boolean verbose = false; |
---|
39 | public static int size = 0; |
---|
40 | public static double alpha = 0; |
---|
41 | |
---|
42 | /* cluster payloads */ |
---|
43 | public static ArrayList<ArrayList<QuadTreePayload<Instance>>> ccluster = new ArrayList<ArrayList<QuadTreePayload<Instance>>>(); |
---|
44 | |
---|
45 | /* cluster sizes (index is cluster number, arraylist is list of boxes (x0,y0,x1,y1) */ |
---|
46 | public static HashMap<Integer, ArrayList<Double[][]>> csize = new HashMap<Integer, ArrayList<Double[][]>>(); |
---|
47 | |
---|
48 | /* payload of this instance */ |
---|
49 | private ArrayList<QuadTreePayload<Instance>> payload; |
---|
50 | |
---|
51 | |
---|
52 | public QuadTree(QuadTree parent, ArrayList<QuadTreePayload<Instance>> payload) { |
---|
53 | this.parent = parent; |
---|
54 | this.payload = payload; |
---|
55 | } |
---|
56 | |
---|
57 | |
---|
58 | public String toString() { |
---|
59 | String n = ""; |
---|
60 | if(this.parent == null) { |
---|
61 | n += "rootnode "; |
---|
62 | } |
---|
63 | String level = new String(new char[this.level]).replace("\0", "-"); |
---|
64 | n += level + " instances: " + this.getNumbers(); |
---|
65 | return n; |
---|
66 | } |
---|
67 | |
---|
68 | /** |
---|
69 | * Returns the payload, used for clustering |
---|
70 | * in the clustering list we only have children with paylod |
---|
71 | * |
---|
72 | * @return payload |
---|
73 | */ |
---|
74 | public ArrayList<QuadTreePayload<Instance>> getPayload() { |
---|
75 | return this.payload; |
---|
76 | } |
---|
77 | |
---|
78 | /** |
---|
79 | * Calculate the density of this quadrant |
---|
80 | * |
---|
81 | * density = number of instances / global size (all instances) |
---|
82 | * |
---|
83 | * @return density |
---|
84 | */ |
---|
85 | public double getDensity() { |
---|
86 | double dens = 0; |
---|
87 | dens = (double)this.getNumbers() / QuadTree.size; |
---|
88 | return dens; |
---|
89 | } |
---|
90 | |
---|
91 | public void setSize(double[] x, double[] y){ |
---|
92 | this.x = x; |
---|
93 | this.y = y; |
---|
94 | } |
---|
95 | |
---|
96 | public double[][] getSize() { |
---|
97 | return new double[][] {this.x, this.y}; |
---|
98 | } |
---|
99 | |
---|
100 | public Double[][] getSizeDouble() { |
---|
101 | Double[] tmpX = new Double[2]; |
---|
102 | Double[] tmpY = new Double[2]; |
---|
103 | |
---|
104 | tmpX[0] = this.x[0]; |
---|
105 | tmpX[1] = this.x[1]; |
---|
106 | |
---|
107 | tmpY[0] = this.y[0]; |
---|
108 | tmpY[1] = this.y[1]; |
---|
109 | |
---|
110 | return new Double[][] {tmpX, tmpY}; |
---|
111 | } |
---|
112 | |
---|
113 | /** |
---|
114 | * TODO: DRY, median ist immer dasselbe |
---|
115 | * |
---|
116 | * @return median for x |
---|
117 | */ |
---|
118 | private double getMedianForX() { |
---|
119 | double med_x =0 ; |
---|
120 | |
---|
121 | Collections.sort(this.payload, new Comparator<QuadTreePayload<Instance>>() { |
---|
122 | @Override |
---|
123 | public int compare(QuadTreePayload<Instance> x1, QuadTreePayload<Instance> x2) { |
---|
124 | return Double.compare(x1.x, x2.x); |
---|
125 | } |
---|
126 | }); |
---|
127 | |
---|
128 | if(this.payload.size() % 2 == 0) { |
---|
129 | int mid = this.payload.size() / 2; |
---|
130 | med_x = (this.payload.get(mid).x + this.payload.get(mid+1).x) / 2; |
---|
131 | }else { |
---|
132 | int mid = this.payload.size() / 2; |
---|
133 | med_x = this.payload.get(mid).x; |
---|
134 | } |
---|
135 | |
---|
136 | if(QuadTree.verbose) { |
---|
137 | System.out.println("sorted:"); |
---|
138 | for(int i = 0; i < this.payload.size(); i++) { |
---|
139 | System.out.print(""+this.payload.get(i).x+","); |
---|
140 | } |
---|
141 | System.out.println("median x: " + med_x); |
---|
142 | } |
---|
143 | return med_x; |
---|
144 | } |
---|
145 | |
---|
146 | private double getMedianForY() { |
---|
147 | double med_y =0 ; |
---|
148 | |
---|
149 | Collections.sort(this.payload, new Comparator<QuadTreePayload<Instance>>() { |
---|
150 | @Override |
---|
151 | public int compare(QuadTreePayload<Instance> y1, QuadTreePayload<Instance> y2) { |
---|
152 | return Double.compare(y1.y, y2.y); |
---|
153 | } |
---|
154 | }); |
---|
155 | |
---|
156 | if(this.payload.size() % 2 == 0) { |
---|
157 | int mid = this.payload.size() / 2; |
---|
158 | med_y = (this.payload.get(mid).y + this.payload.get(mid+1).y) / 2; |
---|
159 | }else { |
---|
160 | int mid = this.payload.size() / 2; |
---|
161 | med_y = this.payload.get(mid).y; |
---|
162 | } |
---|
163 | |
---|
164 | if(QuadTree.verbose) { |
---|
165 | System.out.println("sorted:"); |
---|
166 | for(int i = 0; i < this.payload.size(); i++) { |
---|
167 | System.out.print(""+this.payload.get(i).y+","); |
---|
168 | } |
---|
169 | System.out.println("median y: " + med_y); |
---|
170 | } |
---|
171 | return med_y; |
---|
172 | } |
---|
173 | |
---|
174 | /** |
---|
175 | * Reurns the number of instances in the payload |
---|
176 | * |
---|
177 | * @return int number of instances |
---|
178 | */ |
---|
179 | public int getNumbers() { |
---|
180 | int number = 0; |
---|
181 | if(this.payload != null) { |
---|
182 | number = this.payload.size(); |
---|
183 | } |
---|
184 | return number; |
---|
185 | } |
---|
186 | |
---|
187 | /** |
---|
188 | * Calculate median values of payload for x, y and split into 4 sectors |
---|
189 | * |
---|
190 | * @return Array of QuadTree nodes (4 childs) |
---|
191 | * @throws Exception if we would run into an recursive loop |
---|
192 | */ |
---|
193 | public QuadTree[] split() throws Exception { |
---|
194 | |
---|
195 | double medx = this.getMedianForX(); |
---|
196 | double medy = this.getMedianForY(); |
---|
197 | |
---|
198 | // Payload lists for each child |
---|
199 | ArrayList<QuadTreePayload<Instance>> nw = new ArrayList<QuadTreePayload<Instance>>(); |
---|
200 | ArrayList<QuadTreePayload<Instance>> sw = new ArrayList<QuadTreePayload<Instance>>(); |
---|
201 | ArrayList<QuadTreePayload<Instance>> ne = new ArrayList<QuadTreePayload<Instance>>(); |
---|
202 | ArrayList<QuadTreePayload<Instance>> se = new ArrayList<QuadTreePayload<Instance>>(); |
---|
203 | |
---|
204 | // sort the payloads to new payloads |
---|
205 | // here we have the problem that payloads with the same values are sorted |
---|
206 | // into the same slots and it could happen that medx and medy = size_x[1] and size_y[1] |
---|
207 | // in that case we would have an endless loop |
---|
208 | for(int i=0; i < this.payload.size(); i++) { |
---|
209 | |
---|
210 | QuadTreePayload<Instance> item = this.payload.get(i); |
---|
211 | |
---|
212 | // north west |
---|
213 | if(item.x <= medx && item.y >= medy) { |
---|
214 | nw.add(item); |
---|
215 | } |
---|
216 | |
---|
217 | // south west |
---|
218 | else if(item.x <= medx && item.y <= medy) { |
---|
219 | sw.add(item); |
---|
220 | } |
---|
221 | |
---|
222 | // north east |
---|
223 | else if(item.x >= medx && item.y >= medy) { |
---|
224 | ne.add(item); |
---|
225 | } |
---|
226 | |
---|
227 | // south east |
---|
228 | else if(item.x >= medx && item.y <= medy) { |
---|
229 | se.add(item); |
---|
230 | } |
---|
231 | } |
---|
232 | |
---|
233 | // if we assign one child a payload equal to our own (see problem above) |
---|
234 | // we throw an exceptions which stops the recursion on this node |
---|
235 | if(nw.equals(this.payload)) { |
---|
236 | throw new Exception("payload equal"); |
---|
237 | } |
---|
238 | if(sw.equals(this.payload)) { |
---|
239 | throw new Exception("payload equal"); |
---|
240 | } |
---|
241 | if(ne.equals(this.payload)) { |
---|
242 | throw new Exception("payload equal"); |
---|
243 | } |
---|
244 | if(se.equals(this.payload)) { |
---|
245 | throw new Exception("payload equal"); |
---|
246 | } |
---|
247 | |
---|
248 | this.child_nw = new QuadTree(this, nw); |
---|
249 | this.child_nw.setSize(new double[] {this.x[0], medx}, new double[] {medy, this.y[1]}); |
---|
250 | this.child_nw.level = this.level + 1; |
---|
251 | |
---|
252 | this.child_sw = new QuadTree(this, sw); |
---|
253 | this.child_sw.setSize(new double[] {this.x[0], medx}, new double[] {this.y[0], medy}); |
---|
254 | this.child_sw.level = this.level + 1; |
---|
255 | |
---|
256 | this.child_ne = new QuadTree(this, ne); |
---|
257 | this.child_ne.setSize(new double[] {medx, this.x[1]}, new double[] {medy, this.y[1]}); |
---|
258 | this.child_ne.level = this.level + 1; |
---|
259 | |
---|
260 | this.child_se = new QuadTree(this, se); |
---|
261 | this.child_se.setSize(new double[] {medx, this.x[1]}, new double[] {this.y[0], medy}); |
---|
262 | this.child_se.level = this.level + 1; |
---|
263 | |
---|
264 | this.payload = null; |
---|
265 | return new QuadTree[] {this.child_nw, this.child_ne, this.child_se, this.child_sw}; |
---|
266 | } |
---|
267 | |
---|
268 | /** |
---|
269 | * TODO: static method |
---|
270 | * |
---|
271 | * @param q |
---|
272 | */ |
---|
273 | public void recursiveSplit(QuadTree q) { |
---|
274 | if(QuadTree.verbose) { |
---|
275 | System.out.println("splitting: "+ q); |
---|
276 | } |
---|
277 | if(q.getNumbers() < QuadTree.alpha) { |
---|
278 | return; |
---|
279 | }else{ |
---|
280 | // exception is thrown if we would run into an endless loop (see comments in split()) |
---|
281 | try { |
---|
282 | QuadTree[] childs = q.split(); |
---|
283 | this.recursiveSplit(childs[0]); |
---|
284 | this.recursiveSplit(childs[1]); |
---|
285 | this.recursiveSplit(childs[2]); |
---|
286 | this.recursiveSplit(childs[3]); |
---|
287 | }catch(Exception e) { |
---|
288 | return; |
---|
289 | } |
---|
290 | } |
---|
291 | } |
---|
292 | |
---|
293 | /** |
---|
294 | * returns an list of childs sorted by density |
---|
295 | * |
---|
296 | * @param q QuadTree |
---|
297 | * @return list of QuadTrees |
---|
298 | */ |
---|
299 | private void generateList(QuadTree q) { |
---|
300 | |
---|
301 | // we only have all childs or none at all |
---|
302 | if(q.child_ne == null) { |
---|
303 | this.l.add(q); |
---|
304 | } |
---|
305 | |
---|
306 | if(q.child_ne != null) { |
---|
307 | this.generateList(q.child_ne); |
---|
308 | } |
---|
309 | if(q.child_nw != null) { |
---|
310 | this.generateList(q.child_nw); |
---|
311 | } |
---|
312 | if(q.child_se != null) { |
---|
313 | this.generateList(q.child_se); |
---|
314 | } |
---|
315 | if(q.child_sw != null) { |
---|
316 | this.generateList(q.child_sw); |
---|
317 | } |
---|
318 | } |
---|
319 | |
---|
320 | /** |
---|
321 | * Checks if passed QuadTree is neighboring to us |
---|
322 | * |
---|
323 | * @param q QuadTree |
---|
324 | * @return true if passed QuadTree is a neighbor |
---|
325 | */ |
---|
326 | public boolean isNeighbour(QuadTree q) { |
---|
327 | boolean is_neighbour = false; |
---|
328 | |
---|
329 | double[][] our_size = this.getSize(); |
---|
330 | double[][] new_size = q.getSize(); |
---|
331 | |
---|
332 | // X is i=0, Y is i=1 |
---|
333 | for(int i =0; i < 2; i++) { |
---|
334 | // we are smaller than q |
---|
335 | // -------------- q |
---|
336 | // ------- we |
---|
337 | if(our_size[i][0] >= new_size[i][0] && our_size[i][1] <= new_size[i][1]) { |
---|
338 | is_neighbour = true; |
---|
339 | } |
---|
340 | // we overlap with q at some point |
---|
341 | //a) ---------------q |
---|
342 | // ----------- we |
---|
343 | //b) --------- q |
---|
344 | // --------- we |
---|
345 | if((our_size[i][0] >= new_size[i][0] && our_size[i][0] <= new_size[i][1]) || |
---|
346 | (our_size[i][1] >= new_size[i][0] && our_size[i][1] <= new_size[i][1])) { |
---|
347 | is_neighbour = true; |
---|
348 | } |
---|
349 | // we are larger than q |
---|
350 | // ---- q |
---|
351 | // ---------- we |
---|
352 | if(our_size[i][1] >= new_size[i][1] && our_size[i][0] <= new_size[i][0]) { |
---|
353 | is_neighbour = true; |
---|
354 | } |
---|
355 | } |
---|
356 | |
---|
357 | if(is_neighbour && QuadTree.verbose) { |
---|
358 | System.out.println(this + " neighbour of: " + q); |
---|
359 | } |
---|
360 | |
---|
361 | return is_neighbour; |
---|
362 | } |
---|
363 | |
---|
364 | /** |
---|
365 | * Perform pruning and clustering of the quadtree |
---|
366 | * |
---|
367 | * Pruning according to: |
---|
368 | * Tim Menzies, Andrew Butcher, David Cok, Andrian Marcus, Lucas Layman, |
---|
369 | * Forrest Shull, Burak Turhan, Thomas Zimmermann, |
---|
370 | * "Local versus Global Lessons for Defect Prediction and Effort Estimation," |
---|
371 | * IEEE Transactions on Software Engineering, vol. 39, no. 6, pp. 822-834, June, 2013 |
---|
372 | * |
---|
373 | * 1) get list of leaf quadrants |
---|
374 | * 2) sort by their density |
---|
375 | * 3) set stop_rule to 0.5 * highest Density in the list |
---|
376 | * 4) merge all nodes with a density > stop_rule to the new cluster and remove all from list |
---|
377 | * 5) repeat |
---|
378 | * |
---|
379 | * @param q List of QuadTree (children only) |
---|
380 | */ |
---|
381 | public void gridClustering(ArrayList<QuadTree> list) { |
---|
382 | |
---|
383 | if(list.size() == 0) { |
---|
384 | return; |
---|
385 | } |
---|
386 | |
---|
387 | double stop_rule; |
---|
388 | QuadTree biggest; |
---|
389 | QuadTree current; |
---|
390 | |
---|
391 | // current clusterlist |
---|
392 | ArrayList<QuadTreePayload<Instance>> current_cluster; |
---|
393 | |
---|
394 | // remove list (for removal of items after scanning of the list) |
---|
395 | ArrayList<Integer> remove = new ArrayList<Integer>(); |
---|
396 | |
---|
397 | // 1. find biggest, and add it |
---|
398 | biggest = list.get(list.size()-1); |
---|
399 | stop_rule = biggest.getDensity() * 0.5; |
---|
400 | |
---|
401 | current_cluster = new ArrayList<QuadTreePayload<Instance>>(); |
---|
402 | current_cluster.addAll(biggest.getPayload()); |
---|
403 | |
---|
404 | // remove the biggest because we are starting with it |
---|
405 | remove.add(list.size()-1); |
---|
406 | |
---|
407 | ArrayList<Double[][]> tmpSize = new ArrayList<Double[][]>(); |
---|
408 | tmpSize.add(biggest.getSizeDouble()); |
---|
409 | |
---|
410 | // check the items for their density |
---|
411 | for(int i=list.size()-1; i >= 0; i--) { |
---|
412 | current = list.get(i); |
---|
413 | |
---|
414 | // 2. find neighbors with correct density |
---|
415 | // if density > stop_rule and is_neighbour add to cluster and remove from list |
---|
416 | if(current.getDensity() > stop_rule && !current.equals(biggest) && current.isNeighbour(biggest)) { |
---|
417 | current_cluster.addAll(current.getPayload()); |
---|
418 | |
---|
419 | // add it to remove list (we cannot remove it inside the loop because it would move the index) |
---|
420 | remove.add(i); |
---|
421 | |
---|
422 | // get the size |
---|
423 | tmpSize.add(current.getSizeDouble()); |
---|
424 | } |
---|
425 | } |
---|
426 | |
---|
427 | // 3. remove our removal candidates from the list |
---|
428 | for(Integer item: remove) { |
---|
429 | list.remove((int)item); |
---|
430 | } |
---|
431 | |
---|
432 | // 4. add to cluster |
---|
433 | QuadTree.ccluster.add(current_cluster); |
---|
434 | |
---|
435 | // 5. add sizes of our current (biggest) this adds a number of sizes (all QuadTree Instances belonging to this cluster) |
---|
436 | // we need that to classify test instances to a cluster later |
---|
437 | Integer cnumber = new Integer(QuadTree.ccluster.size()-1); |
---|
438 | if(QuadTree.csize.containsKey(cnumber) == false) { |
---|
439 | QuadTree.csize.put(cnumber, tmpSize); |
---|
440 | } |
---|
441 | |
---|
442 | // repeat |
---|
443 | this.gridClustering(list); |
---|
444 | } |
---|
445 | |
---|
446 | public void printInfo() { |
---|
447 | System.out.println("we have " + ccluster.size() + " clusters"); |
---|
448 | |
---|
449 | for(int i=0; i < ccluster.size(); i++) { |
---|
450 | System.out.println("cluster: "+i+ " size: "+ ccluster.get(i).size()); |
---|
451 | } |
---|
452 | } |
---|
453 | |
---|
454 | /** |
---|
455 | * Helper Method to get a sorted list (by density) for all |
---|
456 | * children |
---|
457 | * |
---|
458 | * @param q QuadTree |
---|
459 | * @return Sorted ArrayList of quadtrees |
---|
460 | */ |
---|
461 | public ArrayList<QuadTree> getList(QuadTree q) { |
---|
462 | this.generateList(q); |
---|
463 | |
---|
464 | Collections.sort(this.l, new Comparator<QuadTree>() { |
---|
465 | @Override |
---|
466 | public int compare(QuadTree x1, QuadTree x2) { |
---|
467 | return Double.compare(x1.getDensity(), x2.getDensity()); |
---|
468 | } |
---|
469 | }); |
---|
470 | |
---|
471 | return this.l; |
---|
472 | } |
---|
473 | } |
---|