source: trunk/CrossPare/src/de/ugoe/cs/cpdp/training/QuadTree.java @ 141

Last change on this file since 141 was 136, checked in by sherbold, 8 years ago
  • more code documentation
File size: 17.5 KB
Line 
1// Copyright 2015 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.cpdp.training;
16
17import java.util.ArrayList;
18import java.util.Collections;
19import java.util.Comparator;
20import java.util.HashMap;
21
22import weka.core.Instance;
23import de.ugoe.cs.cpdp.training.WekaLocalFQTraining.QuadTreePayload;
24
25/**
26 * <p>
27 * QuadTree implementation.
28 * </p>
29 * <p>
30 * QuadTree gets a list of instances and then recursively split them into 4 children For this it
31 * uses the median of the 2 values x,y.
32 * </p>
33 *
34 * @author Alexander Trautsch
35 */
36public class QuadTree {
37
38    /**
39     * 1 parent or null
40     */
41    private QuadTree parent = null;
42
43    /**
44     * north-west quadrant
45     */
46    private QuadTree child_nw;
47
48    /**
49     * north-east quadrant
50     */
51    private QuadTree child_ne;
52
53    /**
54     * south-east quadrant
55     */
56    private QuadTree child_se;
57
58    /**
59     * south-west quadrant
60     */
61    private QuadTree child_sw;
62
63    /**
64     * helper list for child quadrant generation
65     */
66    private ArrayList<QuadTree> l = new ArrayList<QuadTree>();
67
68    /**
69     * debugging attribute
70     */
71    public int level = 0;
72
73    /**
74     * size of the quadrant in x-dimension
75     */
76    private double[] x;
77
78    /**
79     * size of the quadrant in y-dimension
80     */
81    private double[] y;
82
83    /**
84     * debugging parameter
85     */
86    public static boolean verbose = false;
87
88    /**
89     * global size of the QuadTree.
90     */
91    public static int size = 0;
92
93    /**
94     * recursion parameter alpha
95     */
96    public static double alpha = 0;
97
98    /**
99     * data for each cluster
100     */
101    public static ArrayList<ArrayList<QuadTreePayload<Instance>>> ccluster =
102        new ArrayList<ArrayList<QuadTreePayload<Instance>>>();
103
104    /**
105     * cluster sizes (index is cluster number, {@link ArrayList} is list of boxes (x0,y0,x1,y1
106     */
107    public static HashMap<Integer, ArrayList<Double[][]>> csize =
108        new HashMap<Integer, ArrayList<Double[][]>>();
109
110    /**
111     * data within this quadrant
112     */
113    private ArrayList<QuadTreePayload<Instance>> payload;
114
115    /**
116     * <p>
117     * Constructor. Creates a new QuadTree.
118     * </p>
119     *
120     * @param parent
121     *            parent of this tree
122     * @param payload
123     *            data within the quadrant
124     */
125    public QuadTree(QuadTree parent, ArrayList<QuadTreePayload<Instance>> payload) {
126        this.parent = parent;
127        this.payload = payload;
128    }
129
130    /*
131     * (non-Javadoc)
132     *
133     * @see java.lang.Object#toString()
134     */
135    @Override
136    public String toString() {
137        String n = "";
138        if (this.parent == null) {
139            n += "rootnode ";
140        }
141        String level = new String(new char[this.level]).replace("\0", "-");
142        n += level + " instances: " + this.getNumbers();
143        return n;
144    }
145
146    /**
147     * <p>
148     * Returns the payload, used for clustering in the clustering list we only have children with
149     * payload
150     * </p>
151     *
152     * @return payload the payload
153     */
154    public ArrayList<QuadTreePayload<Instance>> getPayload() {
155        return this.payload;
156    }
157
158    /**
159     * <p>
160     * Calculate the density of this quadrant as
161     * <ul>
162     * <li>density = number of instances / global size (all instances)</li>
163     * </ul>
164     *
165     * @return density the density
166     */
167    public double getDensity() {
168        double dens = 0;
169        dens = (double) this.getNumbers() / QuadTree.size;
170        return dens;
171    }
172
173    /**
174     * <p>
175     * sets the size coordinates of the quadrant
176     * </p>
177     *
178     * @param x
179     *            x-dimension
180     * @param y
181     *            y-dimension
182     */
183    public void setSize(double[] x, double[] y) {
184        this.x = x;
185        this.y = y;
186    }
187
188    /**
189     * <p>
190     * returns the size of the quadrant
191     * </p>
192     *
193     * @return size of the current quadrant
194     */
195    public double[][] getSize() {
196        return new double[][]
197            { this.x, this.y };
198    }
199
200    /**
201     * <p>
202     * returns the size of the quadrant
203     * </p>
204     *
205     * @return size of the current quadrant
206     */
207    public Double[][] getSizeDouble() {
208        Double[] tmpX = new Double[2];
209        Double[] tmpY = new Double[2];
210
211        tmpX[0] = this.x[0];
212        tmpX[1] = this.x[1];
213
214        tmpY[0] = this.y[0];
215        tmpY[1] = this.y[1];
216
217        return new Double[][]
218            { tmpX, tmpY };
219    }
220
221    /**
222     * <p>
223     * calculates the median for the x axis
224     * </p>
225     *
226     * @return median for x
227     */
228    private double getMedianForX() {
229        double med_x = 0;
230
231        Collections.sort(this.payload, new Comparator<QuadTreePayload<Instance>>() {
232            @Override
233            public int compare(QuadTreePayload<Instance> x1, QuadTreePayload<Instance> x2) {
234                return Double.compare(x1.x, x2.x);
235            }
236        });
237
238        if (this.payload.size() % 2 == 0) {
239            int mid = this.payload.size() / 2;
240            med_x = (this.payload.get(mid).x + this.payload.get(mid + 1).x) / 2;
241        }
242        else {
243            int mid = this.payload.size() / 2;
244            med_x = this.payload.get(mid).x;
245        }
246
247        if (QuadTree.verbose) {
248            System.out.println("sorted:");
249            for (int i = 0; i < this.payload.size(); i++) {
250                System.out.print("" + this.payload.get(i).x + ",");
251            }
252            System.out.println("median x: " + med_x);
253        }
254        return med_x;
255    }
256
257    /**
258     * <p>
259     * calculates the median for the y axis
260     * </p>
261     *
262     * @return median for y
263     */
264    private double getMedianForY() {
265        double med_y = 0;
266
267        Collections.sort(this.payload, new Comparator<QuadTreePayload<Instance>>() {
268            @Override
269            public int compare(QuadTreePayload<Instance> y1, QuadTreePayload<Instance> y2) {
270                return Double.compare(y1.y, y2.y);
271            }
272        });
273
274        if (this.payload.size() % 2 == 0) {
275            int mid = this.payload.size() / 2;
276            med_y = (this.payload.get(mid).y + this.payload.get(mid + 1).y) / 2;
277        }
278        else {
279            int mid = this.payload.size() / 2;
280            med_y = this.payload.get(mid).y;
281        }
282
283        if (QuadTree.verbose) {
284            System.out.println("sorted:");
285            for (int i = 0; i < this.payload.size(); i++) {
286                System.out.print("" + this.payload.get(i).y + ",");
287            }
288            System.out.println("median y: " + med_y);
289        }
290        return med_y;
291    }
292
293    /**
294     * <p>
295     * Returns the number of instances in the payload
296     * </p>
297     *
298     * @return number of instances
299     */
300    public int getNumbers() {
301        int number = 0;
302        if (this.payload != null) {
303            number = this.payload.size();
304        }
305        return number;
306    }
307
308    /**
309     * <p>
310     * Calculate median values of payload for x, y and split into 4 sectors
311     * </p>
312     *
313     * @return Array of QuadTree nodes (4 childs)
314     * @throws Exception
315     *             if we would run into an recursive loop
316     */
317    public QuadTree[] split() throws Exception {
318
319        double medx = this.getMedianForX();
320        double medy = this.getMedianForY();
321
322        // Payload lists for each child
323        ArrayList<QuadTreePayload<Instance>> nw = new ArrayList<QuadTreePayload<Instance>>();
324        ArrayList<QuadTreePayload<Instance>> sw = new ArrayList<QuadTreePayload<Instance>>();
325        ArrayList<QuadTreePayload<Instance>> ne = new ArrayList<QuadTreePayload<Instance>>();
326        ArrayList<QuadTreePayload<Instance>> se = new ArrayList<QuadTreePayload<Instance>>();
327
328        // sort the payloads to new payloads
329        // here we have the problem that payloads with the same values are sorted
330        // into the same slots and it could happen that medx and medy = size_x[1] and size_y[1]
331        // in that case we would have an endless loop
332        for (int i = 0; i < this.payload.size(); i++) {
333
334            QuadTreePayload<Instance> item = this.payload.get(i);
335
336            // north west
337            if (item.x <= medx && item.y >= medy) {
338                nw.add(item);
339            }
340
341            // south west
342            else if (item.x <= medx && item.y <= medy) {
343                sw.add(item);
344            }
345
346            // north east
347            else if (item.x >= medx && item.y >= medy) {
348                ne.add(item);
349            }
350
351            // south east
352            else if (item.x >= medx && item.y <= medy) {
353                se.add(item);
354            }
355        }
356
357        // if we assign one child a payload equal to our own (see problem above)
358        // we throw an exceptions which stops the recursion on this node
359        if (nw.equals(this.payload)) {
360            throw new Exception("payload equal");
361        }
362        if (sw.equals(this.payload)) {
363            throw new Exception("payload equal");
364        }
365        if (ne.equals(this.payload)) {
366            throw new Exception("payload equal");
367        }
368        if (se.equals(this.payload)) {
369            throw new Exception("payload equal");
370        }
371
372        this.child_nw = new QuadTree(this, nw);
373        this.child_nw.setSize(new double[]
374            { this.x[0], medx }, new double[]
375            { medy, this.y[1] });
376        this.child_nw.level = this.level + 1;
377
378        this.child_sw = new QuadTree(this, sw);
379        this.child_sw.setSize(new double[]
380            { this.x[0], medx }, new double[]
381            { this.y[0], medy });
382        this.child_sw.level = this.level + 1;
383
384        this.child_ne = new QuadTree(this, ne);
385        this.child_ne.setSize(new double[]
386            { medx, this.x[1] }, new double[]
387            { medy, this.y[1] });
388        this.child_ne.level = this.level + 1;
389
390        this.child_se = new QuadTree(this, se);
391        this.child_se.setSize(new double[]
392            { medx, this.x[1] }, new double[]
393            { this.y[0], medy });
394        this.child_se.level = this.level + 1;
395
396        this.payload = null;
397        return new QuadTree[]
398            { this.child_nw, this.child_ne, this.child_se, this.child_sw };
399    }
400
401    /**
402     * <p>
403     * creates the children of a QuadTree and recursively splits them as well
404     * </p>
405     *
406     * @param q
407     *            tree that is split
408     */
409    public static void recursiveSplit(QuadTree q) {
410        if (QuadTree.verbose) {
411            System.out.println("splitting: " + q);
412        }
413        if (q.getNumbers() < QuadTree.alpha) {
414            return;
415        }
416        else {
417            // exception is thrown if we would run into an endless loop (see comments in split())
418            try {
419                QuadTree[] childs = q.split();
420                recursiveSplit(childs[0]);
421                recursiveSplit(childs[1]);
422                recursiveSplit(childs[2]);
423                recursiveSplit(childs[3]);
424            }
425            catch (Exception e) {
426                return;
427            }
428        }
429    }
430
431    /**
432     * <p>
433     * returns an list of children sorted by density
434     * </p>
435     *
436     * @param q
437     *            QuadTree
438     */
439    private void generateList(QuadTree q) {
440
441        // we only have all childs or none at all
442        if (q.child_ne == null) {
443            this.l.add(q);
444        }
445
446        if (q.child_ne != null) {
447            this.generateList(q.child_ne);
448        }
449        if (q.child_nw != null) {
450            this.generateList(q.child_nw);
451        }
452        if (q.child_se != null) {
453            this.generateList(q.child_se);
454        }
455        if (q.child_sw != null) {
456            this.generateList(q.child_sw);
457        }
458    }
459
460    /**
461     * <p>
462     * Checks if passed QuadTree is neighboring to us
463     * </p>
464     *
465     * @param q
466     *            QuadTree
467     * @return true if passed QuadTree is a neighbor
468     */
469    public boolean isNeighbour(QuadTree q) {
470        boolean is_neighbour = false;
471
472        double[][] our_size = this.getSize();
473        double[][] new_size = q.getSize();
474
475        // X is i=0, Y is i=1
476        for (int i = 0; i < 2; i++) {
477            // we are smaller than q
478            // -------------- q
479            // ------- we
480            if (our_size[i][0] >= new_size[i][0] && our_size[i][1] <= new_size[i][1]) {
481                is_neighbour = true;
482            }
483            // we overlap with q at some point
484            // a) ---------------q
485            // ----------- we
486            // b) --------- q
487            // --------- we
488            if ((our_size[i][0] >= new_size[i][0] && our_size[i][0] <= new_size[i][1]) ||
489                (our_size[i][1] >= new_size[i][0] && our_size[i][1] <= new_size[i][1]))
490            {
491                is_neighbour = true;
492            }
493            // we are larger than q
494            // ---- q
495            // ---------- we
496            if (our_size[i][1] >= new_size[i][1] && our_size[i][0] <= new_size[i][0]) {
497                is_neighbour = true;
498            }
499        }
500
501        if (is_neighbour && QuadTree.verbose) {
502            System.out.println(this + " neighbour of: " + q);
503        }
504
505        return is_neighbour;
506    }
507
508    /**
509     * <p>
510     * Perform pruning and clustering of the quadtree
511     * </p>
512     * <p>
513     * Pruning according to: Tim Menzies, Andrew Butcher, David Cok, Andrian Marcus, Lucas Layman,
514     * Forrest Shull, Burak Turhan, Thomas Zimmermann,
515     * "Local versus Global Lessons for Defect Prediction and Effort Estimation," IEEE Transactions
516     * on Software Engineering, vol. 39, no. 6, pp. 822-834, June, 2013
517     * </p>
518     * <ol>
519     * <li>get list of leaf quadrants</li>
520     * <li>sort by their density</li>
521     * <li>set stop_rule to 0.5*highest Density in the list</li>
522     * <li>merge all nodes with a density > stop_rule to the new cluster and remove all from list
523     * </li>
524     * <li>repeat</li>
525     * </ol>
526     *
527     * @param list
528     *            List of QuadTree (children only)
529     */
530    public void gridClustering(ArrayList<QuadTree> list) {
531
532        if (list.size() == 0) {
533            return;
534        }
535
536        double stop_rule;
537        QuadTree biggest;
538        QuadTree current;
539
540        // current clusterlist
541        ArrayList<QuadTreePayload<Instance>> current_cluster;
542
543        // remove list (for removal of items after scanning of the list)
544        ArrayList<Integer> remove = new ArrayList<Integer>();
545
546        // 1. find biggest, and add it
547        biggest = list.get(list.size() - 1);
548        stop_rule = biggest.getDensity() * 0.5;
549
550        current_cluster = new ArrayList<QuadTreePayload<Instance>>();
551        current_cluster.addAll(biggest.getPayload());
552
553        // remove the biggest because we are starting with it
554        remove.add(list.size() - 1);
555
556        ArrayList<Double[][]> tmpSize = new ArrayList<Double[][]>();
557        tmpSize.add(biggest.getSizeDouble());
558
559        // check the items for their density
560        for (int i = list.size() - 1; i >= 0; i--) {
561            current = list.get(i);
562
563            // 2. find neighbors with correct density
564            // if density > stop_rule and is_neighbour add to cluster and remove from list
565            if (current.getDensity() > stop_rule && !current.equals(biggest) &&
566                current.isNeighbour(biggest))
567            {
568                current_cluster.addAll(current.getPayload());
569
570                // add it to remove list (we cannot remove it inside the loop because it would move
571                // the index)
572                remove.add(i);
573
574                // get the size
575                tmpSize.add(current.getSizeDouble());
576            }
577        }
578
579        // 3. remove our removal candidates from the list
580        for (Integer item : remove) {
581            list.remove((int) item);
582        }
583
584        // 4. add to cluster
585        QuadTree.ccluster.add(current_cluster);
586
587        // 5. add sizes of our current (biggest) this adds a number of sizes (all QuadTree Instances
588        // belonging to this cluster)
589        // we need that to classify test instances to a cluster later
590        Integer cnumber = new Integer(QuadTree.ccluster.size() - 1);
591        if (QuadTree.csize.containsKey(cnumber) == false) {
592            QuadTree.csize.put(cnumber, tmpSize);
593        }
594
595        // repeat
596        this.gridClustering(list);
597    }
598
599    /**
600     * <p>
601     * debugging function that prints information about the QuadTree
602     * </p>
603     *
604     */
605    public void printInfo() {
606        System.out.println("we have " + ccluster.size() + " clusters");
607
608        for (int i = 0; i < ccluster.size(); i++) {
609            System.out.println("cluster: " + i + " size: " + ccluster.get(i).size());
610        }
611    }
612
613    /**
614     * <p>
615     * Helper Method to get a sorted list (by density) for all children
616     * </p>
617     *
618     * @param q
619     *            QuadTree
620     * @return Sorted ArrayList of quadtrees
621     */
622    public ArrayList<QuadTree> getList(QuadTree q) {
623        this.generateList(q);
624
625        Collections.sort(this.l, new Comparator<QuadTree>() {
626            @Override
627            public int compare(QuadTree x1, QuadTree x2) {
628                return Double.compare(x1.getDensity(), x2.getDensity());
629            }
630        });
631
632        return this.l;
633    }
634}
Note: See TracBrowser for help on using the repository browser.