source: trunk/CrossPare/src/de/ugoe/cs/cpdp/training/QuadTree.java @ 117

Last change on this file since 117 was 86, checked in by sherbold, 9 years ago
  • switched workspace encoding to UTF-8 and fixed broken characters
File size: 15.6 KB
Line 
1// Copyright 2015 Georg-August-Universität Göttingen, Germany
2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.cpdp.training;
16
17import java.util.ArrayList;
18import java.util.Collections;
19import java.util.Comparator;
20import java.util.HashMap;
21
22import weka.core.Instance;
23import de.ugoe.cs.cpdp.training.WekaLocalFQTraining.QuadTreePayload;
24
25/**
26 * QuadTree implementation
27 *
28 * QuadTree gets a list of instances and then recursively split them into 4 childs For this it uses
29 * the median of the 2 values x,y
30 */
31public class QuadTree {
32
33    /* 1 parent or null */
34    private QuadTree parent = null;
35
36    /* 4 childs, 1 per quadrant */
37    private QuadTree child_nw;
38    private QuadTree child_ne;
39    private QuadTree child_se;
40    private QuadTree child_sw;
41
42    /* list (only helps with generation of list of childs!) */
43    private ArrayList<QuadTree> l = new ArrayList<QuadTree>();
44
45    /* level only used for debugging */
46    public int level = 0;
47
48    /* size of the quadrant */
49    private double[] x;
50    private double[] y;
51
52    public static boolean verbose = false;
53    public static int size = 0;
54    public static double alpha = 0;
55
56    /* cluster payloads */
57    public static ArrayList<ArrayList<QuadTreePayload<Instance>>> ccluster =
58        new ArrayList<ArrayList<QuadTreePayload<Instance>>>();
59
60    /* cluster sizes (index is cluster number, arraylist is list of boxes (x0,y0,x1,y1) */
61    public static HashMap<Integer, ArrayList<Double[][]>> csize =
62        new HashMap<Integer, ArrayList<Double[][]>>();
63
64    /* payload of this instance */
65    private ArrayList<QuadTreePayload<Instance>> payload;
66
67    public QuadTree(QuadTree parent, ArrayList<QuadTreePayload<Instance>> payload) {
68        this.parent = parent;
69        this.payload = payload;
70    }
71
72    public String toString() {
73        String n = "";
74        if (this.parent == null) {
75            n += "rootnode ";
76        }
77        String level = new String(new char[this.level]).replace("\0", "-");
78        n += level + " instances: " + this.getNumbers();
79        return n;
80    }
81
82    /**
83     * Returns the payload, used for clustering in the clustering list we only have children with
84     * paylod
85     *
86     * @return payload
87     */
88    public ArrayList<QuadTreePayload<Instance>> getPayload() {
89        return this.payload;
90    }
91
92    /**
93     * Calculate the density of this quadrant
94     *
95     * density = number of instances / global size (all instances)
96     *
97     * @return density
98     */
99    public double getDensity() {
100        double dens = 0;
101        dens = (double) this.getNumbers() / QuadTree.size;
102        return dens;
103    }
104
105    public void setSize(double[] x, double[] y) {
106        this.x = x;
107        this.y = y;
108    }
109
110    public double[][] getSize() {
111        return new double[][]
112            { this.x, this.y };
113    }
114
115    public Double[][] getSizeDouble() {
116        Double[] tmpX = new Double[2];
117        Double[] tmpY = new Double[2];
118
119        tmpX[0] = this.x[0];
120        tmpX[1] = this.x[1];
121
122        tmpY[0] = this.y[0];
123        tmpY[1] = this.y[1];
124
125        return new Double[][]
126            { tmpX, tmpY };
127    }
128
129    /**
130     * TODO: DRY, median ist immer dasselbe
131     *
132     * @return median for x
133     */
134    private double getMedianForX() {
135        double med_x = 0;
136
137        Collections.sort(this.payload, new Comparator<QuadTreePayload<Instance>>() {
138            @Override
139            public int compare(QuadTreePayload<Instance> x1, QuadTreePayload<Instance> x2) {
140                return Double.compare(x1.x, x2.x);
141            }
142        });
143
144        if (this.payload.size() % 2 == 0) {
145            int mid = this.payload.size() / 2;
146            med_x = (this.payload.get(mid).x + this.payload.get(mid + 1).x) / 2;
147        }
148        else {
149            int mid = this.payload.size() / 2;
150            med_x = this.payload.get(mid).x;
151        }
152
153        if (QuadTree.verbose) {
154            System.out.println("sorted:");
155            for (int i = 0; i < this.payload.size(); i++) {
156                System.out.print("" + this.payload.get(i).x + ",");
157            }
158            System.out.println("median x: " + med_x);
159        }
160        return med_x;
161    }
162
163    private double getMedianForY() {
164        double med_y = 0;
165
166        Collections.sort(this.payload, new Comparator<QuadTreePayload<Instance>>() {
167            @Override
168            public int compare(QuadTreePayload<Instance> y1, QuadTreePayload<Instance> y2) {
169                return Double.compare(y1.y, y2.y);
170            }
171        });
172
173        if (this.payload.size() % 2 == 0) {
174            int mid = this.payload.size() / 2;
175            med_y = (this.payload.get(mid).y + this.payload.get(mid + 1).y) / 2;
176        }
177        else {
178            int mid = this.payload.size() / 2;
179            med_y = this.payload.get(mid).y;
180        }
181
182        if (QuadTree.verbose) {
183            System.out.println("sorted:");
184            for (int i = 0; i < this.payload.size(); i++) {
185                System.out.print("" + this.payload.get(i).y + ",");
186            }
187            System.out.println("median y: " + med_y);
188        }
189        return med_y;
190    }
191
192    /**
193     * Reurns the number of instances in the payload
194     *
195     * @return int number of instances
196     */
197    public int getNumbers() {
198        int number = 0;
199        if (this.payload != null) {
200            number = this.payload.size();
201        }
202        return number;
203    }
204
205    /**
206     * Calculate median values of payload for x, y and split into 4 sectors
207     *
208     * @return Array of QuadTree nodes (4 childs)
209     * @throws Exception
210     *             if we would run into an recursive loop
211     */
212    public QuadTree[] split() throws Exception {
213
214        double medx = this.getMedianForX();
215        double medy = this.getMedianForY();
216
217        // Payload lists for each child
218        ArrayList<QuadTreePayload<Instance>> nw = new ArrayList<QuadTreePayload<Instance>>();
219        ArrayList<QuadTreePayload<Instance>> sw = new ArrayList<QuadTreePayload<Instance>>();
220        ArrayList<QuadTreePayload<Instance>> ne = new ArrayList<QuadTreePayload<Instance>>();
221        ArrayList<QuadTreePayload<Instance>> se = new ArrayList<QuadTreePayload<Instance>>();
222
223        // sort the payloads to new payloads
224        // here we have the problem that payloads with the same values are sorted
225        // into the same slots and it could happen that medx and medy = size_x[1] and size_y[1]
226        // in that case we would have an endless loop
227        for (int i = 0; i < this.payload.size(); i++) {
228
229            QuadTreePayload<Instance> item = this.payload.get(i);
230
231            // north west
232            if (item.x <= medx && item.y >= medy) {
233                nw.add(item);
234            }
235
236            // south west
237            else if (item.x <= medx && item.y <= medy) {
238                sw.add(item);
239            }
240
241            // north east
242            else if (item.x >= medx && item.y >= medy) {
243                ne.add(item);
244            }
245
246            // south east
247            else if (item.x >= medx && item.y <= medy) {
248                se.add(item);
249            }
250        }
251
252        // if we assign one child a payload equal to our own (see problem above)
253        // we throw an exceptions which stops the recursion on this node
254        if (nw.equals(this.payload)) {
255            throw new Exception("payload equal");
256        }
257        if (sw.equals(this.payload)) {
258            throw new Exception("payload equal");
259        }
260        if (ne.equals(this.payload)) {
261            throw new Exception("payload equal");
262        }
263        if (se.equals(this.payload)) {
264            throw new Exception("payload equal");
265        }
266
267        this.child_nw = new QuadTree(this, nw);
268        this.child_nw.setSize(new double[]
269            { this.x[0], medx }, new double[]
270            { medy, this.y[1] });
271        this.child_nw.level = this.level + 1;
272
273        this.child_sw = new QuadTree(this, sw);
274        this.child_sw.setSize(new double[]
275            { this.x[0], medx }, new double[]
276            { this.y[0], medy });
277        this.child_sw.level = this.level + 1;
278
279        this.child_ne = new QuadTree(this, ne);
280        this.child_ne.setSize(new double[]
281            { medx, this.x[1] }, new double[]
282            { medy, this.y[1] });
283        this.child_ne.level = this.level + 1;
284
285        this.child_se = new QuadTree(this, se);
286        this.child_se.setSize(new double[]
287            { medx, this.x[1] }, new double[]
288            { this.y[0], medy });
289        this.child_se.level = this.level + 1;
290
291        this.payload = null;
292        return new QuadTree[]
293            { this.child_nw, this.child_ne, this.child_se, this.child_sw };
294    }
295
296    /**
297     * TODO: static method
298     *
299     * @param q
300     */
301    public void recursiveSplit(QuadTree q) {
302        if (QuadTree.verbose) {
303            System.out.println("splitting: " + q);
304        }
305        if (q.getNumbers() < QuadTree.alpha) {
306            return;
307        }
308        else {
309            // exception is thrown if we would run into an endless loop (see comments in split())
310            try {
311                QuadTree[] childs = q.split();
312                this.recursiveSplit(childs[0]);
313                this.recursiveSplit(childs[1]);
314                this.recursiveSplit(childs[2]);
315                this.recursiveSplit(childs[3]);
316            }
317            catch (Exception e) {
318                return;
319            }
320        }
321    }
322
323    /**
324     * returns an list of childs sorted by density
325     *
326     * @param q
327     *            QuadTree
328     * @return list of QuadTrees
329     */
330    private void generateList(QuadTree q) {
331
332        // we only have all childs or none at all
333        if (q.child_ne == null) {
334            this.l.add(q);
335        }
336
337        if (q.child_ne != null) {
338            this.generateList(q.child_ne);
339        }
340        if (q.child_nw != null) {
341            this.generateList(q.child_nw);
342        }
343        if (q.child_se != null) {
344            this.generateList(q.child_se);
345        }
346        if (q.child_sw != null) {
347            this.generateList(q.child_sw);
348        }
349    }
350
351    /**
352     * Checks if passed QuadTree is neighboring to us
353     *
354     * @param q
355     *            QuadTree
356     * @return true if passed QuadTree is a neighbor
357     */
358    public boolean isNeighbour(QuadTree q) {
359        boolean is_neighbour = false;
360
361        double[][] our_size = this.getSize();
362        double[][] new_size = q.getSize();
363
364        // X is i=0, Y is i=1
365        for (int i = 0; i < 2; i++) {
366            // we are smaller than q
367            // -------------- q
368            // ------- we
369            if (our_size[i][0] >= new_size[i][0] && our_size[i][1] <= new_size[i][1]) {
370                is_neighbour = true;
371            }
372            // we overlap with q at some point
373            // a) ---------------q
374            // ----------- we
375            // b) --------- q
376            // --------- we
377            if ((our_size[i][0] >= new_size[i][0] && our_size[i][0] <= new_size[i][1]) ||
378                (our_size[i][1] >= new_size[i][0] && our_size[i][1] <= new_size[i][1]))
379            {
380                is_neighbour = true;
381            }
382            // we are larger than q
383            // ---- q
384            // ---------- we
385            if (our_size[i][1] >= new_size[i][1] && our_size[i][0] <= new_size[i][0]) {
386                is_neighbour = true;
387            }
388        }
389
390        if (is_neighbour && QuadTree.verbose) {
391            System.out.println(this + " neighbour of: " + q);
392        }
393
394        return is_neighbour;
395    }
396
397    /**
398     * Perform pruning and clustering of the quadtree
399     *
400     * Pruning according to: Tim Menzies, Andrew Butcher, David Cok, Andrian Marcus, Lucas Layman,
401     * Forrest Shull, Burak Turhan, Thomas Zimmermann,
402     * "Local versus Global Lessons for Defect Prediction and Effort Estimation," IEEE Transactions
403     * on Software Engineering, vol. 39, no. 6, pp. 822-834, June, 2013
404     *
405     * 1) get list of leaf quadrants 2) sort by their density 3) set stop_rule to 0.5 * highest
406     * Density in the list 4) merge all nodes with a density > stop_rule to the new cluster and
407     * remove all from list 5) repeat
408     *
409     * @param q
410     *            List of QuadTree (children only)
411     */
412    public void gridClustering(ArrayList<QuadTree> list) {
413
414        if (list.size() == 0) {
415            return;
416        }
417
418        double stop_rule;
419        QuadTree biggest;
420        QuadTree current;
421
422        // current clusterlist
423        ArrayList<QuadTreePayload<Instance>> current_cluster;
424
425        // remove list (for removal of items after scanning of the list)
426        ArrayList<Integer> remove = new ArrayList<Integer>();
427
428        // 1. find biggest, and add it
429        biggest = list.get(list.size() - 1);
430        stop_rule = biggest.getDensity() * 0.5;
431
432        current_cluster = new ArrayList<QuadTreePayload<Instance>>();
433        current_cluster.addAll(biggest.getPayload());
434
435        // remove the biggest because we are starting with it
436        remove.add(list.size() - 1);
437
438        ArrayList<Double[][]> tmpSize = new ArrayList<Double[][]>();
439        tmpSize.add(biggest.getSizeDouble());
440
441        // check the items for their density
442        for (int i = list.size() - 1; i >= 0; i--) {
443            current = list.get(i);
444
445            // 2. find neighbors with correct density
446            // if density > stop_rule and is_neighbour add to cluster and remove from list
447            if (current.getDensity() > stop_rule && !current.equals(biggest) &&
448                current.isNeighbour(biggest))
449            {
450                current_cluster.addAll(current.getPayload());
451
452                // add it to remove list (we cannot remove it inside the loop because it would move
453                // the index)
454                remove.add(i);
455
456                // get the size
457                tmpSize.add(current.getSizeDouble());
458            }
459        }
460
461        // 3. remove our removal candidates from the list
462        for (Integer item : remove) {
463            list.remove((int) item);
464        }
465
466        // 4. add to cluster
467        QuadTree.ccluster.add(current_cluster);
468
469        // 5. add sizes of our current (biggest) this adds a number of sizes (all QuadTree Instances
470        // belonging to this cluster)
471        // we need that to classify test instances to a cluster later
472        Integer cnumber = new Integer(QuadTree.ccluster.size() - 1);
473        if (QuadTree.csize.containsKey(cnumber) == false) {
474            QuadTree.csize.put(cnumber, tmpSize);
475        }
476
477        // repeat
478        this.gridClustering(list);
479    }
480
481    public void printInfo() {
482        System.out.println("we have " + ccluster.size() + " clusters");
483
484        for (int i = 0; i < ccluster.size(); i++) {
485            System.out.println("cluster: " + i + " size: " + ccluster.get(i).size());
486        }
487    }
488
489    /**
490     * Helper Method to get a sorted list (by density) for all children
491     *
492     * @param q
493     *            QuadTree
494     * @return Sorted ArrayList of quadtrees
495     */
496    public ArrayList<QuadTree> getList(QuadTree q) {
497        this.generateList(q);
498
499        Collections.sort(this.l, new Comparator<QuadTree>() {
500            @Override
501            public int compare(QuadTree x1, QuadTree x2) {
502                return Double.compare(x1.getDensity(), x2.getDensity());
503            }
504        });
505
506        return this.l;
507    }
508}
Note: See TracBrowser for help on using the repository browser.