Changeset 13 for trunk/CrossPare/src/de/ugoe/cs/cpdp
- Timestamp:
- 08/25/14 14:32:23 (10 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaLocalTraining2.java
r12 r13 26 26 /** 27 27 * ACHTUNG UNFERTIG 28 *29 28 * 30 * Basically a copy of WekaClusterTraining2 with internal classes for the Fastmap and QuadTree implementations 29 * With WekaLocalTraining2 we do the following: 30 * 1) Run the Fastmap algorithm on all training data, let it calculate the 2 most significant 31 * dimensions and projections of each instance to these dimensions 32 * 2) With these 2 dimensions we span a QuadTree which gets recursively split on median(x) and median(y) values. 33 * 3) We cluster the QuadTree nodes together if they have similar density (50%) 34 * 4) We save the clusters and their training data 35 * 5) We train a Weka classifier for each cluster with the clusters training data 36 * 5) We classify single instances to a cluster and then classify them using the classifier of the cluster 37 * 31 38 */ 32 39 public class WekaLocalTraining2 extends WekaBaseTraining2 implements ITrainingStrategy { … … 34 41 private final TraindatasetCluster classifier = new TraindatasetCluster(); 35 42 36 // we do not need to keep them around37 //private final QuadTree q = null;38 //private final Fastmap f = null;39 40 43 // these values are set later when we have all the information we need 44 41 45 /*Stopping rule for tree recursion (Math.sqrt(Instances)*/ 42 46 public static double ALPHA = 0; … … 45 49 /*size of the complete set (used for density function)*/ 46 50 public static int SIZE = 0; 47 48 public static int MIN_INST = 10;49 51 50 52 // cluster … … 104 106 105 107 /** 106 * Because Fastmap saves only the image not the values of the attributes 107 * we can not use it to classify single instances to values 108 * 109 * TODO: mehr erklärung 110 * TODO: class lavel filter raus 111 * 112 * Finde die am nächsten liegende Instanz zur übergebenen 113 * dann bestimme den cluster der instanz und führe dann den 114 * classifier des clusters aus 108 * Because Fastmap saves only the image not the values of the attributes it used 109 * we can not use it or the QuadTree to classify single instances to clusters. 110 * 111 * To classify a single instance we measure the distance to all instances we have clustered and 112 * use the cluster where the distance is minimal. 113 * 114 * TODO: class attribute filter raus 115 115 */ 116 116 @Override … … 190 190 // 4. run fastmap for 2 dimensions on the distance matrix 191 191 Fastmap f = new Fastmap(2, dist); 192 f.calculate( 2);192 f.calculate(); 193 193 double[][] X = f.getX(); 194 194 … … 223 223 SIZE = train.size(); 224 224 225 Console.traceln(Level.INFO, String.format("Generate QuadTree with "+ SIZE + " size, Alpha: "+ ALPHA+ ""));225 //Console.traceln(Level.INFO, String.format("Generate QuadTree with "+ SIZE + " size, Alpha: "+ ALPHA+ "")); 226 226 227 227 // set the size and then split the tree recursively at the median value for x, y … … 232 232 ArrayList<QuadTree> l = new ArrayList<QuadTree>(q.getList(q)); 233 233 234 // recursive grid clustering (tree pruning), the values are stored in cluster !234 // recursive grid clustering (tree pruning), the values are stored in cluster 235 235 q.gridClustering(l); 236 237 // after grid clustering we need to remove the clusters with < 2 * ALPHA instances 238 239 // hier müssten wir sowas haben wie welche instanz in welchem cluster ist 240 // oder wir iterieren durch die cluster und sammeln uns die instanzen daraus 236 237 // wir iterieren durch die cluster und sammeln uns die instanzen daraus 241 238 for(int i=0; i < cluster.size(); i++) { 242 239 ArrayList<QuadTreePayload<Instance>> current = cluster.get(i); 243 240 244 241 // i is the clusternumber 245 // we only allow clusters with Instances > ALPHA 242 // we only allow clusters with Instances > ALPHA, other clusters are not considered! 246 243 if(current.size() > ALPHA) { 247 244 for(int j=0; j < current.size(); j++ ) { … … 290 287 } 291 288 289 292 290 /** 293 291 * Fastmap implementation 294 *295 * TODO: only one place to pass dimension!296 292 * 297 293 * Faloutsos, C., & Lin, K. I. (1995). … … 313 309 private int col = 0; 314 310 311 /*number of dimensions we want*/ 312 private int target_dims = 0; 313 315 314 public Fastmap(int k, double[][] O) { 316 315 this.O = O; 317 318 316 int N = O.length; 317 318 this.target_dims = k; 319 319 320 320 this.X = new double[N][k]; … … 323 323 324 324 /** 325 * The distance function for e culidean distance325 * The distance function for euclidean distance 326 326 * 327 327 * Acts according to equation 4 of the fastmap paper … … 395 395 * @param dims dimensionality 396 396 */ 397 public void calculate( int dims) {398 399 for(int k=0; k < dims; k++) {397 public void calculate() { 398 399 for(int k=0; k <this.target_dims; k++) { 400 400 401 401 // 2) choose pivot objects … … 436 436 } 437 437 438 438 439 /** 439 440 * QuadTree implementation … … 517 518 } 518 519 519 520 /** 521 * Todo: DRY, median ist immer dasselbe 520 /** 521 * TODO: DRY, median ist immer dasselbe 522 522 * 523 523 * @return median for x … … 551 551 } 552 552 553 554 553 private double getMedianForY() { 555 554 double med_y =0 ; … … 580 579 } 581 580 582 583 581 /** 584 582 * Reurns the number of instances in the payload … … 593 591 return number; 594 592 } 595 596 593 597 594 /** … … 678 675 } 679 676 680 681 677 /** 682 * T odo: evt. auslagern, eigentlich auch eher ne statische methode678 * TODO: evt. auslagern, eigentlich auch eher ne statische methode 683 679 * 684 680 * @param q … … 704 700 } 705 701 706 707 702 /** 708 703 * returns an list of childs sorted by density … … 732 727 } 733 728 } 734 735 729 736 730 /**
Note: See TracChangeset
for help on using the changeset viewer.