Changeset 16
- Timestamp:
- 09/01/14 14:37:33 (10 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaLocalTraining2.java
r15 r16 33 33 * 3) We cluster the QuadTree nodes together if they have similar density (50%) 34 34 * 4) We save the clusters and their training data 35 * 5) We train a Weka classifier for each cluster with the clusters training data 36 * 5) We classify single instances to a cluster and then classify them using the classifier of the cluster 37 * 35 * 5) We only use clusters with > ALPHA instances (currently Math.sqrt(SIZE)), rest is discarded 36 * 6) We train a Weka classifier for each cluster with the clusters training data 37 * 7) We recalculate Fastmap distances for a single instance and then try to find a cluster containing the coords of the instance. 38 * 7.1.) If we can not find a cluster (due to coords outside of all clusters) we find the nearest cluster. 39 * 8) We classifiy the Instance with the classifier and traindata from the Cluster we found in 7. 38 40 */ 39 41 public class WekaLocalTraining2 extends WekaBaseTraining2 implements ITrainingStrategy { … … 41 43 private final TraindatasetCluster classifier = new TraindatasetCluster(); 42 44 43 // these values are set later when we have all the information we need 44 45 // these values are set later when we have all the information we need (size) 45 46 /*Stopping rule for tree recursion (Math.sqrt(Instances)*/ 46 47 public static double ALPHA = 0; 48 /*size of the complete set (used for density function)*/ 49 public static int SIZE = 0; 47 50 /*Stopping rule for clustering*/ 48 51 public static double DELTA = 0.5; 49 /*size of the complete set (used for density function)*/ 50 public static int SIZE = 0; 51 52 53 // we need these references later in the testing 52 54 private static QuadTree TREE; 53 55 private static Fastmap FMAP; … … 55 57 private static Instances TRAIN; 56 58 57 // cluster 59 // cluster payloads 58 60 private static ArrayList<ArrayList<QuadTreePayload<Instance>>> cluster = new ArrayList<ArrayList<QuadTreePayload<Instance>>>(); 61 62 // cluster sizes (index is cluster number, arraylist is list of boxes (x0,y0,x1,y1) 63 private static HashMap<Integer, ArrayList<Double[][]>> CSIZE = new HashMap<Integer, ArrayList<Double[][]>>(); 59 64 60 65 @Override … … 118 123 * 119 124 * TODO: class attribute filter raus 125 * TODO: werden auf die übergebene Instance ebenfalls die preprocessors angewendet? müsste eigentlich 120 126 */ 121 127 @Override … … 163 169 // this is the projection vector for our instance 164 170 double[] proj = FMAP.addInstance(distmat); 165 166 167 // jetzt suchen wir den cluster in dem wir uns befinden mit den 2 projektionen168 169 170 // get distance of this instance to every other instance171 // if the distance is minimal apply the classifier of the current cluster172 173 171 int cnumber; 174 Iterator<Integer> clusternumber = ctraindata.keySet().iterator(); 175 while ( clusternumber.hasNext() ) { 176 cnumber = clusternumber.next(); 177 178 for(int i=0; i < ctraindata.get(cnumber).size(); i++) { 179 } 180 } 181 182 183 /* 184 int cnumber; 185 int min_cluster = -1; 186 double min_distance = 99999999; 187 EuclideanDistance d; 188 Iterator<Integer> clusternumber = ctraindata.keySet().iterator(); 172 int found_cnumber = -1; 173 Iterator<Integer> clusternumber = CSIZE.keySet().iterator(); 189 174 while ( clusternumber.hasNext() ) { 190 175 cnumber = clusternumber.next(); 191 176 192 d = new EuclideanDistance(ctraindata.get(cnumber)); 193 for(int i=0; i < ctraindata.get(cnumber).size(); i++) { 194 if(d.distance(clusterInstance, ctraindata.get(cnumber).get(i)) <= min_distance) { 195 min_cluster = cnumber; 196 min_distance = d.distance(clusterInstance, ctraindata.get(cnumber).get(i)); 177 // jetzt iterieren wir über die boxen und hoffen wir finden was (cluster könnte auch entfernt worden sein) 178 for ( int box=0; box < CSIZE.get(cnumber).size(); box++ ) { 179 Double[][] current = CSIZE.get(cnumber).get(box); 180 if(proj[0] <= current[0][0] && proj[0] >= current[0][1] && // x 181 proj[1] <= current[1][0] && proj[1] >= current[1][1]) { // y 182 found_cnumber = cnumber; 197 183 } 198 184 } 199 185 } 200 186 187 // wenn wir keinen cluster finden, liegen wir außerhalb des bereichs 188 // kann das vorkommen mit fastmap? 189 190 // ja das kann vorkommen wir suchen also weiterhin den nächsten 191 // müssten mal durchzählen wie oft das vorkommt 192 if ( found_cnumber == -1 ) { 193 //Console.traceln(Level.INFO, String.format("ERROR matching instance to cluster!")); 194 //throw new RuntimeException("no cluster for test instance found!"); 195 } 196 197 // jetzt kann es vorkommen das der cluster gelöscht wurde (weil zuwenig instanzen), jetzt müssen wir den 198 // finden der am nächsten dran ist 199 if( !this.ctraindata.containsKey(found_cnumber) ) { 200 double min_distance = 99999999; 201 clusternumber = ctraindata.keySet().iterator(); 202 while ( clusternumber.hasNext() ) { 203 cnumber = clusternumber.next(); 204 for(int i=0; i < ctraindata.get(cnumber).size(); i++) { 205 if(DIST.distance(clusterInstance, ctraindata.get(cnumber).get(i)) <= min_distance) { 206 found_cnumber = cnumber; 207 min_distance = DIST.distance(clusterInstance, ctraindata.get(cnumber).get(i)); 208 } 209 } 210 } 211 } 212 201 213 // here we have the cluster where an instance has the minimum distance between itself the 202 214 // instance we want to classify 203 if( min_cluster == -1) {215 if( found_cnumber == -1 ) { 204 216 // this is an error condition 217 Console.traceln(Level.INFO, String.format("ERROR matching instance to cluster with full search!")); 205 218 throw new RuntimeException("min_cluster not found"); 206 219 } 207 */208 220 209 221 // classify the passed instance with the cluster we found 210 ret = cclassifier.get( min_cluster).classifyInstance(classInstance);222 ret = cclassifier.get(found_cnumber).classifyInstance(classInstance); 211 223 212 224 }catch( Exception e ) { … … 301 313 } 302 314 } 315 316 303 317 } 304 318 … … 625 639 public double[][] getSize() { 626 640 return new double[][] {this.x, this.y}; 641 } 642 643 public Double[][] getSizeDouble() { 644 Double[] tmpX = new Double[2]; 645 Double[] tmpY = new Double[2]; 646 647 tmpX[0] = this.x[0]; 648 tmpX[1] = this.x[1]; 649 650 tmpY[0] = this.y[0]; 651 tmpY[1] = this.y[1]; 652 653 return new Double[][] {tmpX, tmpY}; 627 654 } 628 655 … … 950 977 remove.add(list.size()-1); 951 978 //System.out.println("removing "+biggest.getDensity() + " from list"); 979 980 ArrayList<Double[][]> tmpSize = new ArrayList<Double[][]>(); 981 tmpSize.add(biggest.getSizeDouble()); 952 982 953 983 // check the items for their density … … 964 994 // wir können hier nicht removen weil wir sonst den index verschieben 965 995 remove.add(i); 996 997 // außerdem brauchen wir die größe 998 tmpSize.add(current.getSizeDouble()); 966 999 } 967 1000 } … … 975 1008 cluster.add(current_cluster); 976 1009 1010 // 5. add size of our current (biggest) 1011 // we need that to classify test instances to a cluster 1012 Integer cnumber = new Integer(cluster.size()-1); 1013 if(CSIZE.containsKey(cnumber) == false) { 1014 CSIZE.put(cnumber, tmpSize); 1015 } 1016 977 1017 // recurse 978 1018 //System.out.println("restlist " + list.size());
Note: See TracChangeset
for help on using the changeset viewer.