Changeset 20 for trunk/CrossPare/src/de/ugoe/cs
- Timestamp:
- 10/31/14 15:54:08 (10 years ago)
- Location:
- trunk/CrossPare/src/de/ugoe/cs/cpdp/training
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaBaseTraining2.java
r7 r20 5 5 6 6 import de.ugoe.cs.util.console.Console; 7 7 8 import weka.core.OptionHandler; 8 9 import weka.classifiers.Classifier; 9 10 import weka.classifiers.meta.CVParameterSelection; 10 11 12 /** 13 * WekaBaseTraining2 14 * 15 * Allows specification of the Weka classifier and its params in the XML experiment configuration. 16 * 17 * Important conventions of the XML format: 18 * Cross Validation params come always last and are prepended with -CVPARAM 19 * Example: <trainer name="WekaClusterTraining2" param="RandomForestLocal weka.classifiers.trees.RandomForest -CVPARAM I 5 25 5"/> 20 */ 11 21 public abstract class WekaBaseTraining2 implements WekaCompatibleTrainer { 12 22 … … 20 30 String[] params = parameters.split(" "); 21 31 22 // first is classifierName32 // first part of the params is the classifierName (e.g. SMORBF) 23 33 classifierName = params[0]; 24 34 25 // allfollowing parameters can be copied from weka!35 // the following parameters can be copied from weka! 26 36 27 // second param is classifierClassName 37 // second param is classifierClassName (e.g. weka.classifiers.functions.SMO) 28 38 classifierClassName = params[1]; 29 39 30 // rest are params to the specified classifier 40 // rest are params to the specified classifier (e.g. -K weka.classifiers.functions.supportVector.RBFKernel) 31 41 classifierParams = Arrays.copyOfRange(params, 2, params.length); 32 42 … … 46 56 Classifier obj = (Classifier) c.newInstance(); 47 57 48 // Filter -CVPARAM58 // Filter out -CVPARAM, these are special because they do not belong to the Weka classifier class as parameters 49 59 String[] param = Arrays.copyOf(classifierParams, classifierParams.length); 50 60 String[] cvparam = {}; … … 68 78 69 79 // we have cross val params 70 // cant check on cvparam.length may not be initialized80 // cant check on cvparam.length here, it may not be initialized 71 81 if(cv) { 72 82 final CVParameterSelection ps = new CVParameterSelection(); -
trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaClusterTraining2.java
r19 r20 23 23 * WekaClusterTraining2 24 24 * 25 * 1. Cluster traindata 26 * 2. for each cluster train a classifier with traindata from cluster 27 * 3. match testdata instance to a cluster, then classify with classifier from the cluster 25 * Currently supports only EM Clustering. 28 26 * 29 * XML config: 27 * 1. Cluster training data 28 * 2. for each cluster train a classifier with training data from cluster 29 * 3. match test data instance to a cluster, then classify with classifier from the cluster 30 * 31 * XML configuration: 30 32 * <!-- because of clustering --> 31 33 * <preprocessor name="Normalization" param=""/> … … 33 35 * <!-- cluster trainer --> 34 36 * <trainer name="WekaClusterTraining2" param="NaiveBayes weka.classifiers.bayes.NaiveBayes" /> 35 *36 * Questions:37 * - how do we configure the clustering params?38 37 */ 39 38 public class WekaClusterTraining2 extends WekaBaseTraining2 implements ITrainingStrategy { … … 45 44 return classifier; 46 45 } 47 48 46 49 47 @Override … … 71 69 72 70 73 71 /** 72 * Helper method that gives us a clean instance copy with 73 * the values of the instancelist of the first parameter. 74 * 75 * @param instancelist with attributes 76 * @param instance with only values 77 * @return copy of the instance 78 */ 74 79 private Instance createInstance(Instances instances, Instance instance) { 75 80 // attributes for feeding instance to classifier … … 96 101 } 97 102 98 99 103 @Override 100 104 public double classifyInstance(Instance instance) { 101 105 double ret = 0; 102 106 try { 107 // 1. copy the instance (keep the class attribute) 103 108 Instances traindata = ctraindata.get(0); 104 109 Instance classInstance = createInstance(traindata, instance); 105 110 106 // remove class attribute before clustering111 // 2. remove class attribute before clustering 107 112 Remove filter = new Remove(); 108 113 filter.setAttributeIndices("" + (traindata.classIndex() + 1)); … … 110 115 traindata = Filter.useFilter(traindata, filter); 111 116 117 // 3. copy the instance (without the class attribute) for clustering 112 118 Instance clusterInstance = createInstance(traindata, instance); 113 119 114 // 1. classify testdata instance to a cluster number120 // 4. match instance without class attribute to a cluster number 115 121 int cnum = clusterer.clusterInstance(clusterInstance); 116 122 117 //Console.traceln(Level.INFO, String.format("instance is in cluster: " + cnum)); 118 119 // 2. classify testata instance to the classifier 123 // 5. classify instance with class attribute to the classifier of that cluster number 120 124 ret = cclassifier.get(cnum).classifyInstance(classInstance); 121 125 … … 127 131 } 128 132 129 130 131 133 @Override 132 134 public void buildClassifier(Instances traindata) throws Exception { 133 135 134 // 1. copy train data136 // 1. copy training data 135 137 Instances train = new Instances(traindata); 136 138 … … 141 143 train = Filter.useFilter(train, filter); 142 144 143 // 3. cluster data 144 //Console.traceln(Level.INFO, String.format("starting clustering")); 145 145 // new objects 146 146 cclassifier = new HashMap<Integer, Classifier>(); 147 147 ctraindata = new HashMap<Integer, Instances>(); 148 148 149 // 3. cluster data 149 150 // use standard params for now 150 151 clusterer = new EM(); 152 // we can set options like so: 151 153 //String[] params = {"-N", "100"}; 152 154 //clusterer.setOptions(params); 155 156 // set max num of clusters to train data size (although we do not want that) 157 clusterer.setMaximumNumberOfClusters(train.size()); 158 159 // build clusterer 153 160 clusterer.buildClusterer(train); 154 // set max num to traindata size155 clusterer.setMaximumNumberOfClusters(train.size());156 157 // 4. get cluster membership of our traindata158 //AddCluster cfilter = new AddCluster();159 //cfilter.setClusterer(clusterer);160 //cfilter.setInputFormat(train);161 //Instances ctrain = Filter.useFilter(train, cfilter);162 161 163 162 Instances ctrain = new Instances(train); 164 163 165 // get train data per cluster164 // get train data per cluster 166 165 int cnumber; 167 166 for ( int j=0; j < ctrain.numInstances(); j++ ) { 168 // get the cluster number from the attributes, subract 1 because if we clusterInstance we get 0-n, and this is 1-n 169 //cnumber = Integer.parseInt(ctrain.get(j).stringValue(ctrain.get(j).numAttributes()-1).replace("cluster", "")) - 1; 167 cnumber = clusterer.clusterInstance(ctrain.get(j)); 170 168 171 cnumber = clusterer.clusterInstance(ctrain.get(j));172 169 // add training data to list of instances for this cluster number 173 170 if ( !ctraindata.containsKey(cnumber) ) { … … 178 175 } 179 176 180 // Debug output 181 //Console.traceln(Level.INFO, String.format("number of clusters: " + clusterer.numberOfClusters())); 182 183 // train one classifier per cluster, we get the clusternumber from the traindata 177 // train one classifier per cluster, we get the cluster number from the training data 184 178 Iterator<Integer> clusternumber = ctraindata.keySet().iterator(); 185 179 while ( clusternumber.hasNext() ) { … … 188 182 cclassifier.get(cnumber).buildClassifier(ctraindata.get(cnumber)); 189 183 190 //Console.traceln(Level.INFO, String.format(" building classifier in cluster "+cnumber + " with " + ctraindata.get(cnumber).size() + " traindata instances"));184 //Console.traceln(Level.INFO, String.format("classifier in cluster "+cnumber)); 191 185 } 192 186 } -
trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaLocalTraining2.java
r19 r20 84 84 private HashMap<Integer, ArrayList<Double[][]>> csize; 85 85 86 /* debug vars */ 87 @SuppressWarnings("unused") 86 88 private boolean show_biggest = true; 87 89 90 @SuppressWarnings("unused") 88 91 private int CFOUND = 0; 92 @SuppressWarnings("unused") 89 93 private int CNOTFOUND = 0; 90 94 … … 260 264 //} 261 265 262 // now it can happen that we do nt find a cluster because we deleted it previously (too few instances)266 // now it can happen that we do not find a cluster because we deleted it previously (too few instances) 263 267 // or we get bigger distance measures from weka so that we are completely outside of our clusters. 264 268 // in these cases we just find the nearest cluster to our instance and use it for classification. … … 280 284 } 281 285 282 // here we have the cluster where an instance has the minimum distance between itself the286 // here we have the cluster where an instance has the minimum distance between itself and the 283 287 // instance we want to classify 284 288 // if we still have not found a cluster we exit because something is really wrong … … 436 440 */ 437 441 438 // train one classifier per cluster, we get the cluster number from the traindata442 // train one classifier per cluster, we get the cluster number from the traindata 439 443 int cnumber; 440 444 Iterator<Integer> clusternumber = ctraindata.keySet().iterator(); … … 444 448 while ( clusternumber.hasNext() ) { 445 449 cnumber = clusternumber.next(); 446 cclassifier.put(cnumber,setupClassifier()); // das hier ist der eigentliche trainer450 cclassifier.put(cnumber,setupClassifier()); // this is the classifier used for the cluster 447 451 cclassifier.get(cnumber).buildClassifier(ctraindata.get(cnumber)); 448 452 //Console.traceln(Level.INFO, String.format("classifier in cluster "+cnumber)); -
trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaTraining2.java
r2 r20 10 10 11 11 /** 12 * Programmatic Weka BaggingTraining12 * Programmatic WekaTraining 13 13 * 14 14 * first parameter is Trainer Name.
Note: See TracChangeset
for help on using the changeset viewer.