Changeset 128 for trunk/CrossPare
- Timestamp:
- 06/21/16 13:07:13 (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/CrossPare/src/de/ugoe/cs/cpdp/wekaclassifier/AbstractCODEP.java
r101 r128 16 16 17 17 import java.util.ArrayList; 18 import java.util.HashMap; 18 19 import java.util.List; 20 import java.util.Map; 19 21 import java.util.logging.Level; 22 import java.util.regex.Matcher; 23 import java.util.regex.Pattern; 20 24 21 25 import de.ugoe.cs.util.console.Console; … … 62 66 private Classifier codepClassifier = null; 63 67 68 /** 69 * Map that store attributes for upscaling for each classifier 70 */ 71 private Map<Integer, Integer> upscaleIndex = null; 72 73 /** 74 * Scaling value that moves the decimal point by 5 digets. 75 */ 76 private final double SCALER = 10000.0d; 77 64 78 /* 65 79 * (non-Javadoc) … … 87 101 setupInternalClassifiers(); 88 102 setupInternalAttributes(); 89 103 upscaleIndex = new HashMap<>(); 104 105 int classifierIndex = 0; 106 boolean secondAttempt = false; 107 Instances traindataCopy = null; 90 108 for (Classifier classifier : internalClassifiers) { 91 Console.traceln(Level.FINE, "internally training " + classifier.getClass().getName()); 92 classifier.buildClassifier(traindata); 109 boolean trainingSuccessfull = false; 110 do { 111 Console.traceln(Level.FINE, 112 "internally training " + classifier.getClass().getName()); 113 try { 114 if (secondAttempt) { 115 classifier.buildClassifier(traindataCopy); 116 trainingSuccessfull = true; 117 } 118 else { 119 classifier.buildClassifier(traindata); 120 trainingSuccessfull = true; 121 } 122 } 123 catch (IllegalArgumentException e) { 124 String regex = "A nominal attribute \\((.*)\\) cannot have duplicate labels.*"; 125 Pattern p = Pattern.compile(regex); 126 Matcher m = p.matcher(e.getMessage()); 127 if (!m.find()) { 128 // cannot treat problem, rethrow exception 129 throw e; 130 } 131 String attributeName = m.group(1); 132 int attrIndex = traindata.attribute(attributeName).index(); 133 if (secondAttempt) { 134 throw new RuntimeException("cannot be handled correctly yet, because upscaleIndex is a Map"); 135 // traindataCopy = upscaleAttribute(traindataCopy, attrIndex); 136 } 137 else { 138 traindataCopy = upscaleAttribute(traindata, attrIndex); 139 } 140 141 upscaleIndex.put(classifierIndex, attrIndex); 142 Console 143 .traceln(Level.FINE, 144 "upscaled attribute " + attributeName + "; restarting training"); 145 secondAttempt = true; 146 continue; 147 } 148 } 149 while (!trainingSuccessfull); // dummy loop for internal continue 150 classifierIndex++; 151 secondAttempt = false; 93 152 } 94 153 … … 118 177 private Instance createInternalInstance(Instance instance) throws Exception { 119 178 double[] values = new double[internalAttributes.size()]; 179 Instances traindataCopy; 120 180 for (int j = 0; j < internalClassifiers.size(); j++) { 181 if (upscaleIndex.containsKey(j)) { 182 // instance value must be upscaled 183 int attrIndex = upscaleIndex.get(j); 184 double upscaledVal = instance.value(attrIndex) * SCALER; 185 traindataCopy = new Instances(instance.dataset()); 186 instance = new DenseInstance(instance.weight(), instance.toDoubleArray()); 187 instance.setValue(attrIndex, upscaledVal); 188 traindataCopy.add(instance); 189 instance.setDataset(traindataCopy); 190 } 121 191 values[j] = internalClassifiers.get(j).classifyInstance(instance); 122 192 } … … 161 231 /** 162 232 * <p> 233 * Upscales the value of a single attribute. This is a workaround to get BayesNet running for 234 * all data. Works on a copy of the training data, i.e., leaves the original data untouched. 235 * </p> 236 * 237 * @param traindata 238 * data from which the attribute is upscaled. 239 * @param attributeIndex 240 * index of the attribute 241 * @return data with upscaled attribute 242 */ 243 private Instances upscaleAttribute(Instances traindata, int attributeIndex) { 244 Instances traindataCopy = new Instances(traindata); 245 for (int i = 0; i < traindata.size(); i++) { 246 traindataCopy.get(i).setValue(attributeIndex, 247 traindata.get(i).value(attributeIndex) * SCALER); 248 } 249 return traindataCopy; 250 } 251 252 /** 253 * <p> 163 254 * Abstract method through which implementing classes define which classifier is used for the 164 255 * CODEP.
Note: See TracChangeset
for help on using the changeset viewer.