Changeset 129 for trunk/CrossPare/src/de/ugoe
- Timestamp:
- 06/22/16 11:25:46 (8 years ago)
- Location:
- trunk/CrossPare/src/de/ugoe/cs/cpdp
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/TopMetricFilter.java
r102 r129 21 21 import java.util.TreeSet; 22 22 import java.util.logging.Level; 23 import java.util.regex.Matcher; 24 import java.util.regex.Pattern; 23 25 import java.util.stream.IntStream; 24 26 … … 29 31 30 32 import de.ugoe.cs.cpdp.util.SortUtils; 33 import de.ugoe.cs.cpdp.util.WekaUtils; 31 34 import de.ugoe.cs.util.console.Console; 32 35 import weka.attributeSelection.AttributeSelection; … … 96 99 List<Set<Integer>> cfsSets = new LinkedList<>(); 97 100 for( Instances traindata : traindataSet ) { 98 AttributeSelection attsel = new AttributeSelection(); 99 CfsSubsetEval eval = new CfsSubsetEval(); 100 GreedyStepwise search = new GreedyStepwise(); 101 search.setSearchBackwards(true); 102 attsel.setEvaluator(eval); 103 attsel.setSearch(search); 104 attsel.SelectAttributes(traindata); 105 Set<Integer> cfsSet = new HashSet<>(); 106 for( int attr : attsel.selectedAttributes() ) { 107 cfsSet.add(attr); 108 } 109 cfsSets.add(cfsSet); 101 boolean selectionSuccessful = false; 102 boolean secondAttempt = false; 103 Instances traindataCopy = null; 104 do { 105 try { 106 if (secondAttempt) { 107 AttributeSelection attsel = new AttributeSelection(); 108 CfsSubsetEval eval = new CfsSubsetEval(); 109 GreedyStepwise search = new GreedyStepwise(); 110 search.setSearchBackwards(true); 111 attsel.setEvaluator(eval); 112 attsel.setSearch(search); 113 attsel.SelectAttributes(traindataCopy); 114 Set<Integer> cfsSet = new HashSet<>(); 115 for( int attr : attsel.selectedAttributes() ) { 116 cfsSet.add(attr); 117 } 118 cfsSets.add(cfsSet); 119 selectionSuccessful = true; 120 } 121 else { 122 AttributeSelection attsel = new AttributeSelection(); 123 CfsSubsetEval eval = new CfsSubsetEval(); 124 GreedyStepwise search = new GreedyStepwise(); 125 search.setSearchBackwards(true); 126 attsel.setEvaluator(eval); 127 attsel.setSearch(search); 128 attsel.SelectAttributes(traindata); 129 Set<Integer> cfsSet = new HashSet<>(); 130 for( int attr : attsel.selectedAttributes() ) { 131 cfsSet.add(attr); 132 } 133 cfsSets.add(cfsSet); 134 selectionSuccessful = true; 135 } 136 } 137 catch (IllegalArgumentException e) { 138 String regex = "A nominal attribute \\((.*)\\) cannot have duplicate labels.*"; 139 Pattern p = Pattern.compile(regex); 140 Matcher m = p.matcher(e.getMessage()); 141 if (!m.find()) { 142 // cannot treat problem, rethrow exception 143 throw e; 144 } 145 String attributeName = m.group(1); 146 int attrIndex = traindata.attribute(attributeName).index(); 147 if (secondAttempt) { 148 traindataCopy = WekaUtils.upscaleAttribute(traindataCopy, attrIndex); 149 } 150 else { 151 traindataCopy = WekaUtils.upscaleAttribute(traindata, attrIndex); 152 } 153 Console 154 .traceln(Level.FINE, 155 "upscaled attribute " + attributeName + "; restarting training"); 156 secondAttempt = true; 157 continue; 158 } 159 } 160 while (!selectionSuccessful); // dummy loop for internal continue 110 161 } 111 162 … … 143 194 } 144 195 Set<Integer> topkSetIndexSet = new TreeSet<>(); 145 for( int j=0; j<bestCoverageIndex; j++ ) { 196 // j<30 ensures that the computational time does not explode since the powerset is 2^n in complexity 197 for( int j=0; j<bestCoverageIndex && j<30 ; j++ ) { 146 198 topkSetIndexSet.add(j); 147 199 } … … 157 209 } 158 210 for( Set<Integer> cfsSet : cfsSets ) { 159 currentCoverage += (coverage( combination, cfsSet)/traindataSet.size());211 currentCoverage += (coverage(topkCombination, cfsSet)/traindataSet.size()); 160 212 } 161 213 if( currentCoverage > bestOptCoverage ) { -
trunk/CrossPare/src/de/ugoe/cs/cpdp/util/WekaUtils.java
r86 r129 37 37 } 38 38 } 39 40 /** 41 * Scaling value that moves the decimal point by 5 digets. 42 */ 43 public final static double SCALER = 10000.0d; 39 44 40 45 /** … … 151 156 return new DistChar(mean, std, min, max, data.numInstances()); 152 157 } 158 159 /** 160 * <p> 161 * Upscales the value of a single attribute. This is a workaround to get BayesNet running for 162 * all data. Works on a copy of the training data, i.e., leaves the original data untouched. 163 * </p> 164 * 165 * @param traindata 166 * data from which the attribute is upscaled. 167 * @param attributeIndex 168 * index of the attribute 169 * @return data with upscaled attribute 170 */ 171 public static Instances upscaleAttribute(Instances traindata, int attributeIndex) { 172 Instances traindataCopy = new Instances(traindata); 173 for (int i = 0; i < traindata.size(); i++) { 174 traindataCopy.get(i).setValue(attributeIndex, 175 traindata.get(i).value(attributeIndex) * SCALER); 176 } 177 return traindataCopy; 178 } 153 179 } -
trunk/CrossPare/src/de/ugoe/cs/cpdp/wekaclassifier/AbstractCODEP.java
r128 r129 23 23 import java.util.regex.Pattern; 24 24 25 import de.ugoe.cs.cpdp.util.WekaUtils; 25 26 import de.ugoe.cs.util.console.Console; 26 27 import weka.classifiers.AbstractClassifier; … … 71 72 private Map<Integer, Integer> upscaleIndex = null; 72 73 73 /**74 * Scaling value that moves the decimal point by 5 digets.75 */76 private final double SCALER = 10000.0d;77 78 74 /* 79 75 * (non-Javadoc) … … 136 132 } 137 133 else { 138 traindataCopy = upscaleAttribute(traindata, attrIndex);134 traindataCopy = WekaUtils.upscaleAttribute(traindata, attrIndex); 139 135 } 140 136 … … 182 178 // instance value must be upscaled 183 179 int attrIndex = upscaleIndex.get(j); 184 double upscaledVal = instance.value(attrIndex) * SCALER;180 double upscaledVal = instance.value(attrIndex) * WekaUtils.SCALER; 185 181 traindataCopy = new Instances(instance.dataset()); 186 182 instance = new DenseInstance(instance.weight(), instance.toDoubleArray()); … … 231 227 /** 232 228 * <p> 233 * Upscales the value of a single attribute. This is a workaround to get BayesNet running for234 * all data. Works on a copy of the training data, i.e., leaves the original data untouched.235 * </p>236 *237 * @param traindata238 * data from which the attribute is upscaled.239 * @param attributeIndex240 * index of the attribute241 * @return data with upscaled attribute242 */243 private Instances upscaleAttribute(Instances traindata, int attributeIndex) {244 Instances traindataCopy = new Instances(traindata);245 for (int i = 0; i < traindata.size(); i++) {246 traindataCopy.get(i).setValue(attributeIndex,247 traindata.get(i).value(attributeIndex) * SCALER);248 }249 return traindataCopy;250 }251 252 /**253 * <p>254 229 * Abstract method through which implementing classes define which classifier is used for the 255 230 * CODEP.
Note: See TracChangeset
for help on using the changeset viewer.