Changeset 129 for trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing
- Timestamp:
- 06/22/16 11:25:46 (8 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/TopMetricFilter.java
r102 r129 21 21 import java.util.TreeSet; 22 22 import java.util.logging.Level; 23 import java.util.regex.Matcher; 24 import java.util.regex.Pattern; 23 25 import java.util.stream.IntStream; 24 26 … … 29 31 30 32 import de.ugoe.cs.cpdp.util.SortUtils; 33 import de.ugoe.cs.cpdp.util.WekaUtils; 31 34 import de.ugoe.cs.util.console.Console; 32 35 import weka.attributeSelection.AttributeSelection; … … 96 99 List<Set<Integer>> cfsSets = new LinkedList<>(); 97 100 for( Instances traindata : traindataSet ) { 98 AttributeSelection attsel = new AttributeSelection(); 99 CfsSubsetEval eval = new CfsSubsetEval(); 100 GreedyStepwise search = new GreedyStepwise(); 101 search.setSearchBackwards(true); 102 attsel.setEvaluator(eval); 103 attsel.setSearch(search); 104 attsel.SelectAttributes(traindata); 105 Set<Integer> cfsSet = new HashSet<>(); 106 for( int attr : attsel.selectedAttributes() ) { 107 cfsSet.add(attr); 108 } 109 cfsSets.add(cfsSet); 101 boolean selectionSuccessful = false; 102 boolean secondAttempt = false; 103 Instances traindataCopy = null; 104 do { 105 try { 106 if (secondAttempt) { 107 AttributeSelection attsel = new AttributeSelection(); 108 CfsSubsetEval eval = new CfsSubsetEval(); 109 GreedyStepwise search = new GreedyStepwise(); 110 search.setSearchBackwards(true); 111 attsel.setEvaluator(eval); 112 attsel.setSearch(search); 113 attsel.SelectAttributes(traindataCopy); 114 Set<Integer> cfsSet = new HashSet<>(); 115 for( int attr : attsel.selectedAttributes() ) { 116 cfsSet.add(attr); 117 } 118 cfsSets.add(cfsSet); 119 selectionSuccessful = true; 120 } 121 else { 122 AttributeSelection attsel = new AttributeSelection(); 123 CfsSubsetEval eval = new CfsSubsetEval(); 124 GreedyStepwise search = new GreedyStepwise(); 125 search.setSearchBackwards(true); 126 attsel.setEvaluator(eval); 127 attsel.setSearch(search); 128 attsel.SelectAttributes(traindata); 129 Set<Integer> cfsSet = new HashSet<>(); 130 for( int attr : attsel.selectedAttributes() ) { 131 cfsSet.add(attr); 132 } 133 cfsSets.add(cfsSet); 134 selectionSuccessful = true; 135 } 136 } 137 catch (IllegalArgumentException e) { 138 String regex = "A nominal attribute \\((.*)\\) cannot have duplicate labels.*"; 139 Pattern p = Pattern.compile(regex); 140 Matcher m = p.matcher(e.getMessage()); 141 if (!m.find()) { 142 // cannot treat problem, rethrow exception 143 throw e; 144 } 145 String attributeName = m.group(1); 146 int attrIndex = traindata.attribute(attributeName).index(); 147 if (secondAttempt) { 148 traindataCopy = WekaUtils.upscaleAttribute(traindataCopy, attrIndex); 149 } 150 else { 151 traindataCopy = WekaUtils.upscaleAttribute(traindata, attrIndex); 152 } 153 Console 154 .traceln(Level.FINE, 155 "upscaled attribute " + attributeName + "; restarting training"); 156 secondAttempt = true; 157 continue; 158 } 159 } 160 while (!selectionSuccessful); // dummy loop for internal continue 110 161 } 111 162 … … 143 194 } 144 195 Set<Integer> topkSetIndexSet = new TreeSet<>(); 145 for( int j=0; j<bestCoverageIndex; j++ ) { 196 // j<30 ensures that the computational time does not explode since the powerset is 2^n in complexity 197 for( int j=0; j<bestCoverageIndex && j<30 ; j++ ) { 146 198 topkSetIndexSet.add(j); 147 199 } … … 157 209 } 158 210 for( Set<Integer> cfsSet : cfsSets ) { 159 currentCoverage += (coverage( combination, cfsSet)/traindataSet.size());211 currentCoverage += (coverage(topkCombination, cfsSet)/traindataSet.size()); 160 212 } 161 213 if( currentCoverage > bestOptCoverage ) {
Note: See TracChangeset
for help on using the changeset viewer.