- Timestamp:
- 07/18/16 12:26:03 (8 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/CLIFF.java
r120 r135 28 28 public class CLIFF implements IPointWiseDataselectionStrategy, ISetWiseDataselectionStrategy { 29 29 30 /** 31 * percentage of data selected 32 */ 30 33 private double percentage = 0.10; 31 34 35 /** 36 * number of ranges considered 37 */ 32 38 private final int numRanges = 10; 33 39 … … 40 46 @Override 41 47 public void setParameter(String parameters) { 42 if ( parameters!=null) {48 if (parameters != null) { 43 49 percentage = Double.parseDouble(parameters); 44 50 } 45 51 } 46 47 /* *52 53 /* 48 54 * @see de.ugoe.cs.cpdp.dataselection.SetWiseDataselectionStrategy#apply(weka.core.Instances, 49 * 55 * org.apache.commons.collections4.list.SetUniqueList) 50 56 */ 51 57 @Override 52 58 public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) { 53 for ( Instances traindata : traindataSet) {59 for (Instances traindata : traindataSet) { 54 60 applyCLIFF(traindata); 55 61 } 56 62 } 57 63 58 /* *64 /* 59 65 * @see de.ugoe.cs.cpdp.dataselection.PointWiseDataselectionStrategy#apply(weka.core.Instances, 60 * 66 * weka.core.Instances) 61 67 */ 62 68 @Override … … 65 71 } 66 72 73 /** 74 * <p> 75 * Applies the CLIFF relevancy filter to the data. 76 * </p> 77 * 78 * @param data 79 * the data 80 * @return CLIFF-filtered data 81 */ 67 82 protected Instances applyCLIFF(Instances data) { 68 83 final double[][] powerAttributes = new double[data.size()][data.numAttributes()]; 69 84 final double[] powerEntity = new double[data.size()]; 70 85 71 86 final int[] counts = data.attributeStats(data.classIndex()).nominalCounts; 72 87 final double probDefect = data.numInstances() / (double) counts[1]; 73 74 for ( int j=0; j<data.numAttributes(); j++) {75 if ( data.attribute(j)!=data.classAttribute()) {88 89 for (int j = 0; j < data.numAttributes(); j++) { 90 if (data.attribute(j) != data.classAttribute()) { 76 91 final double[] ranges = getRanges(data, j); 77 92 final double[] probDefectRange = getRangeProbabilities(data, j, ranges); 78 79 for ( int i=0 ; i<data.numInstances() ; i++) {93 94 for (int i = 0; i < data.numInstances(); i++) { 80 95 final double value = data.instance(i).value(j); 81 96 final int range = determineRange(ranges, value); 82 97 double probClass, probNotClass, probRangeClass, probRangeNotClass; 83 if ( data.instance(i).classValue()==1) {98 if (data.instance(i).classValue() == 1) { 84 99 probClass = probDefect; 85 probNotClass = 1.0 -probDefect;100 probNotClass = 1.0 - probDefect; 86 101 probRangeClass = probDefectRange[range]; 87 probRangeNotClass = 1.0-probDefectRange[range]; 88 } else { 89 probClass = 1.0-probDefect; 102 probRangeNotClass = 1.0 - probDefectRange[range]; 103 } 104 else { 105 probClass = 1.0 - probDefect; 90 106 probNotClass = probDefect; 91 probRangeClass = 1.0 -probDefectRange[range];107 probRangeClass = 1.0 - probDefectRange[range]; 92 108 probRangeNotClass = probDefectRange[range]; 93 109 } 94 powerAttributes[i][j] = Math.pow(probRangeClass, 2.0)/(probRangeClass*probClass+probRangeNotClass*probNotClass); 110 powerAttributes[i][j] = Math.pow(probRangeClass, 2.0) / 111 (probRangeClass * probClass + probRangeNotClass * probNotClass); 95 112 } 96 113 } 97 114 } 98 99 for ( int i=0; i<data.numInstances(); i++) {115 116 for (int i = 0; i < data.numInstances(); i++) { 100 117 powerEntity[i] = 1.0; 101 for (int j =0; j<data.numAttributes() ; j++) {118 for (int j = 0; j < data.numAttributes(); j++) { 102 119 powerEntity[i] *= powerAttributes[i][j]; 103 120 } … … 105 122 double[] sortedPower = powerEntity.clone(); 106 123 Arrays.sort(sortedPower); 107 double cutOff = sortedPower[(int) (data.numInstances() *(1-percentage))];124 double cutOff = sortedPower[(int) (data.numInstances() * (1 - percentage))]; 108 125 109 126 final Instances selected = new Instances(data); 110 127 selected.delete(); 111 for (int i =0; i<data.numInstances(); i++) {112 if ( powerEntity[i]>=cutOff) {128 for (int i = 0; i < data.numInstances(); i++) { 129 if (powerEntity[i] >= cutOff) { 113 130 selected.add(data.instance(i)); 114 131 } … … 116 133 return selected; 117 134 } 118 135 136 /** 137 * <p> 138 * Gets an array with the ranges from the data for a given attribute 139 * </p> 140 * 141 * @param data 142 * the data 143 * @param j 144 * index of the attribute 145 * @return the ranges for the attribute 146 */ 119 147 private double[] getRanges(Instances data, int j) { 120 double[] values = new double[numRanges +1];121 for ( int k=0; k<numRanges; k++) {122 values[k] = data.kthSmallestValue(j, (int) (data.size() *(k+1.0)/numRanges));148 double[] values = new double[numRanges + 1]; 149 for (int k = 0; k < numRanges; k++) { 150 values[k] = data.kthSmallestValue(j, (int) (data.size() * (k + 1.0) / numRanges)); 123 151 } 124 152 values[numRanges] = data.attributeStats(j).numericStats.max; 125 153 return values; 126 154 } 127 155 156 /** 157 * <p> 158 * Gets the probabilities of a positive prediction for each range for a given attribute 159 * </p> 160 * 161 * @param data 162 * the data 163 * @param j 164 * index of the attribute 165 * @param ranges 166 * the ranges 167 * @return probabilities for each range 168 */ 128 169 private double[] getRangeProbabilities(Instances data, int j, double[] ranges) { 129 170 double[] probDefectRange = new double[numRanges]; 130 171 int[] countRange = new int[numRanges]; 131 172 int[] countDefect = new int[numRanges]; 132 for ( int i=0; i<data.numInstances() ; i++) {133 int range = determineRange(ranges, data.instance(i).value(j)); 173 for (int i = 0; i < data.numInstances(); i++) { 174 int range = determineRange(ranges, data.instance(i).value(j)); 134 175 countRange[range]++; 135 if ( data.instance(i).classValue()== 1) {176 if (data.instance(i).classValue() == 1) { 136 177 countDefect[range]++; 137 178 } 138 179 139 180 } 140 for ( int k=0; k<numRanges; k++) {181 for (int k = 0; k < numRanges; k++) { 141 182 probDefectRange[k] = ((double) countDefect[k]) / countRange[k]; 142 183 } 143 184 return probDefectRange; 144 185 } 145 186 187 /** 188 * <p> 189 * Determines the range of a give value 190 * </p> 191 * 192 * @param ranges 193 * the possible ranges 194 * @param value 195 * the value 196 * @return index of the range 197 */ 146 198 private int determineRange(double[] ranges, double value) { 147 for ( int k=0; k<numRanges; k++) {148 if ( value<=ranges[k+1]) {199 for (int k = 0; k < numRanges; k++) { 200 if (value <= ranges[k + 1]) { 149 201 return k; 150 202 }
Note: See TracChangeset
for help on using the changeset viewer.