Changeset 135 for trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection
- Timestamp:
- 07/18/16 12:26:03 (8 years ago)
- Location:
- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection
- Files:
-
- 14 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/AbstractCharacteristicSelection.java
r86 r135 104 104 } 105 105 else if ("median".equals(characteristics[j])) { 106 instanceValues[i * characteristics.length + j] = Utils.kthSmallestValue(testdata.attributeToDoubleArray(i), testdata.size()/2); 106 instanceValues[i * characteristics.length + j] = 107 Utils.kthSmallestValue(testdata.attributeToDoubleArray(i), 108 testdata.size() / 2); 107 109 } 108 110 else { … … 138 140 } 139 141 else if ("median".equals(characteristics[j])) { 140 instanceValues[i * characteristics.length + j] = Utils.kthSmallestValue(traindata.attributeToDoubleArray(i), traindata.size()/2); 142 instanceValues[i * characteristics.length + j] = 143 Utils.kthSmallestValue(traindata.attributeToDoubleArray(i), 144 traindata.size() / 2); 141 145 } 142 146 else { … … 173 177 } 174 178 catch (Exception e) { 175 throw new RuntimeException( 176 "Unexpected exception during normalization of distributional characteristics.", 179 throw new RuntimeException("Unexpected exception during normalization of distributional characteristics.", 177 180 e); 178 181 } -
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/CLIFF.java
r120 r135 28 28 public class CLIFF implements IPointWiseDataselectionStrategy, ISetWiseDataselectionStrategy { 29 29 30 /** 31 * percentage of data selected 32 */ 30 33 private double percentage = 0.10; 31 34 35 /** 36 * number of ranges considered 37 */ 32 38 private final int numRanges = 10; 33 39 … … 40 46 @Override 41 47 public void setParameter(String parameters) { 42 if ( parameters!=null) {48 if (parameters != null) { 43 49 percentage = Double.parseDouble(parameters); 44 50 } 45 51 } 46 47 /* *52 53 /* 48 54 * @see de.ugoe.cs.cpdp.dataselection.SetWiseDataselectionStrategy#apply(weka.core.Instances, 49 * 55 * org.apache.commons.collections4.list.SetUniqueList) 50 56 */ 51 57 @Override 52 58 public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) { 53 for ( Instances traindata : traindataSet) {59 for (Instances traindata : traindataSet) { 54 60 applyCLIFF(traindata); 55 61 } 56 62 } 57 63 58 /* *64 /* 59 65 * @see de.ugoe.cs.cpdp.dataselection.PointWiseDataselectionStrategy#apply(weka.core.Instances, 60 * 66 * weka.core.Instances) 61 67 */ 62 68 @Override … … 65 71 } 66 72 73 /** 74 * <p> 75 * Applies the CLIFF relevancy filter to the data. 76 * </p> 77 * 78 * @param data 79 * the data 80 * @return CLIFF-filtered data 81 */ 67 82 protected Instances applyCLIFF(Instances data) { 68 83 final double[][] powerAttributes = new double[data.size()][data.numAttributes()]; 69 84 final double[] powerEntity = new double[data.size()]; 70 85 71 86 final int[] counts = data.attributeStats(data.classIndex()).nominalCounts; 72 87 final double probDefect = data.numInstances() / (double) counts[1]; 73 74 for ( int j=0; j<data.numAttributes(); j++) {75 if ( data.attribute(j)!=data.classAttribute()) {88 89 for (int j = 0; j < data.numAttributes(); j++) { 90 if (data.attribute(j) != data.classAttribute()) { 76 91 final double[] ranges = getRanges(data, j); 77 92 final double[] probDefectRange = getRangeProbabilities(data, j, ranges); 78 79 for ( int i=0 ; i<data.numInstances() ; i++) {93 94 for (int i = 0; i < data.numInstances(); i++) { 80 95 final double value = data.instance(i).value(j); 81 96 final int range = determineRange(ranges, value); 82 97 double probClass, probNotClass, probRangeClass, probRangeNotClass; 83 if ( data.instance(i).classValue()==1) {98 if (data.instance(i).classValue() == 1) { 84 99 probClass = probDefect; 85 probNotClass = 1.0 -probDefect;100 probNotClass = 1.0 - probDefect; 86 101 probRangeClass = probDefectRange[range]; 87 probRangeNotClass = 1.0-probDefectRange[range]; 88 } else { 89 probClass = 1.0-probDefect; 102 probRangeNotClass = 1.0 - probDefectRange[range]; 103 } 104 else { 105 probClass = 1.0 - probDefect; 90 106 probNotClass = probDefect; 91 probRangeClass = 1.0 -probDefectRange[range];107 probRangeClass = 1.0 - probDefectRange[range]; 92 108 probRangeNotClass = probDefectRange[range]; 93 109 } 94 powerAttributes[i][j] = Math.pow(probRangeClass, 2.0)/(probRangeClass*probClass+probRangeNotClass*probNotClass); 110 powerAttributes[i][j] = Math.pow(probRangeClass, 2.0) / 111 (probRangeClass * probClass + probRangeNotClass * probNotClass); 95 112 } 96 113 } 97 114 } 98 99 for ( int i=0; i<data.numInstances(); i++) {115 116 for (int i = 0; i < data.numInstances(); i++) { 100 117 powerEntity[i] = 1.0; 101 for (int j =0; j<data.numAttributes() ; j++) {118 for (int j = 0; j < data.numAttributes(); j++) { 102 119 powerEntity[i] *= powerAttributes[i][j]; 103 120 } … … 105 122 double[] sortedPower = powerEntity.clone(); 106 123 Arrays.sort(sortedPower); 107 double cutOff = sortedPower[(int) (data.numInstances() *(1-percentage))];124 double cutOff = sortedPower[(int) (data.numInstances() * (1 - percentage))]; 108 125 109 126 final Instances selected = new Instances(data); 110 127 selected.delete(); 111 for (int i =0; i<data.numInstances(); i++) {112 if ( powerEntity[i]>=cutOff) {128 for (int i = 0; i < data.numInstances(); i++) { 129 if (powerEntity[i] >= cutOff) { 113 130 selected.add(data.instance(i)); 114 131 } … … 116 133 return selected; 117 134 } 118 135 136 /** 137 * <p> 138 * Gets an array with the ranges from the data for a given attribute 139 * </p> 140 * 141 * @param data 142 * the data 143 * @param j 144 * index of the attribute 145 * @return the ranges for the attribute 146 */ 119 147 private double[] getRanges(Instances data, int j) { 120 double[] values = new double[numRanges +1];121 for ( int k=0; k<numRanges; k++) {122 values[k] = data.kthSmallestValue(j, (int) (data.size() *(k+1.0)/numRanges));148 double[] values = new double[numRanges + 1]; 149 for (int k = 0; k < numRanges; k++) { 150 values[k] = data.kthSmallestValue(j, (int) (data.size() * (k + 1.0) / numRanges)); 123 151 } 124 152 values[numRanges] = data.attributeStats(j).numericStats.max; 125 153 return values; 126 154 } 127 155 156 /** 157 * <p> 158 * Gets the probabilities of a positive prediction for each range for a given attribute 159 * </p> 160 * 161 * @param data 162 * the data 163 * @param j 164 * index of the attribute 165 * @param ranges 166 * the ranges 167 * @return probabilities for each range 168 */ 128 169 private double[] getRangeProbabilities(Instances data, int j, double[] ranges) { 129 170 double[] probDefectRange = new double[numRanges]; 130 171 int[] countRange = new int[numRanges]; 131 172 int[] countDefect = new int[numRanges]; 132 for ( int i=0; i<data.numInstances() ; i++) {133 int range = determineRange(ranges, data.instance(i).value(j)); 173 for (int i = 0; i < data.numInstances(); i++) { 174 int range = determineRange(ranges, data.instance(i).value(j)); 134 175 countRange[range]++; 135 if ( data.instance(i).classValue()== 1) {176 if (data.instance(i).classValue() == 1) { 136 177 countDefect[range]++; 137 178 } 138 179 139 180 } 140 for ( int k=0; k<numRanges; k++) {181 for (int k = 0; k < numRanges; k++) { 141 182 probDefectRange[k] = ((double) countDefect[k]) / countRange[k]; 142 183 } 143 184 return probDefectRange; 144 185 } 145 186 187 /** 188 * <p> 189 * Determines the range of a give value 190 * </p> 191 * 192 * @param ranges 193 * the possible ranges 194 * @param value 195 * the value 196 * @return index of the range 197 */ 146 198 private int determineRange(double[] ranges, double value) { 147 for ( int k=0; k<numRanges; k++) {148 if ( value<=ranges[k+1]) {199 for (int k = 0; k < numRanges; k++) { 200 if (value <= ranges[k + 1]) { 149 201 return k; 150 202 } -
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/DBSCANFilter.java
r92 r135 99 99 .valid(); clusterIter.advance()) 100 100 { 101 int internalIndex = clusterIter.internalGetIndex() - testdata.size() - firstInternalIndex; 101 int internalIndex = 102 clusterIter.internalGetIndex() - testdata.size() - firstInternalIndex; 102 103 if (internalIndex >= 0) { 103 104 // index belongs to a training instance -
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/DecisionTreeSelection.java
r116 r135 84 84 } 85 85 REPTree repTree = new REPTree(); 86 if ( repTree.getNumFolds()>similarityData.size()) {86 if (repTree.getNumFolds() > similarityData.size()) { 87 87 repTree.setNumFolds(similarityData.size()); 88 88 } -
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/LACE2.java
r120 r135 12 12 // See the License for the specific language governing permissions and 13 13 // limitations under the License. 14 15 14 16 15 package de.ugoe.cs.cpdp.dataselection; … … 39 38 public class LACE2 implements ISetWiseDataselectionStrategy { 40 39 40 /** 41 * percentage of data selected by the internal CLIFF. 42 */ 41 43 private double percentage = 0.10; 42 44 45 /* 46 * (non-Javadoc) 47 * 48 * @see de.ugoe.cs.cpdp.IParameterizable#setParameter(java.lang.String) 49 */ 43 50 @Override 44 51 public void setParameter(String parameters) { 45 if ( parameters!=null && !parameters.isEmpty()) {52 if (parameters != null && !parameters.isEmpty()) { 46 53 percentage = Double.parseDouble(parameters); 47 54 } 48 55 } 49 56 57 /* 58 * (non-Javadoc) 59 * 60 * @see de.ugoe.cs.cpdp.dataselection.ISetWiseDataselectionStrategy#apply(weka.core.Instances, 61 * org.apache.commons.collections4.list.SetUniqueList) 62 */ 50 63 @Override 51 64 public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) { 52 65 Instances selectedData = new Instances(testdata); 53 66 selectedData.clear(); 54 67 55 68 LinkedList<Instances> traindataCopy = new LinkedList<>(traindataSet); 56 69 Collections.shuffle(traindataCopy); 57 70 58 71 CLIFF cliff = new CLIFF(); 59 72 cliff.setParameter(Double.toString(percentage)); … … 61 74 Median median = new Median(); 62 75 double minDist = Double.MIN_VALUE; 63 64 for ( Instances traindata : traindataCopy) {76 77 for (Instances traindata : traindataCopy) { 65 78 Instances cliffedData = cliff.applyCLIFF(traindata); 66 if ( minDist==Double.MIN_VALUE) {79 if (minDist == Double.MIN_VALUE) { 67 80 // determine distance for leader-follower algorithm 68 81 Instances sample; 69 if ( traindata.size()>100) {82 if (traindata.size() > 100) { 70 83 Resample resample = new Resample(); 71 resample.setSampleSizePercent(100.0 /traindata.size()*100.0);84 resample.setSampleSizePercent(100.0 / traindata.size() * 100.0); 72 85 resample.setBiasToUniformClass(0.0); 73 86 resample.setNoReplacement(true); … … 79 92 throw new RuntimeException(e); 80 93 } 81 } else { 94 } 95 else { 82 96 sample = new Instances(traindata); 83 97 } 84 98 double[] distances = new double[sample.size()]; 85 for ( int i=0; i<sample.size(); i++) {99 for (int i = 0; i < sample.size(); i++) { 86 100 Instance unlikeNeighbor = morph.getNearestUnlikeNeighbor(sample.get(i), sample); 87 distances[i] = MathArrays.distance(WekaUtils.instanceValues(sample.get(i)), WekaUtils.instanceValues(unlikeNeighbor)); 101 distances[i] = MathArrays.distance(WekaUtils.instanceValues(sample.get(i)), 102 WekaUtils.instanceValues(unlikeNeighbor)); 88 103 } 89 104 minDist = median.evaluate(distances); 90 105 } 91 for( int i=0; i<cliffedData.size(); i++ ) { 92 Instance unlikeNeighbor = morph.getNearestUnlikeNeighbor(cliffedData.get(i), selectedData); 93 if( unlikeNeighbor==null ) { 106 for (int i = 0; i < cliffedData.size(); i++) { 107 Instance unlikeNeighbor = 108 morph.getNearestUnlikeNeighbor(cliffedData.get(i), selectedData); 109 if (unlikeNeighbor == null) { 94 110 selectedData.add(cliffedData.get(i)); 95 } else { 96 double distance = MathArrays.distance(WekaUtils.instanceValues(cliffedData.get(i)), WekaUtils.instanceValues(unlikeNeighbor)); 97 if( distance>minDist ) { 111 } 112 else { 113 double distance = 114 MathArrays.distance(WekaUtils.instanceValues(cliffedData.get(i)), 115 WekaUtils.instanceValues(unlikeNeighbor)); 116 if (distance > minDist) { 98 117 morph.morphInstance(cliffedData.get(i), cliffedData); 99 118 selectedData.add(cliffedData.get(i)); … … 103 122 } 104 123 } 105 124 106 125 } -
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/MahalanobisOutlierRemoval.java
r117 r135 97 97 RealMatrix inverseCovariance; 98 98 try { 99 inverseCovariance = 100 new LUDecomposition(new Covariance(values).getCovarianceMatrix()).getSolver() 101 .getInverse(); 102 } catch(SingularMatrixException e) { 103 Console.traceln(Level.WARNING, "could not perform Mahalanobis outlier removal due to singular covariance matrix"); 99 inverseCovariance = new LUDecomposition(new Covariance(values).getCovarianceMatrix()) 100 .getSolver().getInverse(); 101 } 102 catch (SingularMatrixException e) { 103 Console 104 .traceln(Level.WARNING, 105 "could not perform Mahalanobis outlier removal due to singular covariance matrix"); 104 106 return; 105 107 } -
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/NeighborhoodFilter.java
r86 r135 36 36 @Override 37 37 public void setParameter(String parameters) { 38 // TODO Auto-generated method stub 39 38 // dummy, parameters not used 40 39 } 41 40 … … 56 55 * </p> 57 56 * 58 * @param testdata test data 59 * @param traindata training data 57 * @param testdata 58 * test data 59 * @param traindata 60 * training data 60 61 * @return filtered trainind data 61 62 */ -
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/PetersFilter.java
r86 r135 27 27 28 28 /** 29 * Filter according to F. Peters, T. Menzies, and A. Marcus: Better Cross Company Defect Prediction <br> 29 * Filter according to F. Peters, T. Menzies, and A. Marcus: Better Cross Company Defect Prediction 30 * <br> 30 31 * <br> 31 32 * This filter does not work, the paper has been withdrawn. … … 36 37 public class PetersFilter implements IPointWiseDataselectionStrategy { 37 38 38 /* *39 /* 39 40 * @see de.ugoe.cs.cpdp.IParameterizable#setParameter(java.lang.String) 40 41 */ … … 44 45 } 45 46 46 /* *47 /* 47 48 * @see de.ugoe.cs.cpdp.dataselection.IPointWiseDataselectionStrategy#apply(weka.core.Instances, 48 * 49 * weka.core.Instances) 49 50 */ 50 51 @Override -
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/PointWiseEMClusterSelection.java
r86 r135 31 31 * Use in Config: 32 32 * 33 * Specify number of clusters -N = Num Clusters <pointwiseselector34 * name="PointWiseEMClusterSelection" param="-N 10"/>33 * Specify number of clusters -N = Num Clusters 34 * <pointwiseselector name="PointWiseEMClusterSelection" param="-N 10"/> 35 35 * 36 36 * Try to determine the number of clusters: -I 10 = max iterations -X 5 = 5 folds for cross 37 * evaluation -max = max number of clusters <pointwiseselector name="PointWiseEMClusterSelection"38 * param="-I 10 -X 5 -max 300"/>37 * evaluation -max = max number of clusters 38 * <pointwiseselector name="PointWiseEMClusterSelection" param="-I 10 -X 5 -max 300"/> 39 39 * 40 40 * Don't forget to add: <preprocessor name="Normalization" param=""/> … … 42 42 public class PointWiseEMClusterSelection implements IPointWiseDataselectionStrategy { 43 43 44 /** 45 * paramters passed to the selection 46 */ 44 47 private String[] params; 45 48 49 /* 50 * (non-Javadoc) 51 * 52 * @see de.ugoe.cs.cpdp.IParameterizable#setParameter(java.lang.String) 53 */ 46 54 @Override 47 55 public void setParameter(String parameters) { … … 108 116 } 109 117 110 Console.traceln(Level.INFO, 111 String.format("our testdata is in: " + selectedCluster.size() + 112 " different clusters")); 118 Console.traceln(Level.INFO, String 119 .format("our testdata is in: " + selectedCluster.size() + " different clusters")); 113 120 114 121 // 5. get cluster membership of our traindata … … 127 134 for (int j = 0; j < ctrain.numInstances(); j++) { 128 135 // get the cluster number from the attributes 129 cnumber = 130 Integer.parseInt(ctrain.get(j).stringValue(ctrain.get(j).numAttributes() - 1) 131 .replace("cluster", "")); 136 cnumber = Integer.parseInt(ctrain.get(j) 137 .stringValue(ctrain.get(j).numAttributes() - 1).replace("cluster", "")); 132 138 133 139 // Console.traceln(Level.INFO, … … 145 151 } 146 152 147 Console.traceln(Level.INFO, 148 String.format("that leaves us with: " + selected.numInstances() + 149 " traindata instances from " + traindata.numInstances())); 153 Console.traceln(Level.INFO, String.format("that leaves us with: " + 154 selected.numInstances() + " traindata instances from " + traindata.numInstances())); 150 155 } 151 156 catch (Exception e) { -
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/SeparatabilitySelection.java
r86 r135 86 86 inst.setClassValue(1.0); 87 87 sample.add(inst); 88 inst = 89 new DenseInstance( 90 traindata.instance(rand.nextInt(traindata.numInstances()))); 88 inst = new DenseInstance(traindata 89 .instance(rand.nextInt(traindata.numInstances()))); 91 90 inst.setDataset(sample); 92 91 inst.setClassValue(0.0); … … 101 100 } 102 101 catch (Exception e) { 103 throw new RuntimeException( 104 "cross-validation during calculation of separatability failed", 102 throw new RuntimeException("cross-validation during calculation of separatability failed", 105 103 e); 106 104 } -
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/SetWiseEMClusterSelection.java
r86 r135 74 74 } 75 75 catch (Exception e) { 76 throw new RuntimeException( 77 "error applying setwise EM clustering training data selection", 76 throw new RuntimeException("error applying setwise EM clustering training data selection", 78 77 e); 79 78 } -
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/SetWiseEMContextSelection.java
r86 r135 41 41 public class SetWiseEMContextSelection implements ISetWiseDataselectionStrategy { 42 42 43 /** 44 * context factors 45 */ 43 46 private String[] project_context_factors; // = new String[]{"TND", "TNC", "TNF", "TLOC"}; 44 47 48 /* 49 * (non-Javadoc) 50 * 51 * @see de.ugoe.cs.cpdp.IParameterizable#setParameter(java.lang.String) 52 */ 45 53 @Override 46 54 public void setParameter(String parameters) { … … 103 111 } 104 112 catch (Exception e) { 105 throw new RuntimeException( 106 "error applying setwise EM clustering training data selection", 113 throw new RuntimeException("error applying setwise EM clustering training data selection", 107 114 e); 108 115 } 109 116 } 110 117 118 /* 119 * (non-Javadoc) 120 * 121 * @see de.ugoe.cs.cpdp.dataselection.ISetWiseDataselectionStrategy#apply(weka.core.Instances, 122 * org.apache.commons.collections4.list.SetUniqueList) 123 */ 111 124 @Override 112 125 public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) { … … 131 144 * @return 132 145 */ 133 protected Instances getContextFactors(Instances testdata, SetUniqueList<Instances> traindataSet) 146 protected Instances getContextFactors(Instances testdata, 147 SetUniqueList<Instances> traindataSet) 134 148 { 135 149 // setup weka Instances for clustering … … 190 204 remove.add(traindata); 191 205 // Console.traceln(Level.WARNING, 192 // "rmove attribute "+attribute+" test: "+testdata.firstInstance().value(testdata.attribute(attribute))+" train: "+traindata.firstInstance().value(traindata.attribute(attribute))); 206 // "rmove attribute "+attribute+" test: 207 // "+testdata.firstInstance().value(testdata.attribute(attribute))+" train: 208 // "+traindata.firstInstance().value(traindata.attribute(attribute))); 193 209 } 194 210 } … … 218 234 } 219 235 catch (Exception e) { 220 throw new RuntimeException( 221 "Unexpected exception during normalization of distributional characteristics.", 236 throw new RuntimeException("Unexpected exception during normalization of distributional characteristics.", 222 237 e); 223 238 } -
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/SetWiseKNNSelection.java
r86 r135 71 71 int closestIndex = 1; 72 72 for (int i = 1; i < data.numInstances(); i++) { 73 double distance = 74 MathArrays.distance(data.instance(0).toDoubleArray(), data.instance(i) 75 .toDoubleArray()); 73 double distance = MathArrays.distance(data.instance(0).toDoubleArray(), 74 data.instance(i).toDoubleArray()); 76 75 if (distance < closestDistance) { 77 76 closestDistance = distance; -
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/SynonymOutlierRemoval.java
r86 r135 19 19 /** 20 20 * <p> 21 * Synonym outlier removal after Amasaki et al. (2015). 21 * Synonym outlier removal after Amasaki et al. (2015). 22 22 * </p> 23 23 * … … 26 26 public class SynonymOutlierRemoval implements IPointWiseDataselectionStrategy { 27 27 28 /* (non-Javadoc) 28 /* 29 * (non-Javadoc) 30 * 29 31 * @see de.ugoe.cs.cpdp.IParameterizable#setParameter(java.lang.String) 30 32 */ … … 34 36 } 35 37 36 /* (non-Javadoc) 37 * @see de.ugoe.cs.cpdp.dataselection.IPointWiseDataselectionStrategy#apply(weka.core.Instances, weka.core.Instances) 38 /* 39 * (non-Javadoc) 40 * 41 * @see de.ugoe.cs.cpdp.dataselection.IPointWiseDataselectionStrategy#apply(weka.core.Instances, 42 * weka.core.Instances) 38 43 */ 39 44 @Override … … 48 53 * </p> 49 54 * 50 * @param traindata data from which the outliers are removed. 55 * @param traindata 56 * data from which the outliers are removed. 51 57 */ 52 58 public void applySynonymRemoval(Instances traindata) { 53 double minDistance[][] = new double[traindata.size()][traindata.numAttributes() -1];54 double minDistanceAttribute[] = new double[traindata.numAttributes() -1];59 double minDistance[][] = new double[traindata.size()][traindata.numAttributes() - 1]; 60 double minDistanceAttribute[] = new double[traindata.numAttributes() - 1]; 55 61 double distance; 56 for ( int j=0; j<minDistanceAttribute.length; j++) {62 for (int j = 0; j < minDistanceAttribute.length; j++) { 57 63 minDistanceAttribute[j] = Double.MAX_VALUE; 58 64 } 59 for (int i1 = traindata.size() -1; i1 < traindata.size(); i1++) {60 int k =0;65 for (int i1 = traindata.size() - 1; i1 < traindata.size(); i1++) { 66 int k = 0; 61 67 for (int j = 0; j < traindata.numAttributes(); j++) { 62 if ( j!=traindata.classIndex()) {68 if (j != traindata.classIndex()) { 63 69 minDistance[i1][k] = Double.MAX_VALUE; 64 70 for (int i2 = 0; i2 < traindata.size(); i2++) { 65 71 if (i1 != i2) { 66 distance = Math.abs(traindata.get(i1).value(j) - traindata.get(i2).value(j)); 72 distance = 73 Math.abs(traindata.get(i1).value(j) - traindata.get(i2).value(j)); 67 74 if (distance < minDistance[i1][k]) { 68 75 minDistance[i1][k] = distance; 69 76 } 70 if ( distance < minDistanceAttribute[k]) {77 if (distance < minDistanceAttribute[k]) { 71 78 minDistanceAttribute[k] = distance; 72 79 } … … 77 84 } 78 85 } 79 for ( int i=traindata.size()-1; i>=0; i--) {86 for (int i = traindata.size() - 1; i >= 0; i--) { 80 87 boolean hasClosest = false; 81 for ( int j=0; !hasClosest && j<traindata.numAttributes(); j++) {82 hasClosest = minDistance[i][j] <=minDistanceAttribute[j];88 for (int j = 0; !hasClosest && j < traindata.numAttributes(); j++) { 89 hasClosest = minDistance[i][j] <= minDistanceAttribute[j]; 83 90 } 84 if ( !hasClosest) {91 if (!hasClosest) { 85 92 traindata.delete(i); 86 93 }
Note: See TracChangeset
for help on using the changeset viewer.