- Timestamp:
- 09/24/15 10:59:05 (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/SetWiseKNNSelection.java
r2 r41 1 // Copyright 2015 Georg-August-Universität Göttingen, Germany 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 1 15 package de.ugoe.cs.cpdp.dataselection; 2 16 … … 10 24 11 25 /** 12 * Filter based on the k-nearest neighbor (KNN) algorithm S. Herbold: Training data selection for cross-project defect prediction 26 * Filter based on the k-nearest neighbor (KNN) algorithm S. Herbold: Training data selection for 27 * cross-project defect prediction 28 * 13 29 * @author Steffen Herbold 14 30 */ 15 31 public class SetWiseKNNSelection extends AbstractCharacteristicSelection { 16 17 /**18 * number of neighbors selected19 */20 private int k = 1;21 22 /**23 * @see de.ugoe.cs.cpdp.dataselection.SetWiseDataselectionStrategy#apply(weka.core.Instances, org.apache.commons.collections4.list.SetUniqueList)24 */25 @Override26 public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) {27 final Instances data = normalizedCharacteristicInstances(testdata, traindataSet);28 29 final Set<Integer> selected = new HashSet<Integer>();30 for( int i=0 ; i<k ; i++ ) {31 int closestIndex = getClosest(data);32 33 selected.add(closestIndex);34 data.delete(closestIndex);35 }36 37 for( int i=traindataSet.size()-1; i>=0 ; i-- ) {38 if( selected.contains(i) ) {39 traindataSet.remove(i);40 }41 }42 }43 44 /**45 * Helper method that determines the index of the instance with the smallest distance to the first instance (index 0).46 * @param data data set47 * @return index of the closest instance48 */49 private int getClosest(Instances data) {50 double closestDistance = Double.MAX_VALUE;51 int closestIndex = 1;52 for( int i=1 ; i<data.numInstances() ; i++ ) {53 double distance = MathArrays.distance(data.instance(0).toDoubleArray(), data.instance(i).toDoubleArray());54 if( distance < closestDistance) {55 closestDistance = distance;56 closestIndex = i;57 }58 }59 return closestIndex;60 }61 32 62 /** 63 * Sets the number of neighbors followed by the distributional characteristics, the values are separated by blanks. 64 * @see AbstractCharacteristicSelection#setParameter(String) 65 */ 66 @Override 67 public void setParameter(String parameters) { 68 if( !"".equals(parameters) ) { 69 final String[] split = parameters.split(" "); 70 k = Integer.parseInt(split[0]); 71 String str = ""; 72 for( int i=1 ; i<split.length; i++ ) { 73 str += split[i]; 74 if( i<split.length-1 ) { 75 str += " "; 76 } 77 } 78 super.setParameter(str); 79 } 80 } 33 /** 34 * number of neighbors selected 35 */ 36 private int k = 1; 37 38 /** 39 * @see de.ugoe.cs.cpdp.dataselection.SetWiseDataselectionStrategy#apply(weka.core.Instances, 40 * org.apache.commons.collections4.list.SetUniqueList) 41 */ 42 @Override 43 public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) { 44 final Instances data = normalizedCharacteristicInstances(testdata, traindataSet); 45 46 final Set<Integer> selected = new HashSet<Integer>(); 47 for (int i = 0; i < k; i++) { 48 int closestIndex = getClosest(data); 49 50 selected.add(closestIndex); 51 data.delete(closestIndex); 52 } 53 54 for (int i = traindataSet.size() - 1; i >= 0; i--) { 55 if (selected.contains(i)) { 56 traindataSet.remove(i); 57 } 58 } 59 } 60 61 /** 62 * Helper method that determines the index of the instance with the smallest distance to the 63 * first instance (index 0). 64 * 65 * @param data 66 * data set 67 * @return index of the closest instance 68 */ 69 private int getClosest(Instances data) { 70 double closestDistance = Double.MAX_VALUE; 71 int closestIndex = 1; 72 for (int i = 1; i < data.numInstances(); i++) { 73 double distance = 74 MathArrays.distance(data.instance(0).toDoubleArray(), data.instance(i) 75 .toDoubleArray()); 76 if (distance < closestDistance) { 77 closestDistance = distance; 78 closestIndex = i; 79 } 80 } 81 return closestIndex; 82 } 83 84 /** 85 * Sets the number of neighbors followed by the distributional characteristics, the values are 86 * separated by blanks. 87 * 88 * @see AbstractCharacteristicSelection#setParameter(String) 89 */ 90 @Override 91 public void setParameter(String parameters) { 92 if (!"".equals(parameters)) { 93 final String[] split = parameters.split(" "); 94 k = Integer.parseInt(split[0]); 95 String str = ""; 96 for (int i = 1; i < split.length; i++) { 97 str += split[i]; 98 if (i < split.length - 1) { 99 str += " "; 100 } 101 } 102 super.setParameter(str); 103 } 104 } 81 105 }
Note: See TracChangeset
for help on using the changeset viewer.