- Timestamp:
- 09/24/15 10:59:05 (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/SetWiseEMClusterSelection.java
r2 r41 1 // Copyright 2015 Georg-August-Universität Göttingen, Germany 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 1 15 package de.ugoe.cs.cpdp.dataselection; 2 16 … … 11 25 12 26 /** 13 * Filter based on EM clustering after S. Herbold: Training data selection for cross-project defect prediction 27 * Filter based on EM clustering after S. Herbold: Training data selection for cross-project defect 28 * prediction 29 * 14 30 * @author Steffen Herbold 15 31 */ 16 32 public class SetWiseEMClusterSelection extends AbstractCharacteristicSelection { 17 18 /** 19 * @see de.ugoe.cs.cpdp.dataselection.SetWiseDataselectionStrategy#apply(weka.core.Instances, org.apache.commons.collections4.list.SetUniqueList) 20 */ 21 @Override 22 public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) { 23 final Instances data = normalizedCharacteristicInstances(testdata, traindataSet); 24 final Instance targetInstance = data.instance(0); 25 final List<Instance> candidateInstances = new LinkedList<Instance>(); 26 for( int i=1; i<data.numInstances(); i++ ) { 27 candidateInstances.add(data.instance(i)); 28 } 29 30 // cluster and select 31 try { 32 final EM emeans = new EM(); 33 boolean onlyTarget = true; 34 int targetCluster; 35 int maxNumClusters = candidateInstances.size(); 36 do { // while(onlyTarget) 37 emeans.setMaximumNumberOfClusters(maxNumClusters); 38 emeans.buildClusterer(data); 39 40 targetCluster = emeans.clusterInstance(targetInstance); 41 42 // check if cluster only contains target project 43 for( int i=0 ; i<candidateInstances.size() && onlyTarget; i++ ) { 44 onlyTarget &= !(emeans.clusterInstance(candidateInstances.get(i))==targetCluster); 45 } 46 maxNumClusters = emeans.numberOfClusters()-1; 47 } while(onlyTarget); 48 49 int numRemoved = 0; 50 for( int i=0 ; i<candidateInstances.size() ; i++ ) { 51 if( emeans.clusterInstance(candidateInstances.get(i))!=targetCluster ) { 52 traindataSet.remove(i-numRemoved++); 53 } 54 } 55 } catch(Exception e) { 56 throw new RuntimeException("error applying setwise EM clustering training data selection", e); 57 } 58 } 33 34 /** 35 * @see de.ugoe.cs.cpdp.dataselection.SetWiseDataselectionStrategy#apply(weka.core.Instances, 36 * org.apache.commons.collections4.list.SetUniqueList) 37 */ 38 @Override 39 public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) { 40 final Instances data = normalizedCharacteristicInstances(testdata, traindataSet); 41 final Instance targetInstance = data.instance(0); 42 final List<Instance> candidateInstances = new LinkedList<Instance>(); 43 for (int i = 1; i < data.numInstances(); i++) { 44 candidateInstances.add(data.instance(i)); 45 } 46 47 // cluster and select 48 try { 49 final EM emeans = new EM(); 50 boolean onlyTarget = true; 51 int targetCluster; 52 int maxNumClusters = candidateInstances.size(); 53 do { // while(onlyTarget) 54 emeans.setMaximumNumberOfClusters(maxNumClusters); 55 emeans.buildClusterer(data); 56 57 targetCluster = emeans.clusterInstance(targetInstance); 58 59 // check if cluster only contains target project 60 for (int i = 0; i < candidateInstances.size() && onlyTarget; i++) { 61 onlyTarget &= 62 !(emeans.clusterInstance(candidateInstances.get(i)) == targetCluster); 63 } 64 maxNumClusters = emeans.numberOfClusters() - 1; 65 } 66 while (onlyTarget); 67 68 int numRemoved = 0; 69 for (int i = 0; i < candidateInstances.size(); i++) { 70 if (emeans.clusterInstance(candidateInstances.get(i)) != targetCluster) { 71 traindataSet.remove(i - numRemoved++); 72 } 73 } 74 } 75 catch (Exception e) { 76 throw new RuntimeException( 77 "error applying setwise EM clustering training data selection", 78 e); 79 } 80 } 59 81 }
Note: See TracChangeset
for help on using the changeset viewer.