package de.ugoe.cs.cpdp.dataselection; import java.util.LinkedList; import java.util.List; import org.apache.commons.collections4.list.SetUniqueList; import weka.clusterers.EM; import weka.core.Instance; import weka.core.Instances; /** * Filter based on EM clustering after S. Herbold: Training data selection for cross-project defect prediction * @author Steffen Herbold */ public class SetWiseEMClusterSelection extends AbstractCharacteristicSelection { /** * @see de.ugoe.cs.cpdp.dataselection.SetWiseDataselectionStrategy#apply(weka.core.Instances, org.apache.commons.collections4.list.SetUniqueList) */ @Override public void apply(Instances testdata, SetUniqueList traindataSet) { final Instances data = normalizedCharacteristicInstances(testdata, traindataSet); final Instance targetInstance = data.instance(0); final List candidateInstances = new LinkedList(); for( int i=1; i