- Timestamp:
- 09/17/15 13:46:16 (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/MedianAsReference.java
r2 r40 10 10 * Median as reference transformation after Carmargo Cruz and Ochimizu: Towards Logistic Regression Models for Predicting Fault-prone Code across Software Projects 11 11 * <br><br> 12 * For each attribute value x, the new value is x -median of the test data12 * For each attribute value x, the new value is x + (median of the test data - median of the current project) 13 13 * @author Steffen Herbold 14 14 */ … … 30 30 public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) { 31 31 final Attribute classAttribute = testdata.classAttribute(); 32 final double[] median = new double[testdata.numAttributes()]; 32 33 33 final double[] median = new double[testdata.numAttributes()]; 34 // test and train have the same number of attributes 35 Attribute traindataClassAttribute; 36 double[] currentmedian = new double[testdata.numAttributes()]; 34 37 35 38 // get medians … … 40 43 } 41 44 42 // update testdata 43 for( int i=0 ; i<testdata.numInstances() ; i++ ) { 44 Instance instance = testdata.instance(i); 45 for( int j=0 ; j<testdata.numAttributes() ; j++ ) { 46 if( testdata.attribute(j)!=classAttribute ) { 47 instance.setValue(j, instance.value(j)-median[j]); 45 // preprocess training data 46 for( Instances traindata : traindataSet ) { 47 // get median of current training set 48 traindataClassAttribute = traindata.classAttribute(); 49 for( int j=0 ; j<traindata.numAttributes() ; j++ ) { 50 if( traindata.attribute(j)!=traindataClassAttribute && traindata.attribute(j).isNumeric()) { 51 currentmedian[j] = traindata.kthSmallestValue(j, (traindata.numInstances()+1)>>1); // (>>2 -> /2) 48 52 } 49 53 } 50 }51 52 // preprocess training data53 for( Instances traindata : traindataSet ) {54 54 for( int i=0 ; i<traindata.numInstances() ; i++ ) { 55 55 Instance instance = traindata.instance(i); 56 for( int j=0 ; j<t estdata.numAttributes() ; j++ ) {57 if( t estdata.attribute(j)!=classAttribute) {58 instance.setValue(j, instance.value(j) -median[j]);56 for( int j=0 ; j<traindata.numAttributes() ; j++ ) { 57 if( traindata.attribute(j)!=classAttribute && traindata.attribute(j).isNumeric() ) { 58 instance.setValue(j, instance.value(j) + (median[j] - currentmedian[j])); 59 59 } 60 60 } … … 69 69 public void apply(Instances testdata, Instances traindata) { 70 70 final Attribute classAttribute = testdata.classAttribute(); 71 71 final Attribute traindataClassAttribute = traindata.classAttribute(); 72 72 final double[] median = new double[testdata.numAttributes()]; 73 74 // test and train have the same number of attributes 75 double[] currentmedian = new double[testdata.numAttributes()]; 73 76 74 77 // get medians … … 78 81 } 79 82 } 80 81 // update testdata 82 for( int i=0 ; i<testdata.numInstances() ; i++ ) { 83 Instance instance = testdata.instance(i); 84 for( int j=0 ; j<testdata.numAttributes() ; j++ ) { 85 if( testdata.attribute(j)!=classAttribute ) { 86 instance.setValue(j, instance.value(j)-median[j]); 87 } 83 84 // get median of current training set 85 for( int j=0 ; j<traindata.numAttributes() ; j++ ) { 86 if( traindata.attribute(j)!=traindataClassAttribute && traindata.attribute(j).isNumeric() ) { 87 currentmedian[j] = traindata.kthSmallestValue(j, (traindata.numInstances()+1)>>1); // (>>2 -> /2) 88 88 } 89 89 } … … 92 92 for( int i=0 ; i<traindata.numInstances() ; i++ ) { 93 93 Instance instance = traindata.instance(i); 94 for( int j=0 ; j<t estdata.numAttributes() ; j++ ) {95 if( t estdata.attribute(j)!=classAttribute) {96 instance.setValue(j, instance.value(j) -median[j]);94 for( int j=0 ; j<traindata.numAttributes() ; j++ ) { 95 if( traindata.attribute(j)!=classAttribute && traindata.attribute(j).isNumeric() ) { 96 instance.setValue(j, instance.value(j) + (median[j] - currentmedian[j])); 97 97 } 98 98 }
Note: See TracChangeset
for help on using the changeset viewer.