Changeset 40
- Timestamp:
- 09/17/15 13:46:16 (9 years ago)
- Location:
- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/LogarithmTransform.java
r2 r40 35 35 Instance instance = testdata.instance(i); 36 36 for( int j=0 ; j<testdata.numAttributes() ; j++ ) { 37 if( testdata.attribute(j)!=classAttribute ) { 38 instance.setValue(j, Math.log(1+instance.value(j))); 37 if( testdata.attribute(j)!=classAttribute && testdata.attribute(j).isNumeric() ) { 38 if( instance.value(j) < 0 ) { 39 instance.setValue(j, (-1*(Math.log(-1*instance.value(j))))); 40 }else { 41 instance.setValue(j, Math.log(1+instance.value(j))); 42 } 39 43 } 40 44 } … … 46 50 Instance instance = traindata.instance(i); 47 51 for( int j=0 ; j<testdata.numAttributes() ; j++ ) { 48 if( testdata.attribute(j)!=classAttribute ) { 49 instance.setValue(j, Math.log(1+instance.value(j))); 52 if( traindata.attribute(j)!=classAttribute && traindata.attribute(j).isNumeric() ) { 53 if( instance.value(j) < 0 ) { 54 instance.setValue(j, (-1*(Math.log(-1*instance.value(j))))); 55 }else { 56 instance.setValue(j, Math.log(1+instance.value(j))); 57 } 50 58 } 51 59 } … … 65 73 Instance instance = testdata.instance(i); 66 74 for( int j=0 ; j<testdata.numAttributes() ; j++ ) { 67 if( testdata.attribute(j)!=classAttribute ) { 68 instance.setValue(j, Math.log(1+instance.value(j))); 75 if( testdata.attribute(j)!=classAttribute && testdata.attribute(j).isNumeric() ) { 76 if( instance.value(j) < 0 ) { 77 instance.setValue(j, (-1*(Math.log(-1*instance.value(j))))); 78 }else { 79 instance.setValue(j, Math.log(1+instance.value(j))); 80 } 69 81 } 70 82 } … … 75 87 Instance instance = traindata.instance(i); 76 88 for( int j=0 ; j<testdata.numAttributes() ; j++ ) { 77 if( testdata.attribute(j)!=classAttribute ) { 78 instance.setValue(j, Math.log(1+instance.value(j))); 89 if( traindata.attribute(j)!=classAttribute && traindata.attribute(j).isNumeric() ) { 90 if( instance.value(j) < 0 ) { 91 instance.setValue(j, (-1*(Math.log(-1*instance.value(j))))); 92 }else { 93 instance.setValue(j, Math.log(1+instance.value(j))); 94 } 79 95 } 80 96 } -
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/MedianAsReference.java
r2 r40 10 10 * Median as reference transformation after Carmargo Cruz and Ochimizu: Towards Logistic Regression Models for Predicting Fault-prone Code across Software Projects 11 11 * <br><br> 12 * For each attribute value x, the new value is x -median of the test data12 * For each attribute value x, the new value is x + (median of the test data - median of the current project) 13 13 * @author Steffen Herbold 14 14 */ … … 30 30 public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) { 31 31 final Attribute classAttribute = testdata.classAttribute(); 32 final double[] median = new double[testdata.numAttributes()]; 32 33 33 final double[] median = new double[testdata.numAttributes()]; 34 // test and train have the same number of attributes 35 Attribute traindataClassAttribute; 36 double[] currentmedian = new double[testdata.numAttributes()]; 34 37 35 38 // get medians … … 40 43 } 41 44 42 // update testdata 43 for( int i=0 ; i<testdata.numInstances() ; i++ ) { 44 Instance instance = testdata.instance(i); 45 for( int j=0 ; j<testdata.numAttributes() ; j++ ) { 46 if( testdata.attribute(j)!=classAttribute ) { 47 instance.setValue(j, instance.value(j)-median[j]); 45 // preprocess training data 46 for( Instances traindata : traindataSet ) { 47 // get median of current training set 48 traindataClassAttribute = traindata.classAttribute(); 49 for( int j=0 ; j<traindata.numAttributes() ; j++ ) { 50 if( traindata.attribute(j)!=traindataClassAttribute && traindata.attribute(j).isNumeric()) { 51 currentmedian[j] = traindata.kthSmallestValue(j, (traindata.numInstances()+1)>>1); // (>>2 -> /2) 48 52 } 49 53 } 50 }51 52 // preprocess training data53 for( Instances traindata : traindataSet ) {54 54 for( int i=0 ; i<traindata.numInstances() ; i++ ) { 55 55 Instance instance = traindata.instance(i); 56 for( int j=0 ; j<t estdata.numAttributes() ; j++ ) {57 if( t estdata.attribute(j)!=classAttribute) {58 instance.setValue(j, instance.value(j) -median[j]);56 for( int j=0 ; j<traindata.numAttributes() ; j++ ) { 57 if( traindata.attribute(j)!=classAttribute && traindata.attribute(j).isNumeric() ) { 58 instance.setValue(j, instance.value(j) + (median[j] - currentmedian[j])); 59 59 } 60 60 } … … 69 69 public void apply(Instances testdata, Instances traindata) { 70 70 final Attribute classAttribute = testdata.classAttribute(); 71 71 final Attribute traindataClassAttribute = traindata.classAttribute(); 72 72 final double[] median = new double[testdata.numAttributes()]; 73 74 // test and train have the same number of attributes 75 double[] currentmedian = new double[testdata.numAttributes()]; 73 76 74 77 // get medians … … 78 81 } 79 82 } 80 81 // update testdata 82 for( int i=0 ; i<testdata.numInstances() ; i++ ) { 83 Instance instance = testdata.instance(i); 84 for( int j=0 ; j<testdata.numAttributes() ; j++ ) { 85 if( testdata.attribute(j)!=classAttribute ) { 86 instance.setValue(j, instance.value(j)-median[j]); 87 } 83 84 // get median of current training set 85 for( int j=0 ; j<traindata.numAttributes() ; j++ ) { 86 if( traindata.attribute(j)!=traindataClassAttribute && traindata.attribute(j).isNumeric() ) { 87 currentmedian[j] = traindata.kthSmallestValue(j, (traindata.numInstances()+1)>>1); // (>>2 -> /2) 88 88 } 89 89 } … … 92 92 for( int i=0 ; i<traindata.numInstances() ; i++ ) { 93 93 Instance instance = traindata.instance(i); 94 for( int j=0 ; j<t estdata.numAttributes() ; j++ ) {95 if( t estdata.attribute(j)!=classAttribute) {96 instance.setValue(j, instance.value(j) -median[j]);94 for( int j=0 ; j<traindata.numAttributes() ; j++ ) { 95 if( traindata.attribute(j)!=classAttribute && traindata.attribute(j).isNumeric() ) { 96 instance.setValue(j, instance.value(j) + (median[j] - currentmedian[j])); 97 97 } 98 98 }
Note: See TracChangeset
for help on using the changeset viewer.