- Timestamp:
- 09/24/15 10:59:05 (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/MedianAsReference.java
r40 r41 1 // Copyright 2015 Georg-August-Universität Göttingen, Germany 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 1 15 package de.ugoe.cs.cpdp.dataprocessing; 2 16 … … 8 22 9 23 /** 10 * Median as reference transformation after Carmargo Cruz and Ochimizu: Towards Logistic Regression Models for Predicting Fault-prone Code across Software Projects 11 * <br><br> 12 * For each attribute value x, the new value is x + (median of the test data - median of the current project) 24 * Median as reference transformation after Carmargo Cruz and Ochimizu: Towards Logistic Regression 25 * Models for Predicting Fault-prone Code across Software Projects <br> 26 * <br> 27 * For each attribute value x, the new value is x + (median of the test data - median of the current 28 * project) 29 * 13 30 * @author Steffen Herbold 14 31 */ 15 32 public class MedianAsReference implements ISetWiseProcessingStrategy, IProcessesingStrategy { 16 33 17 /** 18 * Does not have parameters. String is ignored. 19 * @param parameters ignored 20 */ 21 @Override 22 public void setParameter(String parameters) { 23 // dummy 24 } 34 /** 35 * Does not have parameters. String is ignored. 36 * 37 * @param parameters 38 * ignored 39 */ 40 @Override 41 public void setParameter(String parameters) { 42 // dummy 43 } 25 44 26 /** 27 * @see de.ugoe.cs.cpdp.dataprocessing.SetWiseProcessingStrategy#apply(weka.core.Instances, org.apache.commons.collections4.list.SetUniqueList) 28 */ 29 @Override 30 public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) { 31 final Attribute classAttribute = testdata.classAttribute(); 32 final double[] median = new double[testdata.numAttributes()]; 33 34 // test and train have the same number of attributes 35 Attribute traindataClassAttribute; 36 double[] currentmedian = new double[testdata.numAttributes()]; 37 38 // get medians 39 for( int j=0 ; j<testdata.numAttributes() ; j++ ) { 40 if( testdata.attribute(j)!=classAttribute ) { 41 median[j] = testdata.kthSmallestValue(j, (testdata.numInstances()+1)>>1); // (>>2 -> /2) 42 } 43 } 44 45 // preprocess training data 46 for( Instances traindata : traindataSet ) { 47 // get median of current training set 48 traindataClassAttribute = traindata.classAttribute(); 49 for( int j=0 ; j<traindata.numAttributes() ; j++ ) { 50 if( traindata.attribute(j)!=traindataClassAttribute && traindata.attribute(j).isNumeric()) { 51 currentmedian[j] = traindata.kthSmallestValue(j, (traindata.numInstances()+1)>>1); // (>>2 -> /2) 52 } 53 } 54 for( int i=0 ; i<traindata.numInstances() ; i++ ) { 55 Instance instance = traindata.instance(i); 56 for( int j=0 ; j<traindata.numAttributes() ; j++ ) { 57 if( traindata.attribute(j)!=classAttribute && traindata.attribute(j).isNumeric() ) { 58 instance.setValue(j, instance.value(j) + (median[j] - currentmedian[j])); 59 } 60 } 61 } 62 } 63 } 45 /** 46 * @see de.ugoe.cs.cpdp.dataprocessing.SetWiseProcessingStrategy#apply(weka.core.Instances, 47 * org.apache.commons.collections4.list.SetUniqueList) 48 */ 49 @Override 50 public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) { 51 final Attribute classAttribute = testdata.classAttribute(); 52 final double[] median = new double[testdata.numAttributes()]; 64 53 65 /** 66 * @see de.ugoe.cs.cpdp.dataprocessing.ProcessesingStrategy#apply(weka.core.Instances, weka.core.Instances) 67 */ 68 @Override 69 public void apply(Instances testdata, Instances traindata) { 70 final Attribute classAttribute = testdata.classAttribute(); 71 final Attribute traindataClassAttribute = traindata.classAttribute(); 72 final double[] median = new double[testdata.numAttributes()]; 54 // test and train have the same number of attributes 55 Attribute traindataClassAttribute; 56 double[] currentmedian = new double[testdata.numAttributes()]; 73 57 74 // test and train have the same number of attributes 75 double[] currentmedian = new double[testdata.numAttributes()]; 76 77 // get medians 78 for( int j=0 ; j<testdata.numAttributes() ; j++ ) { 79 if( testdata.attribute(j)!=classAttribute ) { 80 median[j] = testdata.kthSmallestValue(j, (testdata.numInstances()+1)>>1); // (>>2 -> /2) 81 } 82 } 58 // get medians 59 for (int j = 0; j < testdata.numAttributes(); j++) { 60 if (testdata.attribute(j) != classAttribute) { 61 median[j] = testdata.kthSmallestValue(j, (testdata.numInstances() + 1) >> 1); // (>>2 62 // -> 63 // /2) 64 } 65 } 83 66 84 // get median of current training set 85 for( int j=0 ; j<traindata.numAttributes() ; j++ ) { 86 if( traindata.attribute(j)!=traindataClassAttribute && traindata.attribute(j).isNumeric() ) { 87 currentmedian[j] = traindata.kthSmallestValue(j, (traindata.numInstances()+1)>>1); // (>>2 -> /2) 88 } 89 } 90 91 // preprocess training data 92 for( int i=0 ; i<traindata.numInstances() ; i++ ) { 93 Instance instance = traindata.instance(i); 94 for( int j=0 ; j<traindata.numAttributes() ; j++ ) { 95 if( traindata.attribute(j)!=classAttribute && traindata.attribute(j).isNumeric() ) { 96 instance.setValue(j, instance.value(j) + (median[j] - currentmedian[j])); 97 } 98 } 99 } 100 } 67 // preprocess training data 68 for (Instances traindata : traindataSet) { 69 // get median of current training set 70 traindataClassAttribute = traindata.classAttribute(); 71 for (int j = 0; j < traindata.numAttributes(); j++) { 72 if (traindata.attribute(j) != traindataClassAttribute && 73 traindata.attribute(j).isNumeric()) 74 { 75 currentmedian[j] = 76 traindata.kthSmallestValue(j, (traindata.numInstances() + 1) >> 1); // (>>2 77 // -> 78 // /2) 79 } 80 } 81 for (int i = 0; i < traindata.numInstances(); i++) { 82 Instance instance = traindata.instance(i); 83 for (int j = 0; j < traindata.numAttributes(); j++) { 84 if (traindata.attribute(j) != classAttribute && 85 traindata.attribute(j).isNumeric()) 86 { 87 instance.setValue(j, instance.value(j) + (median[j] - currentmedian[j])); 88 } 89 } 90 } 91 } 92 } 93 94 /** 95 * @see de.ugoe.cs.cpdp.dataprocessing.ProcessesingStrategy#apply(weka.core.Instances, 96 * weka.core.Instances) 97 */ 98 @Override 99 public void apply(Instances testdata, Instances traindata) { 100 final Attribute classAttribute = testdata.classAttribute(); 101 final Attribute traindataClassAttribute = traindata.classAttribute(); 102 final double[] median = new double[testdata.numAttributes()]; 103 104 // test and train have the same number of attributes 105 double[] currentmedian = new double[testdata.numAttributes()]; 106 107 // get medians 108 for (int j = 0; j < testdata.numAttributes(); j++) { 109 if (testdata.attribute(j) != classAttribute) { 110 median[j] = testdata.kthSmallestValue(j, (testdata.numInstances() + 1) >> 1); // (>>2 111 // -> 112 // /2) 113 } 114 } 115 116 // get median of current training set 117 for (int j = 0; j < traindata.numAttributes(); j++) { 118 if (traindata.attribute(j) != traindataClassAttribute && 119 traindata.attribute(j).isNumeric()) 120 { 121 currentmedian[j] = 122 traindata.kthSmallestValue(j, (traindata.numInstances() + 1) >> 1); // (>>2 -> 123 // /2) 124 } 125 } 126 127 // preprocess training data 128 for (int i = 0; i < traindata.numInstances(); i++) { 129 Instance instance = traindata.instance(i); 130 for (int j = 0; j < traindata.numAttributes(); j++) { 131 if (traindata.attribute(j) != classAttribute && traindata.attribute(j).isNumeric()) 132 { 133 instance.setValue(j, instance.value(j) + (median[j] - currentmedian[j])); 134 } 135 } 136 } 137 } 101 138 102 139 }
Note: See TracChangeset
for help on using the changeset viewer.