Changeset 51
- Timestamp:
- 04/22/16 14:45:25 (9 years ago)
- Location:
- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing
- Files:
-
- 2 added
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/Normalization.java
r41 r51 17 17 import org.apache.commons.collections4.list.SetUniqueList; 18 18 19 import weka.core.Attribute;20 import weka.core.Instance;21 19 import weka.core.Instances; 22 import weka.experiment.Stats;23 20 24 21 /** … … 35 32 @Override 36 33 public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) { 37 final Attribute classAtt = testdata.classAttribute(); 38 39 for (int i = 0; i < testdata.numAttributes(); i++) { 40 if (!testdata.attribute(i).equals(classAtt)) { 41 Stats teststats = testdata.attributeStats(i).numericStats; 42 43 double minVal = teststats.min; 44 double maxVal = teststats.max; 45 46 for (Instances traindata : traindataSet) { 47 Stats trainstats = traindata.attributeStats(i).numericStats; 48 if (minVal > trainstats.min) { 49 minVal = trainstats.min; 50 } 51 if (maxVal < trainstats.max) { 52 maxVal = trainstats.max; 53 } 54 } 55 56 for (int j = 0; j < testdata.numInstances(); j++) { 57 Instance inst = testdata.instance(j); 58 double newValue = (inst.value(i) - minVal) / (maxVal - minVal); 59 inst.setValue(i, newValue); 60 } 61 62 for (Instances traindata : traindataSet) { 63 for (int j = 0; j < traindata.numInstances(); j++) { 64 Instance inst = traindata.instance(j); 65 double newValue = (inst.value(i) - minVal) / (maxVal - minVal); 66 inst.setValue(i, newValue); 67 } 68 } 69 } 34 NormalizationUtil.minMax(testdata); 35 for (Instances instances : traindataSet) { 36 NormalizationUtil.minMax(instances); 70 37 } 71 72 38 } 73 39 … … 78 44 @Override 79 45 public void apply(Instances testdata, Instances traindata) { 80 final Attribute classAtt = testdata.classAttribute(); 81 82 for (int i = 0; i < testdata.numAttributes(); i++) { 83 if (!testdata.attribute(i).equals(classAtt)) { 84 Stats teststats = testdata.attributeStats(i).numericStats; 85 86 double minVal = teststats.min; 87 double maxVal = teststats.max; 88 89 Stats trainstats = traindata.attributeStats(i).numericStats; 90 if (minVal > trainstats.min) { 91 minVal = trainstats.min; 92 } 93 if (maxVal < trainstats.max) { 94 maxVal = trainstats.max; 95 } 96 97 for (int j = 0; j < testdata.numInstances(); j++) { 98 Instance inst = testdata.instance(j); 99 double newValue = (inst.value(i) - minVal) / (maxVal - minVal); 100 inst.setValue(i, newValue); 101 } 102 103 for (int j = 0; j < traindata.numInstances(); j++) { 104 Instance inst = traindata.instance(j); 105 double newValue = (inst.value(i) - minVal) / (maxVal - minVal); 106 inst.setValue(i, newValue); 107 } 108 } 109 } 46 NormalizationUtil.minMax(testdata); 47 NormalizationUtil.minMax(traindata); 110 48 } 111 49 -
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/ZScoreNormalization.java
r41 r51 17 17 import org.apache.commons.collections4.list.SetUniqueList; 18 18 19 import weka.core.Attribute;20 import weka.core.Instance;21 19 import weka.core.Instances; 22 20 … … 45 43 @Override 46 44 public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) { 47 normalize(testdata);45 NormalizationUtil.zScore(testdata); 48 46 for (Instances instances : traindataSet) { 49 normalize(instances);47 NormalizationUtil.zScore(instances); 50 48 } 51 49 } … … 57 55 @Override 58 56 public void apply(Instances testdata, Instances traindata) { 59 normalize(testdata);60 normalize(traindata);57 NormalizationUtil.zScore(testdata); 58 NormalizationUtil.zScore(traindata); 61 59 } 62 63 private void normalize(Instances instances) {64 instances.toString();65 final Attribute classAttribute = instances.classAttribute();66 67 final double[] means = new double[instances.numAttributes()];68 final double[] stddevs = new double[instances.numAttributes()];69 70 // get means and stddevs of data71 for (int j = 0; j < instances.numAttributes(); j++) {72 if (instances.attribute(j) != classAttribute) {73 means[j] = instances.meanOrMode(j);74 stddevs[j] = Math.sqrt(instances.variance(j));75 }76 }77 for (int i = 0; i < instances.numAttributes(); i++) {78 if (!instances.attribute(i).equals(classAttribute)) {79 for (int j = 0; j < instances.numInstances(); j++) {80 Instance inst = instances.get(i);81 double newValue = (inst.value(i) - means[i]) / stddevs[i];82 if (newValue == Double.NaN) {83 System.out.println("foooooo");84 }85 inst.setValue(i, newValue);86 }87 }88 }89 }90 91 60 } -
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/ZScoreTargetNormalization.java
r41 r51 17 17 import org.apache.commons.collections4.list.SetUniqueList; 18 18 19 import weka.core.Attribute;20 import weka.core.Instance;21 19 import weka.core.Instances; 22 20 … … 46 44 @Override 47 45 public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) { 48 final Attribute classAttribute = testdata.classAttribute(); 49 50 final double[] meanTest = new double[testdata.numAttributes()]; 51 final double[] stddevTest = new double[testdata.numAttributes()]; 52 53 // get means of testdata 54 for (int j = 0; j < testdata.numAttributes(); j++) { 55 if (testdata.attribute(j) != classAttribute) { 56 meanTest[j] = testdata.meanOrMode(j); 57 stddevTest[j] = Math.sqrt(testdata.variance(j)); 58 } 59 } 60 61 // preprocess test data 62 for (int i = 0; i < testdata.numInstances(); i++) { 63 Instance instance = testdata.instance(i); 64 for (int j = 0; j < testdata.numAttributes(); j++) { 65 if (testdata.attribute(j) != classAttribute) { 66 instance.setValue(j, instance.value(j) - meanTest[j] / stddevTest[j]); 67 } 68 } 69 } 70 71 // preprocess training data 72 for (Instances traindata : traindataSet) { 73 for (int i = 0; i < traindata.numInstances(); i++) { 74 Instance instance = traindata.instance(i); 75 for (int j = 0; j < testdata.numAttributes(); j++) { 76 if (testdata.attribute(j) != classAttribute) { 77 instance.setValue(j, instance.value(j) - meanTest[j] / stddevTest[j]); 78 } 79 } 80 } 81 } 46 NormalizationUtil.zScoreTarget(testdata, traindataSet); 82 47 } 83 48 … … 88 53 @Override 89 54 public void apply(Instances testdata, Instances traindata) { 90 final Attribute classAttribute = testdata.classAttribute(); 91 92 final double[] meanTest = new double[testdata.numAttributes()]; 93 final double[] stddevTest = new double[testdata.numAttributes()]; 94 95 // get means of testdata 96 for (int j = 0; j < testdata.numAttributes(); j++) { 97 if (testdata.attribute(j) != classAttribute) { 98 meanTest[j] = testdata.meanOrMode(j); 99 stddevTest[j] = Math.sqrt(testdata.variance(j)); 100 } 101 } 102 103 // preprocess test data 104 for (int i = 0; i < testdata.numInstances(); i++) { 105 Instance instance = testdata.instance(i); 106 for (int j = 0; j < testdata.numAttributes(); j++) { 107 if (testdata.attribute(j) != classAttribute) { 108 instance.setValue(j, instance.value(j) - meanTest[j] / stddevTest[j]); 109 } 110 } 111 } 112 113 // preprocess training data 114 for (int i = 0; i < traindata.numInstances(); i++) { 115 Instance instance = traindata.instance(i); 116 for (int j = 0; j < testdata.numAttributes(); j++) { 117 if (testdata.attribute(j) != classAttribute) { 118 instance.setValue(j, instance.value(j) - meanTest[j] / stddevTest[j]); 119 } 120 } 121 } 55 NormalizationUtil.zScoreTarget(testdata, traindata); 122 56 } 123 57 }
Note: See TracChangeset
for help on using the changeset viewer.