Changeset 135 for trunk/CrossPare/src/de/ugoe/cs/cpdp/util/WekaUtils.java
- Timestamp:
- 07/18/16 12:26:03 (8 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/CrossPare/src/de/ugoe/cs/cpdp/util/WekaUtils.java
r129 r135 15 15 package de.ugoe.cs.cpdp.util; 16 16 17 // TODO comment18 17 import org.apache.commons.math3.ml.distance.EuclideanDistance; 19 18 … … 21 20 import weka.core.Instances; 22 21 22 /** 23 * <p> 24 * Collections of helper functions to work with Weka. 25 * </p> 26 * 27 * @author Steffen Herbold 28 */ 23 29 public class WekaUtils { 24 30 31 /** 32 * <p> 33 * Data class for distance between instances within a data set based on their distributional 34 * characteristics. 35 * </p> 36 * 37 * @author Steffen Herbold 38 */ 25 39 public static class DistChar { 26 40 public final double mean; … … 29 43 public final double max; 30 44 public final int num; 45 31 46 private DistChar(double mean, double std, double min, double max, int num) { 32 47 this.mean = mean; … … 37 52 } 38 53 } 39 54 40 55 /** 41 56 * Scaling value that moves the decimal point by 5 digets. 42 57 */ 43 58 public final static double SCALER = 10000.0d; 44 59 45 60 /** 46 61 * <p> … … 66 81 return distance; 67 82 } 68 83 84 /** 85 * <p> 86 * Returns a double array of the values without the classification. 87 * </p> 88 * 89 * @param instance 90 * the instance 91 * @return double array 92 */ 69 93 public static double[] instanceValues(Instance instance) { 70 double[] values = new double[instance.numAttributes() -1];71 int k =0;72 for ( int j=0; j<instance.numAttributes() ; j++) {73 if ( j!= instance.classIndex()) {94 double[] values = new double[instance.numAttributes() - 1]; 95 int k = 0; 96 for (int j = 0; j < instance.numAttributes(); j++) { 97 if (j != instance.classIndex()) { 74 98 values[k] = instance.value(j); 75 99 k++; … … 78 102 return values; 79 103 } 80 104 105 /** 106 * <p> 107 * Calculates the distributional characteristics of the distances the instances within a data 108 * set have to each other. 109 * </p> 110 * 111 * @param data 112 * data for which the instances are characterized 113 * @return characteristics 114 */ 81 115 public static DistChar datasetDistance(Instances data) { 82 116 double distance; … … 87 121 int numCmp = 0; 88 122 int l = 0; 89 double[] inst1 = new double[data.numAttributes() -1];90 double[] inst2 = new double[data.numAttributes() -1];123 double[] inst1 = new double[data.numAttributes() - 1]; 124 double[] inst2 = new double[data.numAttributes() - 1]; 91 125 EuclideanDistance euclideanDistance = new EuclideanDistance(); 92 for ( int i=0; i<data.numInstances(); i++) {93 l =0;94 for ( int k=0; k<data.numAttributes(); k++) {95 if ( k!=data.classIndex()) {126 for (int i = 0; i < data.numInstances(); i++) { 127 l = 0; 128 for (int k = 0; k < data.numAttributes(); k++) { 129 if (k != data.classIndex()) { 96 130 inst1[l] = data.instance(i).value(k); 97 131 } 98 132 } 99 for ( int j=0; j<data.numInstances(); j++) {100 if ( j!=i) {101 l =0;102 for ( int k=0; k<data.numAttributes(); k++) {103 if ( k!=data.classIndex()) {133 for (int j = 0; j < data.numInstances(); j++) { 134 if (j != i) { 135 l = 0; 136 for (int k = 0; k < data.numAttributes(); k++) { 137 if (k != data.classIndex()) { 104 138 inst2[l] = data.instance(j).value(k); 105 139 } … … 107 141 distance = euclideanDistance.compute(inst1, inst2); 108 142 sumAll += distance; 109 sumAllQ += distance *distance;143 sumAllQ += distance * distance; 110 144 numCmp++; 111 if ( distance < min) {145 if (distance < min) { 112 146 min = distance; 113 147 } 114 if ( distance > max) {148 if (distance > max) { 115 149 max = distance; 116 150 } … … 119 153 } 120 154 double mean = sumAll / numCmp; 121 double std = Math.sqrt((sumAllQ-(sumAll*sumAll)/numCmp) * 122 (1.0d / (numCmp - 1))); 155 double std = Math.sqrt((sumAllQ - (sumAll * sumAll) / numCmp) * (1.0d / (numCmp - 1))); 123 156 return new DistChar(mean, std, min, max, data.numInstances()); 124 157 } 125 126 // like above, but for single attribute 158 159 /** 160 * <p> 161 * Calculates the distributional characteristics of the distances of a single attribute the 162 * instances within a data set have to each other. 163 * </p> 164 * 165 * @param data 166 * data for which the instances are characterized 167 * @param index 168 * attribute for which the distances are characterized 169 * @return characteristics 170 */ 127 171 public static DistChar attributeDistance(Instances data, int index) { 128 172 double distance; … … 133 177 int numCmp = 0; 134 178 double value1, value2; 135 for ( int i=0; i<data.numInstances(); i++) {179 for (int i = 0; i < data.numInstances(); i++) { 136 180 value1 = data.instance(i).value(index); 137 for ( int j=0; j<data.numInstances(); j++) {138 if ( j!=i) {181 for (int j = 0; j < data.numInstances(); j++) { 182 if (j != i) { 139 183 value2 = data.instance(j).value(index); 140 distance = Math.abs(value1 -value2);184 distance = Math.abs(value1 - value2); 141 185 sumAll += distance; 142 sumAllQ += distance *distance;186 sumAllQ += distance * distance; 143 187 numCmp++; 144 if ( distance < min) {188 if (distance < min) { 145 189 min = distance; 146 190 } 147 if ( distance > max) {191 if (distance > max) { 148 192 max = distance; 149 193 } … … 152 196 } 153 197 double mean = sumAll / numCmp; 154 double std = Math.sqrt((sumAllQ-(sumAll*sumAll)/numCmp) * 155 (1.0d / (numCmp - 1))); 198 double std = Math.sqrt((sumAllQ - (sumAll * sumAll) / numCmp) * (1.0d / (numCmp - 1))); 156 199 return new DistChar(mean, std, min, max, data.numInstances()); 157 200 } 158 201 159 202 /** 160 203 * <p>
Note: See TracChangeset
for help on using the changeset viewer.