Changeset 135 for trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing
- Timestamp:
- 07/18/16 12:26:03 (8 years ago)
- Location:
- trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing
- Files:
-
- 13 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/CLAMIProcessor.java
r86 r135 51 51 @Override 52 52 public void setParameter(String parameters) { 53 // TODO Auto-generated method stub 54 53 // dummy, parameters not used 55 54 } 56 55 -
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/CLAProcessor.java
r86 r135 44 44 @Override 45 45 public void setParameter(String parameters) { 46 // TODO Auto-generated method stub 47 46 // dummy, parameters not used 48 47 } 49 48 -
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/LogarithmTransform.java
r86 r135 112 112 Instance instance = traindata.instance(i); 113 113 for (int j = 0; j < testdata.numAttributes(); j++) { 114 if (traindata.attribute(j) != classAttribute && traindata.attribute(j).isNumeric()) 114 if (traindata.attribute(j) != classAttribute && 115 traindata.attribute(j).isNumeric()) 115 116 { 116 117 if (instance.value(j) < 0) { -
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/MORPH.java
r120 r135 25 25 26 26 /** 27 * Implements the MORPH data privatization. 27 * Implements the MORPH data privatization. 28 28 * 29 29 * … … 36 36 */ 37 37 Random rand = new Random(); 38 38 39 39 /** 40 40 * parameter alpha for MORPH, default is 0.15 41 41 */ 42 42 double alpha = 0.15; 43 43 44 44 /** 45 45 * parameter beta for MORPH, default is 0.35 46 46 */ 47 47 double beta = 0.35; 48 48 49 49 /** 50 50 * Does not have parameters. String is ignored. … … 57 57 if (parameters != null && !parameters.equals("")) { 58 58 String[] values = parameters.split(" "); 59 if ( values.length!=2) {59 if (values.length != 2) { 60 60 throw new InvalidParameterException("MORPH requires two doubles as parameter or no parameters to use default values"); 61 61 } … … 63 63 alpha = Double.parseDouble(values[0]); 64 64 beta = Double.parseDouble(values[1]); 65 } catch(NumberFormatException e) { 65 } 66 catch (NumberFormatException e) { 66 67 throw new InvalidParameterException("MORPH requires two doubles as parameter or no parameters to use default values"); 67 68 } … … 75 76 @Override 76 77 public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) { 77 for ( Instances traindata : traindataSet) {78 for (Instances traindata : traindataSet) { 78 79 applyMORPH(traindata); 79 80 } … … 88 89 applyMORPH(traindata); 89 90 } 90 91 91 92 /** 92 93 * … … 95 96 * </p> 96 97 * 97 * @param data data to which the processor is applied 98 * @param data 99 * data to which the processor is applied 98 100 */ 99 101 public void applyMORPH(Instances data) { 100 for (int i =0; i<data.numInstances(); i++) {102 for (int i = 0; i < data.numInstances(); i++) { 101 103 morphInstance(data.get(i), data); 102 104 } 103 105 } 104 106 105 107 /** 106 108 * <p> … … 108 110 * </p> 109 111 * 110 * @param instance instance that is morphed 111 * @param data data based on which the instance is morphed 112 * @param instance 113 * instance that is morphed 114 * @param data 115 * data based on which the instance is morphed 112 116 */ 113 117 public void morphInstance(Instance instance, Instances data) { 114 118 Instance nearestUnlikeNeighbor = getNearestUnlikeNeighbor(instance, data); 115 if( nearestUnlikeNeighbor==null ) { 116 throw new RuntimeException("could not find nearest unlike neighbor within the data: " + data.relationName()); 119 if (nearestUnlikeNeighbor == null) { 120 throw new RuntimeException("could not find nearest unlike neighbor within the data: " + 121 data.relationName()); 117 122 } 118 for( int j=0; j<data.numAttributes() ; j++ ) { 119 if( data.attribute(j)!=data.classAttribute() && data.attribute(j).isNumeric()) { 120 double randVal = rand.nextDouble()*(beta-alpha)+alpha; 121 instance.setValue(j, instance.value(j) + randVal*(instance.value(j)-nearestUnlikeNeighbor.value(j)) ); 123 for (int j = 0; j < data.numAttributes(); j++) { 124 if (data.attribute(j) != data.classAttribute() && data.attribute(j).isNumeric()) { 125 double randVal = rand.nextDouble() * (beta - alpha) + alpha; 126 instance.setValue(j, instance.value(j) + 127 randVal * (instance.value(j) - nearestUnlikeNeighbor.value(j))); 122 128 } 123 129 } 124 130 } 125 131 126 132 /** 127 133 * <p> 128 * Determines the nearest unlike neighbor of an instance. 134 * Determines the nearest unlike neighbor of an instance. 129 135 * </p> 130 136 * 131 * @param instance instance to which the nearest unlike neighbor is determined 132 * @param data data where the nearest unlike neighbor is determined from 137 * @param instance 138 * instance to which the nearest unlike neighbor is determined 139 * @param data 140 * data where the nearest unlike neighbor is determined from 133 141 * @return nearest unlike instance 134 142 */ 135 143 public Instance getNearestUnlikeNeighbor(Instance instance, Instances data) { 136 144 Instance nearestUnlikeNeighbor = null; 137 138 double[] instanceVector = new double[data.numAttributes() -1];145 146 double[] instanceVector = new double[data.numAttributes() - 1]; 139 147 int tmp = 0; 140 for ( int j=0; j<data.numAttributes(); j++) {141 if ( data.attribute(j)!=data.classAttribute() && data.attribute(j).isNumeric()) {148 for (int j = 0; j < data.numAttributes(); j++) { 149 if (data.attribute(j) != data.classAttribute() && data.attribute(j).isNumeric()) { 142 150 instanceVector[tmp] = instance.value(j); 143 151 } 144 152 } 145 153 146 154 double minDistance = Double.MAX_VALUE; 147 for ( int i=0 ; i<data.numInstances() ; i++) {148 if ( instance.classValue() != data.instance(i).classValue()) {155 for (int i = 0; i < data.numInstances(); i++) { 156 if (instance.classValue() != data.instance(i).classValue()) { 149 157 double[] otherVector = new double[data.numAttributes() - 1]; 150 158 tmp = 0; 151 159 for (int j = 0; j < data.numAttributes(); j++) { 152 if (data.attribute(j) != data.classAttribute() && data.attribute(j).isNumeric()) { 160 if (data.attribute(j) != data.classAttribute() && 161 data.attribute(j).isNumeric()) 162 { 153 163 otherVector[tmp++] = data.instance(i).value(j); 154 164 } 155 165 } 156 if ( MathArrays.distance(instanceVector, otherVector)<minDistance) {166 if (MathArrays.distance(instanceVector, otherVector) < minDistance) { 157 167 minDistance = MathArrays.distance(instanceVector, otherVector); 158 168 nearestUnlikeNeighbor = data.instance(i); -
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/MedianAsReference.java
r86 r135 129 129 Instance instance = traindata.instance(i); 130 130 for (int j = 0; j < traindata.numAttributes(); j++) { 131 if (traindata.attribute(j) != classAttribute && traindata.attribute(j).isNumeric()) 131 if (traindata.attribute(j) != classAttribute && 132 traindata.attribute(j).isNumeric()) 132 133 { 133 134 instance.setValue(j, instance.value(j) + (median[j] - currentmedian[j])); -
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/NominalAttributeFilter.java
r86 r135 95 95 96 96 // delete all instances where nominal attribute has the value of one of the parameter 97 if (indexOfnominalAttributeValues .contains(wekaInstance98 . value(indexOfConfidenceAttribute)))97 if (indexOfnominalAttributeValues 98 .contains(wekaInstance.value(indexOfConfidenceAttribute))) 99 99 { 100 100 traindata.delete(j); -
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/Oversampling.java
r86 r135 80 80 81 81 Resample resample = new Resample(); 82 // TODO: resample.setSampleSizePercent((100.0*counts[1])/100+0.01);83 // Ohne +0.01 wird bei tomcat, xerces-1.2 und jedit-4.0 ein negative84 // weniger zurückgegeben85 82 resample.setSampleSizePercent((100.0 * counts[0]) / counts[1]); 86 83 try { -
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/SynonymAttributePruning.java
r86 r135 59 59 double distance; 60 60 for (int j = traindata.numAttributes() - 1; j >= 0; j--) { 61 if ( j!=traindata.classIndex()) {61 if (j != traindata.classIndex()) { 62 62 boolean hasClosest = false; 63 63 for (int i1 = 0; !hasClosest && i1 < traindata.size(); i1++) { … … 67 67 double distanceJ = Double.MAX_VALUE; 68 68 for (int k = 0; k < traindata.numAttributes(); k++) { 69 distance = Math.abs(traindata.get(i1).value(k) - traindata.get(i2).value(k)); 69 distance = Math 70 .abs(traindata.get(i1).value(k) - traindata.get(i2).value(k)); 70 71 if (distance < minVal) { 71 72 minVal = distance; -
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/TCAPlusNormalization.java
r86 r135 19 19 import weka.core.Instances; 20 20 21 // normalization selected according to TCA+ rules (TCA has to be applied separately 21 /** 22 * <p> 23 * Normalization selected according to the TCA+ rules after Nam et al. (Transfer Defect Learning). 24 * </p> 25 * 26 * @author Steffen Herbold 27 */ 22 28 public class TCAPlusNormalization implements IProcessesingStrategy { 23 29 … … 30 36 @Override 31 37 public void setParameter(String parameters) { 32 // TODO Auto-generated method stub 33 38 // dummy, paramters not used 34 39 } 35 40 41 /* 42 * (non-Javadoc) 43 * 44 * @see de.ugoe.cs.cpdp.dataprocessing.IProcessesingStrategy#apply(weka.core.Instances, 45 * weka.core.Instances) 46 */ 36 47 @Override 37 48 public void apply(Instances testdata, Instances traindata) { 38 49 applyTCAPlus(testdata, traindata); 39 50 } 40 51 41 52 private void applyTCAPlus(Instances testdata, Instances traindata) { 42 53 DistChar dcTest = WekaUtils.datasetDistance(testdata); 43 54 DistChar dcTrain = WekaUtils.datasetDistance(traindata); 44 55 45 56 // RULE 1: 46 if( 0.9*dcTrain.mean<=dcTest.mean && 1.1*dcTrain.mean>=dcTest.mean && 47 0.9*dcTrain.std<=dcTest.std && 1.1*dcTrain.std>=dcTest.std) { 57 if (0.9 * dcTrain.mean <= dcTest.mean && 1.1 * dcTrain.mean >= dcTest.mean && 58 0.9 * dcTrain.std <= dcTest.std && 1.1 * dcTrain.std >= dcTest.std) 59 { 48 60 // do nothing 49 61 } 50 62 // RULE 2: 51 else if((0.4*dcTrain.min>dcTest.min || 1.6*dcTrain.min<dcTest.min) && 52 (0.4*dcTrain.max>dcTest.max || 1.6*dcTrain.min<dcTest.max) && 53 (0.4*dcTrain.min>dcTest.num || 1.6*dcTrain.min<dcTest.num)) { 63 else if ((0.4 * dcTrain.min > dcTest.min || 1.6 * dcTrain.min < dcTest.min) && 64 (0.4 * dcTrain.max > dcTest.max || 1.6 * dcTrain.min < dcTest.max) && 65 (0.4 * dcTrain.min > dcTest.num || 1.6 * dcTrain.min < dcTest.num)) 66 { 54 67 NormalizationUtil.minMax(testdata); 55 68 NormalizationUtil.minMax(traindata); 56 69 } 57 70 // RULE 3: 58 else if((0.4*dcTrain.std>dcTest.std && dcTrain.num<dcTest.num) || 59 (1.6*dcTrain.std<dcTest.std)&& dcTrain.num>dcTest.num) { 71 else if ((0.4 * dcTrain.std > dcTest.std && dcTrain.num < dcTest.num) || 72 (1.6 * dcTrain.std < dcTest.std) && dcTrain.num > dcTest.num) 73 { 60 74 NormalizationUtil.zScoreTraining(testdata, traindata); 61 75 } 62 76 // RULE 4: 63 else if((0.4*dcTrain.std>dcTest.std && dcTrain.num>dcTest.num) || 64 (1.6*dcTrain.std<dcTest.std)&& dcTrain.num<dcTest.num) { 77 else if ((0.4 * dcTrain.std > dcTest.std && dcTrain.num > dcTest.num) || 78 (1.6 * dcTrain.std < dcTest.std) && dcTrain.num < dcTest.num) 79 { 65 80 NormalizationUtil.zScoreTarget(testdata, traindata); 66 81 } 67 // RULE 5:82 // RULE 5: 68 83 else { 69 84 NormalizationUtil.zScore(testdata); -
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/TopMetricFilter.java
r129 r135 52 52 */ 53 53 double correlationThreshold = 0.5; 54 54 55 55 /* 56 56 * (non-Javadoc) … … 60 60 @Override 61 61 public void setParameter(String parameters) { 62 if ( parameters!=null && !parameters.equals("")) {62 if (parameters != null && !parameters.equals("")) { 63 63 correlationThreshold = Double.parseDouble(parameters); 64 64 } … … 76 76 } 77 77 78 private void determineTopKAttributes(Instances testdata, SetUniqueList<Instances> traindataSet) throws Exception { 79 Integer[] counts = new Integer[traindataSet.get(0).numAttributes()-1]; 80 IntStream.range(0,counts.length).forEach(val -> counts[val] = 0); 81 for( Instances traindata : traindataSet ) { 78 private void determineTopKAttributes(Instances testdata, SetUniqueList<Instances> traindataSet) 79 throws Exception 80 { 81 Integer[] counts = new Integer[traindataSet.get(0).numAttributes() - 1]; 82 IntStream.range(0, counts.length).forEach(val -> counts[val] = 0); 83 for (Instances traindata : traindataSet) { 82 84 J48 decisionTree = new J48(); 83 85 decisionTree.buildClassifier(traindata); 84 int k =0;85 for ( int j=0; j<traindata.numAttributes(); j++) {86 if (j!=traindata.classIndex()){87 if ( decisionTree.toString().contains(traindata.attribute(j).name())) {88 counts[k] = counts[k] +1;86 int k = 0; 87 for (int j = 0; j < traindata.numAttributes(); j++) { 88 if (j != traindata.classIndex()) { 89 if (decisionTree.toString().contains(traindata.attribute(j).name())) { 90 counts[k] = counts[k] + 1; 89 91 } 90 92 k++; … … 93 95 } 94 96 int[] topkIndex = new int[counts.length]; 95 IntStream.range(0, counts.length).forEach(val -> topkIndex[val] = val);97 IntStream.range(0, counts.length).forEach(val -> topkIndex[val] = val); 96 98 SortUtils.quicksort(counts, topkIndex, true); 97 99 98 100 // get CFSs for each training set 99 101 List<Set<Integer>> cfsSets = new LinkedList<>(); 100 for ( Instances traindata : traindataSet) {102 for (Instances traindata : traindataSet) { 101 103 boolean selectionSuccessful = false; 102 104 boolean secondAttempt = false; … … 113 115 attsel.SelectAttributes(traindataCopy); 114 116 Set<Integer> cfsSet = new HashSet<>(); 115 for ( int attr : attsel.selectedAttributes()) {117 for (int attr : attsel.selectedAttributes()) { 116 118 cfsSet.add(attr); 117 119 } … … 128 130 attsel.SelectAttributes(traindata); 129 131 Set<Integer> cfsSet = new HashSet<>(); 130 for ( int attr : attsel.selectedAttributes()) {132 for (int attr : attsel.selectedAttributes()) { 131 133 cfsSet.add(attr); 132 134 } … … 160 162 while (!selectionSuccessful); // dummy loop for internal continue 161 163 } 162 164 163 165 double[] coverages = new double[topkIndex.length]; 164 for ( Set<Integer> cfsSet : cfsSets) {166 for (Set<Integer> cfsSet : cfsSets) { 165 167 Set<Integer> topkSet = new HashSet<>(); 166 for ( int k=0; k<topkIndex.length ; k++) {168 for (int k = 0; k < topkIndex.length; k++) { 167 169 topkSet.add(topkIndex[k]); 168 coverages[k] += (coverage(topkSet, cfsSet) /traindataSet.size());170 coverages[k] += (coverage(topkSet, cfsSet) / traindataSet.size()); 169 171 } 170 172 } 171 173 double bestCoverageValue = Double.MIN_VALUE; 172 174 int bestCoverageIndex = 0; 173 for ( int i=0; i<coverages.length; i++) {174 if ( coverages[i]>bestCoverageValue) {175 for (int i = 0; i < coverages.length; i++) { 176 if (coverages[i] > bestCoverageValue) { 175 177 bestCoverageValue = coverages[i]; 176 178 bestCoverageIndex = i; … … 180 182 SpearmansCorrelation corr = new SpearmansCorrelation(); 181 183 double[][] correlationMatrix = new double[bestCoverageIndex][bestCoverageIndex]; 182 for ( Instances traindata : traindataSet) {184 for (Instances traindata : traindataSet) { 183 185 double[][] vectors = new double[bestCoverageIndex][traindata.size()]; 184 for ( int i=0; i<traindata.size(); i++) {185 for ( int j=0; j<bestCoverageIndex; j++) {186 for (int i = 0; i < traindata.size(); i++) { 187 for (int j = 0; j < bestCoverageIndex; j++) { 186 188 vectors[j][i] = traindata.get(i).value(topkIndex[j]); 187 189 } 188 190 } 189 for ( int j=0; j<bestCoverageIndex; j++) {190 for ( int k=j+1; k<bestCoverageIndex; k++) {191 for (int j = 0; j < bestCoverageIndex; j++) { 192 for (int k = j + 1; k < bestCoverageIndex; k++) { 191 193 correlationMatrix[j][k] = Math.abs(corr.correlation(vectors[j], vectors[k])); 192 194 } … … 194 196 } 195 197 Set<Integer> topkSetIndexSet = new TreeSet<>(); 196 // j<30 ensures that the computational time does not explode since the powerset is 2^n in complexity 197 for( int j=0; j<bestCoverageIndex && j<30 ; j++ ) { 198 // j<30 ensures that the computational time does not explode since the powerset is 2^n in 199 // complexity 200 for (int j = 0; j < bestCoverageIndex && j < 30; j++) { 198 201 topkSetIndexSet.add(j); 199 202 } … … 201 204 double bestOptCoverage = Double.MIN_VALUE; 202 205 Set<Integer> opttopkSetIndexSet = null; 203 for ( Set<Integer> combination : allCombinations) {204 if ( isUncorrelated(correlationMatrix, combination)) {206 for (Set<Integer> combination : allCombinations) { 207 if (isUncorrelated(correlationMatrix, combination)) { 205 208 double currentCoverage = 0.0; 206 209 Set<Integer> topkCombination = new TreeSet<>(); 207 for ( Integer index : combination) {210 for (Integer index : combination) { 208 211 topkCombination.add(topkIndex[index]); 209 212 } 210 for ( Set<Integer> cfsSet : cfsSets) {211 currentCoverage += (coverage(topkCombination, cfsSet) /traindataSet.size());212 } 213 if ( currentCoverage > bestOptCoverage) {213 for (Set<Integer> cfsSet : cfsSets) { 214 currentCoverage += (coverage(topkCombination, cfsSet) / traindataSet.size()); 215 } 216 if (currentCoverage > bestOptCoverage) { 214 217 bestOptCoverage = currentCoverage; 215 218 opttopkSetIndexSet = combination; … … 218 221 } 219 222 Set<Integer> opttopkIndex = new TreeSet<>(); 220 for (Integer index : opttopkSetIndexSet) {223 for (Integer index : opttopkSetIndexSet) { 221 224 opttopkIndex.add(topkIndex[index]); 222 225 } 223 226 Console.traceln(Level.FINE, "selected the following metrics:"); 224 for (Integer index : opttopkIndex) {227 for (Integer index : opttopkIndex) { 225 228 Console.traceln(Level.FINE, traindataSet.get(0).attribute(index).name()); 226 229 } 227 230 // finally remove attributes 228 for ( int j=testdata.numAttributes()-1; j>=0; j--) {229 if ( j!=testdata.classIndex() && !opttopkIndex.contains(j)) {231 for (int j = testdata.numAttributes() - 1; j >= 0; j--) { 232 if (j != testdata.classIndex() && !opttopkIndex.contains(j)) { 230 233 testdata.deleteAttributeAt(j); 231 for ( Instances traindata : traindataSet) {234 for (Instances traindata : traindataSet) { 232 235 traindata.deleteAttributeAt(j); 233 236 } … … 235 238 } 236 239 } 237 240 238 241 private boolean isUncorrelated(double[][] correlationMatrix, Set<Integer> combination) { 239 242 Integer[] intCombination = combination.toArray(new Integer[0]); 240 243 boolean areUncorrelated = true; 241 for( int i=0 ; areUncorrelated && i<intCombination.length ; i++ ) { 242 for( int j=i+1; areUncorrelated && j<intCombination.length ; j++ ) { 243 areUncorrelated &= correlationMatrix[intCombination[i]][intCombination[j]]>correlationThreshold; 244 for (int i = 0; areUncorrelated && i < intCombination.length; i++) { 245 for (int j = i + 1; areUncorrelated && j < intCombination.length; j++) { 246 areUncorrelated &= 247 correlationMatrix[intCombination[i]][intCombination[j]] > correlationThreshold; 244 248 } 245 249 } 246 250 return areUncorrelated; 247 251 } 248 252 249 253 private double coverage(Set<Integer> topkSet, Set<Integer> cfsSet) { 250 254 Set<Integer> topkSetCopy1 = new HashSet<>(topkSet); … … 252 256 Set<Integer> topkSetCopy2 = new HashSet<>(topkSet); 253 257 topkSetCopy2.addAll(cfsSet); 254 return ((double) topkSetCopy1.size()) /topkSetCopy2.size();258 return ((double) topkSetCopy1.size()) / topkSetCopy2.size(); 255 259 } 256 260 } -
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/TransferComponentAnalysis.java
r86 r135 37 37 * </p> 38 38 * 39 * TODO comment class40 39 * @author Steffen Herbold 41 40 */ 42 41 public class TransferComponentAnalysis implements IProcessesingStrategy { 43 42 43 /** 44 * Dimension of the reduced data. 45 */ 44 46 int reducedDimension = 5; 45 47 48 /* 49 * (non-Javadoc) 50 * 51 * @see de.ugoe.cs.cpdp.IParameterizable#setParameter(java.lang.String) 52 */ 46 53 @Override 47 54 public void setParameter(String parameters) { 48 49 } 50 55 // dummy, paramters ignored 56 } 57 58 /* 59 * (non-Javadoc) 60 * 61 * @see de.ugoe.cs.cpdp.dataprocessing.IProcessesingStrategy#apply(weka.core.Instances, 62 * weka.core.Instances) 63 */ 51 64 @Override 52 65 public void apply(Instances testdata, Instances traindata) { … … 54 67 } 55 68 69 /** 70 * <p> 71 * calculates the linear kernel function between two instances 72 * </p> 73 * 74 * @param x1 75 * first instance 76 * @param x2 77 * second instance 78 * @return kernel value 79 */ 56 80 private double linearKernel(Instance x1, Instance x2) { 57 81 double value = 0.0d; … … 64 88 } 65 89 90 /** 91 * <p> 92 * Applies TCA to the test and training data. 93 * </p> 94 * 95 * @param testdata 96 * the test data 97 * @param traindata 98 * the training data 99 */ 66 100 private void applyTCA(Instances testdata, Instances traindata) { 67 101 final int sizeTest = testdata.numInstances(); … … 125 159 } 126 160 161 /** 162 * <p> 163 * Creates the kernel matrix of the test and training data 164 * </p> 165 * 166 * @param testdata 167 * the test data 168 * @param traindata 169 * the training data 170 * @return kernel matrix 171 */ 127 172 private PrimitiveMatrix buildKernel(Instances testdata, Instances traindata) { 128 173 final int kernelDim = traindata.numInstances() + testdata.numInstances(); … … 162 207 } 163 208 209 /** 210 * <p> 211 * Calculates the kernel norm matrix, i.e., the matrix which is used for matrix multiplication 212 * to calculate the kernel norm. 213 * </p> 214 * 215 * @param dimTest 216 * dimension of the test data 217 * @param sizeTrain 218 * number of instances of the training data 219 * @return kernel norm matrix 220 */ 164 221 private PrimitiveMatrix buildKernelNormMatrix(final int dimTest, final int sizeTrain) { 165 222 final double trainSquared = 1.0 / (sizeTrain * (double) sizeTrain); … … 199 256 } 200 257 258 /** 259 * <p> 260 * Creates the center matrix 261 * </p> 262 * 263 * @param sizeTest 264 * number of instances of the test data 265 * @param sizeTrain 266 * number of instances of the training data 267 * @return center matrix 268 */ 201 269 private PrimitiveMatrix buildCenterMatrix(final int sizeTest, final int sizeTrain) { 202 270 Builder<PrimitiveMatrix> centerMatrix = … … 208 276 } 209 277 278 /** 279 * <p> 280 * Builds the mu-Matrix for offsetting values. 281 * </p> 282 * 283 * @param sizeTest 284 * number of instances of the test data 285 * @param sizeTrain 286 * number of instances of the training data 287 * @param mu 288 * mu parameter 289 * @return mu-Matrix 290 */ 210 291 private PrimitiveMatrix buildMuMatrix(final int sizeTest, 211 292 final int sizeTrain, -
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/Undersampling.java
r86 r135 80 80 81 81 Resample resample = new Resample(); 82 // TODO: resample.setSampleSizePercent((100.0*counts[1])/100+0.01);83 // Ohne +0.01 wird bei tomcat, xerces-1.2 und jedit-4.0 ein negative weniger84 // zurückgegeben85 82 resample.setSampleSizePercent((100.0 * counts[1]) / counts[0]); 86 83 try { -
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/ZScoreTargetNormalization.java
r86 r135 24 24 * @author Steffen Herbold 25 25 */ 26 public class ZScoreTargetNormalization implements ISetWiseProcessingStrategy, IProcessesingStrategy 26 public class ZScoreTargetNormalization 27 implements ISetWiseProcessingStrategy, IProcessesingStrategy 27 28 { 28 29
Note: See TracChangeset
for help on using the changeset viewer.