Changeset 41 for trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/AbstractCharacteristicSelection.java
- Timestamp:
- 09/24/15 10:59:05 (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/CrossPare/src/de/ugoe/cs/cpdp/dataselection/AbstractCharacteristicSelection.java
r2 r41 1 // Copyright 2015 Georg-August-Universität Göttingen, Germany 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 1 15 package de.ugoe.cs.cpdp.dataselection; 2 16 … … 14 28 15 29 /** 16 * Abstract class that implements the foundation of setwise data selection strategies using distributional characteristics. 17 * This class provides the means to transform the data sets into their characteristic vectors. 30 * Abstract class that implements the foundation of setwise data selection strategies using 31 * distributional characteristics. This class provides the means to transform the data sets into 32 * their characteristic vectors. 33 * 18 34 * @author Steffen Herbold 19 35 */ 20 public abstract class AbstractCharacteristicSelection implements 21 ISetWiseDataselectionStrategy { 36 public abstract class AbstractCharacteristicSelection implements ISetWiseDataselectionStrategy { 22 37 23 /** 24 * vector with the distributional characteristics 25 */ 26 private String[] characteristics = new String[]{"mean","stddev"}; 27 28 /** 29 * Sets the distributional characteristics. The names of the characteristics are separated by blanks. 30 */ 31 @Override 32 public void setParameter(String parameters) { 33 if( !"".equals(parameters) ) { 34 characteristics = parameters.split(" "); 35 } 36 } 37 38 /** 39 * Transforms the data into the distributional characteristics. The first instance is the test data, followed by the training data. 40 * @param testdata test data 41 * @param traindataSet training data sets 42 * @return distributional characteristics of the data 43 */ 44 protected Instances characteristicInstances(Instances testdata, SetUniqueList<Instances> traindataSet) { 45 // setup weka Instances for clustering 46 final ArrayList<Attribute> atts = new ArrayList<Attribute>(); 47 48 final Attribute classAtt = testdata.classAttribute(); 49 for( int i=0 ; i<testdata.numAttributes() ; i++ ) { 50 Attribute dataAtt = testdata.attribute(i); 51 if( !dataAtt.equals(classAtt) ) { 52 for( String characteristic : characteristics ) { 53 atts.add(new Attribute(dataAtt.name() + "_" + characteristic)); 54 } 55 } 56 } 57 final Instances data = new Instances("distributional_characteristics", atts, 0); 58 59 // setup data for clustering 60 double[] instanceValues = new double[atts.size()]; 61 for( int i=0 ; i<testdata.numAttributes() ; i++ ) { 62 Attribute dataAtt = testdata.attribute(i); 63 if( !dataAtt.equals(classAtt) ) { 64 Stats stats = testdata.attributeStats(i).numericStats; 65 for( int j=0; j<characteristics.length; j++ ) { 66 if( "mean".equals(characteristics[j]) ) { 67 instanceValues[i*characteristics.length+j] = stats.mean; 68 } else if( "stddev".equals(characteristics[j])) { 69 instanceValues[i*characteristics.length+j] = stats.stdDev; 70 } else if( "var".equals(characteristics[j])) { 71 instanceValues[i*characteristics.length+j] = testdata.variance(j); 72 } else { 73 throw new RuntimeException("Unkown distributional characteristic: " + characteristics[j]); 74 } 75 } 76 } 77 } 78 data.add(new DenseInstance(1.0, instanceValues)); 79 80 for( Instances traindata : traindataSet ) { 81 instanceValues = new double[atts.size()]; 82 for( int i=0 ; i<traindata.numAttributes() ; i++ ) { 83 Attribute dataAtt = traindata.attribute(i); 84 if( !dataAtt.equals(classAtt) ) { 85 Stats stats = traindata.attributeStats(i).numericStats; 86 for( int j=0; j<characteristics.length; j++ ) { 87 if( "mean".equals(characteristics[j]) ) { 88 instanceValues[i*characteristics.length+j] = stats.mean; 89 } else if( "stddev".equals(characteristics[j])) { 90 instanceValues[i*characteristics.length+j] = stats.stdDev; 91 } else if( "var".equals(characteristics[j])) { 92 instanceValues[i*characteristics.length+j] = testdata.variance(j); 93 } else { 94 throw new RuntimeException("Unkown distributional characteristic: " + characteristics[j]); 95 } 96 } 97 } 98 } 99 Instance instance = new DenseInstance(1.0, instanceValues); 100 101 data.add(instance); 102 } 103 return data; 104 } 105 106 /** 107 * Returns the normalized distributional characteristics of the training data. 108 * @param testdata test data 109 * @param traindataSet training data sets 110 * @return normalized distributional characteristics of the data 111 */ 112 protected Instances normalizedCharacteristicInstances(Instances testdata, SetUniqueList<Instances> traindataSet) { 113 Instances data = characteristicInstances(testdata, traindataSet); 114 try { 115 final Normalize normalizer = new Normalize(); 116 normalizer.setInputFormat(data); 117 data = Filter.useFilter(data, normalizer); 118 } catch (Exception e) { 119 throw new RuntimeException("Unexpected exception during normalization of distributional characteristics.", e); 120 } 121 return data; 122 } 38 /** 39 * vector with the distributional characteristics 40 */ 41 private String[] characteristics = new String[] 42 { "mean", "stddev" }; 43 44 /** 45 * Sets the distributional characteristics. The names of the characteristics are separated by 46 * blanks. 47 */ 48 @Override 49 public void setParameter(String parameters) { 50 if (!"".equals(parameters)) { 51 characteristics = parameters.split(" "); 52 } 53 } 54 55 /** 56 * Transforms the data into the distributional characteristics. The first instance is the test 57 * data, followed by the training data. 58 * 59 * @param testdata 60 * test data 61 * @param traindataSet 62 * training data sets 63 * @return distributional characteristics of the data 64 */ 65 protected Instances characteristicInstances(Instances testdata, 66 SetUniqueList<Instances> traindataSet) 67 { 68 // setup weka Instances for clustering 69 final ArrayList<Attribute> atts = new ArrayList<Attribute>(); 70 71 final Attribute classAtt = testdata.classAttribute(); 72 for (int i = 0; i < testdata.numAttributes(); i++) { 73 Attribute dataAtt = testdata.attribute(i); 74 if (!dataAtt.equals(classAtt)) { 75 for (String characteristic : characteristics) { 76 atts.add(new Attribute(dataAtt.name() + "_" + characteristic)); 77 } 78 } 79 } 80 final Instances data = new Instances("distributional_characteristics", atts, 0); 81 82 // setup data for clustering 83 double[] instanceValues = new double[atts.size()]; 84 for (int i = 0; i < testdata.numAttributes(); i++) { 85 Attribute dataAtt = testdata.attribute(i); 86 if (!dataAtt.equals(classAtt)) { 87 Stats stats = testdata.attributeStats(i).numericStats; 88 for (int j = 0; j < characteristics.length; j++) { 89 if ("mean".equals(characteristics[j])) { 90 instanceValues[i * characteristics.length + j] = stats.mean; 91 } 92 else if ("stddev".equals(characteristics[j])) { 93 instanceValues[i * characteristics.length + j] = stats.stdDev; 94 } 95 else if ("var".equals(characteristics[j])) { 96 instanceValues[i * characteristics.length + j] = testdata.variance(j); 97 } 98 else { 99 throw new RuntimeException("Unkown distributional characteristic: " + 100 characteristics[j]); 101 } 102 } 103 } 104 } 105 data.add(new DenseInstance(1.0, instanceValues)); 106 107 for (Instances traindata : traindataSet) { 108 instanceValues = new double[atts.size()]; 109 for (int i = 0; i < traindata.numAttributes(); i++) { 110 Attribute dataAtt = traindata.attribute(i); 111 if (!dataAtt.equals(classAtt)) { 112 Stats stats = traindata.attributeStats(i).numericStats; 113 for (int j = 0; j < characteristics.length; j++) { 114 if ("mean".equals(characteristics[j])) { 115 instanceValues[i * characteristics.length + j] = stats.mean; 116 } 117 else if ("stddev".equals(characteristics[j])) { 118 instanceValues[i * characteristics.length + j] = stats.stdDev; 119 } 120 else if ("var".equals(characteristics[j])) { 121 instanceValues[i * characteristics.length + j] = testdata.variance(j); 122 } 123 else { 124 throw new RuntimeException("Unkown distributional characteristic: " + 125 characteristics[j]); 126 } 127 } 128 } 129 } 130 Instance instance = new DenseInstance(1.0, instanceValues); 131 132 data.add(instance); 133 } 134 return data; 135 } 136 137 /** 138 * Returns the normalized distributional characteristics of the training data. 139 * 140 * @param testdata 141 * test data 142 * @param traindataSet 143 * training data sets 144 * @return normalized distributional characteristics of the data 145 */ 146 protected Instances normalizedCharacteristicInstances(Instances testdata, 147 SetUniqueList<Instances> traindataSet) 148 { 149 Instances data = characteristicInstances(testdata, traindataSet); 150 try { 151 final Normalize normalizer = new Normalize(); 152 normalizer.setInputFormat(data); 153 data = Filter.useFilter(data, normalizer); 154 } 155 catch (Exception e) { 156 throw new RuntimeException( 157 "Unexpected exception during normalization of distributional characteristics.", 158 e); 159 } 160 return data; 161 } 123 162 }
Note: See TracChangeset
for help on using the changeset viewer.