| 1 | // Copyright 2015 Georg-August-Universität Göttingen, Germany
|
|---|
| 2 | //
|
|---|
| 3 | // Licensed under the Apache License, Version 2.0 (the "License");
|
|---|
| 4 | // you may not use this file except in compliance with the License.
|
|---|
| 5 | // You may obtain a copy of the License at
|
|---|
| 6 | //
|
|---|
| 7 | // http://www.apache.org/licenses/LICENSE-2.0
|
|---|
| 8 | //
|
|---|
| 9 | // Unless required by applicable law or agreed to in writing, software
|
|---|
| 10 | // distributed under the License is distributed on an "AS IS" BASIS,
|
|---|
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|---|
| 12 | // See the License for the specific language governing permissions and
|
|---|
| 13 | // limitations under the License.
|
|---|
| 14 |
|
|---|
| 15 | package de.ugoe.cs.cpdp.dataprocessing;
|
|---|
| 16 |
|
|---|
| 17 | import de.ugoe.cs.cpdp.util.WekaUtils;
|
|---|
| 18 | import de.ugoe.cs.cpdp.util.WekaUtils.DistChar;
|
|---|
| 19 | import weka.core.Instances;
|
|---|
| 20 |
|
|---|
| 21 | // normalization selected according to TCA+ rules (TCA has to be applied separately
|
|---|
| 22 | public class TCAPlusNormalization implements IProcessesingStrategy {
|
|---|
| 23 |
|
|---|
| 24 | /**
|
|---|
| 25 | * Does not have parameters. String is ignored.
|
|---|
| 26 | *
|
|---|
| 27 | * @param parameters
|
|---|
| 28 | * ignored
|
|---|
| 29 | */
|
|---|
| 30 | @Override
|
|---|
| 31 | public void setParameter(String parameters) {
|
|---|
| 32 | // TODO Auto-generated method stub
|
|---|
| 33 |
|
|---|
| 34 | }
|
|---|
| 35 |
|
|---|
| 36 | @Override
|
|---|
| 37 | public void apply(Instances testdata, Instances traindata) {
|
|---|
| 38 | applyTCAPlus(testdata, traindata);
|
|---|
| 39 | }
|
|---|
| 40 |
|
|---|
| 41 | private void applyTCAPlus(Instances testdata, Instances traindata) {
|
|---|
| 42 | DistChar dcTest = WekaUtils.datasetDistance(testdata);
|
|---|
| 43 | DistChar dcTrain = WekaUtils.datasetDistance(traindata);
|
|---|
| 44 |
|
|---|
| 45 | // RULE 1:
|
|---|
| 46 | if( 0.9*dcTrain.mean<=dcTest.mean && 1.1*dcTrain.mean>=dcTest.mean &&
|
|---|
| 47 | 0.9*dcTrain.std<=dcTest.std && 1.1*dcTrain.std>=dcTest.std) {
|
|---|
| 48 | // do nothing
|
|---|
| 49 | }
|
|---|
| 50 | // RULE 2:
|
|---|
| 51 | else if((0.4*dcTrain.min>dcTest.min || 1.6*dcTrain.min<dcTest.min) &&
|
|---|
| 52 | (0.4*dcTrain.max>dcTest.max || 1.6*dcTrain.min<dcTest.max) &&
|
|---|
| 53 | (0.4*dcTrain.min>dcTest.num || 1.6*dcTrain.min<dcTest.num)) {
|
|---|
| 54 | NormalizationUtil.minMax(testdata);
|
|---|
| 55 | NormalizationUtil.minMax(traindata);
|
|---|
| 56 | }
|
|---|
| 57 | // RULE 3:
|
|---|
| 58 | else if((0.4*dcTrain.std>dcTest.std && dcTrain.num<dcTest.num) ||
|
|---|
| 59 | (1.6*dcTrain.std<dcTest.std)&& dcTrain.num>dcTest.num) {
|
|---|
| 60 | NormalizationUtil.zScoreTraining(testdata, traindata);
|
|---|
| 61 | }
|
|---|
| 62 | // RULE 4:
|
|---|
| 63 | else if((0.4*dcTrain.std>dcTest.std && dcTrain.num>dcTest.num) ||
|
|---|
| 64 | (1.6*dcTrain.std<dcTest.std)&& dcTrain.num<dcTest.num) {
|
|---|
| 65 | NormalizationUtil.zScoreTarget(testdata, traindata);
|
|---|
| 66 | }
|
|---|
| 67 | //RULE 5:
|
|---|
| 68 | else {
|
|---|
| 69 | NormalizationUtil.zScore(testdata);
|
|---|
| 70 | NormalizationUtil.zScore(traindata);
|
|---|
| 71 | }
|
|---|
| 72 | }
|
|---|
| 73 | }
|
|---|