[86] | 1 | // Copyright 2015 Georg-August-Universität Göttingen, Germany
|
---|
[50] | 2 | //
|
---|
| 3 | // Licensed under the Apache License, Version 2.0 (the "License");
|
---|
| 4 | // you may not use this file except in compliance with the License.
|
---|
| 5 | // You may obtain a copy of the License at
|
---|
| 6 | //
|
---|
| 7 | // http://www.apache.org/licenses/LICENSE-2.0
|
---|
| 8 | //
|
---|
| 9 | // Unless required by applicable law or agreed to in writing, software
|
---|
| 10 | // distributed under the License is distributed on an "AS IS" BASIS,
|
---|
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
---|
| 12 | // See the License for the specific language governing permissions and
|
---|
| 13 | // limitations under the License.
|
---|
| 14 |
|
---|
| 15 | package de.ugoe.cs.cpdp.dataprocessing;
|
---|
| 16 |
|
---|
| 17 | import java.security.InvalidParameterException;
|
---|
| 18 | import java.util.Random;
|
---|
| 19 |
|
---|
| 20 | import org.apache.commons.collections4.list.SetUniqueList;
|
---|
| 21 | import org.apache.commons.math3.util.MathArrays;
|
---|
| 22 |
|
---|
| 23 | import weka.core.Instance;
|
---|
| 24 | import weka.core.Instances;
|
---|
| 25 |
|
---|
| 26 | /**
|
---|
[135] | 27 | * Implements the MORPH data privatization.
|
---|
[50] | 28 | *
|
---|
| 29 | *
|
---|
| 30 | * @author Steffen Herbold
|
---|
| 31 | */
|
---|
| 32 | public class MORPH implements ISetWiseProcessingStrategy, IProcessesingStrategy {
|
---|
| 33 |
|
---|
| 34 | /**
|
---|
| 35 | * random number generator for MORPH
|
---|
| 36 | */
|
---|
| 37 | Random rand = new Random();
|
---|
[135] | 38 |
|
---|
[50] | 39 | /**
|
---|
| 40 | * parameter alpha for MORPH, default is 0.15
|
---|
| 41 | */
|
---|
| 42 | double alpha = 0.15;
|
---|
[135] | 43 |
|
---|
[50] | 44 | /**
|
---|
| 45 | * parameter beta for MORPH, default is 0.35
|
---|
| 46 | */
|
---|
| 47 | double beta = 0.35;
|
---|
[135] | 48 |
|
---|
[50] | 49 | /**
|
---|
| 50 | * Does not have parameters. String is ignored.
|
---|
| 51 | *
|
---|
| 52 | * @param parameters
|
---|
| 53 | * ignored
|
---|
| 54 | */
|
---|
| 55 | @Override
|
---|
| 56 | public void setParameter(String parameters) {
|
---|
[70] | 57 | if (parameters != null && !parameters.equals("")) {
|
---|
[50] | 58 | String[] values = parameters.split(" ");
|
---|
[135] | 59 | if (values.length != 2) {
|
---|
[50] | 60 | throw new InvalidParameterException("MORPH requires two doubles as parameter or no parameters to use default values");
|
---|
| 61 | }
|
---|
| 62 | try {
|
---|
| 63 | alpha = Double.parseDouble(values[0]);
|
---|
| 64 | beta = Double.parseDouble(values[1]);
|
---|
[135] | 65 | }
|
---|
| 66 | catch (NumberFormatException e) {
|
---|
[50] | 67 | throw new InvalidParameterException("MORPH requires two doubles as parameter or no parameters to use default values");
|
---|
| 68 | }
|
---|
| 69 | }
|
---|
| 70 | }
|
---|
| 71 |
|
---|
| 72 | /**
|
---|
| 73 | * @see de.ugoe.cs.cpdp.dataprocessing.SetWiseProcessingStrategy#apply(weka.core.Instances,
|
---|
| 74 | * org.apache.commons.collections4.list.SetUniqueList)
|
---|
| 75 | */
|
---|
| 76 | @Override
|
---|
| 77 | public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) {
|
---|
[135] | 78 | for (Instances traindata : traindataSet) {
|
---|
[50] | 79 | applyMORPH(traindata);
|
---|
| 80 | }
|
---|
| 81 | }
|
---|
| 82 |
|
---|
| 83 | /**
|
---|
| 84 | * @see de.ugoe.cs.cpdp.dataprocessing.ProcessesingStrategy#apply(weka.core.Instances,
|
---|
| 85 | * weka.core.Instances)
|
---|
| 86 | */
|
---|
| 87 | @Override
|
---|
| 88 | public void apply(Instances testdata, Instances traindata) {
|
---|
| 89 | applyMORPH(traindata);
|
---|
| 90 | }
|
---|
[135] | 91 |
|
---|
[50] | 92 | /**
|
---|
| 93 | *
|
---|
| 94 | * <p>
|
---|
| 95 | * Applies MORPH to the data
|
---|
| 96 | * </p>
|
---|
| 97 | *
|
---|
[135] | 98 | * @param data
|
---|
| 99 | * data to which the processor is applied
|
---|
[50] | 100 | */
|
---|
[120] | 101 | public void applyMORPH(Instances data) {
|
---|
[135] | 102 | for (int i = 0; i < data.numInstances(); i++) {
|
---|
[120] | 103 | morphInstance(data.get(i), data);
|
---|
| 104 | }
|
---|
| 105 | }
|
---|
[135] | 106 |
|
---|
[120] | 107 | /**
|
---|
| 108 | * <p>
|
---|
| 109 | * Applies MORPH to a single instance
|
---|
| 110 | * </p>
|
---|
| 111 | *
|
---|
[135] | 112 | * @param instance
|
---|
| 113 | * instance that is morphed
|
---|
| 114 | * @param data
|
---|
| 115 | * data based on which the instance is morphed
|
---|
[120] | 116 | */
|
---|
| 117 | public void morphInstance(Instance instance, Instances data) {
|
---|
| 118 | Instance nearestUnlikeNeighbor = getNearestUnlikeNeighbor(instance, data);
|
---|
[135] | 119 | if (nearestUnlikeNeighbor == null) {
|
---|
| 120 | throw new RuntimeException("could not find nearest unlike neighbor within the data: " +
|
---|
| 121 | data.relationName());
|
---|
[120] | 122 | }
|
---|
[135] | 123 | for (int j = 0; j < data.numAttributes(); j++) {
|
---|
| 124 | if (data.attribute(j) != data.classAttribute() && data.attribute(j).isNumeric()) {
|
---|
| 125 | double randVal = rand.nextDouble() * (beta - alpha) + alpha;
|
---|
| 126 | instance.setValue(j, instance.value(j) +
|
---|
| 127 | randVal * (instance.value(j) - nearestUnlikeNeighbor.value(j)));
|
---|
[50] | 128 | }
|
---|
| 129 | }
|
---|
| 130 | }
|
---|
[135] | 131 |
|
---|
[50] | 132 | /**
|
---|
| 133 | * <p>
|
---|
[135] | 134 | * Determines the nearest unlike neighbor of an instance.
|
---|
[50] | 135 | * </p>
|
---|
| 136 | *
|
---|
[135] | 137 | * @param instance
|
---|
| 138 | * instance to which the nearest unlike neighbor is determined
|
---|
| 139 | * @param data
|
---|
| 140 | * data where the nearest unlike neighbor is determined from
|
---|
[50] | 141 | * @return nearest unlike instance
|
---|
| 142 | */
|
---|
[120] | 143 | public Instance getNearestUnlikeNeighbor(Instance instance, Instances data) {
|
---|
[50] | 144 | Instance nearestUnlikeNeighbor = null;
|
---|
[135] | 145 |
|
---|
| 146 | double[] instanceVector = new double[data.numAttributes() - 1];
|
---|
[50] | 147 | int tmp = 0;
|
---|
[135] | 148 | for (int j = 0; j < data.numAttributes(); j++) {
|
---|
| 149 | if (data.attribute(j) != data.classAttribute() && data.attribute(j).isNumeric()) {
|
---|
[50] | 150 | instanceVector[tmp] = instance.value(j);
|
---|
| 151 | }
|
---|
| 152 | }
|
---|
[135] | 153 |
|
---|
[50] | 154 | double minDistance = Double.MAX_VALUE;
|
---|
[135] | 155 | for (int i = 0; i < data.numInstances(); i++) {
|
---|
| 156 | if (instance.classValue() != data.instance(i).classValue()) {
|
---|
[50] | 157 | double[] otherVector = new double[data.numAttributes() - 1];
|
---|
| 158 | tmp = 0;
|
---|
| 159 | for (int j = 0; j < data.numAttributes(); j++) {
|
---|
[135] | 160 | if (data.attribute(j) != data.classAttribute() &&
|
---|
| 161 | data.attribute(j).isNumeric())
|
---|
| 162 | {
|
---|
[50] | 163 | otherVector[tmp++] = data.instance(i).value(j);
|
---|
| 164 | }
|
---|
| 165 | }
|
---|
[135] | 166 | if (MathArrays.distance(instanceVector, otherVector) < minDistance) {
|
---|
[50] | 167 | minDistance = MathArrays.distance(instanceVector, otherVector);
|
---|
| 168 | nearestUnlikeNeighbor = data.instance(i);
|
---|
| 169 | }
|
---|
| 170 | }
|
---|
| 171 | }
|
---|
| 172 | return nearestUnlikeNeighbor;
|
---|
| 173 | }
|
---|
| 174 | }
|
---|