source: trunk/CrossPare/src/de/ugoe/cs/cpdp/dataprocessing/MORPH.java

Last change on this file was 135, checked in by sherbold, 8 years ago
  • code documentation and formatting
  • Property svn:mime-type set to text/plain
File size: 5.9 KB
RevLine 
[86]1// Copyright 2015 Georg-August-Universität Göttingen, Germany
[50]2//
3//   Licensed under the Apache License, Version 2.0 (the "License");
4//   you may not use this file except in compliance with the License.
5//   You may obtain a copy of the License at
6//
7//       http://www.apache.org/licenses/LICENSE-2.0
8//
9//   Unless required by applicable law or agreed to in writing, software
10//   distributed under the License is distributed on an "AS IS" BASIS,
11//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//   See the License for the specific language governing permissions and
13//   limitations under the License.
14
15package de.ugoe.cs.cpdp.dataprocessing;
16
17import java.security.InvalidParameterException;
18import java.util.Random;
19
20import org.apache.commons.collections4.list.SetUniqueList;
21import org.apache.commons.math3.util.MathArrays;
22
23import weka.core.Instance;
24import weka.core.Instances;
25
26/**
[135]27 * Implements the MORPH data privatization.
[50]28 *
29 *
30 * @author Steffen Herbold
31 */
32public class MORPH implements ISetWiseProcessingStrategy, IProcessesingStrategy {
33
34    /**
35     * random number generator for MORPH
36     */
37    Random rand = new Random();
[135]38
[50]39    /**
40     * parameter alpha for MORPH, default is 0.15
41     */
42    double alpha = 0.15;
[135]43
[50]44    /**
45     * parameter beta for MORPH, default is 0.35
46     */
47    double beta = 0.35;
[135]48
[50]49    /**
50     * Does not have parameters. String is ignored.
51     *
52     * @param parameters
53     *            ignored
54     */
55    @Override
56    public void setParameter(String parameters) {
[70]57        if (parameters != null && !parameters.equals("")) {
[50]58            String[] values = parameters.split(" ");
[135]59            if (values.length != 2) {
[50]60                throw new InvalidParameterException("MORPH requires two doubles as parameter or no parameters to use default values");
61            }
62            try {
63                alpha = Double.parseDouble(values[0]);
64                beta = Double.parseDouble(values[1]);
[135]65            }
66            catch (NumberFormatException e) {
[50]67                throw new InvalidParameterException("MORPH requires two doubles as parameter or no parameters to use default values");
68            }
69        }
70    }
71
72    /**
73     * @see de.ugoe.cs.cpdp.dataprocessing.SetWiseProcessingStrategy#apply(weka.core.Instances,
74     *      org.apache.commons.collections4.list.SetUniqueList)
75     */
76    @Override
77    public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) {
[135]78        for (Instances traindata : traindataSet) {
[50]79            applyMORPH(traindata);
80        }
81    }
82
83    /**
84     * @see de.ugoe.cs.cpdp.dataprocessing.ProcessesingStrategy#apply(weka.core.Instances,
85     *      weka.core.Instances)
86     */
87    @Override
88    public void apply(Instances testdata, Instances traindata) {
89        applyMORPH(traindata);
90    }
[135]91
[50]92    /**
93     *
94     * <p>
95     * Applies MORPH to the data
96     * </p>
97     *
[135]98     * @param data
99     *            data to which the processor is applied
[50]100     */
[120]101    public void applyMORPH(Instances data) {
[135]102        for (int i = 0; i < data.numInstances(); i++) {
[120]103            morphInstance(data.get(i), data);
104        }
105    }
[135]106
[120]107    /**
108     * <p>
109     * Applies MORPH to a single instance
110     * </p>
111     *
[135]112     * @param instance
113     *            instance that is morphed
114     * @param data
115     *            data based on which the instance is morphed
[120]116     */
117    public void morphInstance(Instance instance, Instances data) {
118        Instance nearestUnlikeNeighbor = getNearestUnlikeNeighbor(instance, data);
[135]119        if (nearestUnlikeNeighbor == null) {
120            throw new RuntimeException("could not find nearest unlike neighbor within the data: " +
121                data.relationName());
[120]122        }
[135]123        for (int j = 0; j < data.numAttributes(); j++) {
124            if (data.attribute(j) != data.classAttribute() && data.attribute(j).isNumeric()) {
125                double randVal = rand.nextDouble() * (beta - alpha) + alpha;
126                instance.setValue(j, instance.value(j) +
127                    randVal * (instance.value(j) - nearestUnlikeNeighbor.value(j)));
[50]128            }
129        }
130    }
[135]131
[50]132    /**
133     * <p>
[135]134     * Determines the nearest unlike neighbor of an instance.
[50]135     * </p>
136     *
[135]137     * @param instance
138     *            instance to which the nearest unlike neighbor is determined
139     * @param data
140     *            data where the nearest unlike neighbor is determined from
[50]141     * @return nearest unlike instance
142     */
[120]143    public Instance getNearestUnlikeNeighbor(Instance instance, Instances data) {
[50]144        Instance nearestUnlikeNeighbor = null;
[135]145
146        double[] instanceVector = new double[data.numAttributes() - 1];
[50]147        int tmp = 0;
[135]148        for (int j = 0; j < data.numAttributes(); j++) {
149            if (data.attribute(j) != data.classAttribute() && data.attribute(j).isNumeric()) {
[50]150                instanceVector[tmp] = instance.value(j);
151            }
152        }
[135]153
[50]154        double minDistance = Double.MAX_VALUE;
[135]155        for (int i = 0; i < data.numInstances(); i++) {
156            if (instance.classValue() != data.instance(i).classValue()) {
[50]157                double[] otherVector = new double[data.numAttributes() - 1];
158                tmp = 0;
159                for (int j = 0; j < data.numAttributes(); j++) {
[135]160                    if (data.attribute(j) != data.classAttribute() &&
161                        data.attribute(j).isNumeric())
162                    {
[50]163                        otherVector[tmp++] = data.instance(i).value(j);
164                    }
165                }
[135]166                if (MathArrays.distance(instanceVector, otherVector) < minDistance) {
[50]167                    minDistance = MathArrays.distance(instanceVector, otherVector);
168                    nearestUnlikeNeighbor = data.instance(i);
169                }
170            }
171        }
172        return nearestUnlikeNeighbor;
173    }
174}
Note: See TracBrowser for help on using the repository browser.