Context Navigation

← Previous Change
Next Change →

training

Timestamp:

09/24/15 10:59:05 (9 years ago)

Author:

sherbold

Message:

formatted code and added copyrights

Location:

trunk/CrossPare/src/de/ugoe/cs/cpdp/training

Files:

: 12 edited

FixClass.java (modified) (2 diffs)
ISetWiseTrainingStrategy.java (modified) (2 diffs)
ITrainer.java (modified) (1 diff)
ITrainingStrategy.java (modified) (2 diffs)
IWekaCompatibleTrainer.java (modified) (2 diffs)
QuadTree.java (modified) (2 diffs)
RandomClass.java (modified) (2 diffs)
WekaBaggingTraining.java (modified) (3 diffs)
WekaBaseTraining.java (modified) (2 diffs)
WekaLocalEMTraining.java (modified) (2 diffs)
WekaLocalFQTraining.java (modified) (2 diffs)
WekaTraining.java (modified) (3 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/CrossPare/src/de/ugoe/cs/cpdp/training/FixClass.java

-                      r31
+                      r41
+// Copyright 2015 Georg-August-Universität Göttingen, Germany
+//
+//   Licensed under the Apache License, Version 2.0 (the "License");
+//   you may not use this file except in compliance with the License.
+//   You may obtain a copy of the License at
+//
+//       http://www.apache.org/licenses/LICENSE-2.0
+//
+//   Unless required by applicable law or agreed to in writing, software
+//   distributed under the License is distributed on an "AS IS" BASIS,
+//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//   See the License for the specific language governing permissions and
+//   limitations under the License.
 package de.ugoe.cs.cpdp.training;
 …
  * @author Steffen Herbold
  */
+public class FixClass extends AbstractClassifier implements ITrainingStrategy, IWekaCompatibleTrainer {
+public class FixClass extends AbstractClassifier implements ITrainingStrategy,
+    IWekaCompatibleTrainer
+{
         private static final long serialVersionUID = 1L;
+    private static final long serialVersionUID = 1L;
         private double fixedClassValue = 0.0d;
+    private double fixedClassValue = 0.0d;
         /**
          * Returns default capabilities of the classifier.
+         *
          * @return the capabilities of this classifier
          */
         @Override
         public Capabilities getCapabilities() {
                 Capabilities result = super.getCapabilities();
                 result.disableAll();
+    /**
+     * Returns default capabilities of the classifier.
+     *
+     * @return the capabilities of this classifier
+     */
+    @Override
+    public Capabilities getCapabilities() {
+        Capabilities result = super.getCapabilities();
+        result.disableAll();
                 // attributes
                 result.enable(Capability.NOMINAL_ATTRIBUTES);
                 result.enable(Capability.NUMERIC_ATTRIBUTES);
                 result.enable(Capability.DATE_ATTRIBUTES);
                 result.enable(Capability.STRING_ATTRIBUTES);
                 result.enable(Capability.RELATIONAL_ATTRIBUTES);
                 result.enable(Capability.MISSING_VALUES);
+        // attributes
+        result.enable(Capability.NOMINAL_ATTRIBUTES);
+        result.enable(Capability.NUMERIC_ATTRIBUTES);
+        result.enable(Capability.DATE_ATTRIBUTES);
+        result.enable(Capability.STRING_ATTRIBUTES);
+        result.enable(Capability.RELATIONAL_ATTRIBUTES);
+        result.enable(Capability.MISSING_VALUES);
                 // class
                 result.enable(Capability.NOMINAL_CLASS);
                 result.enable(Capability.NUMERIC_CLASS);
                 result.enable(Capability.MISSING_CLASS_VALUES);
+        // class
+        result.enable(Capability.NOMINAL_CLASS);
+        result.enable(Capability.NUMERIC_CLASS);
+        result.enable(Capability.MISSING_CLASS_VALUES);
                 // instances
                 result.setMinimumNumberInstances(0);
+        // instances
+        result.setMinimumNumberInstances(0);
                 return result;
+        }
+        return result;
+    }
         @Override
         public void setOptions(String[] options) throws Exception {
                 fixedClassValue = Double.parseDouble(Utils.getOption('C', options));
+        }
+    @Override
+    public void setOptions(String[] options) throws Exception {
+        fixedClassValue = Double.parseDouble(Utils.getOption('C', options));
+    }
         @Override
         public double classifyInstance(Instance instance) {
                 return fixedClassValue;
+        }
+    @Override
+    public double classifyInstance(Instance instance) {
+        return fixedClassValue;
+    }
         @Override
         public void buildClassifier(Instances traindata) throws Exception {
                 // do nothing
+        }
+    @Override
+    public void buildClassifier(Instances traindata) throws Exception {
+        // do nothing
+    }
+        @Override
+        public void setParameter(String parameters) {
+                try {
+                        this.setOptions(parameters.split(" "));
+                } catch (Exception e) {
+                        e.printStackTrace();
+                }
+        }
+    @Override
+    public void setParameter(String parameters) {
+        try {
+            this.setOptions(parameters.split(" "));
+        }
+        catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
         @Override
         public void apply(Instances traindata) {
                 // do nothing!
+        }
+    @Override
+    public void apply(Instances traindata) {
+        // do nothing!
+    }
         @Override
         public String getName() {
                 return "FixClass";
+        }
+    @Override
+    public String getName() {
+        return "FixClass";
+    }
         @Override
         public Classifier getClassifier() {
                 return this;
+        }
+    @Override
+    public Classifier getClassifier() {
+        return this;
+    }
+}

trunk/CrossPare/src/de/ugoe/cs/cpdp/training/ISetWiseTrainingStrategy.java

-                      r2
+                      r41
+// Copyright 2015 Georg-August-Universität Göttingen, Germany
+//
+//   Licensed under the Apache License, Version 2.0 (the "License");
+//   you may not use this file except in compliance with the License.
+//   You may obtain a copy of the License at
+//
+//       http://www.apache.org/licenses/LICENSE-2.0
+//
+//   Unless required by applicable law or agreed to in writing, software
+//   distributed under the License is distributed on an "AS IS" BASIS,
+//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//   See the License for the specific language governing permissions and
+//   limitations under the License.
 package de.ugoe.cs.cpdp.training;
 …
 // Bagging Strategy: separate models for each training data set
 public interface ISetWiseTrainingStrategy extends ITrainer {
         void apply(SetUniqueList<Instances> traindataSet);
         String getName();
+    void apply(SetUniqueList<Instances> traindataSet);
+    String getName();
+}

trunk/CrossPare/src/de/ugoe/cs/cpdp/training/ITrainer.java

-                      r2
+                      r41
+// Copyright 2015 Georg-August-Universität Göttingen, Germany
+//
+//   Licensed under the Apache License, Version 2.0 (the "License");
+//   you may not use this file except in compliance with the License.
+//   You may obtain a copy of the License at
+//
+//       http://www.apache.org/licenses/LICENSE-2.0
+//
+//   Unless required by applicable law or agreed to in writing, software
+//   distributed under the License is distributed on an "AS IS" BASIS,
+//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//   See the License for the specific language governing permissions and
+//   limitations under the License.
 package de.ugoe.cs.cpdp.training;

trunk/CrossPare/src/de/ugoe/cs/cpdp/training/ITrainingStrategy.java

-                      r6
+                      r41
+// Copyright 2015 Georg-August-Universität Göttingen, Germany
+//
+//   Licensed under the Apache License, Version 2.0 (the "License");
+//   you may not use this file except in compliance with the License.
+//   You may obtain a copy of the License at
+//
+//       http://www.apache.org/licenses/LICENSE-2.0
+//
+//   Unless required by applicable law or agreed to in writing, software
+//   distributed under the License is distributed on an "AS IS" BASIS,
+//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//   See the License for the specific language governing permissions and
+//   limitations under the License.
 package de.ugoe.cs.cpdp.training;
 …
 public interface ITrainingStrategy extends ITrainer {
         void apply(Instances traindata);
         String getName();
+    void apply(Instances traindata);
+    String getName();
+}

trunk/CrossPare/src/de/ugoe/cs/cpdp/training/IWekaCompatibleTrainer.java

-                      r24
+                      r41
+// Copyright 2015 Georg-August-Universität Göttingen, Germany
+//
+//   Licensed under the Apache License, Version 2.0 (the "License");
+//   you may not use this file except in compliance with the License.
+//   You may obtain a copy of the License at
+//
+//       http://www.apache.org/licenses/LICENSE-2.0
+//
+//   Unless required by applicable law or agreed to in writing, software
+//   distributed under the License is distributed on an "AS IS" BASIS,
+//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//   See the License for the specific language governing permissions and
+//   limitations under the License.
 package de.ugoe.cs.cpdp.training;
 …
 public interface IWekaCompatibleTrainer extends ITrainer {
         Classifier getClassifier();
         String getName();
+    Classifier getClassifier();
+    String getName();
+}

trunk/CrossPare/src/de/ugoe/cs/cpdp/training/QuadTree.java

-                      r23
+                      r41
+// Copyright 2015 Georg-August-Universität Göttingen, Germany
+//
+//   Licensed under the Apache License, Version 2.0 (the "License");
+//   you may not use this file except in compliance with the License.
+//   You may obtain a copy of the License at
+//
+//       http://www.apache.org/licenses/LICENSE-2.0
+//
+//   Unless required by applicable law or agreed to in writing, software
+//   distributed under the License is distributed on an "AS IS" BASIS,
+//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//   See the License for the specific language governing permissions and
+//   limitations under the License.
 package de.ugoe.cs.cpdp.training;
 …
  * QuadTree implementation
+ *
  * QuadTree gets a list of instances and then recursively split them into 4 childs
  * For this it uses the median of the 2 values x,y
+ * QuadTree gets a list of instances and then recursively split them into 4 childs For this it uses
+ * the median of the 2 values x,y
  */
 public class QuadTree {
+        /* 1 parent or null */
+        private QuadTree parent = null;
+        /* 4 childs, 1 per quadrant */
+        private QuadTree child_nw;
+        private QuadTree child_ne;
+        private QuadTree child_se;
+        private QuadTree child_sw;
+        /* list (only helps with generation of list of childs!) */
+        private ArrayList<QuadTree> l = new ArrayList<QuadTree>();
+        /* level only used for debugging */
+        public int level = 0;
+        /* size of the quadrant */
+        private double[] x;
+        private double[] y;
+        public static boolean verbose = false;
+        public static int size = 0;
+        public static double alpha = 0;
+        /* cluster payloads */
+        public static ArrayList<ArrayList<QuadTreePayload<Instance>>> ccluster = new ArrayList<ArrayList<QuadTreePayload<Instance>>>();
+        /* cluster sizes (index is cluster number, arraylist is list of boxes (x0,y0,x1,y1) */
+        public static HashMap<Integer, ArrayList<Double[][]>> csize = new HashMap<Integer, ArrayList<Double[][]>>();
+        /* payload of this instance */
+        private ArrayList<QuadTreePayload<Instance>> payload;
+        public QuadTree(QuadTree parent, ArrayList<QuadTreePayload<Instance>> payload) {
+                this.parent = parent;
+                this.payload = payload;
+        }
+        public String toString() {
+                String n = "";
+                if(this.parent == null) {
+                        n += "rootnode ";
+                }
+                String level = new String(new char[this.level]).replace("\0", "-");
+                n += level + " instances: " + this.getNumbers();
+                return n;
+        }
+        /**
+         * Returns the payload, used for clustering
+         * in the clustering list we only have children with paylod
+         *
+         * @return payload
+         */
+        public ArrayList<QuadTreePayload<Instance>> getPayload() {
+                return this.payload;
+        }
+        /**
+         * Calculate the density of this quadrant
+         *
+         * density = number of instances / global size (all instances)
+         *
+         * @return density
+         */
+        public double getDensity() {
+                double dens = 0;
+                dens = (double)this.getNumbers() / QuadTree.size;
+                return dens;
+        }
+        public void setSize(double[] x, double[] y){
+                this.x = x;
+                this.y = y;
+        }
+        public double[][] getSize() {
+                return new double[][] {this.x, this.y};
+        }
+        public Double[][] getSizeDouble() {
+                Double[] tmpX = new Double[2];
+                Double[] tmpY = new Double[2];
+                tmpX[0] = this.x[0];
+                tmpX[1] = this.x[1];
+                tmpY[0] = this.y[0];
+                tmpY[1] = this.y[1];
+                return new Double[][] {tmpX, tmpY};
+        }
+        /**
+         * TODO: DRY, median ist immer dasselbe
+         *
+         * @return median for x
+         */
+        private double getMedianForX() {
+                double med_x =0 ;
+                Collections.sort(this.payload, new Comparator<QuadTreePayload<Instance>>() {
+                @Override
+                public int compare(QuadTreePayload<Instance> x1, QuadTreePayload<Instance> x2) {
+                    return Double.compare(x1.x, x2.x);
+                }
+            });
+                if(this.payload.size() % 2 == 0) {
+                        int mid = this.payload.size() / 2;
+                        med_x = (this.payload.get(mid).x + this.payload.get(mid+1).x) / 2;
+                }else {
+                        int mid = this.payload.size() / 2;
+                        med_x = this.payload.get(mid).x;
+                }
+                if(QuadTree.verbose) {
+                        System.out.println("sorted:");
+                        for(int i = 0; i < this.payload.size(); i++) {
+                                System.out.print(""+this.payload.get(i).x+",");
+                        }
+                        System.out.println("median x: " + med_x);
+                }
+                return med_x;
+        }
+        private double getMedianForY() {
+                double med_y =0 ;
+                Collections.sort(this.payload, new Comparator<QuadTreePayload<Instance>>() {
+                @Override
+                public int compare(QuadTreePayload<Instance> y1, QuadTreePayload<Instance> y2) {
+                    return Double.compare(y1.y, y2.y);
+                }
+            });
+                if(this.payload.size() % 2 == 0) {
+                        int mid = this.payload.size() / 2;
+                        med_y = (this.payload.get(mid).y + this.payload.get(mid+1).y) / 2;
+                }else {
+                        int mid = this.payload.size() / 2;
+                        med_y = this.payload.get(mid).y;
+                }
+                if(QuadTree.verbose) {
+                        System.out.println("sorted:");
+                        for(int i = 0; i < this.payload.size(); i++) {
+                                System.out.print(""+this.payload.get(i).y+",");
+                        }
+                        System.out.println("median y: " + med_y);
+                }
+                return med_y;
+        }
+        /**
+         * Reurns the number of instances in the payload
+         *
+         * @return int number of instances
+         */
+        public int getNumbers() {
+                int number = 0;
+                if(this.payload != null) {
+                        number = this.payload.size();
+                }
+                return number;
+        }
+        /**
+         * Calculate median values of payload for x, y and split into 4 sectors
+         *
+         * @return Array of QuadTree nodes (4 childs)
+         * @throws Exception if we would run into an recursive loop
+         */
+        public QuadTree[] split() throws Exception {
+                double medx = this.getMedianForX();
+                double medy = this.getMedianForY();
+                // Payload lists for each child
+                ArrayList<QuadTreePayload<Instance>> nw = new ArrayList<QuadTreePayload<Instance>>();
+                ArrayList<QuadTreePayload<Instance>> sw = new ArrayList<QuadTreePayload<Instance>>();
+                ArrayList<QuadTreePayload<Instance>> ne = new ArrayList<QuadTreePayload<Instance>>();
+                ArrayList<QuadTreePayload<Instance>> se = new ArrayList<QuadTreePayload<Instance>>();
+                // sort the payloads to new payloads
+                // here we have the problem that payloads with the same values are sorted
+                // into the same slots and it could happen that medx and medy = size_x[1] and size_y[1]
+                // in that case we would have an endless loop
+                for(int i=0; i < this.payload.size(); i++) {
+                        QuadTreePayload<Instance> item = this.payload.get(i);
+                        // north west
+                        if(item.x <= medx && item.y >= medy) {
+                                nw.add(item);
+                        }
+                        // south west
+                        else if(item.x <= medx && item.y <= medy) {
+                                sw.add(item);
+                        }
+                        // north east
+                        else if(item.x >= medx && item.y >= medy) {
+                                ne.add(item);
+                        }
+                        // south east
+                        else if(item.x >= medx && item.y <= medy) {
+                                se.add(item);
+                        }
+                }
+                // if we assign one child a payload equal to our own (see problem above)
+                // we throw an exceptions which stops the recursion on this node
+                if(nw.equals(this.payload)) {
+                        throw new Exception("payload equal");
+                }
+                if(sw.equals(this.payload)) {
+                        throw new Exception("payload equal");
+                }
+                if(ne.equals(this.payload)) {
+                        throw new Exception("payload equal");
+                }
+                if(se.equals(this.payload)) {
+                        throw new Exception("payload equal");
+                }
+                this.child_nw = new QuadTree(this, nw);
+                this.child_nw.setSize(new double[] {this.x[0], medx}, new double[] {medy, this.y[1]});
+                this.child_nw.level = this.level + 1;
+                this.child_sw = new QuadTree(this, sw);
+                this.child_sw.setSize(new double[] {this.x[0], medx}, new double[] {this.y[0], medy});
+                this.child_sw.level = this.level + 1;
+                this.child_ne = new QuadTree(this, ne);
+                this.child_ne.setSize(new double[] {medx, this.x[1]}, new double[] {medy, this.y[1]});
+                this.child_ne.level = this.level + 1;
+                this.child_se = new QuadTree(this, se);
+                this.child_se.setSize(new double[] {medx, this.x[1]}, new double[] {this.y[0], medy});
+                this.child_se.level = this.level + 1;
+                this.payload = null;
+                return new QuadTree[] {this.child_nw, this.child_ne, this.child_se, this.child_sw};
+        }
+        /**
+         * TODO: static method
+         *
+         * @param q
+         */
+        public void recursiveSplit(QuadTree q) {
+                if(QuadTree.verbose) {
+                        System.out.println("splitting: "+ q);
+                }
+                if(q.getNumbers() < QuadTree.alpha) {
+                        return;
+                }else{
+                        // exception is thrown if we would run into an endless loop (see comments in split())
+                        try {
+                                QuadTree[] childs = q.split();
+                                this.recursiveSplit(childs[0]);
+                                this.recursiveSplit(childs[1]);
+                                this.recursiveSplit(childs[2]);
+                                this.recursiveSplit(childs[3]);
+                        }catch(Exception e) {
+                                return;
+                        }
+                }
+        }
+        /**
+         * returns an list of childs sorted by density
+         *
+         * @param q QuadTree
+         * @return list of QuadTrees
+         */
+        private void generateList(QuadTree q) {
+                // we only have all childs or none at all
+                if(q.child_ne == null) {
+                        this.l.add(q);
+                }
+                if(q.child_ne != null) {
+                        this.generateList(q.child_ne);
+                }
+                if(q.child_nw != null) {
+                        this.generateList(q.child_nw);
+                }
+                if(q.child_se != null) {
+                        this.generateList(q.child_se);
+                }
+                if(q.child_sw != null) {
+                        this.generateList(q.child_sw);
+                }
+        }
+        /**
+         * Checks if passed QuadTree is neighboring to us
+         *
+         * @param q QuadTree
+         * @return true if passed QuadTree is a neighbor
+         */
+        public boolean isNeighbour(QuadTree q) {
+                boolean is_neighbour = false;
+                double[][] our_size = this.getSize();
+                double[][] new_size = q.getSize();
+                // X is i=0, Y is i=1
+                for(int i =0; i < 2; i++) {
+                        // we are smaller than q
+                        // -------------- q
+                        //    ------- we
+                        if(our_size[i][0] >= new_size[i][0] && our_size[i][1] <= new_size[i][1]) {
+                                is_neighbour = true;
+                        }
+                        // we overlap with q at some point
+                        //a) ---------------q
+                        //         ----------- we
+                        //b)     --------- q
+                        // --------- we
+                        if((our_size[i][0] >= new_size[i][0] && our_size[i][0] <= new_size[i][1]) ||
+                           (our_size[i][1] >= new_size[i][0] && our_size[i][1] <= new_size[i][1])) {
+                                is_neighbour = true;
+                        }
+                        // we are larger than q
+                        //    ---- q
+                        // ---------- we
+                        if(our_size[i][1] >= new_size[i][1] && our_size[i][0] <= new_size[i][0]) {
+                                is_neighbour = true;
+                        }
+                }
+                if(is_neighbour && QuadTree.verbose) {
+                        System.out.println(this + " neighbour of: " + q);
+                }
+                return is_neighbour;
+        }
+        /**
+         * Perform pruning and clustering of the quadtree
+         *
+         * Pruning according to:
+         * Tim Menzies, Andrew Butcher, David Cok, Andrian Marcus, Lucas Layman,
+         * Forrest Shull, Burak Turhan, Thomas Zimmermann,
+         * "Local versus Global Lessons for Defect Prediction and Effort Estimation,"
+         * IEEE Transactions on Software Engineering, vol. 39, no. 6, pp. 822-834, June, 2013
+         *
+         * 1) get list of leaf quadrants
+         * 2) sort by their density
+         * 3) set stop_rule to 0.5 * highest Density in the list
+         * 4) merge all nodes with a density > stop_rule to the new cluster and remove all from list
+         * 5) repeat
+         *
+         * @param q List of QuadTree (children only)
+         */
+        public void gridClustering(ArrayList<QuadTree> list) {
+                if(list.size() == 0) {
+                        return;
+                }
+                double stop_rule;
+                QuadTree biggest;
+                QuadTree current;
+                // current clusterlist
+                ArrayList<QuadTreePayload<Instance>> current_cluster;
+                // remove list (for removal of items after scanning of the list)
+            ArrayList<Integer> remove = new ArrayList<Integer>();
+                // 1. find biggest, and add it
+            biggest = list.get(list.size()-1);
+            stop_rule = biggest.getDensity() * 0.5;
+            current_cluster = new ArrayList<QuadTreePayload<Instance>>();
+            current_cluster.addAll(biggest.getPayload());
+            // remove the biggest because we are starting with it
+            remove.add(list.size()-1);
+            ArrayList<Double[][]> tmpSize = new ArrayList<Double[][]>();
+            tmpSize.add(biggest.getSizeDouble());
+                // check the items for their density
+            for(int i=list.size()-1; i >= 0; i--) {
+                current = list.get(i);
+                        // 2. find neighbors with correct density
+                // if density > stop_rule and is_neighbour add to cluster and remove from list
+                if(current.getDensity() > stop_rule && !current.equals(biggest) && current.isNeighbour(biggest)) {
+                        current_cluster.addAll(current.getPayload());
+                        // add it to remove list (we cannot remove it inside the loop because it would move the index)
+                        remove.add(i);
+                        // get the size
+                        tmpSize.add(current.getSizeDouble());
+                }
+                }
+                // 3. remove our removal candidates from the list
+            for(Integer item: remove) {
+                list.remove((int)item);
+            }
+                // 4. add to cluster
+            QuadTree.ccluster.add(current_cluster);
+            // 5. add sizes of our current (biggest) this adds a number of sizes (all QuadTree Instances belonging to this cluster)
+            // we need that to classify test instances to a cluster later
+            Integer cnumber = new Integer(QuadTree.ccluster.size()-1);
+            if(QuadTree.csize.containsKey(cnumber) == false) {
+                QuadTree.csize.put(cnumber, tmpSize);
+            }
+                // repeat
+            this.gridClustering(list);
+        }
+        public void printInfo() {
+            System.out.println("we have " + ccluster.size() + " clusters");
+            for(int i=0; i < ccluster.size(); i++) {
+                System.out.println("cluster: "+i+ " size: "+ ccluster.get(i).size());
+            }
+        }
+        /**
+         * Helper Method to get a sorted list (by density) for all
+         * children
+         *
+         * @param q QuadTree
+         * @return Sorted ArrayList of quadtrees
+         */
+        public ArrayList<QuadTree> getList(QuadTree q) {
+                this.generateList(q);
+                Collections.sort(this.l, new Comparator<QuadTree>() {
+                @Override
+                public int compare(QuadTree x1, QuadTree x2) {
+                    return Double.compare(x1.getDensity(), x2.getDensity());
+                }
+            });
+                return this.l;
+        }
+    /* 1 parent or null */
+    private QuadTree parent = null;
+    /* 4 childs, 1 per quadrant */
+    private QuadTree child_nw;
+    private QuadTree child_ne;
+    private QuadTree child_se;
+    private QuadTree child_sw;
+    /* list (only helps with generation of list of childs!) */
+    private ArrayList<QuadTree> l = new ArrayList<QuadTree>();
+    /* level only used for debugging */
+    public int level = 0;
+    /* size of the quadrant */
+    private double[] x;
+    private double[] y;
+    public static boolean verbose = false;
+    public static int size = 0;
+    public static double alpha = 0;
+    /* cluster payloads */
+    public static ArrayList<ArrayList<QuadTreePayload<Instance>>> ccluster =
+        new ArrayList<ArrayList<QuadTreePayload<Instance>>>();
+    /* cluster sizes (index is cluster number, arraylist is list of boxes (x0,y0,x1,y1) */
+    public static HashMap<Integer, ArrayList<Double[][]>> csize =
+        new HashMap<Integer, ArrayList<Double[][]>>();
+    /* payload of this instance */
+    private ArrayList<QuadTreePayload<Instance>> payload;
+    public QuadTree(QuadTree parent, ArrayList<QuadTreePayload<Instance>> payload) {
+        this.parent = parent;
+        this.payload = payload;
+    }
+    public String toString() {
+        String n = "";
+        if (this.parent == null) {
+            n += "rootnode ";
+        }
+        String level = new String(new char[this.level]).replace("\0", "-");
+        n += level + " instances: " + this.getNumbers();
+        return n;
+    }
+    /**
+     * Returns the payload, used for clustering in the clustering list we only have children with
+     * paylod
+     *
+     * @return payload
+     */
+    public ArrayList<QuadTreePayload<Instance>> getPayload() {
+        return this.payload;
+    }
+    /**
+     * Calculate the density of this quadrant
+     *
+     * density = number of instances / global size (all instances)
+     *
+     * @return density
+     */
+    public double getDensity() {
+        double dens = 0;
+        dens = (double) this.getNumbers() / QuadTree.size;
+        return dens;
+    }
+    public void setSize(double[] x, double[] y) {
+        this.x = x;
+        this.y = y;
+    }
+    public double[][] getSize() {
+        return new double[][]
+            { this.x, this.y };
+    }
+    public Double[][] getSizeDouble() {
+        Double[] tmpX = new Double[2];
+        Double[] tmpY = new Double[2];
+        tmpX[0] = this.x[0];
+        tmpX[1] = this.x[1];
+        tmpY[0] = this.y[0];
+        tmpY[1] = this.y[1];
+        return new Double[][]
+            { tmpX, tmpY };
+    }
+    /**
+     * TODO: DRY, median ist immer dasselbe
+     *
+     * @return median for x
+     */
+    private double getMedianForX() {
+        double med_x = 0;
+        Collections.sort(this.payload, new Comparator<QuadTreePayload<Instance>>() {
+            @Override
+            public int compare(QuadTreePayload<Instance> x1, QuadTreePayload<Instance> x2) {
+                return Double.compare(x1.x, x2.x);
+            }
+        });
+        if (this.payload.size() % 2 == 0) {
+            int mid = this.payload.size() / 2;
+            med_x = (this.payload.get(mid).x + this.payload.get(mid + 1).x) / 2;
+        }
+        else {
+            int mid = this.payload.size() / 2;
+            med_x = this.payload.get(mid).x;
+        }
+        if (QuadTree.verbose) {
+            System.out.println("sorted:");
+            for (int i = 0; i < this.payload.size(); i++) {
+                System.out.print("" + this.payload.get(i).x + ",");
+            }
+            System.out.println("median x: " + med_x);
+        }
+        return med_x;
+    }
+    private double getMedianForY() {
+        double med_y = 0;
+        Collections.sort(this.payload, new Comparator<QuadTreePayload<Instance>>() {
+            @Override
+            public int compare(QuadTreePayload<Instance> y1, QuadTreePayload<Instance> y2) {
+                return Double.compare(y1.y, y2.y);
+            }
+        });
+        if (this.payload.size() % 2 == 0) {
+            int mid = this.payload.size() / 2;
+            med_y = (this.payload.get(mid).y + this.payload.get(mid + 1).y) / 2;
+        }
+        else {
+            int mid = this.payload.size() / 2;
+            med_y = this.payload.get(mid).y;
+        }
+        if (QuadTree.verbose) {
+            System.out.println("sorted:");
+            for (int i = 0; i < this.payload.size(); i++) {
+                System.out.print("" + this.payload.get(i).y + ",");
+            }
+            System.out.println("median y: " + med_y);
+        }
+        return med_y;
+    }
+    /**
+     * Reurns the number of instances in the payload
+     *
+     * @return int number of instances
+     */
+    public int getNumbers() {
+        int number = 0;
+        if (this.payload != null) {
+            number = this.payload.size();
+        }
+        return number;
+    }
+    /**
+     * Calculate median values of payload for x, y and split into 4 sectors
+     *
+     * @return Array of QuadTree nodes (4 childs)
+     * @throws Exception
+     *             if we would run into an recursive loop
+     */
+    public QuadTree[] split() throws Exception {
+        double medx = this.getMedianForX();
+        double medy = this.getMedianForY();
+        // Payload lists for each child
+        ArrayList<QuadTreePayload<Instance>> nw = new ArrayList<QuadTreePayload<Instance>>();
+        ArrayList<QuadTreePayload<Instance>> sw = new ArrayList<QuadTreePayload<Instance>>();
+        ArrayList<QuadTreePayload<Instance>> ne = new ArrayList<QuadTreePayload<Instance>>();
+        ArrayList<QuadTreePayload<Instance>> se = new ArrayList<QuadTreePayload<Instance>>();
+        // sort the payloads to new payloads
+        // here we have the problem that payloads with the same values are sorted
+        // into the same slots and it could happen that medx and medy = size_x[1] and size_y[1]
+        // in that case we would have an endless loop
+        for (int i = 0; i < this.payload.size(); i++) {
+            QuadTreePayload<Instance> item = this.payload.get(i);
+            // north west
+            if (item.x <= medx && item.y >= medy) {
+                nw.add(item);
+            }
+            // south west
+            else if (item.x <= medx && item.y <= medy) {
+                sw.add(item);
+            }
+            // north east
+            else if (item.x >= medx && item.y >= medy) {
+                ne.add(item);
+            }
+            // south east
+            else if (item.x >= medx && item.y <= medy) {
+                se.add(item);
+            }
+        }
+        // if we assign one child a payload equal to our own (see problem above)
+        // we throw an exceptions which stops the recursion on this node
+        if (nw.equals(this.payload)) {
+            throw new Exception("payload equal");
+        }
+        if (sw.equals(this.payload)) {
+            throw new Exception("payload equal");
+        }
+        if (ne.equals(this.payload)) {
+            throw new Exception("payload equal");
+        }
+        if (se.equals(this.payload)) {
+            throw new Exception("payload equal");
+        }
+        this.child_nw = new QuadTree(this, nw);
+        this.child_nw.setSize(new double[]
+            { this.x[0], medx }, new double[]
+            { medy, this.y[1] });
+        this.child_nw.level = this.level + 1;
+        this.child_sw = new QuadTree(this, sw);
+        this.child_sw.setSize(new double[]
+            { this.x[0], medx }, new double[]
+            { this.y[0], medy });
+        this.child_sw.level = this.level + 1;
+        this.child_ne = new QuadTree(this, ne);
+        this.child_ne.setSize(new double[]
+            { medx, this.x[1] }, new double[]
+            { medy, this.y[1] });
+        this.child_ne.level = this.level + 1;
+        this.child_se = new QuadTree(this, se);
+        this.child_se.setSize(new double[]
+            { medx, this.x[1] }, new double[]
+            { this.y[0], medy });
+        this.child_se.level = this.level + 1;
+        this.payload = null;
+        return new QuadTree[]
+            { this.child_nw, this.child_ne, this.child_se, this.child_sw };
+    }
+    /**
+     * TODO: static method
+     *
+     * @param q
+     */
+    public void recursiveSplit(QuadTree q) {
+        if (QuadTree.verbose) {
+            System.out.println("splitting: " + q);
+        }
+        if (q.getNumbers() < QuadTree.alpha) {
+            return;
+        }
+        else {
+            // exception is thrown if we would run into an endless loop (see comments in split())
+            try {
+                QuadTree[] childs = q.split();
+                this.recursiveSplit(childs[0]);
+                this.recursiveSplit(childs[1]);
+                this.recursiveSplit(childs[2]);
+                this.recursiveSplit(childs[3]);
+            }
+            catch (Exception e) {
+                return;
+            }
+        }
+    }
+    /**
+     * returns an list of childs sorted by density
+     *
+     * @param q
+     *            QuadTree
+     * @return list of QuadTrees
+     */
+    private void generateList(QuadTree q) {
+        // we only have all childs or none at all
+        if (q.child_ne == null) {
+            this.l.add(q);
+        }
+        if (q.child_ne != null) {
+            this.generateList(q.child_ne);
+        }
+        if (q.child_nw != null) {
+            this.generateList(q.child_nw);
+        }
+        if (q.child_se != null) {
+            this.generateList(q.child_se);
+        }
+        if (q.child_sw != null) {
+            this.generateList(q.child_sw);
+        }
+    }
+    /**
+     * Checks if passed QuadTree is neighboring to us
+     *
+     * @param q
+     *            QuadTree
+     * @return true if passed QuadTree is a neighbor
+     */
+    public boolean isNeighbour(QuadTree q) {
+        boolean is_neighbour = false;
+        double[][] our_size = this.getSize();
+        double[][] new_size = q.getSize();
+        // X is i=0, Y is i=1
+        for (int i = 0; i < 2; i++) {
+            // we are smaller than q
+            // -------------- q
+            // ------- we
+            if (our_size[i][0] >= new_size[i][0] && our_size[i][1] <= new_size[i][1]) {
+                is_neighbour = true;
+            }
+            // we overlap with q at some point
+            // a) ---------------q
+            // ----------- we
+            // b) --------- q
+            // --------- we
+            if ((our_size[i][0] >= new_size[i][0] && our_size[i][0] <= new_size[i][1]) ||
+                (our_size[i][1] >= new_size[i][0] && our_size[i][1] <= new_size[i][1]))
+            {
+                is_neighbour = true;
+            }
+            // we are larger than q
+            // ---- q
+            // ---------- we
+            if (our_size[i][1] >= new_size[i][1] && our_size[i][0] <= new_size[i][0]) {
+                is_neighbour = true;
+            }
+        }
+        if (is_neighbour && QuadTree.verbose) {
+            System.out.println(this + " neighbour of: " + q);
+        }
+        return is_neighbour;
+    }
+    /**
+     * Perform pruning and clustering of the quadtree
+     *
+     * Pruning according to: Tim Menzies, Andrew Butcher, David Cok, Andrian Marcus, Lucas Layman,
+     * Forrest Shull, Burak Turhan, Thomas Zimmermann,
+     * "Local versus Global Lessons for Defect Prediction and Effort Estimation," IEEE Transactions
+     * on Software Engineering, vol. 39, no. 6, pp. 822-834, June, 2013
+     *
+     * 1) get list of leaf quadrants 2) sort by their density 3) set stop_rule to 0.5 * highest
+     * Density in the list 4) merge all nodes with a density > stop_rule to the new cluster and
+     * remove all from list 5) repeat
+     *
+     * @param q
+     *            List of QuadTree (children only)
+     */
+    public void gridClustering(ArrayList<QuadTree> list) {
+        if (list.size() == 0) {
+            return;
+        }
+        double stop_rule;
+        QuadTree biggest;
+        QuadTree current;
+        // current clusterlist
+        ArrayList<QuadTreePayload<Instance>> current_cluster;
+        // remove list (for removal of items after scanning of the list)
+        ArrayList<Integer> remove = new ArrayList<Integer>();
+        // 1. find biggest, and add it
+        biggest = list.get(list.size() - 1);
+        stop_rule = biggest.getDensity() * 0.5;
+        current_cluster = new ArrayList<QuadTreePayload<Instance>>();
+        current_cluster.addAll(biggest.getPayload());
+        // remove the biggest because we are starting with it
+        remove.add(list.size() - 1);
+        ArrayList<Double[][]> tmpSize = new ArrayList<Double[][]>();
+        tmpSize.add(biggest.getSizeDouble());
+        // check the items for their density
+        for (int i = list.size() - 1; i >= 0; i--) {
+            current = list.get(i);
+            // 2. find neighbors with correct density
+            // if density > stop_rule and is_neighbour add to cluster and remove from list
+            if (current.getDensity() > stop_rule && !current.equals(biggest) &&
+                current.isNeighbour(biggest))
+            {
+                current_cluster.addAll(current.getPayload());
+                // add it to remove list (we cannot remove it inside the loop because it would move
+                // the index)
+                remove.add(i);
+                // get the size
+                tmpSize.add(current.getSizeDouble());
+            }
+        }
+        // 3. remove our removal candidates from the list
+        for (Integer item : remove) {
+            list.remove((int) item);
+        }
+        // 4. add to cluster
+        QuadTree.ccluster.add(current_cluster);
+        // 5. add sizes of our current (biggest) this adds a number of sizes (all QuadTree Instances
+        // belonging to this cluster)
+        // we need that to classify test instances to a cluster later
+        Integer cnumber = new Integer(QuadTree.ccluster.size() - 1);
+        if (QuadTree.csize.containsKey(cnumber) == false) {
+            QuadTree.csize.put(cnumber, tmpSize);
+        }
+        // repeat
+        this.gridClustering(list);
+    }
+    public void printInfo() {
+        System.out.println("we have " + ccluster.size() + " clusters");
+        for (int i = 0; i < ccluster.size(); i++) {
+            System.out.println("cluster: " + i + " size: " + ccluster.get(i).size());
+        }
+    }
+    /**
+     * Helper Method to get a sorted list (by density) for all children
+     *
+     * @param q
+     *            QuadTree
+     * @return Sorted ArrayList of quadtrees
+     */
+    public ArrayList<QuadTree> getList(QuadTree q) {
+        this.generateList(q);
+        Collections.sort(this.l, new Comparator<QuadTree>() {
+            @Override
+            public int compare(QuadTree x1, QuadTree x2) {
+                return Double.compare(x1.getDensity(), x2.getDensity());
+            }
+        });
+        return this.l;
+    }
+}

trunk/CrossPare/src/de/ugoe/cs/cpdp/training/RandomClass.java

-                      r38
+                      r41
+// Copyright 2015 Georg-August-Universität Göttingen, Germany
+//
+//   Licensed under the Apache License, Version 2.0 (the "License");
+//   you may not use this file except in compliance with the License.
+//   You may obtain a copy of the License at
+//
+//       http://www.apache.org/licenses/LICENSE-2.0
+//
+//   Unless required by applicable law or agreed to in writing, software
+//   distributed under the License is distributed on an "AS IS" BASIS,
+//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//   See the License for the specific language governing permissions and
+//   limitations under the License.
 package de.ugoe.cs.cpdp.training;
 …
  * Assigns a random class label to the instance it is evaluated on.
+ *
  * The range of class labels are hardcoded in fixedClassValues.
  * This can later be extended to take values from the XML configuration.
+ * The range of class labels are hardcoded in fixedClassValues. This can later be extended to take
+ * values from the XML configuration.
  */
+public class RandomClass extends AbstractClassifier implements ITrainingStrategy, IWekaCompatibleTrainer {
+public class RandomClass extends AbstractClassifier implements ITrainingStrategy,
+    IWekaCompatibleTrainer
+{
         private static final long serialVersionUID = 1L;
+    private static final long serialVersionUID = 1L;
+        private double[] fixedClassValues = {0.0d, 1.0d};
+        @Override
+        public void setParameter(String parameters) {
+                // do nothing, maybe take percentages for distribution later
+        }
+    private double[] fixedClassValues =
+        { 0.0d, 1.0d };
         @Override
         public void buildClassifier(Instances arg0) throws Exception {
+                // do nothing
+        }
+    @Override
+    public void setParameter(String parameters) {
+        // do nothing, maybe take percentages for distribution later
+    }
         @Override
         public Classifier getClassifier() {
+                return this;
+        }
+    @Override
+    public void buildClassifier(Instances arg0) throws Exception {
+        // do nothing
+    }
         @Override
         public void apply(Instances traindata) {
+                // nothing to do
+        }
+    @Override
+    public Classifier getClassifier() {
+        return this;
+    }
+        @Override
+        public String getName() {
+                return "RandomClass";
+        }
+        @Override
+        public double classifyInstance(Instance instance) {
+                Random rand = new Random();
+            int randomNum = rand.nextInt(this.fixedClassValues.length);
+                return this.fixedClassValues[randomNum];
+        }
+    @Override
+    public void apply(Instances traindata) {
+        // nothing to do
+    }
+    @Override
+    public String getName() {
+        return "RandomClass";
+    }
+    @Override
+    public double classifyInstance(Instance instance) {
+        Random rand = new Random();
+        int randomNum = rand.nextInt(this.fixedClassValues.length);
+        return this.fixedClassValues[randomNum];
+    }
+}

trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaBaggingTraining.java

-                      r25
+                      r41
+// Copyright 2015 Georg-August-Universität Göttingen, Germany
+//
+//   Licensed under the Apache License, Version 2.0 (the "License");
+//   you may not use this file except in compliance with the License.
+//   You may obtain a copy of the License at
+//
+//       http://www.apache.org/licenses/LICENSE-2.0
+//
+//   Unless required by applicable law or agreed to in writing, software
+//   distributed under the License is distributed on an "AS IS" BASIS,
+//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//   See the License for the specific language governing permissions and
+//   limitations under the License.
 package de.ugoe.cs.cpdp.training;
 …
 /**
  * Programmatic WekaBaggingTraining
+ *
- * first parameter is Trainer Name.
- * second parameter is class name
+ *
+ * all subsequent parameters are configuration params (for example for trees)
+ * Cross Validation params always come last and are prepended with -CVPARAM
+ * first parameter is Trainer Name. second parameter is class name
+ *
+ * all subsequent parameters are configuration params (for example for trees) Cross Validation
+ * params always come last and are prepended with -CVPARAM
+ *
  * XML Configurations for Weka Classifiers:
+ *
  * <pre>
  * {@code
 …
 public class WekaBaggingTraining extends WekaBaseTraining implements ISetWiseTrainingStrategy {
+        private final TraindatasetBagging classifier = new TraindatasetBagging();
+        @Override
+        public Classifier getClassifier() {
+                return classifier;
+        }
+        @Override
+        public void apply(SetUniqueList<Instances> traindataSet) {
+                PrintStream errStr      = System.err;
+                System.setErr(new PrintStream(new NullOutputStream()));
+                try {
+                        classifier.buildClassifier(traindataSet);
+                } catch (Exception e) {
+                        throw new RuntimeException(e);
+                } finally {
+                        System.setErr(errStr);
+                }
+        }
+        public class TraindatasetBagging extends AbstractClassifier {
+                private static final long serialVersionUID = 1L;
+    private final TraindatasetBagging classifier = new TraindatasetBagging();
+                private List<Instances> trainingData = null;
+                private List<Classifier> classifiers = null;
+                @Override
+                public double classifyInstance(Instance instance) {
+                        if( classifiers==null ) {
+                                return 0.0;
+                        }
+                        double classification = 0.0;
+                        for( int i=0 ; i<classifiers.size(); i++ ) {
+                                Classifier classifier = classifiers.get(i);
+                                Instances traindata = trainingData.get(i);
+                                Set<String> attributeNames = new HashSet<>();
+                                for( int j=0; j<traindata.numAttributes(); j++ ) {
+                                        attributeNames.add(traindata.attribute(j).name());
+                                }
+                                double[] values = new double[traindata.numAttributes()];
+                                int index = 0;
+                                for( int j=0; j<instance.numAttributes(); j++ ) {
+                                        if( attributeNames.contains(instance.attribute(j).name())) {
+                                                values[index] = instance.value(j);
+                                                index++;
+                                        }
+                                }
+                                Instances tmp = new Instances(traindata);
+                                tmp.clear();
+                                Instance instCopy = new DenseInstance(instance.weight(), values);
+                                instCopy.setDataset(tmp);
+                                try {
+                                        classification += classifier.classifyInstance(instCopy);
+                                } catch (Exception e) {
+                                        throw new RuntimeException("bagging classifier could not classify an instance", e);
+                                }
+                        }
+                        classification /= classifiers.size();
+                        return (classification>=0.5) ? 1.0 : 0.0;
+                }
+                public void buildClassifier(SetUniqueList<Instances> traindataSet) throws Exception {
+                        classifiers = new LinkedList<>();
+                        trainingData = new LinkedList<>();
+                        for( Instances traindata : traindataSet ) {
+                                Classifier classifier = setupClassifier();
+                                classifier.buildClassifier(traindata);
+                                classifiers.add(classifier);
+                                trainingData.add(new Instances(traindata));
+                        }
+                }
+                @Override
+                public void buildClassifier(Instances traindata) throws Exception {
+                        classifiers = new LinkedList<>();
+                        trainingData = new LinkedList<>();
+                        final Classifier classifier = setupClassifier();
+                        classifier.buildClassifier(traindata);
+                        classifiers.add(classifier);
+                        trainingData.add(new Instances(traindata));
+                }
+        }
+    @Override
+    public Classifier getClassifier() {
+        return classifier;
+    }
+    @Override
+    public void apply(SetUniqueList<Instances> traindataSet) {
+        PrintStream errStr = System.err;
+        System.setErr(new PrintStream(new NullOutputStream()));
+        try {
+            classifier.buildClassifier(traindataSet);
+        }
+        catch (Exception e) {
+            throw new RuntimeException(e);
+        }
+        finally {
+            System.setErr(errStr);
+        }
+    }
+    public class TraindatasetBagging extends AbstractClassifier {
+        private static final long serialVersionUID = 1L;
+        private List<Instances> trainingData = null;
+        private List<Classifier> classifiers = null;
+        @Override
+        public double classifyInstance(Instance instance) {
+            if (classifiers == null) {
+                return 0.0;
+            }
+            double classification = 0.0;
+            for (int i = 0; i < classifiers.size(); i++) {
+                Classifier classifier = classifiers.get(i);
+                Instances traindata = trainingData.get(i);
+                Set<String> attributeNames = new HashSet<>();
+                for (int j = 0; j < traindata.numAttributes(); j++) {
+                    attributeNames.add(traindata.attribute(j).name());
+                }
+                double[] values = new double[traindata.numAttributes()];
+                int index = 0;
+                for (int j = 0; j < instance.numAttributes(); j++) {
+                    if (attributeNames.contains(instance.attribute(j).name())) {
+                        values[index] = instance.value(j);
+                        index++;
+                    }
+                }
+                Instances tmp = new Instances(traindata);
+                tmp.clear();
+                Instance instCopy = new DenseInstance(instance.weight(), values);
+                instCopy.setDataset(tmp);
+                try {
+                    classification += classifier.classifyInstance(instCopy);
+                }
+                catch (Exception e) {
+                    throw new RuntimeException("bagging classifier could not classify an instance",
+                                               e);
+                }
+            }
+            classification /= classifiers.size();
+            return (classification >= 0.5) ? 1.0 : 0.0;
+        }
+        public void buildClassifier(SetUniqueList<Instances> traindataSet) throws Exception {
+            classifiers = new LinkedList<>();
+            trainingData = new LinkedList<>();
+            for (Instances traindata : traindataSet) {
+                Classifier classifier = setupClassifier();
+                classifier.buildClassifier(traindata);
+                classifiers.add(classifier);
+                trainingData.add(new Instances(traindata));
+            }
+        }
+        @Override
+        public void buildClassifier(Instances traindata) throws Exception {
+            classifiers = new LinkedList<>();
+            trainingData = new LinkedList<>();
+            final Classifier classifier = setupClassifier();
+            classifier.buildClassifier(traindata);
+            classifiers.add(classifier);
+            trainingData.add(new Instances(traindata));
+        }
+    }
+}

trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaBaseTraining.java

-                      r25
+                      r41
+// Copyright 2015 Georg-August-Universität Göttingen, Germany
+//
+//   Licensed under the Apache License, Version 2.0 (the "License");
+//   you may not use this file except in compliance with the License.
+//   You may obtain a copy of the License at
+//
+//       http://www.apache.org/licenses/LICENSE-2.0
+//
+//   Unless required by applicable law or agreed to in writing, software
+//   distributed under the License is distributed on an "AS IS" BASIS,
+//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//   See the License for the specific language governing permissions and
+//   limitations under the License.
 package de.ugoe.cs.cpdp.training;
 …
  * Allows specification of the Weka classifier and its params in the XML experiment configuration.
+ *
  * Important conventions of the XML format:
  * Cross Validation params always come last and are prepended with -CVPARAM
  * Example: <trainer name="WekaTraining" param="RandomForestLocal weka.classifiers.trees.RandomForest -CVPARAM I 5 25 5"/>
+ * Important conventions of the XML format: Cross Validation params always come last and are
+ * prepended with -CVPARAM Example: <trainer name="WekaTraining"
+ * param="RandomForestLocal weka.classifiers.trees.RandomForest -CVPARAM I 5 25 5"/>
  */
 public abstract class WekaBaseTraining implements IWekaCompatibleTrainer {
-        protected Classifier classifier = null;
-        protected String classifierClassName;
-        protected String classifierName;
-        protected String[] classifierParams;
-        @Override
-        public void setParameter(String parameters) {
-                String[] params = parameters.split(" ");
+                // first part of the params is the classifierName (e.g. SMORBF)
+                classifierName = params[0];
+                // the following parameters can be copied from weka!
+                // second param is classifierClassName (e.g. weka.classifiers.functions.SMO)
+                classifierClassName = params[1];
+                // rest are params to the specified classifier (e.g. -K weka.classifiers.functions.supportVector.RBFKernel)
+                classifierParams = Arrays.copyOfRange(params, 2, params.length);
+                classifier = setupClassifier();
+        }
+    protected Classifier classifier = null;
+    protected String classifierClassName;
+    protected String classifierName;
+    protected String[] classifierParams;
+        @Override
+        public Classifier getClassifier() {
+                return classifier;
+        }
+    @Override
+    public void setParameter(String parameters) {
+        String[] params = parameters.split(" ");
+        public Classifier setupClassifier() {
+                Classifier cl = null;
+                try{
+                        @SuppressWarnings("rawtypes")
+                        Class c = Class.forName(classifierClassName);
+                        Classifier obj = (Classifier) c.newInstance();
+                        // Filter out -CVPARAM, these are special because they do not belong to the Weka classifier class as parameters
+                        String[] param = Arrays.copyOf(classifierParams, classifierParams.length);
+                        String[] cvparam = {};
+                        boolean cv = false;
+                        for ( int i=0; i < classifierParams.length; i++ ) {
+                                if(classifierParams[i].equals("-CVPARAM")) {
+                                        // rest of array are cvparam
+                                        cvparam = Arrays.copyOfRange(classifierParams, i+1, classifierParams.length);
+                                        // before this we have normal params
+                                        param = Arrays.copyOfRange(classifierParams, 0, i);
+                                        cv = true;
+                                        break;
+                                }
+                        }
+                        // set classifier params
+                        ((OptionHandler)obj).setOptions(param);
+                        cl = obj;
+                        // we have cross val params
+                        // cant check on cvparam.length here, it may not be initialized
+                        if(cv) {
+                                final CVParameterSelection ps = new CVParameterSelection();
+                                ps.setClassifier(obj);
+                                ps.setNumFolds(5);
+                                //ps.addCVParameter("I 5 25 5");
+                                for( int i=1 ; i<cvparam.length/4 ; i++ ) {
+                                        ps.addCVParameter(Arrays.asList(Arrays.copyOfRange(cvparam, 0, 4*i)).toString().replaceAll(", ", " ").replaceAll("^\\[|\\]$", ""));
+                                }
+                                cl = ps;
+                        }
+        // first part of the params is the classifierName (e.g. SMORBF)
+        classifierName = params[0];
+                }catch(ClassNotFoundException e) {
+                        Console.traceln(Level.WARNING, String.format("class not found: %s", e.toString()));
+                        e.printStackTrace();
+                } catch (InstantiationException e) {
+                        Console.traceln(Level.WARNING, String.format("Instantiation Exception: %s", e.toString()));
+                        e.printStackTrace();
+                } catch (IllegalAccessException e) {
+                        Console.traceln(Level.WARNING, String.format("Illegal Access Exception: %s", e.toString()));
+                        e.printStackTrace();
+                } catch (Exception e) {
+                        Console.traceln(Level.WARNING, String.format("Exception: %s", e.toString()));
+                        e.printStackTrace();
+                }
+                return cl;
+        }
+        // the following parameters can be copied from weka!
+        @Override
+        public String getName() {
+                return classifierName;
+        }
+        // second param is classifierClassName (e.g. weka.classifiers.functions.SMO)
+        classifierClassName = params[1];
+        // rest are params to the specified classifier (e.g. -K
+        // weka.classifiers.functions.supportVector.RBFKernel)
+        classifierParams = Arrays.copyOfRange(params, 2, params.length);
+        classifier = setupClassifier();
+    }
+    @Override
+    public Classifier getClassifier() {
+        return classifier;
+    }
+    public Classifier setupClassifier() {
+        Classifier cl = null;
+        try {
+            @SuppressWarnings("rawtypes")
+            Class c = Class.forName(classifierClassName);
+            Classifier obj = (Classifier) c.newInstance();
+            // Filter out -CVPARAM, these are special because they do not belong to the Weka
+            // classifier class as parameters
+            String[] param = Arrays.copyOf(classifierParams, classifierParams.length);
+            String[] cvparam = { };
+            boolean cv = false;
+            for (int i = 0; i < classifierParams.length; i++) {
+                if (classifierParams[i].equals("-CVPARAM")) {
+                    // rest of array are cvparam
+                    cvparam = Arrays.copyOfRange(classifierParams, i + 1, classifierParams.length);
+                    // before this we have normal params
+                    param = Arrays.copyOfRange(classifierParams, 0, i);
+                    cv = true;
+                    break;
+                }
+            }
+            // set classifier params
+            ((OptionHandler) obj).setOptions(param);
+            cl = obj;
+            // we have cross val params
+            // cant check on cvparam.length here, it may not be initialized
+            if (cv) {
+                final CVParameterSelection ps = new CVParameterSelection();
+                ps.setClassifier(obj);
+                ps.setNumFolds(5);
+                // ps.addCVParameter("I 5 25 5");
+                for (int i = 1; i < cvparam.length / 4; i++) {
+                    ps.addCVParameter(Arrays.asList(Arrays.copyOfRange(cvparam, 0, 4 * i))
+                        .toString().replaceAll(", ", " ").replaceAll("^\\[|\\]$", ""));
+                }
+                cl = ps;
+            }
+        }
+        catch (ClassNotFoundException e) {
+            Console.traceln(Level.WARNING, String.format("class not found: %s", e.toString()));
+            e.printStackTrace();
+        }
+        catch (InstantiationException e) {
+            Console.traceln(Level.WARNING,
+                            String.format("Instantiation Exception: %s", e.toString()));
+            e.printStackTrace();
+        }
+        catch (IllegalAccessException e) {
+            Console.traceln(Level.WARNING,
+                            String.format("Illegal Access Exception: %s", e.toString()));
+            e.printStackTrace();
+        }
+        catch (Exception e) {
+            Console.traceln(Level.WARNING, String.format("Exception: %s", e.toString()));
+            e.printStackTrace();
+        }
+        return cl;
+    }
+    @Override
+    public String getName() {
+        return classifierName;
+    }
+}

trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaLocalEMTraining.java

-                      r25
+                      r41
+// Copyright 2015 Georg-August-Universität Göttingen, Germany
+//
+//   Licensed under the Apache License, Version 2.0 (the "License");
+//   you may not use this file except in compliance with the License.
+//   You may obtain a copy of the License at
+//
+//       http://www.apache.org/licenses/LICENSE-2.0
+//
+//   Unless required by applicable law or agreed to in writing, software
+//   distributed under the License is distributed on an "AS IS" BASIS,
+//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//   See the License for the specific language governing permissions and
+//   limitations under the License.
 package de.ugoe.cs.cpdp.training;
 …
  * WekaLocalEMTraining
+ *
+ * Local Trainer with EM Clustering for data partitioning.
+ * Currently supports only EM Clustering.
+ *
+ * 1. Cluster training data
+ * 2. for each cluster train a classifier with training data from cluster
+ * Local Trainer with EM Clustering for data partitioning. Currently supports only EM Clustering.
+ *
+ * 1. Cluster training data 2. for each cluster train a classifier with training data from cluster
  * 3. match test data instance to a cluster, then classify with classifier from the cluster
+ *
+ * XML configuration:
+ * <!-- because of clustering -->
+ * <preprocessor name="Normalization" param=""/>
+ *
+ * <!-- cluster trainer -->
+ * <trainer name="WekaLocalEMTraining" param="NaiveBayes weka.classifiers.bayes.NaiveBayes" />
+ * XML configuration: <!-- because of clustering --> <preprocessor name="Normalization" param=""/>
+ *
+ * <!-- cluster trainer --> <trainer name="WekaLocalEMTraining"
+ * param="NaiveBayes weka.classifiers.bayes.NaiveBayes" />
  */
 public class WekaLocalEMTraining extends WekaBaseTraining implements ITrainingStrategy {
+        private final TraindatasetCluster classifier = new TraindatasetCluster();
+        @Override
+        public Classifier getClassifier() {
+                return classifier;
+        }
+        @Override
+        public void apply(Instances traindata) {
+                PrintStream errStr      = System.err;
+                System.setErr(new PrintStream(new NullOutputStream()));
+                try {
+                        classifier.buildClassifier(traindata);
+                } catch (Exception e) {
+                        throw new RuntimeException(e);
+                } finally {
+                        System.setErr(errStr);
+                }
+        }
+        public class TraindatasetCluster extends AbstractClassifier {
+                private static final long serialVersionUID = 1L;
+                private EM clusterer = null;
+                private HashMap<Integer, Classifier> cclassifier;
+                private HashMap<Integer, Instances> ctraindata;
+                /**
+                 * Helper method that gives us a clean instance copy with
+                 * the values of the instancelist of the first parameter.
+                 *
+                 * @param instancelist with attributes
+                 * @param instance with only values
+                 * @return copy of the instance
+                 */
+                private Instance createInstance(Instances instances, Instance instance) {
+                        // attributes for feeding instance to classifier
+                        Set<String> attributeNames = new HashSet<>();
+                        for( int j=0; j<instances.numAttributes(); j++ ) {
+                                attributeNames.add(instances.attribute(j).name());
+                        }
+                        double[] values = new double[instances.numAttributes()];
+                        int index = 0;
+                        for( int j=0; j<instance.numAttributes(); j++ ) {
+                                if( attributeNames.contains(instance.attribute(j).name())) {
+                                        values[index] = instance.value(j);
+                                        index++;
+                                }
+                        }
+                        Instances tmp = new Instances(instances);
+                        tmp.clear();
+                        Instance instCopy = new DenseInstance(instance.weight(), values);
+                        instCopy.setDataset(tmp);
+                        return instCopy;
+                }
+                @Override
+                public double classifyInstance(Instance instance) {
+                        double ret = 0;
+                        try {
+                                // 1. copy the instance (keep the class attribute)
+                                Instances traindata = ctraindata.get(0);
+                                Instance classInstance = createInstance(traindata, instance);
+                                // 2. remove class attribute before clustering
+                                Remove filter = new Remove();
+                                filter.setAttributeIndices("" + (traindata.classIndex() + 1));
+                                filter.setInputFormat(traindata);
+                                traindata = Filter.useFilter(traindata, filter);
+                                // 3. copy the instance (without the class attribute) for clustering
+                                Instance clusterInstance = createInstance(traindata, instance);
+                                // 4. match instance without class attribute to a cluster number
+                                int cnum = clusterer.clusterInstance(clusterInstance);
+                                // 5. classify instance with class attribute to the classifier of that cluster number
+                                ret = cclassifier.get(cnum).classifyInstance(classInstance);
+                        }catch( Exception e ) {
+                                Console.traceln(Level.INFO, String.format("ERROR matching instance to cluster!"));
+                                throw new RuntimeException(e);
+                        }
+                        return ret;
+                }
+                @Override
+                public void buildClassifier(Instances traindata) throws Exception {
+                        // 1. copy training data
+                        Instances train = new Instances(traindata);
+                        // 2. remove class attribute for clustering
+                        Remove filter = new Remove();
+                        filter.setAttributeIndices("" + (train.classIndex() + 1));
+                        filter.setInputFormat(train);
+                        train = Filter.useFilter(train, filter);
+                        // new objects
+                        cclassifier = new HashMap<Integer, Classifier>();
+                        ctraindata = new HashMap<Integer, Instances>();
+                        Instances ctrain;
+                        int maxNumClusters = train.size();
+                        boolean sufficientInstancesInEachCluster;
+                        do { // while(onlyTarget)
+                                sufficientInstancesInEachCluster = true;
+                                clusterer = new EM();
+                                clusterer.setMaximumNumberOfClusters(maxNumClusters);
+                                clusterer.buildClusterer(train);
+                                // 4. get cluster membership of our traindata
+                                //AddCluster cfilter = new AddCluster();
+                                //cfilter.setClusterer(clusterer);
+                                //cfilter.setInputFormat(train);
+                                //Instances ctrain = Filter.useFilter(train, cfilter);
+                                ctrain = new Instances(train);
+                                ctraindata = new HashMap<>();
+                                // get traindata per cluster
+                                for ( int j=0; j < ctrain.numInstances(); j++ ) {
+                                        // get the cluster number from the attributes, subract 1 because if we clusterInstance we get 0-n, and this is 1-n
+                                        //cnumber = Integer.parseInt(ctrain.get(j).stringValue(ctrain.get(j).numAttributes()-1).replace("cluster", "")) - 1;
+                                        int cnumber = clusterer.clusterInstance(ctrain.get(j));
+                                        // add training data to list of instances for this cluster number
+                                        if ( !ctraindata.containsKey(cnumber) ) {
+                                                ctraindata.put(cnumber, new Instances(traindata));
+                                                ctraindata.get(cnumber).delete();
+                                        }
+                                        ctraindata.get(cnumber).add(traindata.get(j));
+                                }
+                                for( Entry<Integer,Instances> entry : ctraindata.entrySet() ) {
+                                        Instances instances = entry.getValue();
+                                        int[] counts = instances.attributeStats(instances.classIndex()).nominalCounts;
+                                        for( int count : counts ) {
+                                                sufficientInstancesInEachCluster &= count>0;
+                                        }
+                                        sufficientInstancesInEachCluster &= instances.numInstances()>=5;
+                                }
+                                maxNumClusters = clusterer.numberOfClusters()-1;
+                        } while(!sufficientInstancesInEachCluster);
+                        // train one classifier per cluster, we get the cluster number from the training data
+                        Iterator<Integer> clusternumber = ctraindata.keySet().iterator();
+                        while ( clusternumber.hasNext() ) {
+                                int cnumber = clusternumber.next();
+                                cclassifier.put(cnumber,setupClassifier());
+                                cclassifier.get(cnumber).buildClassifier(ctraindata.get(cnumber));
+                                //Console.traceln(Level.INFO, String.format("classifier in cluster "+cnumber));
+                        }
+                }
+        }
+    private final TraindatasetCluster classifier = new TraindatasetCluster();
+    @Override
+    public Classifier getClassifier() {
+        return classifier;
+    }
+    @Override
+    public void apply(Instances traindata) {
+        PrintStream errStr = System.err;
+        System.setErr(new PrintStream(new NullOutputStream()));
+        try {
+            classifier.buildClassifier(traindata);
+        }
+        catch (Exception e) {
+            throw new RuntimeException(e);
+        }
+        finally {
+            System.setErr(errStr);
+        }
+    }
+    public class TraindatasetCluster extends AbstractClassifier {
+        private static final long serialVersionUID = 1L;
+        private EM clusterer = null;
+        private HashMap<Integer, Classifier> cclassifier;
+        private HashMap<Integer, Instances> ctraindata;
+        /**
+         * Helper method that gives us a clean instance copy with the values of the instancelist of
+         * the first parameter.
+         *
+         * @param instancelist
+         *            with attributes
+         * @param instance
+         *            with only values
+         * @return copy of the instance
+         */
+        private Instance createInstance(Instances instances, Instance instance) {
+            // attributes for feeding instance to classifier
+            Set<String> attributeNames = new HashSet<>();
+            for (int j = 0; j < instances.numAttributes(); j++) {
+                attributeNames.add(instances.attribute(j).name());
+            }
+            double[] values = new double[instances.numAttributes()];
+            int index = 0;
+            for (int j = 0; j < instance.numAttributes(); j++) {
+                if (attributeNames.contains(instance.attribute(j).name())) {
+                    values[index] = instance.value(j);
+                    index++;
+                }
+            }
+            Instances tmp = new Instances(instances);
+            tmp.clear();
+            Instance instCopy = new DenseInstance(instance.weight(), values);
+            instCopy.setDataset(tmp);
+            return instCopy;
+        }
+        @Override
+        public double classifyInstance(Instance instance) {
+            double ret = 0;
+            try {
+                // 1. copy the instance (keep the class attribute)
+                Instances traindata = ctraindata.get(0);
+                Instance classInstance = createInstance(traindata, instance);
+                // 2. remove class attribute before clustering
+                Remove filter = new Remove();
+                filter.setAttributeIndices("" + (traindata.classIndex() + 1));
+                filter.setInputFormat(traindata);
+                traindata = Filter.useFilter(traindata, filter);
+                // 3. copy the instance (without the class attribute) for clustering
+                Instance clusterInstance = createInstance(traindata, instance);
+                // 4. match instance without class attribute to a cluster number
+                int cnum = clusterer.clusterInstance(clusterInstance);
+                // 5. classify instance with class attribute to the classifier of that cluster
+                // number
+                ret = cclassifier.get(cnum).classifyInstance(classInstance);
+            }
+            catch (Exception e) {
+                Console.traceln(Level.INFO, String.format("ERROR matching instance to cluster!"));
+                throw new RuntimeException(e);
+            }
+            return ret;
+        }
+        @Override
+        public void buildClassifier(Instances traindata) throws Exception {
+            // 1. copy training data
+            Instances train = new Instances(traindata);
+            // 2. remove class attribute for clustering
+            Remove filter = new Remove();
+            filter.setAttributeIndices("" + (train.classIndex() + 1));
+            filter.setInputFormat(train);
+            train = Filter.useFilter(train, filter);
+            // new objects
+            cclassifier = new HashMap<Integer, Classifier>();
+            ctraindata = new HashMap<Integer, Instances>();
+            Instances ctrain;
+            int maxNumClusters = train.size();
+            boolean sufficientInstancesInEachCluster;
+            do { // while(onlyTarget)
+                sufficientInstancesInEachCluster = true;
+                clusterer = new EM();
+                clusterer.setMaximumNumberOfClusters(maxNumClusters);
+                clusterer.buildClusterer(train);
+                // 4. get cluster membership of our traindata
+                // AddCluster cfilter = new AddCluster();
+                // cfilter.setClusterer(clusterer);
+                // cfilter.setInputFormat(train);
+                // Instances ctrain = Filter.useFilter(train, cfilter);
+                ctrain = new Instances(train);
+                ctraindata = new HashMap<>();
+                // get traindata per cluster
+                for (int j = 0; j < ctrain.numInstances(); j++) {
+                    // get the cluster number from the attributes, subract 1 because if we
+                    // clusterInstance we get 0-n, and this is 1-n
+                    // cnumber =
+                    // Integer.parseInt(ctrain.get(j).stringValue(ctrain.get(j).numAttributes()-1).replace("cluster",
+                    // "")) - 1;
+                    int cnumber = clusterer.clusterInstance(ctrain.get(j));
+                    // add training data to list of instances for this cluster number
+                    if (!ctraindata.containsKey(cnumber)) {
+                        ctraindata.put(cnumber, new Instances(traindata));
+                        ctraindata.get(cnumber).delete();
+                    }
+                    ctraindata.get(cnumber).add(traindata.get(j));
+                }
+                for (Entry<Integer, Instances> entry : ctraindata.entrySet()) {
+                    Instances instances = entry.getValue();
+                    int[] counts = instances.attributeStats(instances.classIndex()).nominalCounts;
+                    for (int count : counts) {
+                        sufficientInstancesInEachCluster &= count > 0;
+                    }
+                    sufficientInstancesInEachCluster &= instances.numInstances() >= 5;
+                }
+                maxNumClusters = clusterer.numberOfClusters() - 1;
+            }
+            while (!sufficientInstancesInEachCluster);
+            // train one classifier per cluster, we get the cluster number from the training data
+            Iterator<Integer> clusternumber = ctraindata.keySet().iterator();
+            while (clusternumber.hasNext()) {
+                int cnumber = clusternumber.next();
+                cclassifier.put(cnumber, setupClassifier());
+                cclassifier.get(cnumber).buildClassifier(ctraindata.get(cnumber));
+                // Console.traceln(Level.INFO, String.format("classifier in cluster "+cnumber));
+            }
+        }
+    }
+}

trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaLocalFQTraining.java

-                      r25
+                      r41
+// Copyright 2015 Georg-August-Universität Göttingen, Germany
+//
+//   Licensed under the Apache License, Version 2.0 (the "License");
+//   you may not use this file except in compliance with the License.
+//   You may obtain a copy of the License at
+//
+//       http://www.apache.org/licenses/LICENSE-2.0
+//
+//   Unless required by applicable law or agreed to in writing, software
+//   distributed under the License is distributed on an "AS IS" BASIS,
+//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//   See the License for the specific language governing permissions and
+//   limitations under the License.
 package de.ugoe.cs.cpdp.training;
 …
 /**
+ * Trainer with reimplementation of WHERE clustering algorithm from:
+ * Tim Menzies, Andrew Butcher, David Cok, Andrian Marcus, Lucas Layman,
+ * Forrest Shull, Burak Turhan, Thomas Zimmermann,
+ * "Local versus Global Lessons for Defect Prediction and Effort Estimation,"
+ * IEEE Transactions on Software Engineering, vol. 39, no. 6, pp. 822-834, June, 2013
+ * Trainer with reimplementation of WHERE clustering algorithm from: Tim Menzies, Andrew Butcher,
+ * David Cok, Andrian Marcus, Lucas Layman, Forrest Shull, Burak Turhan, Thomas Zimmermann,
+ * "Local versus Global Lessons for Defect Prediction and Effort Estimation," IEEE Transactions on
+ * Software Engineering, vol. 39, no. 6, pp. 822-834, June, 2013
+ *
  * With WekaLocalFQTraining we do the following:
  * 1) Run the Fastmap algorithm on all training data, let it calculate the 2 most significant
  *    dimensions and projections of each instance to these dimensions
  * 2) With these 2 dimensions we span a QuadTree which gets recursively split on median(x) and median(y) values.
  * 3) We cluster the QuadTree nodes together if they have similar density (50%)
  * 4) We save the clusters and their training data
  * 5) We only use clusters with > ALPHA instances (currently Math.sqrt(SIZE)), rest is discarded with the training data of this cluster
  * 6) We train a Weka classifier for each cluster with the clusters training data
  * 7) We recalculate Fastmap distances for a single instance with the old pivots and then try to find a cluster containing the coords of the instance.
  * 7.1.) If we can not find a cluster (due to coords outside of all clusters) we find the nearest cluster.
  * 8) We classify the Instance with the classifier and traindata from the Cluster we found in 7.
+ * With WekaLocalFQTraining we do the following: 1) Run the Fastmap algorithm on all training data,
+ * let it calculate the 2 most significant dimensions and projections of each instance to these
+ * dimensions 2) With these 2 dimensions we span a QuadTree which gets recursively split on
+ * median(x) and median(y) values. 3) We cluster the QuadTree nodes together if they have similar
+ * density (50%) 4) We save the clusters and their training data 5) We only use clusters with >
+ * ALPHA instances (currently Math.sqrt(SIZE)), rest is discarded with the training data of this
+ * cluster 6) We train a Weka classifier for each cluster with the clusters training data 7) We
+ * recalculate Fastmap distances for a single instance with the old pivots and then try to find a
+ * cluster containing the coords of the instance. 7.1.) If we can not find a cluster (due to coords
+ * outside of all clusters) we find the nearest cluster. 8) We classify the Instance with the
+ * classifier and traindata from the Cluster we found in 7.
  */
 public class WekaLocalFQTraining extends WekaBaseTraining implements ITrainingStrategy {
+        private final TraindatasetCluster classifier = new TraindatasetCluster();
+        @Override
+        public Classifier getClassifier() {
+                return classifier;
+        }
+        @Override
+        public void apply(Instances traindata) {
+                PrintStream errStr      = System.err;
+                System.setErr(new PrintStream(new NullOutputStream()));
+                try {
+                        classifier.buildClassifier(traindata);
+                } catch (Exception e) {
+                        throw new RuntimeException(e);
+                } finally {
+                        System.setErr(errStr);
+                }
+        }
+        public class TraindatasetCluster extends AbstractClassifier {
+                private static final long serialVersionUID = 1L;
+                /* classifier per cluster */
+                private HashMap<Integer, Classifier> cclassifier;
+                /* instances per cluster */
+                private HashMap<Integer, Instances> ctraindata;
+                /* holds the instances and indices of the pivot objects of the Fastmap calculation in buildClassifier*/
+                private HashMap<Integer, Instance> cpivots;
+                /* holds the indices of the pivot objects for x,y and the dimension [x,y][dimension]*/
+                private int[][] cpivotindices;
+                /* holds the sizes of the cluster multiple "boxes" per cluster */
+                private HashMap<Integer, ArrayList<Double[][]>> csize;
+                /* debug vars */
+                @SuppressWarnings("unused")
+                private boolean show_biggest = true;
+                @SuppressWarnings("unused")
+                private int CFOUND = 0;
+                @SuppressWarnings("unused")
+                private int CNOTFOUND = 0;
+                private Instance createInstance(Instances instances, Instance instance) {
+                        // attributes for feeding instance to classifier
+                        Set<String> attributeNames = new HashSet<>();
+                        for( int j=0; j<instances.numAttributes(); j++ ) {
+                                attributeNames.add(instances.attribute(j).name());
+                        }
+                        double[] values = new double[instances.numAttributes()];
+                        int index = 0;
+                        for( int j=0; j<instance.numAttributes(); j++ ) {
+                                if( attributeNames.contains(instance.attribute(j).name())) {
+                                        values[index] = instance.value(j);
+                                        index++;
+                                }
+                        }
+                        Instances tmp = new Instances(instances);
+                        tmp.clear();
+                        Instance instCopy = new DenseInstance(instance.weight(), values);
+                        instCopy.setDataset(tmp);
+                        return instCopy;
+                }
+                /**
+                 * Because Fastmap saves only the image not the values of the attributes it used
+                 * we can not use the old data directly to classify single instances to clusters.
+                 *
+                 * To classify a single instance we do a new fastmap computation with only the instance and
+                 * the old pivot elements.
+                 *
+                 * After that we find the cluster with our fastmap result for x and y.
+                 */
+                @Override
+                public double classifyInstance(Instance instance) {
+                        double ret = 0;
+                        try {
+                                // classinstance gets passed to classifier
+                                Instances traindata = ctraindata.get(0);
+                                Instance classInstance = createInstance(traindata, instance);
+                                // this one keeps the class attribute
+                                Instances traindata2 = ctraindata.get(1);
+                                // remove class attribute before clustering
+                                Remove filter = new Remove();
+                                filter.setAttributeIndices("" + (traindata.classIndex() + 1));
+                                filter.setInputFormat(traindata);
+                                traindata = Filter.useFilter(traindata, filter);
+                                Instance clusterInstance = createInstance(traindata, instance);
+                                Fastmap FMAP = new Fastmap(2);
+                                EuclideanDistance dist = new EuclideanDistance(traindata);
+                                // we set our pivot indices [x=0,y=1][dimension]
+                                int[][] npivotindices = new int[2][2];
+                                npivotindices[0][0] = 1;
+                                npivotindices[1][0] = 2;
+                                npivotindices[0][1] = 3;
+                                npivotindices[1][1] = 4;
+                                // build temp dist matrix (2 pivots per dimension + 1 instance we want to classify)
+                                // the instance we want to classify comes first after that the pivot elements in the order defined above
+                                double[][] distmat = new double[2*FMAP.target_dims+1][2*FMAP.target_dims+1];
+                                distmat[0][0] = 0;
+                                distmat[0][1] = dist.distance(clusterInstance, this.cpivots.get((Integer)this.cpivotindices[0][0]));
+                                distmat[0][2] = dist.distance(clusterInstance, this.cpivots.get((Integer)this.cpivotindices[1][0]));
+                                distmat[0][3] = dist.distance(clusterInstance, this.cpivots.get((Integer)this.cpivotindices[0][1]));
+                                distmat[0][4] = dist.distance(clusterInstance, this.cpivots.get((Integer)this.cpivotindices[1][1]));
+                                distmat[1][0] = dist.distance(this.cpivots.get((Integer)this.cpivotindices[0][0]), clusterInstance);
+                                distmat[1][1] = 0;
+                                distmat[1][2] = dist.distance(this.cpivots.get((Integer)this.cpivotindices[0][0]), this.cpivots.get((Integer)this.cpivotindices[1][0]));
+                                distmat[1][3] = dist.distance(this.cpivots.get((Integer)this.cpivotindices[0][0]), this.cpivots.get((Integer)this.cpivotindices[0][1]));
+                                distmat[1][4] = dist.distance(this.cpivots.get((Integer)this.cpivotindices[0][0]), this.cpivots.get((Integer)this.cpivotindices[1][1]));
+                                distmat[2][0] = dist.distance(this.cpivots.get((Integer)this.cpivotindices[1][0]), clusterInstance);
+                                distmat[2][1] = dist.distance(this.cpivots.get((Integer)this.cpivotindices[1][0]), this.cpivots.get((Integer)this.cpivotindices[0][0]));
+                                distmat[2][2] = 0;
+                                distmat[2][3] = dist.distance(this.cpivots.get((Integer)this.cpivotindices[1][0]), this.cpivots.get((Integer)this.cpivotindices[0][1]));
+                                distmat[2][4] = dist.distance(this.cpivots.get((Integer)this.cpivotindices[1][0]), this.cpivots.get((Integer)this.cpivotindices[1][1]));
+                                distmat[3][0] = dist.distance(this.cpivots.get((Integer)this.cpivotindices[0][1]), clusterInstance);
+                                distmat[3][1] = dist.distance(this.cpivots.get((Integer)this.cpivotindices[0][1]), this.cpivots.get((Integer)this.cpivotindices[0][0]));
+                                distmat[3][2] = dist.distance(this.cpivots.get((Integer)this.cpivotindices[0][1]), this.cpivots.get((Integer)this.cpivotindices[1][0]));
+                                distmat[3][3] = 0;
+                                distmat[3][4] = dist.distance(this.cpivots.get((Integer)this.cpivotindices[0][1]), this.cpivots.get((Integer)this.cpivotindices[1][1]));
+                                distmat[4][0] = dist.distance(this.cpivots.get((Integer)this.cpivotindices[1][1]), clusterInstance);
+                                distmat[4][1] = dist.distance(this.cpivots.get((Integer)this.cpivotindices[1][1]), this.cpivots.get((Integer)this.cpivotindices[0][0]));
+                                distmat[4][2] = dist.distance(this.cpivots.get((Integer)this.cpivotindices[1][1]), this.cpivots.get((Integer)this.cpivotindices[1][0]));
+                                distmat[4][3] = dist.distance(this.cpivots.get((Integer)this.cpivotindices[1][1]), this.cpivots.get((Integer)this.cpivotindices[0][1]));
+                                distmat[4][4] = 0;
+                                /* debug output: show biggest distance found within the new distance matrix
+                                double biggest = 0;
+                                for(int i=0; i < distmat.length; i++) {
+                                        for(int j=0; j < distmat[0].length; j++) {
+                                                if(biggest < distmat[i][j]) {
+                                                        biggest = distmat[i][j];
+                                                }
+                                        }
+                                }
+                                if(this.show_biggest) {
+                                        Console.traceln(Level.INFO, String.format(""+clusterInstance));
+                                        Console.traceln(Level.INFO, String.format("biggest distances: "+ biggest));
+                                        this.show_biggest = false;
+                                }
+                                */
+                                FMAP.setDistmat(distmat);
+                                FMAP.setPivots(npivotindices);
+                                FMAP.calculate();
+                                double[][] x = FMAP.getX();
+                                double[] proj = x[0];
+                                // debug output: show the calculated distance matrix, our result vektor for the instance and the complete result matrix
+                                /*
+                                Console.traceln(Level.INFO, "distmat:");
+                            for(int i=0; i<distmat.length; i++){
+                                for(int j=0; j<distmat[0].length; j++){
+                                        Console.trace(Level.INFO, String.format("%20s", distmat[i][j]));
+                                }
+                                Console.traceln(Level.INFO, "");
+                            }
+                            Console.traceln(Level.INFO, "vector:");
+                            for(int i=0; i < proj.length; i++) {
+                                Console.trace(Level.INFO, String.format("%20s", proj[i]));
+                            }
+                            Console.traceln(Level.INFO, "");
+                                Console.traceln(Level.INFO, "resultmat:");
+                            for(int i=0; i<x.length; i++){
+                                for(int j=0; j<x[0].length; j++){
+                                        Console.trace(Level.INFO, String.format("%20s", x[i][j]));
+                                }
+                                Console.traceln(Level.INFO, "");
+                            }
+                            */
+                                // now we iterate over all clusters (well, boxes of sizes per cluster really) and save the number of the
+                                // cluster in which we are
+                                int cnumber;
+                                int found_cnumber = -1;
+                                Iterator<Integer> clusternumber = this.csize.keySet().iterator();
+                                while ( clusternumber.hasNext() && found_cnumber == -1) {
+                                        cnumber = clusternumber.next();
+                                        // now iterate over the boxes of the cluster and hope we find one (cluster could have been removed)
+                                        // or we are too far away from any cluster because of the fastmap calculation with the initial pivot objects
+                                        for ( int box=0; box < this.csize.get(cnumber).size(); box++ ) {
+                                                Double[][] current = this.csize.get(cnumber).get(box);
+                                                if(proj[0] >= current[0][0] && proj[0] <= current[0][1] &&  // x
+                                                   proj[1] >= current[1][0] && proj[1] <= current[1][1]) {  // y
+                                                        found_cnumber = cnumber;
+                                                }
+                                        }
+                                }
+                                // we want to count how often we are really inside a cluster
+                                //if ( found_cnumber == -1 ) {
+                                //      CNOTFOUND += 1;
+                                //}else {
+                                //      CFOUND += 1;
+                                //}
+                                // now it can happen that we do not find a cluster because we deleted it previously (too few instances)
+                                // or we get bigger distance measures from weka so that we are completely outside of our clusters.
+                                // in these cases we just find the nearest cluster to our instance and use it for classification.
+                                // to do that we use the EuclideanDistance again to compare our distance to all other Instances
+                                // then we take the cluster of the closest weka instance
+                                dist = new EuclideanDistance(traindata2);
+                                if( !this.ctraindata.containsKey(found_cnumber) ) {
+                                        double min_distance = Double.MAX_VALUE;
+                                        clusternumber = ctraindata.keySet().iterator();
+                                        while ( clusternumber.hasNext() ) {
+                                                cnumber = clusternumber.next();
+                                                for(int i=0; i < ctraindata.get(cnumber).size(); i++) {
+                                                        if(dist.distance(instance, ctraindata.get(cnumber).get(i)) <= min_distance) {
+                                                                found_cnumber = cnumber;
+                                                                min_distance = dist.distance(instance, ctraindata.get(cnumber).get(i));
+                                                        }
+                                                }
+                                        }
+                                }
+                                // here we have the cluster where an instance has the minimum distance between itself and the
+                                // instance we want to classify
+                                // if we still have not found a cluster we exit because something is really wrong
+                                if( found_cnumber == -1 ) {
+                                        Console.traceln(Level.INFO, String.format("ERROR matching instance to cluster with full search!"));
+                                        throw new RuntimeException("cluster not found with full search");
+                                }
+                                // classify the passed instance with the cluster we found and its training data
+                                ret = cclassifier.get(found_cnumber).classifyInstance(classInstance);
+                        }catch( Exception e ) {
+                                Console.traceln(Level.INFO, String.format("ERROR matching instance to cluster!"));
+                                throw new RuntimeException(e);
+                        }
+                        return ret;
+                }
+                @Override
+                public void buildClassifier(Instances traindata) throws Exception {
+                        //Console.traceln(Level.INFO, String.format("found: "+ CFOUND + ", notfound: " + CNOTFOUND));
+                        this.show_biggest = true;
+                        cclassifier = new HashMap<Integer, Classifier>();
+                        ctraindata = new HashMap<Integer, Instances>();
+                        cpivots = new HashMap<Integer, Instance>();
+                        cpivotindices = new int[2][2];
+                        // 1. copy traindata
+                        Instances train = new Instances(traindata);
+                        Instances train2 = new Instances(traindata);  // this one keeps the class attribute
+                        // 2. remove class attribute for clustering
+                        Remove filter = new Remove();
+                        filter.setAttributeIndices("" + (train.classIndex() + 1));
+                        filter.setInputFormat(train);
+                        train = Filter.useFilter(train, filter);
+                        // 3. calculate distance matrix (needed for Fastmap because it starts at dimension 1)
+                        double biggest = 0;
+                        EuclideanDistance dist = new EuclideanDistance(train);
+                        double[][] distmat = new double[train.size()][train.size()];
+                        for( int i=0; i < train.size(); i++ ) {
+                                for( int j=0; j < train.size(); j++ ) {
+                                        distmat[i][j] = dist.distance(train.get(i), train.get(j));
+                                        if( distmat[i][j] > biggest ) {
+                                                biggest = distmat[i][j];
+                                        }
+                                }
+                        }
+                        //Console.traceln(Level.INFO, String.format("biggest distances: "+ biggest));
+                        // 4. run fastmap for 2 dimensions on the distance matrix
+                        Fastmap FMAP = new Fastmap(2);
+                        FMAP.setDistmat(distmat);
+                        FMAP.calculate();
+                        cpivotindices = FMAP.getPivots();
+                        double[][] X = FMAP.getX();
+                        distmat = new double[0][0];
+                        System.gc();
+                        // quadtree payload generation
+                        ArrayList<QuadTreePayload<Instance>> qtp = new ArrayList<QuadTreePayload<Instance>>();
+                        // we need these for the sizes of the quadrants
+                        double[] big = {0,0};
+                        double[] small = {Double.MAX_VALUE,Double.MAX_VALUE};
+                        // set quadtree payload values and get max and min x and y values for size
+                    for( int i=0; i<X.length; i++ ){
+                        if(X[i][0] >= big[0]) {
+                                big[0] = X[i][0];
+                        }
+                        if(X[i][1] >= big[1]) {
+                                big[1] = X[i][1];
+                        }
+                        if(X[i][0] <= small[0]) {
+                                small[0] = X[i][0];
+                        }
+                        if(X[i][1] <= small[1]) {
+                                small[1] = X[i][1];
+                        }
+                        QuadTreePayload<Instance> tmp = new QuadTreePayload<Instance>(X[i][0], X[i][1], train2.get(i));
+                        qtp.add(tmp);
+                    }
+                    //Console.traceln(Level.INFO, String.format("size for cluster ("+small[0]+","+small[1]+") - ("+big[0]+","+big[1]+")"));
+                    // 5. generate quadtree
+                    QuadTree TREE = new QuadTree(null, qtp);
+                    QuadTree.size = train.size();
+                    QuadTree.alpha = Math.sqrt(train.size());
+                    QuadTree.ccluster = new ArrayList<ArrayList<QuadTreePayload<Instance>>>();
+                    QuadTree.csize = new HashMap<Integer, ArrayList<Double[][]>>();
+                    //Console.traceln(Level.INFO, String.format("Generate QuadTree with "+ QuadTree.size + " size, Alpha: "+ QuadTree.alpha+ ""));
+                    // set the size and then split the tree recursively at the median value for x, y
+                    TREE.setSize(new double[] {small[0], big[0]}, new double[] {small[1], big[1]});
+                    // recursive split und grid clustering eher static
+                    TREE.recursiveSplit(TREE);
+                    // generate list of nodes sorted by density (childs only)
+                    ArrayList<QuadTree> l = new ArrayList<QuadTree>(TREE.getList(TREE));
+                    // recursive grid clustering (tree pruning), the values are stored in ccluster
+                    TREE.gridClustering(l);
+                    // wir iterieren durch die cluster und sammeln uns die instanzen daraus
+                    //ctraindata.clear();
+                    for( int i=0; i < QuadTree.ccluster.size(); i++ ) {
+                        ArrayList<QuadTreePayload<Instance>> current = QuadTree.ccluster.get(i);
+                        // i is the clusternumber
+                        // we only allow clusters with Instances > ALPHA, other clusters are not considered!
+                        //if(current.size() > QuadTree.alpha) {
+                        if( current.size() > 4 ) {
+                                for( int j=0; j < current.size(); j++ ) {
+                                        if( !ctraindata.containsKey(i) ) {
+                                                ctraindata.put(i, new Instances(train2));
+                                                ctraindata.get(i).delete();
+                                        }
+                                        ctraindata.get(i).add(current.get(j).getInst());
+                                }
+                        }else{
+                                Console.traceln(Level.INFO, String.format("drop cluster, only: " + current.size() + " instances"));
+                        }
+                    }
+                        // here we keep things we need later on
+                        // QuadTree sizes for later use (matching new instances)
+                        this.csize = new HashMap<Integer, ArrayList<Double[][]>>(QuadTree.csize);
+                        // pivot elements
+                        //this.cpivots.clear();
+                        for( int i=0; i < FMAP.PA[0].length; i++ ) {
+                                this.cpivots.put(FMAP.PA[0][i], (Instance)train.get(FMAP.PA[0][i]).copy());
+                        }
+                        for( int j=0; j < FMAP.PA[0].length; j++ ) {
+                                this.cpivots.put(FMAP.PA[1][j], (Instance)train.get(FMAP.PA[1][j]).copy());
+                        }
+                        /* debug output
+                        int pnumber;
+                        Iterator<Integer> pivotnumber = cpivots.keySet().iterator();
+                        while ( pivotnumber.hasNext() ) {
+                                pnumber = pivotnumber.next();
+                                Console.traceln(Level.INFO, String.format("pivot: "+pnumber+ " inst: "+cpivots.get(pnumber)));
+                        }
+                        */
+                    // train one classifier per cluster, we get the cluster number from the traindata
+                    int cnumber;
+                        Iterator<Integer> clusternumber = ctraindata.keySet().iterator();
+                        //cclassifier.clear();
+                        //int traindata_count = 0;
+                        while ( clusternumber.hasNext() ) {
+                                cnumber = clusternumber.next();
+                                cclassifier.put(cnumber,setupClassifier());  // this is the classifier used for the cluster
+                                cclassifier.get(cnumber).buildClassifier(ctraindata.get(cnumber));
+                                //Console.traceln(Level.INFO, String.format("classifier in cluster "+cnumber));
+                                //traindata_count += ctraindata.get(cnumber).size();
+                                //Console.traceln(Level.INFO, String.format("building classifier in cluster "+cnumber +"  with "+ ctraindata.get(cnumber).size() +" traindata instances"));
+                        }
+                        // add all traindata
+                        //Console.traceln(Level.INFO, String.format("traindata in all clusters: " + traindata_count));
+                }
+        }
+        /**
+         * Payload for the QuadTree.
+         * x and y are the calculated Fastmap values.
+         * T is a weka instance.
+         */
+        public class QuadTreePayload<T> {
+                public double x;
+                public double y;
+                private T inst;
+                public QuadTreePayload(double x, double y, T value) {
+                        this.x = x;
+                        this.y = y;
+                        this.inst = value;
+                }
+                public T getInst() {
+                        return this.inst;
+                }
+        }
+        /**
+         * Fastmap implementation
+         *
+         * Faloutsos, C., & Lin, K. I. (1995).
+         * FastMap: A fast algorithm for indexing, data-mining and visualization of traditional and multimedia datasets
+         * (Vol. 24, No. 2, pp. 163-174). ACM.
+         */
+        public class Fastmap {
+                /*N x k Array, at the end, the i-th row will be the image of the i-th object*/
+                private double[][] X;
+                /*2 x k pivot Array one pair per recursive call*/
+                private int[][] PA;
+                /*Objects we got (distance matrix)*/
+                private double[][] O;
+                /*column of X currently updated (also the dimension)*/
+                private int col = 0;
+                /*number of dimensions we want*/
+                private int target_dims = 0;
+                // if we already have the pivot elements
+                private boolean pivot_set = false;
+                public Fastmap(int k) {
+                        this.target_dims = k;
+                }
+                /**
+                 * Sets the distance matrix
+                 * and params that depend on this
+                 * @param O
+                 */
+                public void setDistmat(double[][] O) {
+                        this.O = O;
+                        int N = O.length;
+                        this.X = new double[N][this.target_dims];
+                        this.PA = new int[2][this.target_dims];
+                }
+                /**
+                 * Set pivot elements, we need that to classify instances
+                 * after the calculation is complete (because we then want to reuse
+                 * only the pivot elements).
+                 *
+                 * @param pi
+                 */
+                public void setPivots(int[][] pi) {
+                        this.pivot_set = true;
+                        this.PA = pi;
+                }
+                /**
+                 * Return the pivot elements that were chosen during the calculation
+                 *
+                 * @return
+                 */
+                public int[][] getPivots() {
+                        return this.PA;
+                }
+                /**
+                 * The distance function for euclidean distance
+                 *
+                 * Acts according to equation 4 of the fastmap paper
+                 *
+                 * @param x x index of x image (if k==0 x object)
+                 * @param y y index of y image (if k==0 y object)
+                 * @param kdimensionality
+                 * @return distance
+                 */
+                private double dist(int x, int y, int k) {
+                        // basis is object distance, we get this from our distance matrix
+                        double tmp = this.O[x][y] * this.O[x][y];
+                        // decrease by projections
+                        for( int i=0; i < k; i++ ) {
+                                double tmp2 = (this.X[x][i] - this.X[y][i]);
+                                tmp -= tmp2 * tmp2;
+                        }
+                        return Math.abs(tmp);
+                }
+                /**
+                 * Find the object farthest from the given index
+                 * This method is a helper Method for findDistandObjects
+                 *
+                 * @param index of the object
+                 * @return index of the farthest object from the given index
+                 */
+                private int findFarthest(int index) {
+                        double furthest = Double.MIN_VALUE;
+                        int ret = 0;
+                        for( int i=0; i < O.length; i++ ) {
+                                double dist = this.dist(i, index, this.col);
+                                if( i != index && dist > furthest ) {
+                                        furthest = dist;
+                                        ret = i;
+                                }
+                        }
+                        return ret;
+                }
+                /**
+                 * Finds the pivot objects
+                 *
+                 * This method is basically algorithm 1 of the fastmap paper.
+                 *
+                 * @return 2 indexes of the choosen pivot objects
+                 */
+                private int[] findDistantObjects() {
+                        // 1. choose object randomly
+                        Random r = new Random();
+                        int obj = r.nextInt(this.O.length);
+                        // 2. find farthest object from randomly chosen object
+                        int idx1 = this.findFarthest(obj);
+                        // 3. find farthest object from previously farthest object
+                        int idx2 = this.findFarthest(idx1);
+                        return new int[] {idx1, idx2};
+                }
+                /**
+                 * Calculates the new k-vector values (projections)
+                 *
+                 * This is basically algorithm 2 of the fastmap paper.
+                 * We just added the possibility to pre-set the pivot elements because
+                 * we need to classify single instances after the computation is already done.
+                 *
+                 * @param dims dimensionality
+                 */
+                public void calculate() {
+                        for( int k=0; k < this.target_dims; k++ ) {
+                                // 2) choose pivot objects
+                                if ( !this.pivot_set ) {
+                                        int[] pivots = this.findDistantObjects();
+                                        // 3) record ids of pivot objects
+                                        this.PA[0][this.col] = pivots[0];
+                                        this.PA[1][this.col] = pivots[1];
+                                }
+                                // 4) inter object distances are zero (this.X is initialized with 0 so we just continue)
+                                if( this.dist(this.PA[0][this.col], this.PA[1][this.col], this.col) == 0 ) {
+                                        continue;
+                                }
+                                // 5) project the objects on the line between the pivots
+                                double dxy = this.dist(this.PA[0][this.col], this.PA[1][this.col], this.col);
+                                for( int i=0; i < this.O.length; i++ ) {
+                                        double dix = this.dist(i, this.PA[0][this.col], this.col);
+                                        double diy = this.dist(i, this.PA[1][this.col], this.col);
+                                        double tmp = (dix + dxy - diy) / (2 * Math.sqrt(dxy));
+                                        // save the projection
+                                        this.X[i][this.col] = tmp;
+                                }
+                                this.col += 1;
+                        }
+                }
+                /**
+                 * returns the result matrix of the projections
+                 *
+                 * @return calculated result
+                 */
+                public double[][] getX() {
+                        return this.X;
+                }
+        }
+    private final TraindatasetCluster classifier = new TraindatasetCluster();
+    @Override
+    public Classifier getClassifier() {
+        return classifier;
+    }
+    @Override
+    public void apply(Instances traindata) {
+        PrintStream errStr = System.err;
+        System.setErr(new PrintStream(new NullOutputStream()));
+        try {
+            classifier.buildClassifier(traindata);
+        }
+        catch (Exception e) {
+            throw new RuntimeException(e);
+        }
+        finally {
+            System.setErr(errStr);
+        }
+    }
+    public class TraindatasetCluster extends AbstractClassifier {
+        private static final long serialVersionUID = 1L;
+        /* classifier per cluster */
+        private HashMap<Integer, Classifier> cclassifier;
+        /* instances per cluster */
+        private HashMap<Integer, Instances> ctraindata;
+        /*
+         * holds the instances and indices of the pivot objects of the Fastmap calculation in
+         * buildClassifier
+         */
+        private HashMap<Integer, Instance> cpivots;
+        /* holds the indices of the pivot objects for x,y and the dimension [x,y][dimension] */
+        private int[][] cpivotindices;
+        /* holds the sizes of the cluster multiple "boxes" per cluster */
+        private HashMap<Integer, ArrayList<Double[][]>> csize;
+        /* debug vars */
+        @SuppressWarnings("unused")
+        private boolean show_biggest = true;
+        @SuppressWarnings("unused")
+        private int CFOUND = 0;
+        @SuppressWarnings("unused")
+        private int CNOTFOUND = 0;
+        private Instance createInstance(Instances instances, Instance instance) {
+            // attributes for feeding instance to classifier
+            Set<String> attributeNames = new HashSet<>();
+            for (int j = 0; j < instances.numAttributes(); j++) {
+                attributeNames.add(instances.attribute(j).name());
+            }
+            double[] values = new double[instances.numAttributes()];
+            int index = 0;
+            for (int j = 0; j < instance.numAttributes(); j++) {
+                if (attributeNames.contains(instance.attribute(j).name())) {
+                    values[index] = instance.value(j);
+                    index++;
+                }
+            }
+            Instances tmp = new Instances(instances);
+            tmp.clear();
+            Instance instCopy = new DenseInstance(instance.weight(), values);
+            instCopy.setDataset(tmp);
+            return instCopy;
+        }
+        /**
+         * Because Fastmap saves only the image not the values of the attributes it used we can not
+         * use the old data directly to classify single instances to clusters.
+         *
+         * To classify a single instance we do a new fastmap computation with only the instance and
+         * the old pivot elements.
+         *
+         * After that we find the cluster with our fastmap result for x and y.
+         */
+        @Override
+        public double classifyInstance(Instance instance) {
+            double ret = 0;
+            try {
+                // classinstance gets passed to classifier
+                Instances traindata = ctraindata.get(0);
+                Instance classInstance = createInstance(traindata, instance);
+                // this one keeps the class attribute
+                Instances traindata2 = ctraindata.get(1);
+                // remove class attribute before clustering
+                Remove filter = new Remove();
+                filter.setAttributeIndices("" + (traindata.classIndex() + 1));
+                filter.setInputFormat(traindata);
+                traindata = Filter.useFilter(traindata, filter);
+                Instance clusterInstance = createInstance(traindata, instance);
+                Fastmap FMAP = new Fastmap(2);
+                EuclideanDistance dist = new EuclideanDistance(traindata);
+                // we set our pivot indices [x=0,y=1][dimension]
+                int[][] npivotindices = new int[2][2];
+                npivotindices[0][0] = 1;
+                npivotindices[1][0] = 2;
+                npivotindices[0][1] = 3;
+                npivotindices[1][1] = 4;
+                // build temp dist matrix (2 pivots per dimension + 1 instance we want to classify)
+                // the instance we want to classify comes first after that the pivot elements in the
+                // order defined above
+                double[][] distmat = new double[2 * FMAP.target_dims + 1][2 * FMAP.target_dims + 1];
+                distmat[0][0] = 0;
+                distmat[0][1] =
+                    dist.distance(clusterInstance,
+                                  this.cpivots.get((Integer) this.cpivotindices[0][0]));
+                distmat[0][2] =
+                    dist.distance(clusterInstance,
+                                  this.cpivots.get((Integer) this.cpivotindices[1][0]));
+                distmat[0][3] =
+                    dist.distance(clusterInstance,
+                                  this.cpivots.get((Integer) this.cpivotindices[0][1]));
+                distmat[0][4] =
+                    dist.distance(clusterInstance,
+                                  this.cpivots.get((Integer) this.cpivotindices[1][1]));
+                distmat[1][0] =
+                    dist.distance(this.cpivots.get((Integer) this.cpivotindices[0][0]),
+                                  clusterInstance);
+                distmat[1][1] = 0;
+                distmat[1][2] =
+                    dist.distance(this.cpivots.get((Integer) this.cpivotindices[0][0]),
+                                  this.cpivots.get((Integer) this.cpivotindices[1][0]));
+                distmat[1][3] =
+                    dist.distance(this.cpivots.get((Integer) this.cpivotindices[0][0]),
+                                  this.cpivots.get((Integer) this.cpivotindices[0][1]));
+                distmat[1][4] =
+                    dist.distance(this.cpivots.get((Integer) this.cpivotindices[0][0]),
+                                  this.cpivots.get((Integer) this.cpivotindices[1][1]));
+                distmat[2][0] =
+                    dist.distance(this.cpivots.get((Integer) this.cpivotindices[1][0]),
+                                  clusterInstance);
+                distmat[2][1] =
+                    dist.distance(this.cpivots.get((Integer) this.cpivotindices[1][0]),
+                                  this.cpivots.get((Integer) this.cpivotindices[0][0]));
+                distmat[2][2] = 0;
+                distmat[2][3] =
+                    dist.distance(this.cpivots.get((Integer) this.cpivotindices[1][0]),
+                                  this.cpivots.get((Integer) this.cpivotindices[0][1]));
+                distmat[2][4] =
+                    dist.distance(this.cpivots.get((Integer) this.cpivotindices[1][0]),
+                                  this.cpivots.get((Integer) this.cpivotindices[1][1]));
+                distmat[3][0] =
+                    dist.distance(this.cpivots.get((Integer) this.cpivotindices[0][1]),
+                                  clusterInstance);
+                distmat[3][1] =
+                    dist.distance(this.cpivots.get((Integer) this.cpivotindices[0][1]),
+                                  this.cpivots.get((Integer) this.cpivotindices[0][0]));
+                distmat[3][2] =
+                    dist.distance(this.cpivots.get((Integer) this.cpivotindices[0][1]),
+                                  this.cpivots.get((Integer) this.cpivotindices[1][0]));
+                distmat[3][3] = 0;
+                distmat[3][4] =
+                    dist.distance(this.cpivots.get((Integer) this.cpivotindices[0][1]),
+                                  this.cpivots.get((Integer) this.cpivotindices[1][1]));
+                distmat[4][0] =
+                    dist.distance(this.cpivots.get((Integer) this.cpivotindices[1][1]),
+                                  clusterInstance);
+                distmat[4][1] =
+                    dist.distance(this.cpivots.get((Integer) this.cpivotindices[1][1]),
+                                  this.cpivots.get((Integer) this.cpivotindices[0][0]));
+                distmat[4][2] =
+                    dist.distance(this.cpivots.get((Integer) this.cpivotindices[1][1]),
+                                  this.cpivots.get((Integer) this.cpivotindices[1][0]));
+                distmat[4][3] =
+                    dist.distance(this.cpivots.get((Integer) this.cpivotindices[1][1]),
+                                  this.cpivots.get((Integer) this.cpivotindices[0][1]));
+                distmat[4][4] = 0;
+                /*
+                 * debug output: show biggest distance found within the new distance matrix double
+                 * biggest = 0; for(int i=0; i < distmat.length; i++) { for(int j=0; j <
+                 * distmat[0].length; j++) { if(biggest < distmat[i][j]) { biggest = distmat[i][j];
+                 * } } } if(this.show_biggest) { Console.traceln(Level.INFO,
+                 * String.format(""+clusterInstance)); Console.traceln(Level.INFO,
+                 * String.format("biggest distances: "+ biggest)); this.show_biggest = false; }
+                 */
+                FMAP.setDistmat(distmat);
+                FMAP.setPivots(npivotindices);
+                FMAP.calculate();
+                double[][] x = FMAP.getX();
+                double[] proj = x[0];
+                // debug output: show the calculated distance matrix, our result vektor for the
+                // instance and the complete result matrix
+                /*
+                 * Console.traceln(Level.INFO, "distmat:"); for(int i=0; i<distmat.length; i++){
+                 * for(int j=0; j<distmat[0].length; j++){ Console.trace(Level.INFO,
+                 * String.format("%20s", distmat[i][j])); } Console.traceln(Level.INFO, ""); }
+                 *
+                 * Console.traceln(Level.INFO, "vector:"); for(int i=0; i < proj.length; i++) {
+                 * Console.trace(Level.INFO, String.format("%20s", proj[i])); }
+                 * Console.traceln(Level.INFO, "");
+                 *
+                 * Console.traceln(Level.INFO, "resultmat:"); for(int i=0; i<x.length; i++){ for(int
+                 * j=0; j<x[0].length; j++){ Console.trace(Level.INFO, String.format("%20s",
+                 * x[i][j])); } Console.traceln(Level.INFO, ""); }
+                 */
+                // now we iterate over all clusters (well, boxes of sizes per cluster really) and
+                // save the number of the
+                // cluster in which we are
+                int cnumber;
+                int found_cnumber = -1;
+                Iterator<Integer> clusternumber = this.csize.keySet().iterator();
+                while (clusternumber.hasNext() && found_cnumber == -1) {
+                    cnumber = clusternumber.next();
+                    // now iterate over the boxes of the cluster and hope we find one (cluster could
+                    // have been removed)
+                    // or we are too far away from any cluster because of the fastmap calculation
+                    // with the initial pivot objects
+                    for (int box = 0; box < this.csize.get(cnumber).size(); box++) {
+                        Double[][] current = this.csize.get(cnumber).get(box);
+                        if (proj[0] >= current[0][0] && proj[0] <= current[0][1] && // x
+                            proj[1] >= current[1][0] && proj[1] <= current[1][1])
+                        { // y
+                            found_cnumber = cnumber;
+                        }
+                    }
+                }
+                // we want to count how often we are really inside a cluster
+                // if ( found_cnumber == -1 ) {
+                // CNOTFOUND += 1;
+                // }else {
+                // CFOUND += 1;
+                // }
+                // now it can happen that we do not find a cluster because we deleted it previously
+                // (too few instances)
+                // or we get bigger distance measures from weka so that we are completely outside of
+                // our clusters.
+                // in these cases we just find the nearest cluster to our instance and use it for
+                // classification.
+                // to do that we use the EuclideanDistance again to compare our distance to all
+                // other Instances
+                // then we take the cluster of the closest weka instance
+                dist = new EuclideanDistance(traindata2);
+                if (!this.ctraindata.containsKey(found_cnumber)) {
+                    double min_distance = Double.MAX_VALUE;
+                    clusternumber = ctraindata.keySet().iterator();
+                    while (clusternumber.hasNext()) {
+                        cnumber = clusternumber.next();
+                        for (int i = 0; i < ctraindata.get(cnumber).size(); i++) {
+                            if (dist.distance(instance, ctraindata.get(cnumber).get(i)) <= min_distance)
+                            {
+                                found_cnumber = cnumber;
+                                min_distance =
+                                    dist.distance(instance, ctraindata.get(cnumber).get(i));
+                            }
+                        }
+                    }
+                }
+                // here we have the cluster where an instance has the minimum distance between
+                // itself and the
+                // instance we want to classify
+                // if we still have not found a cluster we exit because something is really wrong
+                if (found_cnumber == -1) {
+                    Console.traceln(Level.INFO, String
+                        .format("ERROR matching instance to cluster with full search!"));
+                    throw new RuntimeException("cluster not found with full search");
+                }
+                // classify the passed instance with the cluster we found and its training data
+                ret = cclassifier.get(found_cnumber).classifyInstance(classInstance);
+            }
+            catch (Exception e) {
+                Console.traceln(Level.INFO, String.format("ERROR matching instance to cluster!"));
+                throw new RuntimeException(e);
+            }
+            return ret;
+        }
+        @Override
+        public void buildClassifier(Instances traindata) throws Exception {
+            // Console.traceln(Level.INFO, String.format("found: "+ CFOUND + ", notfound: " +
+            // CNOTFOUND));
+            this.show_biggest = true;
+            cclassifier = new HashMap<Integer, Classifier>();
+            ctraindata = new HashMap<Integer, Instances>();
+            cpivots = new HashMap<Integer, Instance>();
+            cpivotindices = new int[2][2];
+            // 1. copy traindata
+            Instances train = new Instances(traindata);
+            Instances train2 = new Instances(traindata); // this one keeps the class attribute
+            // 2. remove class attribute for clustering
+            Remove filter = new Remove();
+            filter.setAttributeIndices("" + (train.classIndex() + 1));
+            filter.setInputFormat(train);
+            train = Filter.useFilter(train, filter);
+            // 3. calculate distance matrix (needed for Fastmap because it starts at dimension 1)
+            double biggest = 0;
+            EuclideanDistance dist = new EuclideanDistance(train);
+            double[][] distmat = new double[train.size()][train.size()];
+            for (int i = 0; i < train.size(); i++) {
+                for (int j = 0; j < train.size(); j++) {
+                    distmat[i][j] = dist.distance(train.get(i), train.get(j));
+                    if (distmat[i][j] > biggest) {
+                        biggest = distmat[i][j];
+                    }
+                }
+            }
+            // Console.traceln(Level.INFO, String.format("biggest distances: "+ biggest));
+            // 4. run fastmap for 2 dimensions on the distance matrix
+            Fastmap FMAP = new Fastmap(2);
+            FMAP.setDistmat(distmat);
+            FMAP.calculate();
+            cpivotindices = FMAP.getPivots();
+            double[][] X = FMAP.getX();
+            distmat = new double[0][0];
+            System.gc();
+            // quadtree payload generation
+            ArrayList<QuadTreePayload<Instance>> qtp = new ArrayList<QuadTreePayload<Instance>>();
+            // we need these for the sizes of the quadrants
+            double[] big =
+                { 0, 0 };
+            double[] small =
+                { Double.MAX_VALUE, Double.MAX_VALUE };
+            // set quadtree payload values and get max and min x and y values for size
+            for (int i = 0; i < X.length; i++) {
+                if (X[i][0] >= big[0]) {
+                    big[0] = X[i][0];
+                }
+                if (X[i][1] >= big[1]) {
+                    big[1] = X[i][1];
+                }
+                if (X[i][0] <= small[0]) {
+                    small[0] = X[i][0];
+                }
+                if (X[i][1] <= small[1]) {
+                    small[1] = X[i][1];
+                }
+                QuadTreePayload<Instance> tmp =
+                    new QuadTreePayload<Instance>(X[i][0], X[i][1], train2.get(i));
+                qtp.add(tmp);
+            }
+            // Console.traceln(Level.INFO,
+            // String.format("size for cluster ("+small[0]+","+small[1]+") - ("+big[0]+","+big[1]+")"));
+            // 5. generate quadtree
+            QuadTree TREE = new QuadTree(null, qtp);
+            QuadTree.size = train.size();
+            QuadTree.alpha = Math.sqrt(train.size());
+            QuadTree.ccluster = new ArrayList<ArrayList<QuadTreePayload<Instance>>>();
+            QuadTree.csize = new HashMap<Integer, ArrayList<Double[][]>>();
+            // Console.traceln(Level.INFO, String.format("Generate QuadTree with "+ QuadTree.size +
+            // " size, Alpha: "+ QuadTree.alpha+ ""));
+            // set the size and then split the tree recursively at the median value for x, y
+            TREE.setSize(new double[]
+                { small[0], big[0] }, new double[]
+                { small[1], big[1] });
+            // recursive split und grid clustering eher static
+            TREE.recursiveSplit(TREE);
+            // generate list of nodes sorted by density (childs only)
+            ArrayList<QuadTree> l = new ArrayList<QuadTree>(TREE.getList(TREE));
+            // recursive grid clustering (tree pruning), the values are stored in ccluster
+            TREE.gridClustering(l);
+            // wir iterieren durch die cluster und sammeln uns die instanzen daraus
+            // ctraindata.clear();
+            for (int i = 0; i < QuadTree.ccluster.size(); i++) {
+                ArrayList<QuadTreePayload<Instance>> current = QuadTree.ccluster.get(i);
+                // i is the clusternumber
+                // we only allow clusters with Instances > ALPHA, other clusters are not considered!
+                // if(current.size() > QuadTree.alpha) {
+                if (current.size() > 4) {
+                    for (int j = 0; j < current.size(); j++) {
+                        if (!ctraindata.containsKey(i)) {
+                            ctraindata.put(i, new Instances(train2));
+                            ctraindata.get(i).delete();
+                        }
+                        ctraindata.get(i).add(current.get(j).getInst());
+                    }
+                }
+                else {
+                    Console.traceln(Level.INFO,
+                                    String.format("drop cluster, only: " + current.size() +
+                                        " instances"));
+                }
+            }
+            // here we keep things we need later on
+            // QuadTree sizes for later use (matching new instances)
+            this.csize = new HashMap<Integer, ArrayList<Double[][]>>(QuadTree.csize);
+            // pivot elements
+            // this.cpivots.clear();
+            for (int i = 0; i < FMAP.PA[0].length; i++) {
+                this.cpivots.put(FMAP.PA[0][i], (Instance) train.get(FMAP.PA[0][i]).copy());
+            }
+            for (int j = 0; j < FMAP.PA[0].length; j++) {
+                this.cpivots.put(FMAP.PA[1][j], (Instance) train.get(FMAP.PA[1][j]).copy());
+            }
+            /*
+             * debug output int pnumber; Iterator<Integer> pivotnumber =
+             * cpivots.keySet().iterator(); while ( pivotnumber.hasNext() ) { pnumber =
+             * pivotnumber.next(); Console.traceln(Level.INFO, String.format("pivot: "+pnumber+
+             * " inst: "+cpivots.get(pnumber))); }
+             */
+            // train one classifier per cluster, we get the cluster number from the traindata
+            int cnumber;
+            Iterator<Integer> clusternumber = ctraindata.keySet().iterator();
+            // cclassifier.clear();
+            // int traindata_count = 0;
+            while (clusternumber.hasNext()) {
+                cnumber = clusternumber.next();
+                cclassifier.put(cnumber, setupClassifier()); // this is the classifier used for the
+                                                             // cluster
+                cclassifier.get(cnumber).buildClassifier(ctraindata.get(cnumber));
+                // Console.traceln(Level.INFO, String.format("classifier in cluster "+cnumber));
+                // traindata_count += ctraindata.get(cnumber).size();
+                // Console.traceln(Level.INFO,
+                // String.format("building classifier in cluster "+cnumber +"  with "+
+                // ctraindata.get(cnumber).size() +" traindata instances"));
+            }
+            // add all traindata
+            // Console.traceln(Level.INFO, String.format("traindata in all clusters: " +
+            // traindata_count));
+        }
+    }
+    /**
+     * Payload for the QuadTree. x and y are the calculated Fastmap values. T is a weka instance.
+     */
+    public class QuadTreePayload<T> {
+        public double x;
+        public double y;
+        private T inst;
+        public QuadTreePayload(double x, double y, T value) {
+            this.x = x;
+            this.y = y;
+            this.inst = value;
+        }
+        public T getInst() {
+            return this.inst;
+        }
+    }
+    /**
+     * Fastmap implementation
+     *
+     * Faloutsos, C., & Lin, K. I. (1995). FastMap: A fast algorithm for indexing, data-mining and
+     * visualization of traditional and multimedia datasets (Vol. 24, No. 2, pp. 163-174). ACM.
+     */
+    public class Fastmap {
+        /* N x k Array, at the end, the i-th row will be the image of the i-th object */
+        private double[][] X;
+        /* 2 x k pivot Array one pair per recursive call */
+        private int[][] PA;
+        /* Objects we got (distance matrix) */
+        private double[][] O;
+        /* column of X currently updated (also the dimension) */
+        private int col = 0;
+        /* number of dimensions we want */
+        private int target_dims = 0;
+        // if we already have the pivot elements
+        private boolean pivot_set = false;
+        public Fastmap(int k) {
+            this.target_dims = k;
+        }
+        /**
+         * Sets the distance matrix and params that depend on this
+         *
+         * @param O
+         */
+        public void setDistmat(double[][] O) {
+            this.O = O;
+            int N = O.length;
+            this.X = new double[N][this.target_dims];
+            this.PA = new int[2][this.target_dims];
+        }
+        /**
+         * Set pivot elements, we need that to classify instances after the calculation is complete
+         * (because we then want to reuse only the pivot elements).
+         *
+         * @param pi
+         */
+        public void setPivots(int[][] pi) {
+            this.pivot_set = true;
+            this.PA = pi;
+        }
+        /**
+         * Return the pivot elements that were chosen during the calculation
+         *
+         * @return
+         */
+        public int[][] getPivots() {
+            return this.PA;
+        }
+        /**
+         * The distance function for euclidean distance
+         *
+         * Acts according to equation 4 of the fastmap paper
+         *
+         * @param x
+         *            x index of x image (if k==0 x object)
+         * @param y
+         *            y index of y image (if k==0 y object)
+         * @param kdimensionality
+         * @return distance
+         */
+        private double dist(int x, int y, int k) {
+            // basis is object distance, we get this from our distance matrix
+            double tmp = this.O[x][y] * this.O[x][y];
+            // decrease by projections
+            for (int i = 0; i < k; i++) {
+                double tmp2 = (this.X[x][i] - this.X[y][i]);
+                tmp -= tmp2 * tmp2;
+            }
+            return Math.abs(tmp);
+        }
+        /**
+         * Find the object farthest from the given index This method is a helper Method for
+         * findDistandObjects
+         *
+         * @param index
+         *            of the object
+         * @return index of the farthest object from the given index
+         */
+        private int findFarthest(int index) {
+            double furthest = Double.MIN_VALUE;
+            int ret = 0;
+            for (int i = 0; i < O.length; i++) {
+                double dist = this.dist(i, index, this.col);
+                if (i != index && dist > furthest) {
+                    furthest = dist;
+                    ret = i;
+                }
+            }
+            return ret;
+        }
+        /**
+         * Finds the pivot objects
+         *
+         * This method is basically algorithm 1 of the fastmap paper.
+         *
+         * @return 2 indexes of the choosen pivot objects
+         */
+        private int[] findDistantObjects() {
+            // 1. choose object randomly
+            Random r = new Random();
+            int obj = r.nextInt(this.O.length);
+            // 2. find farthest object from randomly chosen object
+            int idx1 = this.findFarthest(obj);
+            // 3. find farthest object from previously farthest object
+            int idx2 = this.findFarthest(idx1);
+            return new int[]
+                { idx1, idx2 };
+        }
+        /**
+         * Calculates the new k-vector values (projections)
+         *
+         * This is basically algorithm 2 of the fastmap paper. We just added the possibility to
+         * pre-set the pivot elements because we need to classify single instances after the
+         * computation is already done.
+         *
+         * @param dims
+         *            dimensionality
+         */
+        public void calculate() {
+            for (int k = 0; k < this.target_dims; k++) {
+                // 2) choose pivot objects
+                if (!this.pivot_set) {
+                    int[] pivots = this.findDistantObjects();
+                    // 3) record ids of pivot objects
+                    this.PA[0][this.col] = pivots[0];
+                    this.PA[1][this.col] = pivots[1];
+                }
+                // 4) inter object distances are zero (this.X is initialized with 0 so we just
+                // continue)
+                if (this.dist(this.PA[0][this.col], this.PA[1][this.col], this.col) == 0) {
+                    continue;
+                }
+                // 5) project the objects on the line between the pivots
+                double dxy = this.dist(this.PA[0][this.col], this.PA[1][this.col], this.col);
+                for (int i = 0; i < this.O.length; i++) {
+                    double dix = this.dist(i, this.PA[0][this.col], this.col);
+                    double diy = this.dist(i, this.PA[1][this.col], this.col);
+                    double tmp = (dix + dxy - diy) / (2 * Math.sqrt(dxy));
+                    // save the projection
+                    this.X[i][this.col] = tmp;
+                }
+                this.col += 1;
+            }
+        }
+        /**
+         * returns the result matrix of the projections
+         *
+         * @return calculated result
+         */
+        public double[][] getX() {
+            return this.X;
+        }
+    }
+}

trunk/CrossPare/src/de/ugoe/cs/cpdp/training/WekaTraining.java

-                      r25
+                      r41
+// Copyright 2015 Georg-August-Universität Göttingen, Germany
+//
+//   Licensed under the Apache License, Version 2.0 (the "License");
+//   you may not use this file except in compliance with the License.
+//   You may obtain a copy of the License at
+//
+//       http://www.apache.org/licenses/LICENSE-2.0
+//
+//   Unless required by applicable law or agreed to in writing, software
+//   distributed under the License is distributed on an "AS IS" BASIS,
+//   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//   See the License for the specific language governing permissions and
+//   limitations under the License.
 package de.ugoe.cs.cpdp.training;
 …
 /**
  * Programmatic WekaTraining
+ *
- * first parameter is Trainer Name.
- * second parameter is class name
+ *
+ * all subsequent parameters are configuration params (for example for trees)
+ * Cross Validation params always come last and are prepended with -CVPARAM
+ * first parameter is Trainer Name. second parameter is class name
+ *
+ * all subsequent parameters are configuration params (for example for trees) Cross Validation
+ * params always come last and are prepended with -CVPARAM
+ *
  * XML Configurations for Weka Classifiers:
+ *
  * <pre>
  * {@code
 …
 public class WekaTraining extends WekaBaseTraining implements ITrainingStrategy {
+        @Override
+        public void apply(Instances traindata) {
+                PrintStream errStr      = System.err;
+                System.setErr(new PrintStream(new NullOutputStream()));
+                try {
+                        if(classifier == null) {
+                                Console.traceln(Level.WARNING, String.format("classifier null!"));
+                        }
+                        classifier.buildClassifier(traindata);
+                } catch (Exception e) {
+                        throw new RuntimeException(e);
+                } finally {
+                        System.setErr(errStr);
+                }
+        }
+    @Override
+    public void apply(Instances traindata) {
+        PrintStream errStr = System.err;
+        System.setErr(new PrintStream(new NullOutputStream()));
+        try {
+            if (classifier == null) {
+                Console.traceln(Level.WARNING, String.format("classifier null!"));
+            }
+            classifier.buildClassifier(traindata);
+        }
+        catch (Exception e) {
+            throw new RuntimeException(e);
+        }
+        finally {
+            System.setErr(errStr);
+        }
+    }
+}

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 41 for trunk/CrossPare/src/de/ugoe/cs/cpdp/training

Legend:

Download in other formats: