// Copyright 2015 Georg-August-Universität Göttingen, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package de.ugoe.cs.cpdp.dataprocessing; import java.util.Arrays; import java.util.logging.Level; import org.ojalgo.matrix.PrimitiveMatrix; import org.ojalgo.matrix.jama.JamaEigenvalue; import org.ojalgo.matrix.jama.JamaEigenvalue.General; import org.ojalgo.scalar.ComplexNumber; import org.ojalgo.access.Access2D.Builder; import org.ojalgo.array.Array1D; import de.ugoe.cs.util.console.Console; import weka.core.Attribute; import weka.core.Instance; import weka.core.Instances; /** *

* TCA with a linear kernel after Pan et al. (Domain Adaptation via Transfer Component Analysis) and * used for defect prediction by Nam et al. (Transfer Defect Learning) *

* * TODO comment class * @author Steffen Herbold */ public class TransferComponentAnalysis implements IProcessesingStrategy { int reducedDimension = 5; @Override public void setParameter(String parameters) { } @Override public void apply(Instances testdata, Instances traindata) { applyTCA(testdata, traindata); } private double linearKernel(Instance x1, Instance x2) { double value = 0.0d; for (int j = 0; j < x1.numAttributes(); j++) { if (j != x1.classIndex()) { value += x1.value(j) * x2.value(j); } } return value; } private void applyTCA(Instances testdata, Instances traindata) { final int sizeTest = testdata.numInstances(); final int sizeTrain = traindata.numInstances(); final PrimitiveMatrix kernelMatrix = buildKernel(testdata, traindata); final PrimitiveMatrix kernelNormMatrix = buildKernelNormMatrix(sizeTest, sizeTrain); // L in // the // paper final PrimitiveMatrix centerMatrix = buildCenterMatrix(sizeTest, sizeTrain); // H in the // paper final double mu = 1.0; // default from the MATLAB implementation final PrimitiveMatrix muMatrix = buildMuMatrix(sizeTest, sizeTrain, mu); PrimitiveMatrix.FACTORY.makeEye(sizeTest + sizeTrain, sizeTest + sizeTrain); Console.traceln(Level.FINEST, "creating optimization matrix (dimension " + (sizeTest + sizeTrain) + ")"); final PrimitiveMatrix optimizationProblem = kernelMatrix.multiplyRight(kernelNormMatrix) .multiplyRight(kernelMatrix).add(muMatrix).invert().multiplyRight(kernelMatrix) .multiplyRight(centerMatrix).multiplyRight(kernelMatrix); Console.traceln(Level.FINEST, "optimization matrix created, now solving eigenvalue problem"); General eigenvalueDecomposition = new JamaEigenvalue.General(); eigenvalueDecomposition.compute(optimizationProblem); Console.traceln(Level.FINEST, "eigenvalue problem solved"); Array1D eigenvaluesArray = eigenvalueDecomposition.getEigenvalues(); System.out.println(eigenvaluesArray.length); final double[] eigenvalues = new double[(int) eigenvaluesArray.length]; final int[] index = new int[(int) eigenvaluesArray.length]; // create kernel transformation matrix from eigenvectors for (int i = 0; i < eigenvaluesArray.length; i++) { eigenvalues[i] = eigenvaluesArray.doubleValue(i); index[i] = i; } quicksort(eigenvalues, index); final PrimitiveMatrix transformedKernel = kernelMatrix.multiplyRight(eigenvalueDecomposition .getV().selectColumns(Arrays.copyOfRange(index, 0, reducedDimension))); // update testdata and traindata for (int j = testdata.numAttributes() - 1; j >= 0; j--) { if (j != testdata.classIndex()) { testdata.deleteAttributeAt(j); traindata.deleteAttributeAt(j); } } for (int j = 0; j < reducedDimension; j++) { testdata.insertAttributeAt(new Attribute("kerneldim" + j), 1); traindata.insertAttributeAt(new Attribute("kerneldim" + j), 1); } for (int i = 0; i < sizeTrain; i++) { for (int j = 0; j < reducedDimension; j++) { traindata.instance(i).setValue(j + 1, transformedKernel.get(i, j)); } } for (int i = 0; i < sizeTest; i++) { for (int j = 0; j < reducedDimension; j++) { testdata.instance(i).setValue(j + 1, transformedKernel.get(i + sizeTrain, j)); } } } private PrimitiveMatrix buildKernel(Instances testdata, Instances traindata) { final int kernelDim = traindata.numInstances() + testdata.numInstances(); Builder kernelBuilder = PrimitiveMatrix.getBuilder(kernelDim, kernelDim); // built upper left quadrant (source, source) for (int i = 0; i < traindata.numInstances(); i++) { for (int j = 0; j < traindata.numInstances(); j++) { kernelBuilder.set(i, j, linearKernel(traindata.get(i), traindata.get(j))); } } // built upper right quadrant (source, target) for (int i = 0; i < traindata.numInstances(); i++) { for (int j = 0; j < testdata.numInstances(); j++) { kernelBuilder.set(i, j + traindata.numInstances(), linearKernel(traindata.get(i), testdata.get(j))); } } // built lower left quadrant (target, source) for (int i = 0; i < testdata.numInstances(); i++) { for (int j = 0; j < traindata.numInstances(); j++) { kernelBuilder.set(i + traindata.numInstances(), j, linearKernel(testdata.get(i), traindata.get(j))); } } // built lower right quadrant (target, target) for (int i = 0; i < testdata.numInstances(); i++) { for (int j = 0; j < testdata.numInstances(); j++) { kernelBuilder.set(i + traindata.numInstances(), j + traindata.numInstances(), linearKernel(testdata.get(i), testdata.get(j))); } } return kernelBuilder.build(); } private PrimitiveMatrix buildKernelNormMatrix(final int dimTest, final int sizeTrain) { final double trainSquared = 1.0 / (sizeTrain * (double) sizeTrain); final double testSquared = 1.0 / (dimTest * (double) dimTest); final double trainTest = -1.0 / (sizeTrain * (double) dimTest); Builder kernelNormBuilder = PrimitiveMatrix.getBuilder(sizeTrain + dimTest, sizeTrain + dimTest); // built upper left quadrant (source, source) for (int i = 0; i < sizeTrain; i++) { for (int j = 0; j < sizeTrain; j++) { kernelNormBuilder.set(i, j, trainSquared); } } // built upper right quadrant (source, target) for (int i = 0; i < sizeTrain; i++) { for (int j = 0; j < dimTest; j++) { kernelNormBuilder.set(i, j + sizeTrain, trainTest); } } // built lower left quadrant (target, source) for (int i = 0; i < dimTest; i++) { for (int j = 0; j < sizeTrain; j++) { kernelNormBuilder.set(i + sizeTrain, j, trainTest); } } // built lower right quadrant (target, target) for (int i = 0; i < dimTest; i++) { for (int j = 0; j < dimTest; j++) { kernelNormBuilder.set(i + sizeTrain, j + sizeTrain, testSquared); } } return kernelNormBuilder.build(); } private PrimitiveMatrix buildCenterMatrix(final int sizeTest, final int sizeTrain) { Builder centerMatrix = PrimitiveMatrix.getBuilder(sizeTest + sizeTrain, sizeTest + sizeTrain); for (int i = 0; i < centerMatrix.countRows(); i++) { centerMatrix.set(i, i, -1.0 / (sizeTest + sizeTrain)); } return centerMatrix.build(); } private PrimitiveMatrix buildMuMatrix(final int sizeTest, final int sizeTrain, final double mu) { Builder muMatrix = PrimitiveMatrix.getBuilder(sizeTest + sizeTrain, sizeTest + sizeTrain); for (int i = 0; i < muMatrix.countRows(); i++) { muMatrix.set(i, i, mu); } return muMatrix.build(); } // below is from http://stackoverflow.com/a/1040503 private static void quicksort(double[] main, int[] index) { quicksort(main, index, 0, index.length - 1); } // quicksort a[left] to a[right] private static void quicksort(double[] a, int[] index, int left, int right) { if (right <= left) return; int i = partition(a, index, left, right); quicksort(a, index, left, i - 1); quicksort(a, index, i + 1, right); } // partition a[left] to a[right], assumes left < right private static int partition(double[] a, int[] index, int left, int right) { int i = left - 1; int j = right; while (true) { while (less(a[++i], a[right])) // find item on left to swap ; // a[right] acts as sentinel while (less(a[right], a[--j])) // find item on right to swap if (j == left) break; // don't go out-of-bounds if (i >= j) break; // check if pointers cross exch(a, index, i, j); // swap two elements into place } exch(a, index, i, right); // swap with partition element return i; } // is x < y ? private static boolean less(double x, double y) { return (x < y); } // exchange a[i] and a[j] private static void exch(double[] a, int[] index, int i, int j) { double swap = a[i]; a[i] = a[j]; a[j] = swap; int b = index[i]; index[i] = index[j]; index[j] = b; } }