// Copyright 2015 Georg-August-Universität Göttingen, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package de.ugoe.cs.cpdp.dataprocessing; import java.util.Arrays; import java.util.logging.Level; import org.ojalgo.matrix.PrimitiveMatrix; import org.ojalgo.matrix.jama.JamaEigenvalue; import org.ojalgo.matrix.jama.JamaEigenvalue.General; import org.ojalgo.scalar.ComplexNumber; import org.ojalgo.access.Access2D.Builder; import org.ojalgo.array.Array1D; import de.ugoe.cs.cpdp.util.SortUtils; import de.ugoe.cs.util.console.Console; import weka.core.Attribute; import weka.core.Instance; import weka.core.Instances; /** *
* TCA with a linear kernel after Pan et al. (Domain Adaptation via Transfer Component Analysis) and * used for defect prediction by Nam et al. (Transfer Defect Learning) *
* * @author Steffen Herbold */ public class TransferComponentAnalysis implements IProcessesingStrategy { /** * Dimension of the reduced data. */ int reducedDimension = 5; /* * (non-Javadoc) * * @see de.ugoe.cs.cpdp.IParameterizable#setParameter(java.lang.String) */ @Override public void setParameter(String parameters) { // dummy, paramters ignored } /* * (non-Javadoc) * * @see de.ugoe.cs.cpdp.dataprocessing.IProcessesingStrategy#apply(weka.core.Instances, * weka.core.Instances) */ @Override public void apply(Instances testdata, Instances traindata) { applyTCA(testdata, traindata); } /** ** calculates the linear kernel function between two instances *
* * @param x1 * first instance * @param x2 * second instance * @return kernel value */ private double linearKernel(Instance x1, Instance x2) { double value = 0.0d; for (int j = 0; j < x1.numAttributes(); j++) { if (j != x1.classIndex()) { value += x1.value(j) * x2.value(j); } } return value; } /** ** Applies TCA to the test and training data. *
* * @param testdata * the test data * @param traindata * the training data */ private void applyTCA(Instances testdata, Instances traindata) { final int sizeTest = testdata.numInstances(); final int sizeTrain = traindata.numInstances(); final PrimitiveMatrix kernelMatrix = buildKernel(testdata, traindata); final PrimitiveMatrix kernelNormMatrix = buildKernelNormMatrix(sizeTest, sizeTrain); // L in // the // paper final PrimitiveMatrix centerMatrix = buildCenterMatrix(sizeTest, sizeTrain); // H in the // paper final double mu = 1.0; // default from the MATLAB implementation final PrimitiveMatrix muMatrix = buildMuMatrix(sizeTest, sizeTrain, mu); PrimitiveMatrix.FACTORY.makeEye(sizeTest + sizeTrain, sizeTest + sizeTrain); Console.traceln(Level.FINEST, "creating optimization matrix (dimension " + (sizeTest + sizeTrain) + ")"); final PrimitiveMatrix optimizationProblem = kernelMatrix.multiplyRight(kernelNormMatrix) .multiplyRight(kernelMatrix).add(muMatrix).invert().multiplyRight(kernelMatrix) .multiplyRight(centerMatrix).multiplyRight(kernelMatrix); Console.traceln(Level.FINEST, "optimization matrix created, now solving eigenvalue problem"); General eigenvalueDecomposition = new JamaEigenvalue.General(); eigenvalueDecomposition.compute(optimizationProblem); Console.traceln(Level.FINEST, "eigenvalue problem solved"); Array1D* Creates the kernel matrix of the test and training data *
* * @param testdata * the test data * @param traindata * the training data * @return kernel matrix */ private PrimitiveMatrix buildKernel(Instances testdata, Instances traindata) { final int kernelDim = traindata.numInstances() + testdata.numInstances(); Builder* Calculates the kernel norm matrix, i.e., the matrix which is used for matrix multiplication * to calculate the kernel norm. *
* * @param dimTest * dimension of the test data * @param sizeTrain * number of instances of the training data * @return kernel norm matrix */ private PrimitiveMatrix buildKernelNormMatrix(final int dimTest, final int sizeTrain) { final double trainSquared = 1.0 / (sizeTrain * (double) sizeTrain); final double testSquared = 1.0 / (dimTest * (double) dimTest); final double trainTest = -1.0 / (sizeTrain * (double) dimTest); Builder* Creates the center matrix *
* * @param sizeTest * number of instances of the test data * @param sizeTrain * number of instances of the training data * @return center matrix */ private PrimitiveMatrix buildCenterMatrix(final int sizeTest, final int sizeTrain) { Builder* Builds the mu-Matrix for offsetting values. *
* * @param sizeTest * number of instances of the test data * @param sizeTrain * number of instances of the training data * @param mu * mu parameter * @return mu-Matrix */ private PrimitiveMatrix buildMuMatrix(final int sizeTest, final int sizeTrain, final double mu) { Builder