/*
*
* This Java port of Barnes Hut t-SNE is Copyright (c) Leif Jonsson 2016 and
* Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the Delft University of Technology.
* 4. Neither the name of the Delft University of Technology nor the names of
* its contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ''AS IS'' AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
* EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
* OF SUCH DAMAGE.
*
*/
package com.jujutsu.tsne.barneshut;
import static java.lang.Math.exp;
import static java.lang.Math.log;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ThreadLocalRandom;
import com.jujutsu.tsne.PrincipalComponentAnalysis;
import com.jujutsu.utils.MatrixOps;
public class BHTSne implements BarnesHutTSne {
protected final Distance distance = new EuclideanDistance();
protected volatile boolean abort = false;
@Override
public double[][] tsne(TSneConfiguration config) {
return run(config);
}
private double[] flatten(double[][] x) {
int noCols = x[0].length;
double [] flat = new double[x.length*x[0].length];
for (int i = 0; i < x.length; i++) {
for (int j = 0; j < x[i].length; j++) {
flat[i*noCols+j] = x[i][j];
}
}
return flat;
}
private double [][] expand(double[]x, int N, int D) {
double [][] expanded = new double[N][D];
for (int row = 0; row < N; row++) {
for (int col = 0; col < D; col++) {
expanded[row][col] = x[row*D+col];
}
}
return expanded;
}
static double sign_tsne(double x) { return (x == .0 ? .0 : (x < .0 ? -1.0 : 1.0)); }
// Perform t-SNE
double [][] run(TSneConfiguration parameterObject) {
int D = parameterObject.getXStartDim();
double[][] Xin = parameterObject.getXin();
boolean exact = (parameterObject.getTheta() == .0);
if(exact) throw new IllegalArgumentException("The Barnes Hut implementation does not support exact inference yet (theta==0.0), if you want exact t-SNE please use one of the standard t-SNE implementations (FastTSne for instance)");
if(parameterObject.usePca() && D > parameterObject.getInitialDims() && parameterObject.getInitialDims() > 0) {
PrincipalComponentAnalysis pca = new PrincipalComponentAnalysis();
Xin = pca.pca(Xin, parameterObject.getInitialDims());
D = parameterObject.getInitialDims();
System.out.println("X:Shape after PCA is = " + Xin.length + " x " + Xin[0].length);
}
double [] X = flatten(Xin);
int N = parameterObject.getNrRows();
int no_dims = parameterObject.getOutputDims();
double [] Y = new double[N*no_dims];
System.out.println("X:Shape is = " + N + " x " + D);
// Determine whether we are using an exact algorithm
double perplexity = parameterObject.getPerplexity();
if(N - 1 < 3 * perplexity) { throw new IllegalArgumentException("Perplexity too large for the number of data points!\n"); }
System.out.printf("Using no_dims = %d, perplexity = %f, and theta = %f\n", no_dims, perplexity, parameterObject.getTheta());
// Set learning parameters
double total_time = 0;
int stop_lying_iter = 250, mom_switch_iter = 250;
double momentum = .5, final_momentum = .8;
double eta = 200.0;
// Allocate some memory
double [] dY = new double[N * no_dims];
double [] uY = new double[N * no_dims];
double [] gains = new double[N * no_dims];
for(int i = 0; i < N * no_dims; i++) gains[i] = 1.0;
// Normalize input data (to prevent numerical problems)
System.out.println("Computing input similarities...");
long start = System.currentTimeMillis();
//zeroMean(X, N, D);
double max_X = .0;
for(int i = 0; i < N * D; i++) {
if(X[i] > max_X) max_X = X[i];
}
for(int i = 0; i < N * D; i++) X[i] /= max_X;
double [] P = null;
int K = (int) (3 * perplexity);
int [] row_P = new int[N+1];
int [] col_P = new int[N*K];
double [] val_P = new double[N*K];
/**_row_P = (int*) malloc((N + 1) * sizeof(int));
*_col_P = (int*) calloc(N * K, sizeof(int));
*_val_P = (double*) calloc(N * K, sizeof(double));*/
// Compute input similarities for exact t-SNE
if(exact) {
// Compute similarities
P = new double[N * N];
computeGaussianPerplexity(X, N, D, P, perplexity);
// Symmetrize input similarities
System.out.println("Symmetrizing...");
int nN = 0;
for(int n = 0; n < N; n++) {
int mN = 0;
for(int m = n + 1; m < N; m++) {
P[nN + m] += P[mN + n];
P[mN + n] = P[nN + m];
mN += N;
}
nN += N;
}
double sum_P = .0;
for(int i = 0; i < N * N; i++) sum_P += P[i];
for(int i = 0; i < N * N; i++) P[i] /= sum_P;
}
// Compute input similarities for approximate t-SNE
else {
// Compute asymmetric pairwise input similarities
computeGaussianPerplexity(X, N, D, row_P, col_P, val_P, perplexity, K);
// Verified that val_P,col_P,row_P is the same at this point
// Symmetrize input similarities
SymResult res = symmetrizeMatrix(row_P, col_P, val_P, N);
row_P = res.sym_row_P;
col_P = res.sym_col_P;
val_P = res.sym_val_P;
double sum_P = .0;
for(int i = 0; i < row_P[N]; i++) sum_P += val_P[i];
for(int i = 0; i < row_P[N]; i++) val_P[i] /= sum_P;
}
long end = System.currentTimeMillis();
// Lie about the P-values
if(exact) { for(int i = 0; i < N * N; i++) P[i] *= 12.0; }
else { for(int i = 0; i < row_P[N]; i++) val_P[i] *= 12.0; }
// Initialize solution (randomly)
for(int i = 0; i < N * no_dims; i++) Y[i] = ThreadLocalRandom.current().nextDouble() * 0.0001;
// Perform main training loop
if(exact) System.out.printf("Done in %4.2f seconds!\nLearning embedding...\n", (end - start) / 1000.0);
else System.out.printf("Done in %4.2f seconds (sparsity = %f)!\nLearning embedding...\n", (end - start) / 1000.0, (double) row_P[N] / ((double) N * (double) N));
start = System.currentTimeMillis();
for(int iter = 0; iter < parameterObject.getMaxIter() && !abort; iter++) {
if(exact) computeExactGradient(P, Y, N, no_dims, dY);
// Compute (approximate) gradient
else computeGradient(P, row_P, col_P, val_P, Y, N, no_dims, dY, parameterObject.getTheta());
updateGradient(N, no_dims, Y, momentum, eta, dY, uY, gains);
// Make solution zero-mean
zeroMean(Y, N, no_dims);
// Stop lying about the P-values after a while, and switch momentum
if(iter == stop_lying_iter) {
if(exact) { for(int i = 0; i < N * N; i++) P[i] /= 12.0; }
else { for(int i = 0; i < row_P[N]; i++) val_P[i] /= 12.0; }
}
if(iter == mom_switch_iter) momentum = final_momentum;
// Print out progress
if(((iter > 0 && iter % 50 == 0) || iter == parameterObject.getMaxIter() - 1) && !parameterObject.silent() ) {
end = System.currentTimeMillis();
String err_string = "not_calculated";
if(parameterObject.printError()) {
double C = .0;
if(exact) C = evaluateError(P, Y, N, no_dims);
else C = evaluateError(row_P, col_P, val_P, Y, N, no_dims, parameterObject.getTheta()); // doing approximate computation here!
err_string = "" + C;
}
if(iter == 0)
System.out.printf("Iteration %d: error is %s\n", iter + 1, err_string);
else {
total_time += (end - start) / 1000.0;
System.out.printf("Iteration %d: error is %s (50 iterations in %4.2f seconds)\n", iter, err_string, (end - start) / 1000.0);
}
start = System.currentTimeMillis();
}
}
end = System.currentTimeMillis(); total_time += (end - start) / 1000.0;
System.out.printf("Fitting performed in %4.2f seconds.\n", total_time);
return expand(Y,N,no_dims);
}
void updateGradient(int N, int no_dims, double[] Y, double momentum, double eta, double[] dY, double[] uY,
double[] gains) {
for(int i = 0; i < N * no_dims; i++) {
// Update gains
gains[i] = (sign_tsne(dY[i]) != sign_tsne(uY[i])) ? (gains[i] + .2) : (gains[i] * .8);
if(gains[i] < .01) gains[i] = .01;
// Perform gradient update (with momentum and gains)
Y[i] = Y[i] + uY[i];
uY[i] = momentum * uY[i] - eta * gains[i] * dY[i];
}
}
// Compute gradient of the t-SNE cost function (using Barnes-Hut algorithm)
void computeGradient(double [] P, int [] inp_row_P, int [] inp_col_P, double [] inp_val_P, double [] Y, int N, int D, double [] dC, double theta)
{
// Construct space-partitioning tree on current map
SPTree tree = new SPTree(D, Y, N);
// Compute all terms required for t-SNE gradient
double [] sum_Q = new double[1];
double [] pos_f = new double[N * D];
double [][] neg_f = new double[N][D];
tree.computeEdgeForces(inp_row_P, inp_col_P, inp_val_P, N, pos_f);
for(int n = 0; n < N; n++) tree.computeNonEdgeForces(n, theta, neg_f[n], sum_Q);
// Compute final t-SNE gradient
for(int n = 0; n < N; n++) {
for(int d = 0; d < D; d++) {
dC[n*D+d] = pos_f[n*D+d] - (neg_f[n][d] / sum_Q[0]);
}
}
}
// Compute gradient of the t-SNE cost function (exact)
void computeExactGradient(double [] P, double [] Y, int N, int D, double [] dC) {
// Make sure the current gradient contains zeros
for(int i = 0; i < N * D; i++) dC[i] = 0.0;
// Compute the squared Euclidean distance matrix
double [] DD = new double[N * N];
computeSquaredEuclideanDistance(Y, N, D, DD);
// Compute Q-matrix and normalization sum
double [] Q = new double[N * N];
double sum_Q = .0;
int nN = 0;
for(int n = 0; n < N; n++) {
for(int m = 0; m < N; m++) {
if(n != m) {
Q[nN + m] = 1 / (1 + DD[nN + m]);
sum_Q += Q[nN + m];
}
}
nN += N;
}
// Perform the computation of the gradient
nN = 0;
int nD = 0;
for(int n = 0; n < N; n++) {
int mD = 0;
for(int m = 0; m < N; m++) {
if(n != m) {
double mult = (P[nN + m] - (Q[nN + m] / sum_Q)) * Q[nN + m];
for(int d = 0; d < D; d++) {
dC[nD + d] += (Y[nD + d] - Y[mD + d]) * mult;
}
}
mD += D;
}
nN += N;
nD += D;
}
}
// Evaluate t-SNE cost function (exactly)
double evaluateError(double [] P, double [] Y, int N, int D) {
// Compute the squared Euclidean distance matrix
double [] DD = new double[N * N];
double [] Q = new double[N * N];
computeSquaredEuclideanDistance(Y, N, D, DD);
// Compute Q-matrix and normalization sum
int nN = 0;
double sum_Q = Double.MIN_VALUE;
for(int n = 0; n < N; n++) {
for(int m = 0; m < N; m++) {
if(n != m) {
Q[nN + m] = 1 / (1 + DD[nN + m]);
sum_Q += Q[nN + m];
}
else Q[nN + m] = Double.MIN_VALUE;
}
nN += N;
}
for(int i = 0; i < N * N; i++) Q[i] /= sum_Q;
// Sum t-SNE error
double C = .0;
for(int n = 0; n < N * N; n++) {
C += P[n] * log((P[n] + Double.MIN_VALUE) / (Q[n] + Double.MIN_VALUE));
}
return C;
}
// Evaluate t-SNE cost function (approximately)
double evaluateError(int [] row_P, int [] col_P, double [] val_P, double [] Y, int N, int D, double theta)
{
// Get estimate of normalization term
SPTree tree = new SPTree(D, Y, N);
double [] buff = new double[D];
double [] sum_Q = new double[1];
for(int n = 0; n < N; n++) tree.computeNonEdgeForces(n, theta, buff, sum_Q);
// Loop over all edges to compute t-SNE error
int ind1, ind2;
double C = .0, Q;
for(int n = 0; n < N; n++) {
ind1 = n * D;
for(int i = row_P[n]; i < row_P[n + 1]; i++) {
Q = .0;
ind2 = col_P[i] * D;
for(int d = 0; d < D; d++) buff[d] = Y[ind1 + d];
for(int d = 0; d < D; d++) buff[d] -= Y[ind2 + d];
for(int d = 0; d < D; d++) Q += buff[d] * buff[d];
Q = (1.0 / (1.0 + Q)) / sum_Q[0];
C += val_P[i] * log((val_P[i] + Double.MIN_VALUE) / (Q + Double.MIN_VALUE));
}
}
return C;
}
// Compute input similarities with a fixed perplexity
void computeGaussianPerplexity(double [] X, int N, int D, double [] P, double perplexity) {
// Compute the squared Euclidean distance matrix
double [] DD = new double[N * N];
computeSquaredEuclideanDistance(X, N, D, DD);
// Compute the Gaussian kernel row by row
int nN = 0;
for(int n = 0; n < N; n++) {
// Initialize some variables
boolean found = false;
double beta = 1.0;
double min_beta = -Double.MAX_VALUE;
double max_beta = Double.MAX_VALUE;
double tol = 1e-5;
double sum_P = Double.MIN_VALUE;
// Iterate until we found a good perplexity
int iter = 0;
while(!found && iter < 200) {
// Compute Gaussian kernel row
for(int m = 0; m < N; m++) P[nN + m] = exp(-beta * DD[nN + m]);
P[nN + n] = Double.MIN_VALUE;
// Compute entropy of current row
sum_P = Double.MIN_VALUE;
for(int m = 0; m < N; m++) sum_P += P[nN + m];
double H = 0.0;
for(int m = 0; m < N; m++) H += beta * (DD[nN + m] * P[nN + m]);
H = (H / sum_P) + log(sum_P);
// Evaluate whether the entropy is within the tolerance level
double Hdiff = H - log(perplexity);
if(Hdiff < tol && -Hdiff < tol) {
found = true;
}
else {
if(Hdiff > 0) {
min_beta = beta;
if(max_beta == Double.MAX_VALUE || max_beta == -Double.MAX_VALUE)
beta *= 2.0;
else
beta = (beta + max_beta) / 2.0;
}
else {
max_beta = beta;
if(min_beta == -Double.MAX_VALUE || min_beta == Double.MAX_VALUE)
beta /= 2.0;
else
beta = (beta + min_beta) / 2.0;
}
}
// Update iteration counter
iter++;
}
// Row normalize P
for(int m = 0; m < N; m++) P[nN + m] /= sum_P;
nN += N;
}
}
// Compute input similarities with a fixed perplexity using ball trees
void computeGaussianPerplexity(double [] X, int N, int D, int [] _row_P, int [] _col_P, double [] _val_P, double perplexity, int K) {
if(perplexity > K) System.out.println("Perplexity should be lower than K!");
// Allocate the memory we need
/**_row_P = (int*) malloc((N + 1) * sizeof(int));
*_col_P = (int*) calloc(N * K, sizeof(int));
*_val_P = (double*) calloc(N * K, sizeof(double));
if(*_row_P == null || *_col_P == null || *_val_P == null) { Rcpp::stop("Memory allocation failed!\n"); }*/
int [] row_P = _row_P;
int [] col_P = _col_P;
double [] val_P = _val_P;
double [] cur_P = new double[N - 1];
row_P[0] = 0;
for(int n = 0; n < N; n++) row_P[n + 1] = row_P[n] + K;
// Build ball tree on data set
VpTree<DataPoint> tree = new VpTree<DataPoint>(distance);
final DataPoint [] obj_X = new DataPoint [N];
for(int n = 0; n < N; n++) {
double [] row = MatrixOps.extractRowFromFlatMatrix(X,n,D);
obj_X[n] = new DataPoint(D, n, row);
}
tree.create(obj_X);
// VERIFIED THAT TREES LOOK THE SAME
//System.out.println("Created Tree is: ");
// AdditionalInfoProvider pp = new AdditionalInfoProvider() {
// @Override
// public String provideInfo(Node node) {
// return "" + obj_X[node.index].index();
// }
// };
// TreePrinter printer = new TreePrinter(pp);
// printer.printTreeHorizontal(tree.getRoot());
// Loop over all points to find nearest neighbors
System.out.println("Building tree...");
List<DataPoint> indices = new ArrayList<>();
List<Double> distances = new ArrayList<>();
for(int n = 0; n < N; n++) {
if(n % 10000 == 0) System.out.printf(" - point %d of %d\n", n, N);
// Find nearest neighbors
indices.clear();
distances.clear();
//System.out.println("Looking at: " + obj_X.get(n).index());
tree.search(obj_X[n], K + 1, indices, distances);
// Initialize some variables for binary search
boolean found = false;
double beta = 1.0;
double min_beta = -Double.MAX_VALUE;
double max_beta = Double.MAX_VALUE;
double tol = 1e-5;
// Iterate until we found a good perplexity
int iter = 0;
double sum_P = 0.;
while(!found && iter < 200) {
// Compute Gaussian kernel row and entropy of current row
sum_P = Double.MIN_VALUE;
double H = .0;
for(int m = 0; m < K; m++) {
cur_P[m] = exp(-beta * distances.get(m + 1));
sum_P += cur_P[m];
H += beta * (distances.get(m + 1) * cur_P[m]);
}
H = (H / sum_P) + log(sum_P);
// Evaluate whether the entropy is within the tolerance level
double Hdiff = H - log(perplexity);
if(Hdiff < tol && -Hdiff < tol) {
found = true;
}
else {
if(Hdiff > 0) {
min_beta = beta;
if(max_beta == Double.MAX_VALUE || max_beta == -Double.MAX_VALUE)
beta *= 2.0;
else
beta = (beta + max_beta) / 2.0;
}
else {
max_beta = beta;
if(min_beta == -Double.MAX_VALUE || min_beta == Double.MAX_VALUE)
beta /= 2.0;
else
beta = (beta + min_beta) / 2.0;
}
}
// Update iteration counter
iter++;
}
// Row-normalize current row of P and store in matrix
for(int m = 0; m < K; m++) {
cur_P[m] /= sum_P;
col_P[row_P[n] + m] = indices.get(m + 1).index();
val_P[row_P[n] + m] = cur_P[m];
}
}
}
void computeGaussianPerplexity(double [] X, int N, int D, int [] _row_P, int [] _col_P, double [] _val_P, double perplexity, double threshold) {
// Allocate some memory we need for computations
double [] buff = new double[D];
double [] DD = new double[N];
double [] cur_P = new double[N];
// Compute the Gaussian kernel row by row (to find number of elements in sparse P)
int total_count = 0;
for(int n = 0; n < N; n++) {
// Compute the squared Euclidean distance matrix
for(int m = 0; m < N; m++) {
for(int d = 0; d < D; d++) buff[d] = X[n * D + d];
for(int d = 0; d < D; d++) buff[d] -= X[m * D + d];
DD[m] = .0;
for(int d = 0; d < D; d++) DD[m] += buff[d] * buff[d];
}
// Initialize some variables
boolean found = false;
double beta = 1.0;
double min_beta = -Double.MAX_VALUE;
double max_beta = Double.MAX_VALUE;
double tol = 1e-5;
// Iterate until we found a good perplexity
int iter = 0;
double sum_P = 0.;
while(!found && iter < 200) {
// Compute Gaussian kernel row
for(int m = 0; m < N; m++) cur_P[m] = exp(-beta * DD[m]);
cur_P[n] = Double.MIN_VALUE;
// Compute entropy of current row
sum_P = Double.MIN_VALUE;
for(int m = 0; m < N; m++) sum_P += cur_P[m];
double H = 0.0;
for(int m = 0; m < N; m++) H += beta * (DD[m] * cur_P[m]);
H = (H / sum_P) + log(sum_P);
// Evaluate whether the entropy is within the tolerance level
double Hdiff = H - log(perplexity);
if(Hdiff < tol && -Hdiff < tol) {
found = true;
}
else {
if(Hdiff > 0) {
min_beta = beta;
if(max_beta == Double.MAX_VALUE || max_beta == -Double.MAX_VALUE)
beta *= 2.0;
else
beta = (beta + max_beta) / 2.0;
}
else {
max_beta = beta;
if(min_beta == -Double.MAX_VALUE || min_beta == Double.MAX_VALUE)
beta /= 2.0;
else
beta = (beta + min_beta) / 2.0;
}
}
// Update iteration counter
iter++;
}
// Row-normalize and threshold current row of P
for(int m = 0; m < N; m++) cur_P[m] /= sum_P;
for(int m = 0; m < N; m++) {
if(cur_P[m] > threshold / (double) N) total_count++;
}
}
// Allocate the memory we need
_row_P = new int[N + 1];
_col_P = new int[total_count];
_val_P = new double[total_count];
int [] row_P = _row_P;
int [] col_P = _col_P;
double [] val_P = _val_P;
row_P[0] = 0;
// Compute the Gaussian kernel row by row (this time, store the results)
int count = 0;
for(int n = 0; n < N; n++) {
// Compute the squared Euclidean distance matrix
for(int m = 0; m < N; m++) {
for(int d = 0; d < D; d++) buff[d] = X[n * D + d];
for(int d = 0; d < D; d++) buff[d] -= X[m * D + d];
DD[m] = .0;
for(int d = 0; d < D; d++) DD[m] += buff[d] * buff[d];
}
// Initialize some variables
boolean found = false;
double beta = 1.0;
double min_beta = -Double.MAX_VALUE;
double max_beta = Double.MAX_VALUE;
double tol = 1e-5;
// Iterate until we found a good perplexity
int iter = 0;
double sum_P = 0.;
while(!found && iter < 200) {
// Compute Gaussian kernel row
for(int m = 0; m < N; m++) cur_P[m] = exp(-beta * DD[m]);
cur_P[n] = Double.MIN_VALUE;
// Compute entropy of current row
sum_P = Double.MIN_VALUE;
for(int m = 0; m < N; m++) sum_P += cur_P[m];
double H = 0.0;
for(int m = 0; m < N; m++) H += beta * (DD[m] * cur_P[m]);
H = (H / sum_P) + log(sum_P);
// Evaluate whether the entropy is within the tolerance level
double Hdiff = H - log(perplexity);
if(Hdiff < tol && -Hdiff < tol) {
found = true;
}
else {
if(Hdiff > 0) {
min_beta = beta;
if(max_beta == Double.MAX_VALUE || max_beta == -Double.MAX_VALUE)
beta *= 2.0;
else
beta = (beta + max_beta) / 2.0;
}
else {
max_beta = beta;
if(min_beta == -Double.MAX_VALUE || min_beta == Double.MAX_VALUE)
beta /= 2.0;
else
beta = (beta + min_beta) / 2.0;
}
}
// Update iteration counter
iter++;
}
// Row-normalize and threshold current row of P
for(int m = 0; m < N; m++) cur_P[m] /= sum_P;
for(int m = 0; m < N; m++) {
if(cur_P[m] > threshold / (double) N) {
col_P[count] = m;
val_P[count] = cur_P[m];
count++;
}
}
row_P[n + 1] = count;
}
}
class SymResult {
int [] sym_row_P;
int [] sym_col_P;
double [] sym_val_P;
public SymResult(int[] sym_row_P, int[] sym_col_P, double[] sym_val_P) {
super();
this.sym_row_P = sym_row_P;
this.sym_col_P = sym_col_P;
this.sym_val_P = sym_val_P;
}
}
SymResult symmetrizeMatrix(int [] _row_P, int [] _col_P, double [] _val_P, int N) {
// Get sparse matrix
int [] row_P = _row_P;
int [] col_P = _col_P;
double [] val_P = _val_P;
// Count number of elements and row counts of symmetric matrix
int [] row_counts = new int[N];
for(int n = 0; n < N; n++) {
for(int i = row_P[n]; i < row_P[n + 1]; i++) {
// Check whether element (col_P[i], n) is present
boolean present = false;
for(int m = row_P[col_P[i]]; m < row_P[col_P[i] + 1]; m++) {
if(col_P[m] == n) present = true;
}
if(present) row_counts[n]++;
else {
row_counts[n]++;
row_counts[col_P[i]]++;
}
}
}
int no_elem = 0;
for(int n = 0; n < N; n++) no_elem += row_counts[n];
// Allocate memory for symmetrized matrix
int [] sym_row_P = new int[N + 1];
int [] sym_col_P = new int[no_elem];
double [] sym_val_P = new double[no_elem];
// Construct new row indices for symmetric matrix
sym_row_P[0] = 0;
for(int n = 0; n < N; n++) sym_row_P[n + 1] = sym_row_P[n] + row_counts[n];
// Fill the result matrix
int [] offset = new int[N];
for(int n = 0; n < N; n++) {
for(int i = row_P[n]; i < row_P[n + 1]; i++) { // considering element(n, col_P[i])
// Check whether element (col_P[i], n) is present
boolean present = false;
for(int m = row_P[col_P[i]]; m < row_P[col_P[i] + 1]; m++) {
if(col_P[m] == n) {
present = true;
if(n <= col_P[i]) { // make sure we do not add elements twice
sym_col_P[sym_row_P[n] + offset[n]] = col_P[i];
sym_col_P[sym_row_P[col_P[i]] + offset[col_P[i]]] = n;
sym_val_P[sym_row_P[n] + offset[n]] = val_P[i] + val_P[m];
sym_val_P[sym_row_P[col_P[i]] + offset[col_P[i]]] = val_P[i] + val_P[m];
}
}
}
// If (col_P[i], n) is not present, there is no addition involved
if(!present) {
sym_col_P[sym_row_P[n] + offset[n]] = col_P[i];
sym_col_P[sym_row_P[col_P[i]] + offset[col_P[i]]] = n;
sym_val_P[sym_row_P[n] + offset[n]] = val_P[i];
sym_val_P[sym_row_P[col_P[i]] + offset[col_P[i]]] = val_P[i];
}
// Update offsets
if(!present || (present && n <= col_P[i])) {
offset[n]++;
if(col_P[i] != n) offset[col_P[i]]++;
}
}
}
// Divide the result by two
for(int i = 0; i < no_elem; i++) sym_val_P[i] /= 2.0;
return new SymResult(sym_row_P, sym_col_P, sym_val_P);
}
// Compute squared Euclidean distance matrix (using BLAS)
void computeSquaredEuclideanDistance(double [] X, int N, int D, double [] DD) {
double [] dataSums = new double[N];
for(int n = 0; n < N; n++) {
for(int d = 0; d < D; d++) {
dataSums[n] += (X[n * D + d] * X[n * D + d]);
}
}
for(int n = 0; n < N; n++) {
for(int m = 0; m < N; m++) {
DD[n * N + m] = dataSums[n] + dataSums[m];
}
}
//double a1 = -2.0;
//double a2 = 1.0;
//dgemm(String arg0, String arg1, int arg2, int arg3, int arg4, double arg5, double[] arg6, int arg7, int arg8, double[] arg9, int arg10, int arg11, double arg12, double[] arg13, int arg14, int arg15);
// org.netlib.blas.Dgemm.dgemm("T", "N", N, N, D, a1, X, D, X, D, a2, DD, N);
/* DGEMM - perform one of the matrix-matrix operations */
/* C := alpha*op( A )*op( B ) + beta*C */
/* BLAS_extern void
* R BLAS declaration:
F77_NAME(dgemm)(const char *transa, const char *transb, const int *m,
const int *n, const int *k, const double *alpha,
const double *a, const int *lda,
const double *b, const int *ldb,
const double *beta, double *c, const int *ldc);*/
//org.netlib.blas.Dgemm.dgemm
}
// Makes data zero-mean
void zeroMean(double [] X, int N, int D) {
// Compute data mean
double [] mean = new double[D];
for(int n = 0; n < N; n++) {
for(int d = 0; d < D; d++) {
mean[d] += X[n * D + d];
}
}
for(int d = 0; d < D; d++) {
mean[d] /= (double) N;
}
// Subtract data mean
for(int n = 0; n < N; n++) {
for(int d = 0; d < D; d++) {
X[n * D + d] -= mean[d];
}
}
}
// Makes data zero-mean
void zeroMean(double [][] X, int N, int D) {
// Compute data mean
double [] mean = new double[D];
for(int n = 0; n < N; n++) {
for(int d = 0; d < D; d++) {
mean[d] += X[n][d];
}
}
for(int d = 0; d < D; d++) {
mean[d] /= (double) N;
}
// Subtract data mean
for(int n = 0; n < N; n++) {
for(int d = 0; d < D; d++) {
X[n][d] -= mean[d];
}
}
}
@Override
public void abort() {
abort = true;
}
}