BHTSne.java example

Explorer
trombone-master
- src
  - main
    - java
  - test
    - java
      - org
        voyanttools
        trombone
        document
        MetadataTest.java
        input
        expand
        ArchiveExpanderTest.java
        CompressedExpanderTest.java
        XmlExpanderTest.java
        XslExpanderTest.java
        extract
        BagItExtractorTest.java
        TikaExtractorTest.java
        XmlExtractorTest.java
        index
        LuceneIndexerTest.java
        lucene
        StoredToLuceneDocumentMapperTest.java
        analysis
        OpenNlpLemmaTokenizerTest.java
        StanfordNlpLemmaTokenizerTest.java
        search
        FieldPrefixAwareSimpleQueryParserTest.java
        model
        CorpusCollocateTest.java
        CorpusTermMinimalsDBTest.java
        CorpusTermsQueueTest.java
        DocumentTermsTest.java
        KeywordsTest.java
        TableTest.java
        storage
        file
        FileStoredDocumentSourceStorageTest.java
        TromboneMigration.java
        tool
        DocumentCollocatesTest.java
        DocumentTermsTest.java
        KwicsTest.java
        StoredResourceTest.java
        TableCorrelationsTest.java
        TableManagerTest.java
        build
        CorpusBuilderTest.java
        CorpusCreatorTest.java
        DocumentExpanderTest.java
        DocumentExtractorTest.java
        DocumentStorerTest.java
        corpus
        CorpusCollocatesTest.java
        CorpusFacetsTest.java
        CorpusManagerTest.java
        CorpusTermsTest.java
        DocumentContextsTest.java
        DocumentNgramsTest.java
        DocumentTermsTest.java
        DocumentTokensTest.java
        DocumentsMetadataTest.java
        SimpleSortedSetFacetsExample.java
        VelizaTest.java
        util
        EmbeddedWebServer.java
        TestHelper.java
/*
 *
 * This Java port of Barnes Hut t-SNE is Copyright (c) Leif Jonsson 2016 and 
 * Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology)
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *    This product includes software developed by the Delft University of Technology.
 * 4. Neither the name of the Delft University of Technology nor the names of
 *    its contributors may be used to endorse or promote products derived from
 *    this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ''AS IS'' AND ANY EXPRESS
 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
 * EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
 * OF SUCH DAMAGE.
 *
 */
package com.jujutsu.tsne.barneshut;

import static java.lang.Math.exp;
import static java.lang.Math.log;

import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ThreadLocalRandom;

import com.jujutsu.tsne.PrincipalComponentAnalysis;
import com.jujutsu.utils.MatrixOps;

public class BHTSne implements BarnesHutTSne {

	protected final Distance distance = new EuclideanDistance();
	protected volatile boolean abort = false;

	@Override
	public double[][] tsne(TSneConfiguration config) {
		return run(config);
	}

	private double[] flatten(double[][] x) {
		int noCols = x[0].length;
		double [] flat = new double[x.length*x[0].length];
		for (int i = 0; i < x.length; i++) {
			for (int j = 0; j < x[i].length; j++) {
				flat[i*noCols+j] = x[i][j];
			}
		}
		return flat;
	}

	private double [][] expand(double[]x, int N, int D) {
		double [][] expanded = new double[N][D];
		for (int row = 0; row < N; row++) {
			for (int col = 0; col < D; col++) {
				expanded[row][col] = x[row*D+col];
			}
		}
		return expanded;
	}

	static double sign_tsne(double x) { return (x == .0 ? .0 : (x < .0 ? -1.0 : 1.0)); }

	// Perform t-SNE
	double [][] run(TSneConfiguration parameterObject) {
		int D = parameterObject.getXStartDim();
		double[][] Xin = parameterObject.getXin();
		boolean exact = (parameterObject.getTheta() == .0);
		
		if(exact) throw new IllegalArgumentException("The Barnes Hut implementation does not support exact inference yet (theta==0.0), if you want exact t-SNE please use one of the standard t-SNE implementations (FastTSne for instance)");

		if(parameterObject.usePca() && D > parameterObject.getInitialDims() && parameterObject.getInitialDims() > 0) {
			PrincipalComponentAnalysis pca = new PrincipalComponentAnalysis();
			Xin = pca.pca(Xin, parameterObject.getInitialDims());
			D = parameterObject.getInitialDims();
			System.out.println("X:Shape after PCA is = " + Xin.length + " x " + Xin[0].length);
		}
		
		double [] X = flatten(Xin);	
		int N = parameterObject.getNrRows();
		int no_dims = parameterObject.getOutputDims();
		
		double [] Y = new double[N*no_dims];
		System.out.println("X:Shape is = " + N + " x " + D);
		// Determine whether we are using an exact algorithm
		double perplexity = parameterObject.getPerplexity();
		if(N - 1 < 3 * perplexity) { throw new IllegalArgumentException("Perplexity too large for the number of data points!\n"); }
		System.out.printf("Using no_dims = %d, perplexity = %f, and theta = %f\n", no_dims, perplexity, parameterObject.getTheta());

		// Set learning parameters
		double total_time = 0;
		int stop_lying_iter = 250, mom_switch_iter = 250;
		double momentum = .5, final_momentum = .8;
		double eta = 200.0;

		// Allocate some memory
		double [] dY    = new double[N * no_dims];
		double [] uY    = new double[N * no_dims];
		double [] gains = new double[N * no_dims];
		for(int i = 0; i < N * no_dims; i++) gains[i] = 1.0;

		// Normalize input data (to prevent numerical problems)
		System.out.println("Computing input similarities...");
		long start = System.currentTimeMillis();
		//zeroMean(X, N, D);
		double max_X = .0;
		for(int i = 0; i < N * D; i++) {
			if(X[i] > max_X) max_X = X[i];
		}

		for(int i = 0; i < N * D; i++) X[i] /= max_X;

		double [] P = null;
		int K  = (int) (3 * perplexity);
		int [] row_P = new int[N+1];
		int [] col_P = new int[N*K];
		double [] val_P = new double[N*K];
		/**_row_P = (int*)    malloc((N + 1) * sizeof(int));
		 *_col_P = (int*)    calloc(N * K, sizeof(int));
		 *_val_P = (double*) calloc(N * K, sizeof(double));*/
		// Compute input similarities for exact t-SNE
		if(exact) {

			// Compute similarities
			P = new double[N * N];
			computeGaussianPerplexity(X, N, D, P, perplexity);

			// Symmetrize input similarities
			System.out.println("Symmetrizing...");
			int nN = 0;
			for(int n = 0; n < N; n++) {
				int mN = 0;
				for(int m = n + 1; m < N; m++) {
					P[nN + m] += P[mN + n];
					P[mN + n]  = P[nN + m];
					mN += N;
				}
				nN += N;
			}
			double sum_P = .0;
			for(int i = 0; i < N * N; i++) sum_P += P[i];
			for(int i = 0; i < N * N; i++) P[i] /= sum_P;
		}

		// Compute input similarities for approximate t-SNE
		else {

			// Compute asymmetric pairwise input similarities
			computeGaussianPerplexity(X, N, D, row_P, col_P, val_P, perplexity, K);

			// Verified that val_P,col_P,row_P is the same at this point

			// Symmetrize input similarities
			SymResult res = symmetrizeMatrix(row_P, col_P, val_P, N);
			row_P = res.sym_row_P;
			col_P = res.sym_col_P;
			val_P = res.sym_val_P;

			double sum_P = .0;
			for(int i = 0; i < row_P[N]; i++) sum_P += val_P[i];
			for(int i = 0; i < row_P[N]; i++) val_P[i] /= sum_P;
		}
		long end = System.currentTimeMillis();

		// Lie about the P-values
		if(exact) { for(int i = 0; i < N * N; i++)        P[i] *= 12.0; }
		else {      for(int i = 0; i < row_P[N]; i++) val_P[i] *= 12.0; }

		// Initialize solution (randomly)
		for(int i = 0; i < N * no_dims; i++) Y[i] = ThreadLocalRandom.current().nextDouble() * 0.0001;

		// Perform main training loop
		if(exact) System.out.printf("Done in %4.2f seconds!\nLearning embedding...\n", (end - start) / 1000.0);
		else System.out.printf("Done in %4.2f seconds (sparsity = %f)!\nLearning embedding...\n", (end - start) / 1000.0, (double) row_P[N] / ((double) N * (double) N));
		start = System.currentTimeMillis();
		for(int iter = 0; iter < parameterObject.getMaxIter() && !abort; iter++) {

			if(exact) computeExactGradient(P, Y, N, no_dims, dY);
			// Compute (approximate) gradient
			else computeGradient(P, row_P, col_P, val_P, Y, N, no_dims, dY, parameterObject.getTheta());

			updateGradient(N, no_dims, Y, momentum, eta, dY, uY, gains);

			// Make solution zero-mean
			zeroMean(Y, N, no_dims);

			// Stop lying about the P-values after a while, and switch momentum
			if(iter == stop_lying_iter) {
				if(exact) { for(int i = 0; i < N * N; i++)        P[i] /= 12.0; }
				else      { for(int i = 0; i < row_P[N]; i++) val_P[i] /= 12.0; }
			}
			if(iter == mom_switch_iter) momentum = final_momentum;

			// Print out progress
			if(((iter > 0 && iter % 50 == 0) || iter == parameterObject.getMaxIter() - 1) && !parameterObject.silent() ) {
				end = System.currentTimeMillis();
				String err_string = "not_calculated";
				if(parameterObject.printError()) {
					double C = .0;
					if(exact) C = evaluateError(P, Y, N, no_dims);
					else      C = evaluateError(row_P, col_P, val_P, Y, N, no_dims, parameterObject.getTheta());  // doing approximate computation here!
					err_string = "" + C;
				}
				if(iter == 0)
					System.out.printf("Iteration %d: error is %s\n", iter + 1, err_string);
				else {
					total_time += (end - start) / 1000.0;
					System.out.printf("Iteration %d: error is %s (50 iterations in %4.2f seconds)\n", iter, err_string, (end - start) / 1000.0);
				}
				start = System.currentTimeMillis();
			}
		}
		end = System.currentTimeMillis(); total_time += (end - start) / 1000.0;

		System.out.printf("Fitting performed in %4.2f seconds.\n", total_time);
		return expand(Y,N,no_dims);
	}

	void updateGradient(int N, int no_dims, double[] Y, double momentum, double eta, double[] dY, double[] uY,
			double[] gains) {
		for(int i = 0; i < N * no_dims; i++)  {
			// Update gains
			gains[i] = (sign_tsne(dY[i]) != sign_tsne(uY[i])) ? (gains[i] + .2) : (gains[i] * .8);
			if(gains[i] < .01) gains[i] = .01;

			// Perform gradient update (with momentum and gains)
			Y[i] = Y[i] + uY[i];
			uY[i] = momentum * uY[i] - eta * gains[i] * dY[i];
		}
	}

	// Compute gradient of the t-SNE cost function (using Barnes-Hut algorithm)
	void computeGradient(double [] P, int [] inp_row_P, int [] inp_col_P, double [] inp_val_P, double [] Y, int N, int D, double [] dC, double theta)
	{
		// Construct space-partitioning tree on current map
		SPTree tree = new SPTree(D, Y, N);

		// Compute all terms required for t-SNE gradient
		double [] sum_Q = new double[1];
		double [] pos_f = new double[N * D];
		double [][] neg_f = new double[N][D];

		tree.computeEdgeForces(inp_row_P, inp_col_P, inp_val_P, N, pos_f);
		for(int n = 0; n < N; n++) tree.computeNonEdgeForces(n, theta, neg_f[n], sum_Q);

		// Compute final t-SNE gradient
		for(int n = 0; n < N; n++) {
			for(int d = 0; d < D; d++) {
				dC[n*D+d] = pos_f[n*D+d] - (neg_f[n][d] / sum_Q[0]);
			}
		}
	}

	// Compute gradient of the t-SNE cost function (exact)
	void computeExactGradient(double [] P, double [] Y, int N, int D, double [] dC) {

		// Make sure the current gradient contains zeros
		for(int i = 0; i < N * D; i++) dC[i] = 0.0;

		// Compute the squared Euclidean distance matrix
		double [] DD = new double[N * N];
		computeSquaredEuclideanDistance(Y, N, D, DD);

		// Compute Q-matrix and normalization sum
		double [] Q    = new double[N * N];
		double sum_Q = .0;
		int nN = 0;
		for(int n = 0; n < N; n++) {
			for(int m = 0; m < N; m++) {
				if(n != m) {
					Q[nN + m] = 1 / (1 + DD[nN + m]);
					sum_Q += Q[nN + m];
				}
			}
			nN += N;
		}

		// Perform the computation of the gradient
		nN = 0;
		int nD = 0;
		for(int n = 0; n < N; n++) {
			int mD = 0;
			for(int m = 0; m < N; m++) {
				if(n != m) {
					double mult = (P[nN + m] - (Q[nN + m] / sum_Q)) * Q[nN + m];
					for(int d = 0; d < D; d++) {
						dC[nD + d] += (Y[nD + d] - Y[mD + d]) * mult;
					}
				}
				mD += D;
			}
			nN += N;
			nD += D;
		}
	}

	// Evaluate t-SNE cost function (exactly)
	double evaluateError(double [] P, double [] Y, int N, int D) {

		// Compute the squared Euclidean distance matrix
		double [] DD = new double[N * N];
		double [] Q = new double[N * N];
		computeSquaredEuclideanDistance(Y, N, D, DD);

		// Compute Q-matrix and normalization sum
		int nN = 0;
		double sum_Q = Double.MIN_VALUE;
		for(int n = 0; n < N; n++) {
			for(int m = 0; m < N; m++) {
				if(n != m) {
					Q[nN + m] = 1 / (1 + DD[nN + m]);
					sum_Q += Q[nN + m];
				}
				else Q[nN + m] = Double.MIN_VALUE;
			}
			nN += N;
		}
		for(int i = 0; i < N * N; i++) Q[i] /= sum_Q;

		// Sum t-SNE error
		double C = .0;
		for(int n = 0; n < N * N; n++) {
			C += P[n] * log((P[n] + Double.MIN_VALUE) / (Q[n] + Double.MIN_VALUE));
		}

		return C;
	}

	// Evaluate t-SNE cost function (approximately)
	double evaluateError(int [] row_P, int [] col_P, double [] val_P, double [] Y, int N, int D, double theta)
	{
		// Get estimate of normalization term
		SPTree tree = new SPTree(D, Y, N);
		double [] buff = new double[D];
		double [] sum_Q = new double[1];
		for(int n = 0; n < N; n++) tree.computeNonEdgeForces(n, theta, buff, sum_Q);

		// Loop over all edges to compute t-SNE error
		int ind1, ind2;
		double C = .0, Q;
		for(int n = 0; n < N; n++) {
			ind1 = n * D;
			for(int i = row_P[n]; i < row_P[n + 1]; i++) {
				Q = .0;
				ind2 = col_P[i] * D;
				for(int d = 0; d < D; d++) buff[d]  = Y[ind1 + d];
				for(int d = 0; d < D; d++) buff[d] -= Y[ind2 + d];
				for(int d = 0; d < D; d++) Q += buff[d] * buff[d];
				Q = (1.0 / (1.0 + Q)) / sum_Q[0];
				C += val_P[i] * log((val_P[i] + Double.MIN_VALUE) / (Q + Double.MIN_VALUE));
			}
		}

		return C;
	}

	// Compute input similarities with a fixed perplexity
	void computeGaussianPerplexity(double [] X, int N, int D, double [] P, double perplexity) {

		// Compute the squared Euclidean distance matrix
		double [] DD = new double[N * N];
		computeSquaredEuclideanDistance(X, N, D, DD);

		// Compute the Gaussian kernel row by row
		int nN = 0;
		for(int n = 0; n < N; n++) {

			// Initialize some variables
			boolean found = false;
			double beta = 1.0;
			double min_beta = -Double.MAX_VALUE;
			double max_beta =  Double.MAX_VALUE;
			double tol = 1e-5;
			double sum_P = Double.MIN_VALUE;

			// Iterate until we found a good perplexity
			int iter = 0;
			while(!found && iter < 200) {

				// Compute Gaussian kernel row
				for(int m = 0; m < N; m++) P[nN + m] = exp(-beta * DD[nN + m]);
				P[nN + n] = Double.MIN_VALUE;

				// Compute entropy of current row
				sum_P = Double.MIN_VALUE;
				for(int m = 0; m < N; m++) sum_P += P[nN + m];
				double H = 0.0;
				for(int m = 0; m < N; m++) H += beta * (DD[nN + m] * P[nN + m]);
				H = (H / sum_P) + log(sum_P);

				// Evaluate whether the entropy is within the tolerance level
				double Hdiff = H - log(perplexity);
				if(Hdiff < tol && -Hdiff < tol) {
					found = true;
				}
				else {
					if(Hdiff > 0) {
						min_beta = beta;
						if(max_beta == Double.MAX_VALUE || max_beta == -Double.MAX_VALUE)
							beta *= 2.0;
						else
							beta = (beta + max_beta) / 2.0;
					}
					else {
						max_beta = beta;
						if(min_beta == -Double.MAX_VALUE || min_beta == Double.MAX_VALUE)
							beta /= 2.0;
						else
							beta = (beta + min_beta) / 2.0;
					}
				}

				// Update iteration counter
				iter++;
			}

			// Row normalize P
			for(int m = 0; m < N; m++) P[nN + m] /= sum_P;
			nN += N;
		}
	}

	// Compute input similarities with a fixed perplexity using ball trees
	void computeGaussianPerplexity(double [] X, int N, int D, int [] _row_P, int [] _col_P, double [] _val_P, double perplexity, int K) {
		if(perplexity > K) System.out.println("Perplexity should be lower than K!");

		// Allocate the memory we need
		/**_row_P = (int*)    malloc((N + 1) * sizeof(int));
		 *_col_P = (int*)    calloc(N * K, sizeof(int));
		 *_val_P = (double*) calloc(N * K, sizeof(double));
		    if(*_row_P == null || *_col_P == null || *_val_P == null) { Rcpp::stop("Memory allocation failed!\n"); }*/
		int [] row_P = _row_P;
		int [] col_P = _col_P;
		double [] val_P = _val_P;
		double [] cur_P = new double[N - 1];

		row_P[0] = 0;
		for(int n = 0; n < N; n++) row_P[n + 1] = row_P[n] + K;    

		// Build ball tree on data set
		VpTree<DataPoint> tree = new VpTree<DataPoint>(distance);
		final DataPoint [] obj_X = new DataPoint [N];
		for(int n = 0; n < N; n++) {
			double [] row = MatrixOps.extractRowFromFlatMatrix(X,n,D);
			obj_X[n] = new DataPoint(D, n, row);
		}
		tree.create(obj_X);

		// VERIFIED THAT TREES LOOK THE SAME
		//System.out.println("Created Tree is: ");
		//			AdditionalInfoProvider pp = new AdditionalInfoProvider() {			
		//				@Override
		//				public String provideInfo(Node node) {
		//					return "" + obj_X[node.index].index();
		//				}
		//			};
		//			TreePrinter printer = new TreePrinter(pp);
		//			printer.printTreeHorizontal(tree.getRoot());

		// Loop over all points to find nearest neighbors
		System.out.println("Building tree...");
		List<DataPoint> indices = new ArrayList<>();
		List<Double> distances = new ArrayList<>();
		for(int n = 0; n < N; n++) {

			if(n % 10000 == 0) System.out.printf(" - point %d of %d\n", n, N);

			// Find nearest neighbors
			indices.clear();
			distances.clear();
			//System.out.println("Looking at: " + obj_X.get(n).index());
			tree.search(obj_X[n], K + 1, indices, distances);

			// Initialize some variables for binary search
			boolean found = false;
			double beta = 1.0;
			double min_beta = -Double.MAX_VALUE;
			double max_beta =  Double.MAX_VALUE;
			double tol = 1e-5;

			// Iterate until we found a good perplexity
			int iter = 0; 
			double sum_P = 0.;
			while(!found && iter < 200) {

				// Compute Gaussian kernel row and entropy of current row
				sum_P = Double.MIN_VALUE;
				double H = .0;
				for(int m = 0; m < K; m++) {
					cur_P[m] = exp(-beta * distances.get(m + 1));
					sum_P += cur_P[m];
					H += beta * (distances.get(m + 1) * cur_P[m]);
				}
				H = (H / sum_P) + log(sum_P);

				// Evaluate whether the entropy is within the tolerance level
				double Hdiff = H - log(perplexity);
				if(Hdiff < tol && -Hdiff < tol) {
					found = true;
				}
				else {
					if(Hdiff > 0) {
						min_beta = beta;
						if(max_beta == Double.MAX_VALUE || max_beta == -Double.MAX_VALUE)
							beta *= 2.0;
						else
							beta = (beta + max_beta) / 2.0;
					}
					else {
						max_beta = beta;
						if(min_beta == -Double.MAX_VALUE || min_beta == Double.MAX_VALUE)
							beta /= 2.0;
						else
							beta = (beta + min_beta) / 2.0;
					}
				}

				// Update iteration counter
				iter++;
			}

			// Row-normalize current row of P and store in matrix 
			for(int m = 0; m < K; m++) {
				cur_P[m] /= sum_P;
				col_P[row_P[n] + m] = indices.get(m + 1).index();
				val_P[row_P[n] + m] = cur_P[m];
			}
		}
	}

	void computeGaussianPerplexity(double [] X, int N, int D, int [] _row_P, int [] _col_P, double [] _val_P, double perplexity, double threshold) {
		// Allocate some memory we need for computations
		double [] buff  = new double[D];
		double [] DD    = new double[N];
		double [] cur_P = new double[N];

		// Compute the Gaussian kernel row by row (to find number of elements in sparse P)
		int total_count = 0;
		for(int n = 0; n < N; n++) {

			// Compute the squared Euclidean distance matrix
			for(int m = 0; m < N; m++) {
				for(int d = 0; d < D; d++) buff[d]  = X[n * D + d];
				for(int d = 0; d < D; d++) buff[d] -= X[m * D + d];
				DD[m] = .0;
				for(int d = 0; d < D; d++) DD[m] += buff[d] * buff[d];
			}

			// Initialize some variables
			boolean found = false;
			double beta = 1.0;
			double min_beta = -Double.MAX_VALUE;
			double max_beta =  Double.MAX_VALUE;
			double tol = 1e-5;

			// Iterate until we found a good perplexity
			int iter = 0; 
			double sum_P = 0.;
			while(!found && iter < 200) {

				// Compute Gaussian kernel row
				for(int m = 0; m < N; m++) cur_P[m] = exp(-beta * DD[m]);
				cur_P[n] = Double.MIN_VALUE;

				// Compute entropy of current row
				sum_P = Double.MIN_VALUE;
				for(int m = 0; m < N; m++) sum_P += cur_P[m];
				double H = 0.0;
				for(int m = 0; m < N; m++) H += beta * (DD[m] * cur_P[m]);
				H = (H / sum_P) + log(sum_P);

				// Evaluate whether the entropy is within the tolerance level
				double Hdiff = H - log(perplexity);
				if(Hdiff < tol && -Hdiff < tol) {
					found = true;
				}
				else {
					if(Hdiff > 0) {
						min_beta = beta;
						if(max_beta == Double.MAX_VALUE || max_beta == -Double.MAX_VALUE)
							beta *= 2.0;
						else
							beta = (beta + max_beta) / 2.0;
					}
					else {
						max_beta = beta;
						if(min_beta == -Double.MAX_VALUE || min_beta == Double.MAX_VALUE)
							beta /= 2.0;
						else
							beta = (beta + min_beta) / 2.0;
					}
				}

				// Update iteration counter
				iter++;
			}

			// Row-normalize and threshold current row of P
			for(int m = 0; m < N; m++) cur_P[m] /= sum_P;
			for(int m = 0; m < N; m++) {
				if(cur_P[m] > threshold / (double) N) total_count++;
			}
		}

		// Allocate the memory we need
		_row_P = new int[N + 1];
		_col_P = new int[total_count];
		_val_P = new double[total_count];
		int [] row_P = _row_P;
		int [] col_P = _col_P;
		double [] val_P = _val_P;
		row_P[0] = 0;

		// Compute the Gaussian kernel row by row (this time, store the results)
		int count = 0;
		for(int n = 0; n < N; n++) {

			// Compute the squared Euclidean distance matrix
			for(int m = 0; m < N; m++) {
				for(int d = 0; d < D; d++) buff[d]  = X[n * D + d];
				for(int d = 0; d < D; d++) buff[d] -= X[m * D + d];
				DD[m] = .0;
				for(int d = 0; d < D; d++) DD[m] += buff[d] * buff[d];
			}

			// Initialize some variables
			boolean found = false;
			double beta = 1.0;
			double min_beta = -Double.MAX_VALUE;
			double max_beta =  Double.MAX_VALUE;
			double tol = 1e-5;

			// Iterate until we found a good perplexity
			int iter = 0; 
			double sum_P = 0.;
			while(!found && iter < 200) {

				// Compute Gaussian kernel row
				for(int m = 0; m < N; m++) cur_P[m] = exp(-beta * DD[m]);
				cur_P[n] = Double.MIN_VALUE;

				// Compute entropy of current row
				sum_P = Double.MIN_VALUE;
				for(int m = 0; m < N; m++) sum_P += cur_P[m];
				double H = 0.0;
				for(int m = 0; m < N; m++) H += beta * (DD[m] * cur_P[m]);
				H = (H / sum_P) + log(sum_P);

				// Evaluate whether the entropy is within the tolerance level
				double Hdiff = H - log(perplexity);
				if(Hdiff < tol && -Hdiff < tol) {
					found = true;
				}
				else {
					if(Hdiff > 0) {
						min_beta = beta;
						if(max_beta == Double.MAX_VALUE || max_beta == -Double.MAX_VALUE)
							beta *= 2.0;
						else
							beta = (beta + max_beta) / 2.0;
					}
					else {
						max_beta = beta;
						if(min_beta == -Double.MAX_VALUE || min_beta == Double.MAX_VALUE)
							beta /= 2.0;
						else
							beta = (beta + min_beta) / 2.0;
					}
				}

				// Update iteration counter
				iter++;
			}

			// Row-normalize and threshold current row of P
			for(int m = 0; m < N; m++) cur_P[m] /= sum_P;
			for(int m = 0; m < N; m++) {
				if(cur_P[m] > threshold / (double) N) {
					col_P[count] = m;
					val_P[count] = cur_P[m];
					count++;
				}
			}
			row_P[n + 1] = count;
		}

	}

	class SymResult {
		int []    sym_row_P;
		int []    sym_col_P;
		double [] sym_val_P;

		public SymResult(int[] sym_row_P, int[] sym_col_P, double[] sym_val_P) {
			super();
			this.sym_row_P = sym_row_P;
			this.sym_col_P = sym_col_P;
			this.sym_val_P = sym_val_P;
		}
	}

	SymResult symmetrizeMatrix(int [] _row_P, int [] _col_P, double [] _val_P, int N) {

		// Get sparse matrix
		int [] row_P = _row_P;
		int [] col_P = _col_P;
		double [] val_P = _val_P;

		// Count number of elements and row counts of symmetric matrix
		int [] row_counts = new int[N];
		for(int n = 0; n < N; n++) {
			for(int i = row_P[n]; i < row_P[n + 1]; i++) {

				// Check whether element (col_P[i], n) is present
				boolean present = false;
				for(int m = row_P[col_P[i]]; m < row_P[col_P[i] + 1]; m++) {
					if(col_P[m] == n) present = true;
				}
				if(present) row_counts[n]++;
				else {
					row_counts[n]++;
					row_counts[col_P[i]]++;
				}
			}
		}
		int no_elem = 0;
		for(int n = 0; n < N; n++) no_elem += row_counts[n];

		// Allocate memory for symmetrized matrix
		int []    sym_row_P = new int[N + 1];
		int []    sym_col_P = new int[no_elem];
		double [] sym_val_P = new double[no_elem];

		// Construct new row indices for symmetric matrix
		sym_row_P[0] = 0;
		for(int n = 0; n < N; n++) sym_row_P[n + 1] = sym_row_P[n] + row_counts[n];

		// Fill the result matrix
		int [] offset = new int[N];
		for(int n = 0; n < N; n++) {
			for(int i = row_P[n]; i < row_P[n + 1]; i++) {                                  // considering element(n, col_P[i])

				// Check whether element (col_P[i], n) is present
				boolean present = false;
				for(int m = row_P[col_P[i]]; m < row_P[col_P[i] + 1]; m++) {
					if(col_P[m] == n) {
						present = true;
						if(n <= col_P[i]) {                                                 // make sure we do not add elements twice
							sym_col_P[sym_row_P[n]        + offset[n]]        = col_P[i];
							sym_col_P[sym_row_P[col_P[i]] + offset[col_P[i]]] = n;
							sym_val_P[sym_row_P[n]        + offset[n]]        = val_P[i] + val_P[m];
							sym_val_P[sym_row_P[col_P[i]] + offset[col_P[i]]] = val_P[i] + val_P[m];
						}
					}
				}

				// If (col_P[i], n) is not present, there is no addition involved
				if(!present) {
					sym_col_P[sym_row_P[n]        + offset[n]]        = col_P[i];
					sym_col_P[sym_row_P[col_P[i]] + offset[col_P[i]]] = n;
					sym_val_P[sym_row_P[n]        + offset[n]]        = val_P[i];
					sym_val_P[sym_row_P[col_P[i]] + offset[col_P[i]]] = val_P[i];
				}

				// Update offsets
				if(!present || (present && n <= col_P[i])) {
					offset[n]++;
					if(col_P[i] != n) offset[col_P[i]]++;               
				}
			}
		}

		// Divide the result by two
		for(int i = 0; i < no_elem; i++) sym_val_P[i] /= 2.0;

		return new SymResult(sym_row_P, sym_col_P, sym_val_P);
	}

	// Compute squared Euclidean distance matrix (using BLAS)
	void computeSquaredEuclideanDistance(double [] X, int N, int D, double [] DD) {
		double [] dataSums = new double[N];
		for(int n = 0; n < N; n++) {
			for(int d = 0; d < D; d++) {
				dataSums[n] += (X[n * D + d] * X[n * D + d]);
			}
		}
		for(int n = 0; n < N; n++) {
			for(int m = 0; m < N; m++) {
				DD[n * N + m] = dataSums[n] + dataSums[m];
			}
		}
		//double a1 = -2.0;
		//double a2 = 1.0;
		//dgemm(String arg0, String arg1, int arg2, int arg3, int arg4, double arg5, double[] arg6, int arg7, int arg8, double[] arg9, int arg10, int arg11, double arg12, double[] arg13, int arg14, int arg15);
		//  org.netlib.blas.Dgemm.dgemm("T", "N", N, N, D, a1, X, D, X, D, a2, DD, N);

		/* DGEMM - perform one of the matrix-matrix operations    */
		/* C := alpha*op( A )*op( B ) + beta*C */
		/* BLAS_extern void
		 * R BLAS declaration:
		    F77_NAME(dgemm)(const char *transa, const char *transb, const int *m,
		    		const int *n, const int *k, const double *alpha,
		    		const double *a, const int *lda,
		    		const double *b, const int *ldb,
		    		const double *beta, double *c, const int *ldc);*/

		//org.netlib.blas.Dgemm.dgemm
	}


	// Makes data zero-mean
	void zeroMean(double [] X, int N, int D) {

		// Compute data mean
		double [] mean = new double[D];
		for(int n = 0; n < N; n++) {
			for(int d = 0; d < D; d++) {
				mean[d] += X[n * D + d];
			}
		}
		for(int d = 0; d < D; d++) {
			mean[d] /= (double) N;
		}

		// Subtract data mean
		for(int n = 0; n < N; n++) {
			for(int d = 0; d < D; d++) {
				X[n * D + d] -= mean[d];
			}
		}
	}

	// Makes data zero-mean
	void zeroMean(double [][] X, int N, int D) {

		// Compute data mean
		double [] mean = new double[D];
		for(int n = 0; n < N; n++) {
			for(int d = 0; d < D; d++) {
				mean[d] += X[n][d];
			}
		}
		for(int d = 0; d < D; d++) {
			mean[d] /= (double) N;
		}

		// Subtract data mean
		for(int n = 0; n < N; n++) {
			for(int d = 0; d < D; d++) {
				X[n][d] -= mean[d];
			}
		}
	}

	@Override
	public void abort() {
		abort = true;
	}

}