YFCC100MExample.java example

Explorer

multimedia-indexing-master
- src
  - main
    - java
      - gr
        iti
        mklab
        download
        ImageDownload.java
        ImageDownloadResult.java
        ImageDownloader.java
        visual
        aggregation
        AbstractFeatureAggregator.java
        BowAggregator.java
        VladAggregator.java
        VladAggregatorMultipleVocabularies.java
        datastructures
        AbstractSearchStructure.java
        IVFPQ.java
        Linear.java
        PQ.java
        dimreduction
        PCA.java
        PCALearningExample.java
        PCAProjectionExample.java
        examples
        Example.java
        FeatureExtraction.java
        FolderIndexingMT.java
        IndexTransformation.java
        UrlIndexingMT.java
        YFCC100MExample.java
        extraction
        AbstractFeatureExtractor.java
        ColorSURFExtractor.java
        ImageScaling.java
        RootSIFTExtractor.java
        SIFTExtractor.java
        SURFExtractor.java
        mapreduce
        FloatArrayWritable.java
        HadoopImageDownload.java
        VisualJob.java
        VisualReducer.java
        VisualThreadedMapper.java
        quantization
        AbstractQuantizerLearning.java
        CoarseQuantizerLearning.java
        CodebookLearning.java
        ProductQuantizationLearning.java
        ResidualVectorComputation.java
        SampleLocalFeatures.java
        utilities
        Answer.java
        AnswerWithGeolocation.java
        FeatureIO.java
        ImageIOGreyScale.java
        MetaDataEntity.java
        Normalization.java
        RandomPermutation.java
        RandomRotation.java
        Result.java
        vectorization
        ImageVectorization.java
        ImageVectorizationResult.java
        ImageVectorizer.java

package gr.iti.mklab.visual.examples;

import gr.iti.mklab.download.ImageDownload;
import gr.iti.mklab.visual.datastructures.IVFPQ;
import gr.iti.mklab.visual.datastructures.PQ;
import gr.iti.mklab.visual.datastructures.PQ.TransformationType;
import gr.iti.mklab.visual.utilities.Answer;
import gr.iti.mklab.visual.vectorization.ImageVectorizationResult;
import gr.iti.mklab.visual.vectorization.ImageVectorizer;

import java.awt.image.BufferedImage;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.lang.management.ManagementFactory;
import java.lang.management.ThreadMXBean;
import java.util.Arrays;

/**
 * This class gives an example of how a precomputed IVFPQ index of the <a
 * href="http://labs.yahoo.com/news/yfcc100m/">YFCC100M</a> collection can be loaded and used to answer
 * queries using the <a href="https://github.com/socialsensor/multimedia-indexing">multimedia-indexing</a>
 * library that was developed within SocialSensor EU Project.
 * 
 * @author Eleftherios Spyromitros-Xioufis
 *
 */
public class YFCC100MExample {

	/**
	 * The following operations are performed:
	 * <ul>
	 * <li>The IVFPQ index is loaded in memory</li>
	 * <li>The coarse and product quantizer are loaded from files.</li>
	 * <li>An image vectorizer is initialized (i.e. codebooks and pca matrix are loaded).</li>
	 * <li>A txt file with one image url per line is parsed and for each url:
	 * <ul>
	 * <li>The image is downloaded.</li>
	 * <li>The image is vectorized and the vector is used to query the index.</li>
	 * <li>Most similar images are downloaded and their urls are printed on the console.</li>
	 * </ul>
	 * </li>
	 * </ul>
	 * 
	 * Depending on the number of images that are loaded, a sufficient amount of memory should be allocated
	 * using the -Xmx command (use -Xmx16g to load the full collection).
	 * 
	 * @param args
	 *            [0] Path to the folder where the IVFPQ index resides
	 * @param args
	 *            [1] Number of images (i.e. vectors) to load. This number should be equal or smaller to the
	 *            total size of the index (95213780).
	 * @param args
	 *            [2] Path to the folder where the learning files reside
	 * @param args
	 *            [3] Path to a file that contains the URLs of the query images, one per row.
	 * @param args
	 *            [4] Number of coarse quantizer lists to be searched out of 8192. Use a small number (1 or 2)
	 *            if the whole collection is loaded to obtain query times less than 1 sec.
	 * @param args
	 *            [5] The size of the cache in Megabytes (use 1024 or 2048).
	 * @throws Exception
	 */
	public static void main(String[] args) throws Exception {
		String ivfPqIndexPath = args[0];
		int maxNumVectors = Integer.parseInt(args[1]);
		String learningFilesPath = args[2];
		String urlFile = args[3];
		int w = Integer.parseInt(args[4]);
		long cacheSize = Long.parseLong(args[5]);

		String coarseQuantizerFilename = learningFilesPath + "qcoarse_1024d_8192k.csv";
		String productQuantizerFilename = learningFilesPath + "pq_1024_64x8_rp_ivf_8192k.csv";
		String[] codebookFiles = new String[4];
		for (int i = 0; i < 4; i++) {
			codebookFiles[i] = learningFilesPath + "surf_l2_128c_" + i + ".csv";
		}
		String pcaFile = learningFilesPath + "pca_surf_4x128_32768to1024.txt";

		/** parameters of the index */
		boolean readonly = true;
		boolean countSizeOnLoad = false;
		int loadCounter = maxNumVectors;
		boolean loadIndexInMemory = true;
		int projectionLength = 1024;
		/** parameters of the product quantizer */
		int numCoarseCentroids = 8192;
		int numSubVectors = 64;
		int numProductCentroids = 256;
		TransformationType transformation = PQ.TransformationType.RandomPermutation;

		// Create an IVFPQ object and load the index in memory
		IVFPQ ivfpq = new IVFPQ(projectionLength, maxNumVectors, readonly, ivfPqIndexPath, numSubVectors,
				numProductCentroids, transformation, numCoarseCentroids, countSizeOnLoad, loadCounter,
				loadIndexInMemory, cacheSize);
		System.out.print("Loading coarse and product quantizer..");
		ivfpq.loadCoarseQuantizer(coarseQuantizerFilename);
		ivfpq.loadProductQuantizer(productQuantizerFilename);
		ivfpq.setW(w);
		System.out.println("..completed!");

		System.out.println("Initializing the vectorizer..");
		int[] numCentroids = { 128, 128, 128, 128 };
		ImageVectorizer vectorizer = new ImageVectorizer("surf", codebookFiles, numCentroids,
				projectionLength, pcaFile, true, 1);
		// set a max size similar to that of the indexed images!
		vectorizer.setMaxImageSizeInPixels(640 * 480);
		System.out.println("..completed!");

		String downloadFolder = "results_" + maxNumVectors + "_w=" + w + "/";
		// create the downloads folder it it does not exist
		File theDir = new File(downloadFolder);
		// if the directory does not exist, create it
		if (!theDir.exists()) {
			System.out.println("creating directory to save query and result images: " + downloadFolder);
			boolean result = false;

			try {
				theDir.mkdir();
				result = true;
			} catch (SecurityException se) {
				// handle it
			}
			if (result) {
				System.out.println("DIR created");
			}
		}

		// opening the URLs file for reading
		BufferedReader in = new BufferedReader(new FileReader(new File(urlFile)));
		String url;
		int totalSearchTime = 0;
		int totalLookupTime = 0;
		int totalQueryCpuTime = 0;
		int queryCounter = 0;

		while ((url = in.readLine()) != null) {
			queryCounter++;
			String queryId = "query_image_" + queryCounter; // a dummy id is given
			ImageDownload imd = new ImageDownload(url.replace(" ", "%20"), queryId, downloadFolder, false,
					true, false);
			System.out.print("Downloading the image..");
			BufferedImage image = imd.downloadImage();
			System.out.println("..completed!");

			System.out.print("Computing the vector..");
			vectorizer.submitImageVectorizationTask(queryId, image);
			ImageVectorizationResult result = vectorizer.getImageVectorizationResultWait();
			double[] vector = result.getImageVector();
			System.out.println("Query vector: " + Arrays.toString(Arrays.copyOf(vector, 10)));
			System.out.println("..completed!");

			System.out.print("Computing neighbors..");
			long start = getCpuTime();
			Answer ans = ivfpq.computeNearestNeighbors(30, vector);
			long end = getCpuTime();
			long queryCpuTime = end - start;
			System.out.println("..completed!");

			double searchTime = (double) ans.getIndexSearchTime() / 1000000.0;
			double lookupTime = (double) ans.getNameLookupTime() / 1000000.0;
			System.out.println("Search time: " + searchTime + " ms");
			System.out.println("Lookup time: " + lookupTime + " ms");
			System.out.println("Total query cpu time: " + queryCpuTime + " ms");

			totalSearchTime += searchTime;
			totalLookupTime += lookupTime;
			totalQueryCpuTime += queryCpuTime;

			String[] resultUrls = ans.getIds();
			double[] distances = ans.getDistances();
			for (int i = 0; i < resultUrls.length; i++) {
				String resultUrl = decodeUrl(resultUrls[i]);
				System.out.println(resultUrl + " " + distances[i]);
				try {// downloading the image
					String id = queryId + "_nn_" + i;
					imd = new ImageDownload(resultUrl, id, downloadFolder, false, true, false);
					System.out.print("Downloading result image..");
					imd.downloadImage();
					System.out.println("..completed!");
				} catch (Exception e) {
					System.out.println(e.getMessage());
				}
			}
		}

		System.out.println("Average search time: " + (double) totalSearchTime / queryCounter + " ms");
		System.out.println("Average lookup time: " + (double) totalLookupTime / queryCounter + " ms");
		System.out.println("Average total query cpu time: " + (double) totalQueryCpuTime / queryCounter
				+ " ms");

		vectorizer.shutDown();
		in.close();

	}

	/**
	 * This method is used to compile the Flickr URL from its parts.
	 * 
	 * @param endcodedUrl
	 *            The URL in an encoded form.
	 * @return The decoded URL.
	 */
	private static String decodeUrl(String endcodedUrl) {
		String imageSize = "z"; // [mstzb]
		String[] parts = endcodedUrl.split("_");
		String farmId = parts[0];
		String serverId = parts[1];
		String identidier = parts[2];
		String secret = parts[3];
		// String isVideo = parts[4];

		String url = "https://farm" + farmId + ".staticflickr.com/" + serverId + "/" + identidier + "_"
				+ secret + "_" + imageSize + ".jpg";
		return url;
	}

	/** Get CPU time in milliseconds. */
	public static long getCpuTime() {
		ThreadMXBean bean = ManagementFactory.getThreadMXBean();
		return bean.isCurrentThreadCpuTimeSupported() ? (long) ((double) bean.getCurrentThreadCpuTime() / 1000000.0)
				: 0L;
	}
}