package gr.iti.mklab.visual.examples;
import gr.iti.mklab.download.ImageDownloadResult;
import gr.iti.mklab.download.ImageDownloader;
import gr.iti.mklab.visual.datastructures.AbstractSearchStructure;
import gr.iti.mklab.visual.datastructures.Linear;
import gr.iti.mklab.visual.vectorization.ImageVectorizationResult;
import gr.iti.mklab.visual.vectorization.ImageVectorizer;
import java.awt.image.BufferedImage;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.Date;
/**
* This class demonstrates multi-threaded image download, VLAD+SURF vectorization and {@link Linear} indexing.
*
* @author Eleftherios Spyromitros-Xioufis
*
*/
public class UrlIndexingMT {
public static final int maxIndexSize = 10000000;
/**
*
* @param args
* [0] folder where temporary image files and/or thumbnails are stored
* @param args
* [1] file containing the urls of the images that should indexed
* @param args
* [2] directory where the BDB index will be created
* @param args
* [3] whether to save the original images (true or false)
* @param args
* [4] number of processor threads to be used for vectorization (compute-intensive task)
* @param args
* [5] number of processor threads to be used for download
* @param args
* [6] a comma separated list with full paths to the codebook files (also works for 1 codebook)
* @param args
* [7] a comma separated list with the number of centroids in each codebook
* @param args
* [8] path to the file containing the pca projection matrix
* @param args
* [9] projection length
* @param args
* [11] minimum interval between two calls in msec (e.g. 60 ~ 1000calls/min, acts as a
* safeguard)
* @param args
* [12] start indexing at this line index inclusive
* @param args
* [13] end indexing at this line index exclusive, indexing while always stop when EOF is read
* @param args
* [14] whether the downloader should follow redirects (true or false)
* @throws Exception
*/
public static void main(String[] args) throws Exception {
String imageDownloadFolder = args[0];
String urlsFile = args[1];
String indexFolder = args[2];
boolean saveOriginal = Boolean.parseBoolean(args[3]);
// suggestion for compute-intensive tasks by
// http://codeidol.com/java/java-concurrency/Applying-Thread-Pools/Sizing-Thread-Pools/
// int numVectorizationThreads = Runtime.getRuntime().availableProcessors() + 1;
int numVectorizationThreads = Integer.parseInt(args[4]);
// if (numVectorizationThreads > 10) {
// throw new Exception("Too many vectorization threads!");
// }
int numDownloadThreads = Integer.parseInt(args[5]);
// if (numDownloadThreads > 50) {
// throw new Exception("Too many download threads!");
// }
String[] codebookFiles = args[6].split(",");
String[] numCentroidsString = args[7].split(",");
int[] numCentroids = new int[numCentroidsString.length];
for (int i = 0; i < numCentroidsString.length; i++) {
numCentroids[i] = Integer.parseInt(numCentroidsString[i]);
}
String pcaFile = args[8];
int projectionLength = Integer.parseInt(args[9]);
int minCallInterval = Integer.parseInt(args[10]);
int startLine = Integer.parseInt(args[11]);
int endLine = Integer.parseInt(args[12]);
int totalTasks = endLine - startLine;
boolean followRedirects = Boolean.parseBoolean(args[13]);
// Initialize the downloader, the vectorizer and the indexer
ImageDownloader downloader = new ImageDownloader(imageDownloadFolder, numDownloadThreads);
downloader.setSaveOriginal(saveOriginal);
downloader.setSaveThumb(false);
downloader.setFollowRedirects(followRedirects);
ImageVectorizer vectorizer = new ImageVectorizer("surf", codebookFiles, numCentroids,
projectionLength, pcaFile, true, numVectorizationThreads);
// The folder where the plain index is stored.
// String BDBEnvHome = indexFolder + "BDB_" + projectionLength + "_plain_" +
// System.currentTimeMillis();
String BDBEnvHome = indexFolder + "BDB_" + projectionLength;
AbstractSearchStructure index = new Linear(projectionLength, maxIndexSize, false, BDBEnvHome, false,
true, 0);
BufferedReader in = new BufferedReader(new FileReader(new File(urlsFile)));
// skip startLine lines
for (int i = 0; i < startLine; i++) {
in.readLine();
}
// scheduling!!!
System.out.println("Indexing started!");
long start = System.currentTimeMillis();
int submittedDownloadsCounter = 0;
int completedCounter = 0;
int failedCounter = 0;
// minimum interval between 2 download calls in msec
long lastDownLoadCall = 0;
String urlLine = "";
while (true) {
// if there are still urls to be submitted for download and the downloader's queue is not full and
// the required interval between 2 calls has passed
if (submittedDownloadsCounter < totalTasks && downloader.canAcceptMoreTasks()
&& (System.currentTimeMillis() - lastDownLoadCall) >= minCallInterval
&& (urlLine = in.readLine()) != null) {
// parse a new line from the file
String id;
String url;
// check if there is an id
if (urlLine.split("\\s+").length > 1) {
// assuming the id is first
id = urlLine.split("\\s+")[0];
url = urlLine.split("\\s+")[1];
} else {
id = String.valueOf(submittedDownloadsCounter + startLine);
url = urlLine;
}
if (index.isIndexed(id)) {
System.out.println("image:" + id + " already indexed");
completedCounter++;
} // this image has been already indexed
else {
downloader.submitImageDownloadTask(url, id);
lastDownLoadCall = System.currentTimeMillis();
}
submittedDownloadsCounter++;
System.out.println("Submitted download tasks: " + submittedDownloadsCounter + " ulr:" + url);
}
// if there is still space in the vectorizer's queue try to get an image download result and
// to submit a new image vectorization task
if (vectorizer.canAcceptMoreTasks()) {
ImageDownloadResult imdr = null;
try {
imdr = downloader.getImageDownloadResult();
} catch (Exception e) {
failedCounter++;
// e.printStackTrace();
System.out.println(e.toString());
System.out.println("" + new Date() + ": " + failedCounter + " vectors failed");
}
if (imdr != null) {
BufferedImage image = imdr.getImage();
// String url = download.getUrlStr();
String id = imdr.getImageId();
vectorizer.submitImageVectorizationTask(id, image);
} // if a download result was successfully retrieved
}
// try to get an image vectorization result and to index the vector
ImageVectorizationResult imvr = null;
try {
imvr = vectorizer.getImageVectorizationResult();
} catch (Exception e) {
failedCounter++;
e.printStackTrace();
System.out.println(e.toString());
System.out.println("" + new Date() + ": " + failedCounter + " vectors failed");
}
if (imvr != null) {
String name = imvr.getImageName();
double[] vector = imvr.getImageVector();
if (index.indexVector(name, vector)) {
completedCounter++;
} else {
failedCounter++;
}
System.out.println("" + new Date() + ": " + completedCounter + " vectors indexed");
}
// check loop termination condition
if ((completedCounter + failedCounter == totalTasks)
|| (completedCounter + failedCounter == submittedDownloadsCounter && urlLine == null)) {
System.out.println("Shutdown sequence has started!");
downloader.shutDown();
vectorizer.shutDown();
index.close();
in.close();
break;
}
}
long end = System.currentTimeMillis();
System.out.println("Indexing completed in: " + (end - start) + " ms");
System.out.println(completedCounter + " indexing tasks completed!");
System.out.println(failedCounter + " indexing tasks failed!");
}
}