AbstractSearchStructure.java example

Explorer
multimedia-indexing-master
- src
  - main
    - java
      - gr
        iti
        mklab
        download
        ImageDownload.java
        ImageDownloadResult.java
        ImageDownloader.java
        visual
        aggregation
        AbstractFeatureAggregator.java
        BowAggregator.java
        VladAggregator.java
        VladAggregatorMultipleVocabularies.java
        datastructures
        AbstractSearchStructure.java
        IVFPQ.java
        Linear.java
        PQ.java
        dimreduction
        PCA.java
        PCALearningExample.java
        PCAProjectionExample.java
        examples
        Example.java
        FeatureExtraction.java
        FolderIndexingMT.java
        IndexTransformation.java
        UrlIndexingMT.java
        YFCC100MExample.java
        extraction
        AbstractFeatureExtractor.java
        ColorSURFExtractor.java
        ImageScaling.java
        RootSIFTExtractor.java
        SIFTExtractor.java
        SURFExtractor.java
        mapreduce
        FloatArrayWritable.java
        HadoopImageDownload.java
        VisualJob.java
        VisualReducer.java
        VisualThreadedMapper.java
        quantization
        AbstractQuantizerLearning.java
        CoarseQuantizerLearning.java
        CodebookLearning.java
        ProductQuantizationLearning.java
        ResidualVectorComputation.java
        SampleLocalFeatures.java
        utilities
        Answer.java
        AnswerWithGeolocation.java
        FeatureIO.java
        ImageIOGreyScale.java
        MetaDataEntity.java
        Normalization.java
        RandomPermutation.java
        RandomRotation.java
        Result.java
        vectorization
        ImageVectorization.java
        ImageVectorizationResult.java
        ImageVectorizer.java
package gr.iti.mklab.visual.datastructures;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.util.Date;
import java.util.List;

import com.aliasi.util.BoundedPriorityQueue;
import com.javadocmd.simplelatlng.LatLng;
import com.sleepycat.bind.tuple.IntegerBinding;
import com.sleepycat.bind.tuple.StringBinding;
import com.sleepycat.bind.tuple.TupleBinding;
import com.sleepycat.bind.tuple.TupleInput;
import com.sleepycat.bind.tuple.TupleOutput;
import com.sleepycat.je.Database;
import com.sleepycat.je.DatabaseConfig;
import com.sleepycat.je.DatabaseEntry;
import com.sleepycat.je.Environment;
import com.sleepycat.je.EnvironmentConfig;
import com.sleepycat.je.EnvironmentNotFoundException;
import com.sleepycat.je.ForwardCursor;
import com.sleepycat.je.LockMode;
import com.sleepycat.je.OperationStatus;
import com.sleepycat.persist.EntityStore;
import com.sleepycat.persist.PrimaryIndex;
import com.sleepycat.persist.StoreConfig;

import gr.iti.mklab.visual.utilities.Answer;
import gr.iti.mklab.visual.utilities.AnswerWithGeolocation;
import gr.iti.mklab.visual.utilities.MetaDataEntity;
import gr.iti.mklab.visual.utilities.Result;

/**
 * This class abstracts operations related to persistence and id lookup from the actual indexing structures.
 * The term id is used for the name or other identifier of the vectors being indexed while the term iid
 * (internal id) is used for the id assigned to a vector internally be each indexing structure. <br>
 * An id is of type String and is kept in disk while iid is of type int and is loaded in memory.<br>
 * Berkeley DB (BDB) is used for efficient persistent storage.
 * 
 * @author Eleftherios Spyromitros-Xioufis
 */
public abstract class AbstractSearchStructure {

	/**
	 * The total memory to be used by the BDB, 512Mb by default. Larger values will allow faster id lookup.
	 */
	protected long cacheSize = 1024 * 1024 * 512;

	/**
	 * Whether the environment will be transactional. If true, ensures that the dbs will not be corrupted.
	 * <br>
	 * For more information on what this means, refer to the BDB documentation.
	 */
	protected boolean transactional = false;

	/**
	 * The length of the raw vectors being indexed.
	 */
	protected int vectorLength;

	/**
	 * Keeps track of the total number of indexed vectors, acts as an auto-increment primary key field.
	 */
	protected int loadCounter;

	/**
	 * Whether the index will be loaded in memory. We can avoid loading the index in memory when we only want
	 * to perform indexing.
	 */
	protected boolean loadIndexInMemory;

	/**
	 * The maximum number of vectors that can be indexed.
	 */
	protected final int maxNumVectors;

	/**
	 * Whether to initialize the load counter by counting the size of the {@link #iidToIdDB}. This operation
	 * incurs a large cost when loading very large indices. It can be set to false for efficiency reasons. In
	 * that case, the load counter should be set manually.
	 */
	private boolean countSizeOnLoad;

	/**
	 * Whether the index should open only for read access. This allows multiple opens of the index.
	 */
	protected boolean readOnly;

	/**
	 * The database environment. Access to this field is needed by specific indexing structures that implement
	 * persistence.
	 */
	protected Environment dbEnv;

	/**
	 * BDB store holding id to iid mappings, required during indexing to fast check if a name is already
	 * indexed.
	 */
	protected Database idToIidDB;

	/**
	 * BDB store holding iid to id mappings, required for name look-up during nn search.
	 */
	protected Database iidToIdDB;

	/**
	 * BDB store holding iid to longitude-latitude mappings, required for geolocation look-up during nn
	 * search.
	 */
	protected Database iidToGeolocationDB;

	/**
	 * BDB store holding iid to metadata mappings, required for metadata look-up during nn search.
	 */
	protected EntityStore iidToMetadataDB;

	/**
	 * Average time taken for internal vector indexing operations.
	 */
	private long totalInternalVectorIndexingTime;

	/**
	 * Average time taken to create an id to idd and the reverse mapping.
	 */
	private long totalIdMappingTime;

	/**
	 * Average total time taken to index a vector.
	 */
	private long totalVectorIndexingTime;

	/**
	 * Whether to create/load geolocation db.
	 */
	protected final boolean useGeolocation = false;

	/**
	 * Whether to create/load metadata db.
	 */
	protected final boolean useMetaData = false;

	/**
	 * Constructor. Used when we count the size of the database when opening it.
	 * 
	 * @param vectorLength
	 *            The dimensionality of the vectors being indexed
	 * @param maxNumVectors
	 *            The maximum allowable size (number of vectors) of the index
	 * @param readOnly
	 *            If true the persistent store will opened only for read access (allows multiple opens)
	 */
	protected AbstractSearchStructure(int vectorLength, int maxNumVectors, boolean readOnly) {
		this(vectorLength, maxNumVectors, readOnly, true, 0, true);
	}

	/**
	 * Constructor. Used when we want to avoid counting the database size and to use a preset value for the
	 * load counter.
	 * 
	 * @param vectorLength
	 *            The dimensionality of the VLAD vectors being indexed
	 * @param maxNumVectors
	 *            The maximum allowable size (number of vectors) of the index
	 * @param readOnly
	 *            If true the persistent store will opened only for read access (allows multiple opens)
	 * @param countSizeOnLoad
	 *            Whether the load counter will be initialized by the size of the persistent store
	 * @param loadCounter
	 *            The initial value of the load counter
	 * @param loadIndexInMemory
	 *            Whether to load the index in memory, we can avoid loading the index in memory when we only
	 *            want to perform indexing
	 */
	protected AbstractSearchStructure(int vectorLength, int maxNumVectors, boolean readOnly,
			boolean countSizeOnLoad, int loadCounter, boolean loadIndexInMemory) {
		this.vectorLength = vectorLength;
		this.loadCounter = loadCounter;
		this.maxNumVectors = maxNumVectors;
		this.readOnly = readOnly;
		this.countSizeOnLoad = countSizeOnLoad;
		this.loadIndexInMemory = loadIndexInMemory;
	}

	/**
	 * Constructor. Used when we want to avoid counting the database size and to use a preset value for the
	 * load counter.
	 * 
	 * @param vectorLength
	 *            The dimensionality of the VLAD vectors being indexed
	 * @param maxNumVectors
	 *            The maximum allowable size (number of vectors) of the index
	 * @param readOnly
	 *            If true the persistent store will opened only for read access (allows multiple opens)
	 * @param countSizeOnLoad
	 *            Whether the load counter will be initialized by the size of the persistent store
	 * @param loadCounter
	 *            The initial value of the load counter
	 * @param loadIndexInMemory
	 *            Whether to load the index in memory, we can avoid loading the index in memory when we only
	 *            want to perform indexing
	 * @param cacheSize
	 *            The size of the cache in Megabytes
	 */
	protected AbstractSearchStructure(int vectorLength, int maxNumVectors, boolean readOnly,
			boolean countSizeOnLoad, int loadCounter, boolean loadIndexInMemory, long cachesize) {
		this.vectorLength = vectorLength;
		this.loadCounter = loadCounter;
		this.maxNumVectors = maxNumVectors;
		this.readOnly = readOnly;
		this.countSizeOnLoad = countSizeOnLoad;
		this.loadIndexInMemory = loadIndexInMemory;
		this.cacheSize = cachesize * 1024 * 1024;
	}

	/**
	 * Updates the index with the given vector. This is a synchronized method, i.e. when a thread calls this
	 * method, all other threads wait for the first thread to complete before executing the method. This
	 * ensures that the persistent BDB store will remain consistent when multiple threads call the indexVector
	 * method.
	 * 
	 * @param id
	 *            The id of the vector
	 * @param vector
	 *            The vector
	 * @return True if the vector is successfully indexed, false otherwise.
	 * @throws Exception
	 */
	public synchronized boolean indexVector(String id, double[] vector) throws Exception {
		long startIndexing = System.currentTimeMillis();
		// check if we can index more vectors
		if (loadCounter >= maxNumVectors) {
			System.out.println("Maximum index capacity reached, no more vectors can be indexed!");
			return false;
		}
		// check if name is already indexed
		if (isIndexed(id)) {
			System.out.println("Vector '" + id + "' already indexed!");
			return false;
		}
		// do the indexing
		// persist id to name and the reverse mapping
		long startMapping = System.currentTimeMillis();
		createMapping(id);
		totalIdMappingTime += System.currentTimeMillis() - startMapping;
		// method specific indexing
		long startInternalIndexing = System.currentTimeMillis();
		indexVectorInternal(vector);
		totalInternalVectorIndexingTime += System.currentTimeMillis() - startInternalIndexing;

		loadCounter++; // increase the loadCounter
		if (loadCounter % 100 == 0) { // debug message
			System.out.println(new Date() + " # indexed vectors: " + loadCounter);
		}
		totalVectorIndexingTime += System.currentTimeMillis() - startIndexing;
		return true;
	}

	/**
	 * This method should be implemented in all subclasses and do the operations required for indexing the
	 * given vector.
	 * 
	 * @param vector
	 *            The vector to be indexed
	 * @throws Exception
	 */
	protected abstract void indexVectorInternal(double[] vector) throws Exception;

	/**
	 * This method returns an {@link Answer} object, which contains the k nearest neighbors along with their
	 * ids and distances from the query vector, ordered by lowest distance. The methods calls
	 * {@link #computeNearestNeighborsInternal(int, double[])} and then performs name lookup.
	 * 
	 * @param k
	 *            The number of nearest neighbors to return
	 * @param queryVector
	 *            The query vector
	 * @return The answer
	 * @throws Exception
	 */
	public Answer computeNearestNeighbors(int k, double[] queryVector) throws Exception {
		if (!loadIndexInMemory) {
			throw new Exception("Cannot execute query because the index is not loaded in memory!");
		}
		long start = System.nanoTime();
		BoundedPriorityQueue<Result> nnQueue = computeNearestNeighborsInternal(k, queryVector);
		long indexSearchTime = System.nanoTime() - start;

		return lookUp(nnQueue, indexSearchTime);

	}

	/**
	 * This method returns a bounded priority queue of Result objects, which contains the k nearest neighbors
	 * along with their iids and distances from the query vector, ordered by lowest distance. Subclasses
	 * should implement this method.
	 * 
	 * @param k
	 *            The number of nearest neighbors to return
	 * @param queryVector
	 *            The query vector
	 * @return A bounded priority queue of Result objects
	 * @throws Exception
	 */
	protected abstract BoundedPriorityQueue<Result> computeNearestNeighborsInternal(int k,
			double[] queryVector) throws Exception;

	/**
	 * This method returns an {@link Answer} object, which contains the k nearest neighbors along with their
	 * ids and distances from the query vector, ordered by lowest distance. The methods calls
	 * {@link #computeNearestNeighborsInternal(int, int)} and then performs id lookup.
	 * 
	 * @param k
	 *            The number of nearest neighbors to return
	 * @param queryId
	 *            The id of the query vector
	 * @return The answer
	 * @throws Exception
	 */
	public Answer computeNearestNeighbors(int k, String queryId) throws Exception {
		int internalIdQuery = getInternalId(queryId);

		long start = System.nanoTime();
		BoundedPriorityQueue<Result> nnQueue = computeNearestNeighborsInternal(k, internalIdQuery);
		long indexSearchTime = System.nanoTime() - start;

		return lookUp(nnQueue, indexSearchTime);
	}

	/**
	 * This method returns a bounded priority queue of Result objects, which contains the k nearest neighbors
	 * along with their iids and distances from the query vector, ordered by lowest distance. Subclasses
	 * should implement this method.
	 * 
	 * @param k
	 *            The number of nearest neighbors to return
	 * @param iid
	 *            The internal id of the query vector
	 * @return A bounded priority queue of Result objects
	 * @throws Exception
	 */
	protected abstract BoundedPriorityQueue<Result> computeNearestNeighborsInternal(int k, int iid)
			throws Exception;

	private Answer lookUp(BoundedPriorityQueue<Result> nnQueue, long indexSearchTime) {
		Result[] nn = new Result[nnQueue.size()];
		nn = nnQueue.toArray(nn);

		String[] ids = new String[nnQueue.size()];
		double[] distances = new double[nnQueue.size()];

		long start = System.nanoTime();
		for (int i = 0; i < nn.length; i++) { // attach external ids to the results
			distances[i] = nn[i].getDistance();
			int iid = nn[i].getId();
			ids[i] = getId(iid);
		}
		long nameLookUpTime = System.nanoTime() - start;

		if (!useMetaData) {
			return new Answer(ids, distances, nameLookUpTime, indexSearchTime);
		} else {
			start = System.nanoTime();
			LatLng[] geolocations = new LatLng[nn.length];
			for (int i = 0; i < nn.length; i++) { // attach external ids to the results
				int iid = nn[i].getId();
				geolocations[i] = getGeolocation(iid);
			}
			long geolocationLookupTime = System.nanoTime() - start;
			return new AnswerWithGeolocation(ids, distances, geolocations, nameLookUpTime, indexSearchTime,
					geolocationLookupTime);
		}
	}

	/**
	 * Returns the internal id assigned to the vector with the given id or -1 if the id is not found. Accesses
	 * the BDB store!
	 * 
	 * @param id
	 *            The id of the vector
	 * @return The internal id assigned to this vector or -1 if the id is not found.
	 */
	public int getInternalId(String id) {
		DatabaseEntry key = new DatabaseEntry();
		StringBinding.stringToEntry(id, key);
		DatabaseEntry data = new DatabaseEntry();
		// check if the id already exists in id to iid database
		if ((idToIidDB.get(null, key, data, null) == OperationStatus.SUCCESS)) {
			return IntegerBinding.entryToInt(data);
		} else {
			return -1;
		}
	}

	/**
	 * Returns the id of the vector which was assigned the given internal id or null if the internal id does
	 * not exist. Accesses the BDB store!
	 * 
	 * @param iid
	 *            The internal id of the vector
	 * @return The id mapped to the given internal id or null if the internal id does not exist
	 */
	public String getId(int iid) {
		if (iid < 0 || iid > loadCounter) {
			System.out.println("Internal id " + iid + " is out of range!");
			return null;
		}
		DatabaseEntry key = new DatabaseEntry();
		IntegerBinding.intToEntry(iid, key);
		DatabaseEntry data = new DatabaseEntry();
		if ((iidToIdDB.get(null, key, data, null) == OperationStatus.SUCCESS)) {
			return StringBinding.entryToString(data);
		} else {
			System.out.println("Internal id " + iid + " is in range but id was not found..");
			System.out.println("Index is probably corrupted");
			System.exit(0);
			return null;
		}
	}

	/**
	 * Returns a {@link LatLng} object with the geolocation of the vector with the given internal id or null
	 * if the internal id does not exist. Accesses the BDB store!
	 * 
	 * @param iid
	 *            The internal id of the vector
	 * @return The geolocation mapped to the given internal id or null if the internal id does not exist
	 */
	public LatLng getGeolocation(int iid) {
		if (iid < 0 || iid > loadCounter) {
			System.out.println("Internal id " + iid + " is out of range!");
			return null;
		}
		DatabaseEntry key = new DatabaseEntry();
		IntegerBinding.intToEntry(iid, key);
		DatabaseEntry data = new DatabaseEntry();
		if ((iidToGeolocationDB.get(null, key, data, null) == OperationStatus.SUCCESS)) {
			TupleInput input = TupleBinding.entryToInput(data);
			double latitude = input.readDouble();
			double longitude = input.readDouble();
			LatLng geolocation = new LatLng(latitude, longitude);
			return geolocation;
		} else {
			System.out.println("Internal id " + iid + " is in range but gelocation was not found.");
			return null;
		}
	}

	/**
	 * Returns a {@link MetaDataEntity} object with the metadata of the vector with the given internal id or
	 * null if the internal id does not exist. Accesses the BDB store!
	 * 
	 * @param iid
	 *            The internal id of the vector
	 * @return The metadata mapped to the given internal id or null if the internal id does not exist
	 */
	public MetaDataEntity getMetadata(int iid) throws Exception {
		if (iid < 0 || iid > loadCounter) {
			System.out.println("Internal id " + iid + " is out of range!");
			return null;
		}
		PrimaryIndex<Integer, MetaDataEntity> primaryIndex = iidToMetadataDB.getPrimaryIndex(Integer.class,
				MetaDataEntity.class);
		return primaryIndex.get(null, iid, null);
	}

	/**
	 * This method is used to set the geolocation of a previously indexed vector. If the geolocation is
	 * already set, this method replaces it.
	 * 
	 * @param iid
	 *            The internal id of the vector
	 * @param latitude
	 * @param longitude
	 * @return true if geolocation is successfully set, false otherwise
	 */
	public boolean setGeolocation(int iid, double latitude, double longitude) {
		if (iid < 0 || iid > loadCounter) {
			System.out.println("Internal id " + iid + " is out of range!");
			return false;
		}
		DatabaseEntry key = new DatabaseEntry();
		DatabaseEntry data = new DatabaseEntry();

		IntegerBinding.intToEntry(iid, key);
		TupleOutput output = new TupleOutput();
		output.writeDouble(latitude);
		output.writeDouble(longitude);
		TupleBinding.outputToEntry(output, data);

		if (iidToGeolocationDB.put(null, key, data) == OperationStatus.SUCCESS) {
			return true;
		} else {
			return false;
		}
	}

	/**
	 * This method is used to set the metadata of a previously indexed vector. If the metadata is already set,
	 * this methods replaces it.
	 * 
	 * @param iid
	 *            The internal id of the vector
	 * @param metaData
	 *            A java object of any class with the @persistent annotation
	 * @return true if metadata is successfully set, false otherwise
	 */
	public boolean setMetadata(int iid, Object metaData) {
		if (iid < 0 || iid > loadCounter) {
			System.out.println("Internal id " + iid + " is out of range!");
			return false;
		}
		MetaDataEntity mde = new MetaDataEntity(iid, metaData);
		PrimaryIndex<Integer, MetaDataEntity> primaryIndex = iidToMetadataDB.getPrimaryIndex(Integer.class,
				MetaDataEntity.class);

		if (primaryIndex.contains(iid)) {
			primaryIndex.put(null, mde);
			return true;
		} else {
			return false;
		}
	}

	/**
	 * <b>{@link #getInternalId(String)} can always be called instead of this method at the same cost!</b>
	 * <br>
	 * Checks if the vector with the given id is already indexed. This method is useful to avoid re-indexing
	 * the same vector. Its convention is that if the given name is already in idToIidBDB, then the vector is
	 * indexed in all other structures e.g. iidToIdBDB. The rest of the checks are avoided for efficiency.
	 * Accesses the BDB store!
	 * 
	 * @param id
	 *            The id the vector
	 * @return true if the vector is indexed, false otherwise
	 */
	public boolean isIndexed(String id) {
		DatabaseEntry key = new DatabaseEntry();
		StringBinding.stringToEntry(id, key);
		DatabaseEntry data = new DatabaseEntry();
		if ((idToIidDB.get(null, key, data, null) == OperationStatus.SUCCESS)) {
			return true;
		} else {
			return false;
		}
	}

	/**
	 * This method is used to create a persistent mapping between the given id and an internal id (equal to
	 * the current value of {@link #loadCounter}). Should be called every time that a new vector is indexed.
	 * 
	 * @param id
	 *            The id
	 */
	protected void createMapping(String id) {
		DatabaseEntry key = new DatabaseEntry();
		DatabaseEntry data = new DatabaseEntry();
		IntegerBinding.intToEntry(loadCounter, key);
		StringBinding.stringToEntry(id, data);
		iidToIdDB.put(null, key, data); // required during name look-up
		idToIidDB.put(null, data, key); // required during indexing
	}

	/**
	 * This method creates and/or opens the BDB databases with the appropriate parameters.
	 * 
	 * @throws Exception
	 */
	private void createOrOpenBDBDbs() throws Exception {
		// configuration for the mapping dbs
		DatabaseConfig dbConfig = new DatabaseConfig();
		dbConfig.setAllowCreate(true); // db will be created if it does not exist
		dbConfig.setReadOnly(readOnly);
		dbConfig.setTransactional(transactional);
		// create/open mapping dbs using config
		iidToIdDB = dbEnv.openDatabase(null, "idToName", dbConfig);

		// if countSizeOnLoad is true, the id-name mappings are counted and the loadCounter is initialized
		if (countSizeOnLoad) {
			System.out.println(new Date() + " counting index size started ");
			int idToNameMappings = (int) iidToIdDB.count();
			loadCounter = Math.min(idToNameMappings, maxNumVectors);
			System.out.println(new Date() + " counting index size ended ");
			System.out.println("Index size: " + loadCounter);
		}

		idToIidDB = dbEnv.openDatabase(null, "nameToId", dbConfig);

		if (useGeolocation) {// create/open geolocation db using config
			iidToGeolocationDB = dbEnv.openDatabase(null, "idToGeolocation", dbConfig);
		}

		if (useMetaData) {
			StoreConfig storeConfig = new StoreConfig(); // configuration of the entity store
			storeConfig.setAllowCreate(true); // store will be created if it does not exist
			storeConfig.setReadOnly(readOnly);
			storeConfig.setTransactional(transactional);
			iidToMetadataDB = new EntityStore(dbEnv, "idToMetadata", storeConfig);
			// int nameToMetadataMappings = (int) nameToMetadataBDB.getPrimaryIndex(String.class,
			// MediaFeedData.class).count(); // counting the size of an EntityStore
		}
	}

	/**
	 * This is a utility method that can be used to dump the contents of the iidToIdDB to a txt file.
	 * 
	 * @param dumpFilename
	 *            Full path to the file where the dump will be written.
	 * @throws Exception
	 */
	public void dumpiidToIdDB(String dumpFilename) throws Exception {
		DatabaseEntry foundKey = new DatabaseEntry();
		DatabaseEntry foundData = new DatabaseEntry();

		ForwardCursor cursor = iidToIdDB.openCursor(null, null);
		BufferedWriter out = new BufferedWriter(new FileWriter(new File(dumpFilename)));
		while (cursor.getNext(foundKey, foundData, LockMode.DEFAULT) == OperationStatus.SUCCESS) {
			int iid = IntegerBinding.entryToInt(foundKey);
			String id = StringBinding.entryToString(foundData);
			out.write(iid + " " + id + "\n");
		}
		cursor.close();
		out.close();
	}

	/**
	 * This is a utility method that can be used to dump the contents of the idToIidDB to a txt file.
	 * 
	 * @param dumpFilename
	 *            Full path to the file where the dump will be written.
	 * @throws Exception
	 */
	public void dumpidToIidDB(String dumpFilename) throws Exception {
		DatabaseEntry foundKey = new DatabaseEntry();
		DatabaseEntry foundData = new DatabaseEntry();

		ForwardCursor cursor = idToIidDB.openCursor(null, null);
		BufferedWriter out = new BufferedWriter(new FileWriter(new File(dumpFilename)));
		while (cursor.getNext(foundKey, foundData, LockMode.DEFAULT) == OperationStatus.SUCCESS) {
			int iid = IntegerBinding.entryToInt(foundData);
			String id = StringBinding.entryToString(foundKey);
			out.write(id + " " + iid + "\n");
		}
		cursor.close();
		out.close();
	}

	/**
	 * This method creates and/or opens the BDB environment in the supplied directory. <br>
	 * TODO: The configuration can be tuned for being more efficient / less persistent!
	 * 
	 * @param BDBEnvHome
	 *            The directory where the BDB environment will be created.
	 * @throws Exception
	 */
	private void createOrOpenBDBEnv(String BDBEnvHome) throws Exception {
		// create the BDBEnvHome directory if it does not exist
		File BDBEnvHomeDir = new File(BDBEnvHome);
		if (!BDBEnvHomeDir.isDirectory()) {
			boolean success = BDBEnvHomeDir.mkdir();
			if (success) {
				System.out.println(BDBEnvHome + " directory created.");
			}
		} else {
			System.out.println(BDBEnvHome + " directory exists.");
		}
		// configuration of the bdb environment, applies to all dbs in this environment
		EnvironmentConfig envConf = new EnvironmentConfig();
		envConf.setAllowCreate(false); // initially we do not allow create
		envConf.setReadOnly(readOnly);
		envConf.setTransactional(transactional);
		envConf.setCacheSize(cacheSize);
		// Instantiate the Environment. This opens it and also possibly creates it.
		try {
			dbEnv = new Environment(BDBEnvHomeDir, envConf);
			System.out.println("An existing BDB environment was found.");
		} catch (EnvironmentNotFoundException e) {
			envConf.setAllowCreate(true);
			dbEnv = new Environment(BDBEnvHomeDir, envConf);
			System.out.println("A new BDB environment was created.");
		}

		// printing information about the BDB environment
		System.out.println("== BDB environment configuration ===");
		System.out.println(dbEnv.getConfig());
		System.out.println("== BDB environment database names ===");
		List<String> dbNames = dbEnv.getDatabaseNames();
		for (String dbName : dbNames) {
			System.out.println(dbName);
		}
		System.out.println("");
	}

	/**
	 * This method creates or opens (if it already exists) the BDB environment and dbs.
	 * 
	 * @param BDBEnvHome
	 *            The directory where the BDB environment will be created
	 * @throws Exception
	 */
	protected void createOrOpenBDBEnvAndDbs(String BDBEnvHome) throws Exception {
		createOrOpenBDBEnv(BDBEnvHome);
		createOrOpenBDBDbs();
	}

	/**
	 * Returns the current value of the loadCounter.
	 * 
	 * @return
	 */
	public int getLoadCounter() {
		return loadCounter;
	}

	/**
	 * This method can be called to output indexing time measurements.
	 */
	public void outputIndexingTimes() {
		System.out.println(
				(double) totalInternalVectorIndexingTime / loadCounter + " ms => internal indexing time");
		System.out.println((double) totalIdMappingTime / loadCounter + " ms => id mapping time");
		System.out.println((double) totalVectorIndexingTime / loadCounter + " ms => total indexing time");
		outputIndexingTimesInternal();
	}

	/**
	 * Should output index specific time measurements.
	 */
	protected abstract void outputIndexingTimesInternal();

	/**
	 * This method closes the open BDB environment and databases.
	 */
	public void close() {
		if (dbEnv != null) {
			// closing dbs
			iidToIdDB.close();
			idToIidDB.close();
			if (useGeolocation) {
				iidToGeolocationDB.close();
			}
			if (useMetaData) {
				iidToMetadataDB.close();
			}
			closeInternal();
			dbEnv.close(); // closing env
		} else {
			System.out.println("BDB environment is null!");
		}
	}

	/**
	 * Each subclass should implement this method to close the BDB databases that it uses.
	 */
	protected abstract void closeInternal();
}