CodebookLearning.java example

Explorer

multimedia-indexing-master
- src
  - main
    - java
      - gr
        iti
        mklab
        download
        ImageDownload.java
        ImageDownloadResult.java
        ImageDownloader.java
        visual
        aggregation
        AbstractFeatureAggregator.java
        BowAggregator.java
        VladAggregator.java
        VladAggregatorMultipleVocabularies.java
        datastructures
        AbstractSearchStructure.java
        IVFPQ.java
        Linear.java
        PQ.java
        dimreduction
        PCA.java
        PCALearningExample.java
        PCAProjectionExample.java
        examples
        Example.java
        FeatureExtraction.java
        FolderIndexingMT.java
        IndexTransformation.java
        UrlIndexingMT.java
        YFCC100MExample.java
        extraction
        AbstractFeatureExtractor.java
        ColorSURFExtractor.java
        ImageScaling.java
        RootSIFTExtractor.java
        SIFTExtractor.java
        SURFExtractor.java
        mapreduce
        FloatArrayWritable.java
        HadoopImageDownload.java
        VisualJob.java
        VisualReducer.java
        VisualThreadedMapper.java
        quantization
        AbstractQuantizerLearning.java
        CoarseQuantizerLearning.java
        CodebookLearning.java
        ProductQuantizationLearning.java
        ResidualVectorComputation.java
        SampleLocalFeatures.java
        utilities
        Answer.java
        AnswerWithGeolocation.java
        FeatureIO.java
        ImageIOGreyScale.java
        MetaDataEntity.java
        Normalization.java
        RandomPermutation.java
        RandomRotation.java
        Result.java
        vectorization
        ImageVectorization.java
        ImageVectorizationResult.java
        ImageVectorizer.java

package gr.iti.mklab.visual.quantization;

import gr.iti.mklab.visual.utilities.Normalization;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;

import weka.core.Instances;
import weka.core.converters.CSVLoader;

/**
 * This class uses the {@link AbstractQuantizerLearning} class to create a codebook from a set of local
 * features that are stored in an arff or csv formated file. It supports application of l2 or power+l2
 * normalization of the local features prior to clustering.
 * 
 * @author Eleftherios Spyromitros-Xioufis
 */
public class CodebookLearning {

	/**
	 * The power to use when power-normalization is applied on the local features.
	 */
	public static final double power = 0.5;

	/**
	 * @param args
	 *            [0] path to the arff or csv formated (without header) file containing a set the local
	 *            features
	 * @param args
	 *            [1] the number of clusters to create (e.g. 64)
	 * @param args
	 *            [2] the maximum number of k-means iterations (e.g. 100)
	 * @param args
	 *            [3] the seed given to k-means (e.g. 1)
	 * @param args
	 *            [4] the number of execution slots to use (>1 = parallel execution)
	 * @param args
	 *            [5] the type of normalization to apply on the local features (no/l2/power+l2)
	 * @param args
	 *            [6] whether to use kmeans++ for the initialization of the centroids (true/false)
	 * @throws Exception
	 */
	public static void main(String[] args) throws Exception {
		String filepath = args[0];
		int numClusters = Integer.parseInt(args[1]);
		int maxIterations = Integer.parseInt(args[2]);
		int seed = Integer.parseInt(args[3]);
		int numSlots = Integer.parseInt(args[4]);
		String normalization = args[5];
		boolean kMeansPlusPlus = Boolean.parseBoolean(args[6]);

		Instances data;
		System.out.println("--Loading descriptors--");
		if (filepath.endsWith(".arff")) { // loading instances from arff file
			BufferedReader reader = new BufferedReader(new FileReader(filepath));
			// Using the Instances class of WEKA to read the dataset
			data = new Instances(reader);
			reader.close();
		} else if (filepath.endsWith(".csv")) { // loading instances from csv file
			CSVLoader loader = new CSVLoader();
			loader.setNoHeaderRowPresent(true);
			loader.setSource(new File(filepath));
			data = loader.getDataSet();
		} else {
			throw new Exception("Wrong dataset format!");
		}

		if (!normalization.equals("no")) { // apply normalization on the features
			System.out.println("--Normalizing descriptors--");
			for (int i = 0; i < data.numInstances(); i++) {
				double[] vector = data.instance(i).toDoubleArray();
				if (normalization.equals("l2")) {
					vector = Normalization.normalizeL2(vector);
				}
				if (normalization.equals("power+l2")) {
					vector = Normalization.normalizePower(vector, power);
					vector = Normalization.normalizeL2(vector);
				}
				for (int j = 0; j < vector.length; j++) {
					data.instance(i).setValue(j, vector[j]);
				}
			}
		}

		String outFilename = filepath + "_codebook-" + data.numAttributes() + "A-" + numClusters + "C-"
				+ maxIterations + "I-" + seed + "S" + "_" + normalization + ".csv";
		AbstractQuantizerLearning.learnAndWriteQuantizer(outFilename, data, numClusters, maxIterations, seed,
				numSlots, kMeansPlusPlus);
	}

}