package gr.iti.mklab.visual.quantization; import gr.iti.mklab.visual.utilities.Normalization; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import weka.core.Instances; import weka.core.converters.CSVLoader; /** * This class uses the {@link AbstractQuantizerLearning} class to create a codebook from a set of local * features that are stored in an arff or csv formated file. It supports application of l2 or power+l2 * normalization of the local features prior to clustering. * * @author Eleftherios Spyromitros-Xioufis */ public class CodebookLearning { /** * The power to use when power-normalization is applied on the local features. */ public static final double power = 0.5; /** * @param args * [0] path to the arff or csv formated (without header) file containing a set the local * features * @param args * [1] the number of clusters to create (e.g. 64) * @param args * [2] the maximum number of k-means iterations (e.g. 100) * @param args * [3] the seed given to k-means (e.g. 1) * @param args * [4] the number of execution slots to use (>1 = parallel execution) * @param args * [5] the type of normalization to apply on the local features (no/l2/power+l2) * @param args * [6] whether to use kmeans++ for the initialization of the centroids (true/false) * @throws Exception */ public static void main(String[] args) throws Exception { String filepath = args[0]; int numClusters = Integer.parseInt(args[1]); int maxIterations = Integer.parseInt(args[2]); int seed = Integer.parseInt(args[3]); int numSlots = Integer.parseInt(args[4]); String normalization = args[5]; boolean kMeansPlusPlus = Boolean.parseBoolean(args[6]); Instances data; System.out.println("--Loading descriptors--"); if (filepath.endsWith(".arff")) { // loading instances from arff file BufferedReader reader = new BufferedReader(new FileReader(filepath)); // Using the Instances class of WEKA to read the dataset data = new Instances(reader); reader.close(); } else if (filepath.endsWith(".csv")) { // loading instances from csv file CSVLoader loader = new CSVLoader(); loader.setNoHeaderRowPresent(true); loader.setSource(new File(filepath)); data = loader.getDataSet(); } else { throw new Exception("Wrong dataset format!"); } if (!normalization.equals("no")) { // apply normalization on the features System.out.println("--Normalizing descriptors--"); for (int i = 0; i < data.numInstances(); i++) { double[] vector = data.instance(i).toDoubleArray(); if (normalization.equals("l2")) { vector = Normalization.normalizeL2(vector); } if (normalization.equals("power+l2")) { vector = Normalization.normalizePower(vector, power); vector = Normalization.normalizeL2(vector); } for (int j = 0; j < vector.length; j++) { data.instance(i).setValue(j, vector[j]); } } } String outFilename = filepath + "_codebook-" + data.numAttributes() + "A-" + numClusters + "C-" + maxIterations + "I-" + seed + "S" + "_" + normalization + ".csv"; AbstractQuantizerLearning.learnAndWriteQuantizer(outFilename, data, numClusters, maxIterations, seed, numSlots, kMeansPlusPlus); } }