package gr.iti.mklab.visual.quantization; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.FilenameFilter; import java.io.IOException; import java.util.Random; /** * This class is used for taking samples of local features (in csv format) which can then be used for learning * visual vocabularies. The local features of each image should be stored in a separate csv file (binary files * are currently not supported). * * @author Eleftherios Spyromitros-Xioufis */ public class SampleLocalFeatures { /** * * @param args * [0] full path to the folder containing the local feature files. * @param args * [1] the target number of features to be retained (e.g. 100000). * @param args * [2] the extension of the feature files (surf or sift). * @param args * [3] the number of samples to produce (>=1). * @throws IOException */ public static void main(String[] args) throws IOException { String path = args[0]; int targetNumFeatures = Integer.parseInt(args[1]); final String extension = args[2]; int numSamples = Integer.parseInt(args[3]); File dir = new File(path); // return only files that have the specified extension FilenameFilter filter = new FilenameFilter() { public boolean accept(File dir, String name) { return name.endsWith("." + extension); } }; String[] files = dir.list(filter); // count the total number of local features in all files long lineCounter = 0; long start = System.currentTimeMillis(); for (int i = 0; i < files.length; i++) { if (i % 500 == 0) { // print progress information System.out.println("Reading file: " + i); System.out.println("Time elapsed: " + (System.currentTimeMillis() - start) + " ms"); } BufferedReader in = new BufferedReader(new FileReader(path + files[i])); while ((in.readLine()) != null) { lineCounter++; } in.close(); } System.out.println("Total number of local features: " + lineCounter); // compute the per image sampling ratio double samplingRatio = (double) targetNumFeatures / lineCounter; System.out.println("Sampling ratio: " + samplingRatio); System.out.println("Generating " + numSamples + " samples.."); // create k output files where the samples will be written and k Random samples BufferedWriter[] outs = new BufferedWriter[numSamples]; Random[] rands = new Random[numSamples]; for (int k = 0; k < numSamples; k++) { outs[k] = new BufferedWriter(new FileWriter(path + "sample" + targetNumFeatures + "s" + k + ".csv")); rands[k] = new Random(k); } // perform the random rejection sampling for (int i = 0; i < files.length; i++) { if (i % 500 == 0) { // print progress information System.out.println("Sampling from file: " + i); System.out.println("Time elapsed: " + (System.currentTimeMillis() - start) + " ms"); } BufferedReader in = new BufferedReader(new FileReader(path + files[i])); String line; while ((line = in.readLine()) != null) { // for each line of the file randomly decide if it will be written in a sample file for (int k = 0; k < numSamples; k++) { if (rands[k].nextDouble() <= samplingRatio) { outs[k].write(line + "\n"); } } } in.close(); } // closing output files for (int k = 0; k < numSamples; k++) { outs[k].close(); } } }