package gr.iti.mklab.visual.quantization;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import weka.clusterers.SimpleKMeans;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.SelectedTag;
/**
* This class contains a static method than learns a k-means quantizer using a slightly modified (to produce
* some additional output) version of Weka's SimpleKMeans class and writes the learned quantizer to a file. It
* supports parallel execution!
*
* @author Eleftherios Spyromitros-Xioufis
*/
public class AbstractQuantizerLearning {
/**
*
* @param outFilePath
* full path to the output file
* @param data
* the instances object containing the data on which the quantizer is learner
* @param numClusters
* the number of clusters in k-means
* @param maxIterations
* the maximum number of k-means iterations
* @param seed
* the seed given to k-means
* @param numSlots
* the number of execution slots to use (>1 = parallel execution)
* @param kMeansPlusPlus
* whether to use kmeans++ for the initialization of the centroids (true/false)
* @throws Exception
*/
public static void learnAndWriteQuantizer(String outFilePath, Instances data, int numClusters,
int maxIterations, int seed, int numSlots, boolean kMeansPlusPlus) throws Exception {
System.out.println("--" + data.numInstances() + " vectors loaded--");
System.out.println("Vector dimensionality: " + data.numAttributes());
System.out.println("Clustering settings:");
System.out.println("Num clusters: " + numClusters);
System.out.println("Max iterations: " + maxIterations);
System.out.println("Seed: " + seed);
System.out.println("Clustering started");
long start = System.currentTimeMillis();
// create a new Clusterer and initialize appropriately
SimpleKMeans clusterer = new SimpleKMeans();
if (kMeansPlusPlus) {
clusterer.setInitializationMethod(new SelectedTag(SimpleKMeans.KMEANS_PLUS_PLUS,
SimpleKMeans.TAGS_SELECTION));
}
clusterer.setDebug(true);
clusterer.setSeed(seed);
clusterer.setNumClusters(numClusters);
clusterer.setMaxIterations(maxIterations);
clusterer.setNumExecutionSlots(numSlots);
clusterer.setFastDistanceCalc(true);
// build the clusterer
clusterer.buildClusterer(data);
// System.out.println("Clusterer:\n" + clusterer.toString());
long end = System.currentTimeMillis();
System.out.println("Clustering completed in " + (end - start) + " ms");
System.out.println("Writing quantizer in file");
// create a new file to store the codebook
BufferedWriter out = new BufferedWriter(new FileWriter(new File(outFilePath)));
// write the results of the clustering to the new file (csv formated)
Instances clusterCentroids = clusterer.getClusterCentroids();
for (int j = 0; j < clusterCentroids.numInstances(); j++) {
Instance centroid = clusterCentroids.instance(j);
for (int k = 0; k < centroid.numAttributes() - 1; k++) {
out.write(centroid.value(k) + ",");
}
out.write(centroid.value(centroid.numAttributes() - 1) + "\n");
}
out.close();
}
}