/** * Copyright (c) 2013 Oculus Info Inc. * http://www.oculusinfo.com/ * * Released under the MIT License. * * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package spimedb.cluster.unsupervised; import spimedb.cluster.DataSet; import spimedb.cluster.Instance; import spimedb.cluster.feature.string.StringFeature; import spimedb.cluster.feature.string.centroid.StringMedianCentroid; import spimedb.cluster.feature.string.distance.EditDistance; import spimedb.cluster.unsupervised.cluster.Cluster; import spimedb.cluster.unsupervised.cluster.ClusterResult; import spimedb.cluster.unsupervised.cluster.dpmeans.DPMeans; public class TestStringClusteringWithDPMeans { private static final String FEATURE_NAME1 = "tokens"; /** * @param args */ public static void main(String[] args) { DataSet ds = new DataSet(); Instance inst = new Instance("1"); StringFeature feature = new StringFeature(FEATURE_NAME1); feature.setValue("jack black"); inst.addFeature(feature); ds.add(inst); inst = new Instance("2"); feature = new StringFeature(FEATURE_NAME1); feature.setValue("jack black"); inst.addFeature(feature); ds.add(inst); inst = new Instance("3"); feature = new StringFeature(FEATURE_NAME1); feature.setValue("jack"); inst.addFeature(feature); ds.add(inst); inst = new Instance("4"); feature = new StringFeature(FEATURE_NAME1); feature.setValue("jack l. black"); inst.addFeature(feature); ds.add(inst); inst = new Instance("5"); feature = new StringFeature(FEATURE_NAME1); feature.setValue("j. black"); inst.addFeature(feature); ds.add(inst); inst = new Instance("6"); feature = new StringFeature(FEATURE_NAME1); feature.setValue("j black"); inst.addFeature(feature); ds.add(inst); inst = new Instance("7"); feature = new StringFeature(FEATURE_NAME1); feature.setValue("black"); inst.addFeature(feature); ds.add(inst); inst = new Instance("8"); feature = new StringFeature(FEATURE_NAME1); feature.setValue("jackie black"); inst.addFeature(feature); ds.add(inst); inst = new Instance("9"); feature = new StringFeature(FEATURE_NAME1); feature.setValue("jack brown"); inst.addFeature(feature); ds.add(inst); inst = new Instance("10"); feature = new StringFeature(FEATURE_NAME1); feature.setValue("jackie green"); inst.addFeature(feature); ds.add(inst); inst = new Instance("11"); feature = new StringFeature(FEATURE_NAME1); feature.setValue("bob"); inst.addFeature(feature); ds.add(inst); inst = new Instance("12"); feature = new StringFeature(FEATURE_NAME1); feature.setValue("bobbie"); inst.addFeature(feature); ds.add(inst); inst = new Instance("13"); feature = new StringFeature(FEATURE_NAME1); feature.setValue("jackie"); inst.addFeature(feature); ds.add(inst); DPMeans clusterer = new DPMeans(5, false); clusterer.setThreshold(0.6); clusterer.registerFeatureType( FEATURE_NAME1, StringMedianCentroid.class, new EditDistance(1.0)); ClusterResult clusters = clusterer.doCluster(ds); for (Cluster c : clusters) { System.out.println(c.toString(true)); } clusterer.terminate(); } }