/*
* RapidMiner
*
* Copyright (C) 2001-2014 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.clustering.clusterer;
import java.util.List;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.OperatorCapability;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.clustering.CentroidClusterModel;
import com.rapidminer.operator.clustering.ClusterModel;
import com.rapidminer.operator.learner.CapabilityProvider;
import com.rapidminer.operator.ports.metadata.CapabilityPrecondition;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.tools.RandomGenerator;
import com.rapidminer.tools.math.similarity.DistanceMeasure;
import com.rapidminer.tools.math.similarity.DistanceMeasureHelper;
import com.rapidminer.tools.math.similarity.DistanceMeasures;
import de.dfki.madm.operator.ClusteringAlgorithms;
import de.dfki.madm.operator.KMeanspp;
import de.dfki.madm.operator.clustering.XMeansCore;
/**
* This operator represents an implementation of X-Means algorithm. It will create a cluster attribute if not present
* yet.
*
* The implementation is according to paper of Dan Pelleg an Andrew Moore:
* - X-means: Extending K-means with Efficient Estimation of the Number of Clusters
*
* @author Patrick Kalka
*/
public class XMeans extends RMAbstractClusterer implements CapabilityProvider {
/** Maximal number of Clusters */
public static final String PARAMETER_K_Max = "k_max";
/** Minimal number of Clusters */
public static final String PARAMETER_K_Min = "k_min";
/** The parameter name for "the maximal number of runs of the k method with random initialization that are performed" */
public static final String PARAMETER_MAX_RUNS = "max_runs";
/** The parameter name for "the maximal number of iterations performed for one run of the k-mean" */
public static final String PARAMETER_MAX_OPTIMIZATION_STEPS = "max_optimization_steps";
private DistanceMeasureHelper measureHelper = new DistanceMeasureHelper(this);
OperatorDescription Description = null;
public XMeans(OperatorDescription description) {
super(description);
Description = description;
getExampleSetInputPort().addPrecondition(new CapabilityPrecondition(this, getExampleSetInputPort()));
}
@Override
public ClusterModel generateClusterModel(ExampleSet eSet) throws OperatorException {
DistanceMeasure measure = measureHelper.getInitializedMeasure(eSet);
int k_max = getParameterAsInt(PARAMETER_K_Max);
int k_min = getParameterAsInt(PARAMETER_K_Min);
boolean kpp = getParameterAsBoolean(KMeanspp.PARAMETER_USE_KPP);
String fast_k = getParameterAsString(ClusteringAlgorithms.PARAMETER_CLUSTERING_ALGORITHM);
int maxOptimizationSteps = getParameterAsInt(PARAMETER_MAX_OPTIMIZATION_STEPS);
int maxRuns = getParameterAsInt(PARAMETER_MAX_RUNS);
XMeansCore xm = new XMeansCore(eSet, k_min, k_max, kpp, maxOptimizationSteps, maxRuns, Description, measure,fast_k);
return xm.doXMean();
}
@Override
public Class<? extends ClusterModel> getClusterModelClass() {
return CentroidClusterModel.class;
}
@Override
public boolean supportsCapability(OperatorCapability capability) {
switch (capability) {
case BINOMINAL_ATTRIBUTES:
case POLYNOMINAL_ATTRIBUTES:
return false;
default:
return true;
}
}
@Override
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = super.getParameterTypes();
types.add(new ParameterTypeInt(PARAMETER_K_Min, "The minimal number of clusters which should be detected.", 2, Integer.MAX_VALUE, 2, false));
types.add(new ParameterTypeInt(PARAMETER_K_Max, "The maximal number of clusters which should be detected.", 60, Integer.MAX_VALUE, 60, false));
ParameterType type = new ParameterTypeBoolean(KMeanspp.PARAMETER_USE_KPP, KMeanspp.SHORT_DESCRIPTION, false);
type.setExpert(false);
types.add(type);
for(ParameterType a : DistanceMeasures.getParameterTypes(this)) {
if (a.getKey() == DistanceMeasures.PARAMETER_MEASURE_TYPES)
{
a.setDefaultValue(2);
}
types.add(a);
}
types.addAll(ClusteringAlgorithms.getParameterTypes(this));
types.add(new ParameterTypeInt(PARAMETER_MAX_RUNS, "The maximal number of runs of k-Means with random initialization that are performed.", 1, Integer.MAX_VALUE, 10, false));
types.add(new ParameterTypeInt(PARAMETER_MAX_OPTIMIZATION_STEPS, "The maximal number of iterations performed for one run of k-Means.", 1, Integer.MAX_VALUE, 100, false));
types.addAll(RandomGenerator.getRandomGeneratorParameters(this));
return types;
}
}