/*
* RapidMiner
*
* Copyright (C) 2001-2014 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.tools.math.similarity;
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.IOContainer;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.parameter.ParameterHandler;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.parameter.conditions.EqualTypeCondition;
import com.rapidminer.tools.math.kernels.Kernel;
import com.rapidminer.tools.math.similarity.divergences.GeneralizedIDivergence;
import com.rapidminer.tools.math.similarity.divergences.ItakuraSaitoDistance;
import com.rapidminer.tools.math.similarity.divergences.KLDivergence;
import com.rapidminer.tools.math.similarity.divergences.LogarithmicLoss;
import com.rapidminer.tools.math.similarity.divergences.LogisticLoss;
import com.rapidminer.tools.math.similarity.divergences.MahalanobisDistance;
import com.rapidminer.tools.math.similarity.divergences.SquaredEuclideanDistance;
import com.rapidminer.tools.math.similarity.divergences.SquaredLoss;
import com.rapidminer.tools.math.similarity.mixed.MixedEuclideanDistance;
import com.rapidminer.tools.math.similarity.nominal.DiceNominalSimilarity;
import com.rapidminer.tools.math.similarity.nominal.JaccardNominalSimilarity;
import com.rapidminer.tools.math.similarity.nominal.KulczynskiNominalSimilarity;
import com.rapidminer.tools.math.similarity.nominal.NominalDistance;
import com.rapidminer.tools.math.similarity.nominal.RogersTanimotoNominalSimilarity;
import com.rapidminer.tools.math.similarity.nominal.RussellRaoNominalSimilarity;
import com.rapidminer.tools.math.similarity.nominal.SimpleMatchingNominalSimilarity;
import com.rapidminer.tools.math.similarity.numerical.CamberraNumericalDistance;
import com.rapidminer.tools.math.similarity.numerical.ChebychevNumericalDistance;
import com.rapidminer.tools.math.similarity.numerical.CorrelationSimilarity;
import com.rapidminer.tools.math.similarity.numerical.CosineSimilarity;
import com.rapidminer.tools.math.similarity.numerical.DTWDistance;
import com.rapidminer.tools.math.similarity.numerical.DiceNumericalSimilarity;
import com.rapidminer.tools.math.similarity.numerical.EuclideanDistance;
import com.rapidminer.tools.math.similarity.numerical.InnerProductSimilarity;
import com.rapidminer.tools.math.similarity.numerical.JaccardNumericalSimilarity;
import com.rapidminer.tools.math.similarity.numerical.KernelEuclideanDistance;
import com.rapidminer.tools.math.similarity.numerical.ManhattanDistance;
import com.rapidminer.tools.math.similarity.numerical.MaxProductSimilarity;
import com.rapidminer.tools.math.similarity.numerical.OverlapNumericalSimilarity;
/**
* This is a convenient class for using the distanceMeasures. It offers methods
* for integrating the measure classes into operators.
*
* @author Sebastian Land
*/
public class DistanceMeasures {
public static final String PARAMETER_MEASURE_TYPES = "measure_types";
public static final String PARAMETER_NOMINAL_MEASURE = "nominal_measure";
public static final String PARAMETER_NUMERICAL_MEASURE = "numerical_measure";
public static final String PARAMETER_MIXED_MEASURE = "mixed_measure";
public static final String PARAMETER_DIVERGENCE = "divergence";
public static final String[] MEASURE_TYPES = new String[] {
"MixedMeasures",
"NominalMeasures",
"NumericalMeasures",
"BregmanDivergences"
};
public static final int MIXED_MEASURES_TYPE = 0;
public static final int NOMINAL_MEASURES_TYPE = 1;
public static final int NUMERICAL_MEASURES_TYPE = 2;
public static final int DIVERGENCES_TYPE = 3;
private static String[] NOMINAL_MEASURES = new String[] {
"NominalDistance",
"DiceSimilarity",
"JaccardSimilarity",
"KulczynskiSimilarity",
"RogersTanimotoSimilarity",
"RussellRaoSimilarity",
"SimpleMatchingSimilarity"
};
private static Class[] NOMINAL_MEASURE_CLASSES = new Class[] {
NominalDistance.class,
DiceNominalSimilarity.class,
JaccardNominalSimilarity.class,
KulczynskiNominalSimilarity.class,
RogersTanimotoNominalSimilarity.class,
RussellRaoNominalSimilarity.class,
SimpleMatchingNominalSimilarity.class
};
private static String[] MIXED_MEASURES = new String[] {
"MixedEuclideanDistance"
};
private static Class[] MIXED_MEASURE_CLASSES = new Class[] {
MixedEuclideanDistance.class
};
/* If this changes, the parameter dependencies might need to be updated */
private static String[] NUMERICAL_MEASURES = new String[] {
"EuclideanDistance",
"CamberraDistance",
"ChebychevDistance",
"CorrelationSimilarity",
"CosineSimilarity",
"DiceSimilarity",
"DynamicTimeWarpingDistance",
"InnerProductSimilarity",
"JaccardSimilarity",
"KernelEuclideanDistance",
"ManhattanDistance",
"MaxProductSimilarity",
"OverlapSimilarity"
};
private static Class[] NUMERICAL_MEASURE_CLASSES = new Class[] {
EuclideanDistance.class,
CamberraNumericalDistance.class,
ChebychevNumericalDistance.class,
CorrelationSimilarity.class,
CosineSimilarity.class,
DiceNumericalSimilarity.class,
DTWDistance.class,
InnerProductSimilarity.class,
JaccardNumericalSimilarity.class,
KernelEuclideanDistance.class,
ManhattanDistance.class,
MaxProductSimilarity.class,
OverlapNumericalSimilarity.class
};
private static String[] DIVERGENCES = new String[] {
"GeneralizedIDivergence",
"ItakuraSaitoDistance",
"KLDivergence",
"LogarithmicLoss",
"LogisticLoss",
"MahalanobisDistance",
"SquaredEuclideanDistance",
"SquaredLoss",
};
private static Class[] DIVERGENCE_CLASSES = new Class[] {
GeneralizedIDivergence.class,
ItakuraSaitoDistance.class,
KLDivergence.class,
LogarithmicLoss.class,
LogisticLoss.class,
MahalanobisDistance.class,
SquaredEuclideanDistance.class,
SquaredLoss.class,
};
private static String[][] MEASURE_ARRAYS = new String[][] {
MIXED_MEASURES,
NOMINAL_MEASURES,
NUMERICAL_MEASURES,
DIVERGENCES
};
private static Class[][] MEASURE_CLASS_ARRAYS = new Class[][] {
MIXED_MEASURE_CLASSES,
NOMINAL_MEASURE_CLASSES,
NUMERICAL_MEASURE_CLASSES,
DIVERGENCE_CLASSES
};
/**
* This method allows registering distance or similarity measures defined in plugins.
* There are four different types of measures: Mixed Measures coping with examples containing
* nominal and numerical values. Numerical and Nominal Measures work only on their respective type of
* attribute. Divergences are a less restricted mathematical concept than distances but might be used
* for some algorithms not needing this restrictions. This type has to be specified using the first parameter.
* @param measureType The type is available as static property of class
* @param measureName The name of the measure to register
* @param measureClass The class of the measure, which needs to extend DistanceMeasure
*/
public static void registerMeasure(int measureType, String measureName, Class<? extends DistanceMeasure> measureClass) {
String[] newTypeNames = new String[MEASURE_ARRAYS[measureType].length + 1];
System.arraycopy(MEASURE_ARRAYS[measureType], 0, newTypeNames, 0, MEASURE_ARRAYS[measureType].length);
newTypeNames[newTypeNames.length - 1] = measureName;
MEASURE_ARRAYS[measureType] = newTypeNames;
Class[] newTypeClasses = new Class[MEASURE_CLASS_ARRAYS[measureType].length + 1];
System.arraycopy(MEASURE_CLASS_ARRAYS[measureType], 0, newTypeClasses, 0, MEASURE_CLASS_ARRAYS[measureType].length);
newTypeClasses[newTypeClasses.length - 1] = measureClass;
MEASURE_CLASS_ARRAYS[measureType] = newTypeClasses;
}
/** Creates an uninitialized distance measure. Initialize the distance measure by calling
* {@link DistanceMeasure#init(ExampleSet, ParameterHandler)}.
*/
public static DistanceMeasure createMeasure(ParameterHandler parameterHandler) throws UndefinedParameterError, OperatorException {
return createMeasure(parameterHandler, null, null);
}
/**
* @deprecated ioContainer is not used. Use a {@link DistanceMeasureHelper} to obtain distance measures.
*/
@Deprecated
public static DistanceMeasure createMeasure(ParameterHandler parameterHandler, ExampleSet exampleSet, IOContainer ioContainer) throws UndefinedParameterError, OperatorException {
int measureType;
if (parameterHandler.isParameterSet(PARAMETER_MEASURE_TYPES)) {
measureType = parameterHandler.getParameterAsInt(PARAMETER_MEASURE_TYPES);
} else {
// if type is not set, then might be there is no type selection: Test if one definition is present
if (parameterHandler.isParameterSet(PARAMETER_MIXED_MEASURE))
measureType = MIXED_MEASURES_TYPE;
else if (parameterHandler.isParameterSet(PARAMETER_NOMINAL_MEASURE))
measureType = NOMINAL_MEASURES_TYPE;
else if (parameterHandler.isParameterSet(PARAMETER_NUMERICAL_MEASURE))
measureType = NUMERICAL_MEASURES_TYPE;
else if (parameterHandler.isParameterSet(PARAMETER_DIVERGENCE))
measureType = DIVERGENCES_TYPE;
else
// if nothing fits: Try to access to get a proper exception
measureType = parameterHandler.getParameterAsInt(PARAMETER_MEASURE_TYPES);
}
Class[] classes = MEASURE_CLASS_ARRAYS[measureType];
Class measureClass = null;
switch (measureType) {
case MIXED_MEASURES_TYPE:
measureClass = classes[parameterHandler.getParameterAsInt(PARAMETER_MIXED_MEASURE)];
break;
case NOMINAL_MEASURES_TYPE:
measureClass = classes[parameterHandler.getParameterAsInt(PARAMETER_NOMINAL_MEASURE)];
break;
case NUMERICAL_MEASURES_TYPE:
measureClass = classes[parameterHandler.getParameterAsInt(PARAMETER_NUMERICAL_MEASURE)];
break;
case DIVERGENCES_TYPE:
measureClass = classes[parameterHandler.getParameterAsInt(PARAMETER_DIVERGENCE)];
break;
}
if (measureClass != null) {
DistanceMeasure measure;
try {
measure = (DistanceMeasure) measureClass.newInstance();
if (exampleSet != null) {
measure.init(exampleSet, parameterHandler);
}
return measure;
} catch (InstantiationException e) {
throw new OperatorException("Could not instanciate distance measure " + measureClass);
} catch (IllegalAccessException e) {
throw new OperatorException("Could not instanciate distance measure " + measureClass);
}
}
return null;
}
public static int getSelectedMeasureType(ParameterHandler parameterHandler) throws UndefinedParameterError {
return parameterHandler.getParameterAsInt(PARAMETER_MEASURE_TYPES);
}
/**
* This method adds a parameter to chose a distance measure as parameter
*/
public static List<ParameterType> getParameterTypes(Operator parameterHandler) {
List<ParameterType> list = new LinkedList<ParameterType>();
list.add(new ParameterTypeCategory(PARAMETER_MEASURE_TYPES, "The measure type", MEASURE_TYPES, 0, false));
ParameterType type = new ParameterTypeCategory(PARAMETER_MIXED_MEASURE, "Select measure", MEASURE_ARRAYS[MIXED_MEASURES_TYPE], 0, false);
type.registerDependencyCondition(new EqualTypeCondition(parameterHandler, PARAMETER_MEASURE_TYPES, MEASURE_TYPES, false, 0));
list.add(type);
type = new ParameterTypeCategory(PARAMETER_NOMINAL_MEASURE, "Select measure", MEASURE_ARRAYS[NOMINAL_MEASURES_TYPE], 0, false);
type.registerDependencyCondition(new EqualTypeCondition(parameterHandler, PARAMETER_MEASURE_TYPES, MEASURE_TYPES, false, 1));
list.add(type);
type = new ParameterTypeCategory(PARAMETER_NUMERICAL_MEASURE, "Select measure", MEASURE_ARRAYS[NUMERICAL_MEASURES_TYPE], 0, false);
type.registerDependencyCondition(new EqualTypeCondition(parameterHandler, PARAMETER_MEASURE_TYPES, MEASURE_TYPES, false, 2));
list.add(type);
type = new ParameterTypeCategory(PARAMETER_DIVERGENCE, "Select divergence", MEASURE_ARRAYS[DIVERGENCES_TYPE], 0, false);
type.registerDependencyCondition(new EqualTypeCondition(parameterHandler, PARAMETER_MEASURE_TYPES, MEASURE_TYPES, false, 3));
list.add(type);
list.addAll(registerDependency(Kernel.getParameters(parameterHandler), 9, parameterHandler));
return list;
}
/**
* This method provides the parameters to chose only from numerical measures.
*/
public static List<ParameterType> getParameterTypesForNumericals(ParameterHandler handler) {
List<ParameterType> list = new LinkedList<ParameterType>();
ParameterType type = new ParameterTypeCategory(PARAMETER_NUMERICAL_MEASURE, "Select measure", MEASURE_ARRAYS[NUMERICAL_MEASURES_TYPE], 0);
list.add(type);
return list;
}
private static Collection<ParameterType> registerDependency(Collection<ParameterType> sourceTypeList, int selectedValue, Operator handler) {
for (ParameterType type: sourceTypeList) {
type.registerDependencyCondition(new EqualTypeCondition(handler, PARAMETER_NUMERICAL_MEASURE, MEASURE_ARRAYS[NUMERICAL_MEASURES_TYPE], false, selectedValue));
type.registerDependencyCondition(new EqualTypeCondition(handler, PARAMETER_MEASURE_TYPES, MEASURE_TYPES, false, NUMERICAL_MEASURES_TYPE));
}
return sourceTypeList;
}
}