/*
* RapidMiner
*
* Copyright (C) 2001-2014 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.features;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import com.rapidminer.datatable.SimpleDataTable;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.AttributeWeights;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.set.AttributeWeightedExampleSet;
import com.rapidminer.gui.dialog.IndividualSelector;
import com.rapidminer.gui.dialog.StopDialog;
import com.rapidminer.operator.OperatorChain;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.ValueDouble;
import com.rapidminer.operator.ValueString;
import com.rapidminer.operator.performance.PerformanceVector;
import com.rapidminer.operator.ports.InputPort;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.operator.ports.PortPairExtender;
import com.rapidminer.operator.ports.metadata.ExampleSetMetaData;
import com.rapidminer.operator.ports.metadata.ExampleSetPassThroughRule;
import com.rapidminer.operator.ports.metadata.GenerateNewMDRule;
import com.rapidminer.operator.ports.metadata.MetaData;
import com.rapidminer.operator.ports.metadata.PassThroughRule;
import com.rapidminer.operator.ports.metadata.SetRelation;
import com.rapidminer.operator.ports.metadata.SimplePrecondition;
import com.rapidminer.operator.ports.metadata.SubprocessTransformRule;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeDouble;
import com.rapidminer.parameter.ParameterTypeFile;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.conditions.BooleanParameterCondition;
import com.rapidminer.tools.RandomGenerator;
import com.rapidminer.tools.Tools;
/**
* This class is the superclass of all feature selection and generation
* operators. It provides an easy to use plug-in interface for operators that
* modify populations. Subclasses just have to supply lists of
* <tt>PopulationOperators</tt> by overriding
* <tt>getPreEvalutaionPopulationOperators()</tt> and
* <tt>getPostEvalutaionPopulationOperators()</tt> during a loop which will
* terminate if <tt>solutionGoodEnough()</tt> returns true.
*
* @author Simon Fischer, Ingo Mierswa
* <br>
*/
public abstract class FeatureOperator extends OperatorChain {
public static final String PARAMETER_NORMALIZE_WEIGHTS = "normalize_weights";
public static final String PARAMETER_SHOW_STOP_DIALOG = "show_stop_dialog";
public static final String PARAMETER_USER_RESULT_INDIVIDUAL_SELECTION = "user_result_individual_selection";
public static final String PARAMETER_SHOW_POPULATION_PLOTTER = "show_population_plotter";
public static final String PARAMETER_PLOT_GENERATIONS = "plot_generations";
public static final String PARAMETER_CONSTRAINT_DRAW_RANGE = "constraint_draw_range";
public static final String PARAMETER_DRAW_DOMINATED_POINTS = "draw_dominated_points";
public static final String PARAMETER_POPULATION_CRITERIA_DATA_FILE = "population_criteria_data_file";
public static final String PARAMETER_MAXIMAL_FITNESS = "maximal_fitness";
private final InputPort exampleSetInput = getInputPorts().createPort("example set in");
private final OutputPort exampleSetOutput = getOutputPorts().createPort("example set out");
private final OutputPort attributeWeightsOutput = getOutputPorts().createPort("weights");
private final OutputPort performanceOutput = getOutputPorts().createPort("performance");
private final OutputPort subprocessExampleOutput = getSubprocess(0).getInnerSources().createPort("example set");
private final InputPort subprocessPerformanceInput = getSubprocess(0).getInnerSinks().createPort("performance");
private final PortPairExtender throughExtender = new PortPairExtender("through", getInputPorts(), getSubprocess(0).getInnerSources());
private ExampleSet exampleSet;
private Population population;
private PopulationEvaluator populationEvaluator;
/** The optimization stops if this maximal fitness was reached. */
private double maximalFitness = Double.POSITIVE_INFINITY;
private boolean checkForMaximalFitness = true;
private RandomGenerator random;
public FeatureOperator(OperatorDescription description) {
super(description, "Evaluation Process");
throughExtender.start();
exampleSetInput.addPrecondition(new SimplePrecondition(exampleSetInput, new ExampleSetMetaData()));
subprocessPerformanceInput.addPrecondition(new SimplePrecondition(subprocessPerformanceInput, new MetaData(PerformanceVector.class)));
getTransformer().addRule(new ExampleSetPassThroughRule(exampleSetInput, subprocessExampleOutput, SetRelation.EQUAL) {
@Override
public ExampleSetMetaData modifyExampleSet(ExampleSetMetaData metaData) {
return modifyInnerOutputExampleSet(metaData);
}
});
getTransformer().addRule(new ExampleSetPassThroughRule(exampleSetInput, exampleSetOutput, SetRelation.EQUAL) {
@Override
public ExampleSetMetaData modifyExampleSet(ExampleSetMetaData metaData) {
return modifyOutputExampleSet(metaData);
}
});
getTransformer().addRule(throughExtender.makePassThroughRule());
getTransformer().addRule(new SubprocessTransformRule(getSubprocess(0)));
getTransformer().addRule(new GenerateNewMDRule(attributeWeightsOutput, new MetaData(AttributeWeights.class)));
getTransformer().addRule(new PassThroughRule(subprocessPerformanceInput, performanceOutput, false));
addValue(new ValueDouble("generation", "The number of the current generation.") {
@Override
public double getDoubleValue() {
if (population == null)
return 0;
return population.getGeneration();
}
});
addValue(new ValueDouble("performance", "The performance of the current generation (main criterion).") {
@Override
public double getDoubleValue() {
if (population == null)
return Double.NaN;
if (population.getCurrentBestPerformance() == null)
return Double.NaN;
PerformanceVector pv = population.getCurrentBestPerformance();
if (pv == null)
return Double.NaN;
return pv.getMainCriterion().getAverage();
}
});
addValue(new ValueDouble("best", "The performance of the best individual ever (main criterion).") {
@Override
public double getDoubleValue() {
if (population == null)
return Double.NaN;
PerformanceVector pv = population.getBestPerformanceEver();
if (pv == null)
return Double.NaN;
return pv.getMainCriterion().getAverage();
}
});
addValue(new ValueDouble("average_length", "The average number of attributes.") {
@Override
public double getDoubleValue() {
if (population == null)
return Double.NaN;
else {
double lengthSum = 0.0d;
for (int i = 0; i < population.getNumberOfIndividuals(); i++)
lengthSum += population.get(i).getNumberOfUsedAttributes();
return lengthSum / population.getNumberOfIndividuals();
}
}
});
addValue(new ValueDouble("best_length", "The number of attributes of the best example set.") {
@Override
public double getDoubleValue() {
if (population == null)
return Double.NaN;
Individual individual = population.getBestIndividualEver();
if (individual != null) {
return individual.getNumberOfUsedAttributes();
} else {
return Double.NaN;
}
}
});
addValue(new ValueString("feature_names", "The names of the features of the best individual so far.") {
@Override
public String getStringValue() {
if (population == null)
return "?";
Individual individual = population.getBestIndividualEver();
if (individual != null) {
double[] weights = individual.getWeights();
String[] names = com.rapidminer.example.Tools.getRegularAttributeNames(exampleSet);
StringBuffer result = new StringBuffer();
boolean first = true;
for (int i = 0; i < weights.length; i++) {
if (weights[i] > 0) {
if (!first) {
result.append(", ");
}
result.append(names[i]);
first = false;
}
}
return result.toString();
} else {
return "?";
}
}
});
}
/**
* Subclasses might override this method in order to change the meta data delivered
* to the inner operators.
*/
protected ExampleSetMetaData modifyInnerOutputExampleSet(ExampleSetMetaData metaData) {
return metaData;
}
/**
* Subclasses might override this method in order to change the final outputed meta data
*/
protected ExampleSetMetaData modifyOutputExampleSet(ExampleSetMetaData metaData) {
return metaData;
}
/**
* Create an initial population. The example set will be cloned before the
* method is invoked. This method is invoked after the pre- and
* postevaluation population operators were collected.
*/
public abstract Population createInitialPopulation(ExampleSet es) throws OperatorException;
/**
* Must return a list of <tt>PopulationOperator</tt>s. All operators are
* applied to the population in their order within the list before the
* population is evaluated. Since this methode is invoked only once the list
* cannot by dynamically changed during runtime.
*/
public abstract List<PopulationOperator> getPreEvaluationPopulationOperators(ExampleSet input) throws OperatorException;
/**
* Must return a list of <tt>PopulationOperator</tt>s. All operators are
* applied to the population in their order within the list after the
* population is evaluated. Since this methode is invoked only once the list
* cannot by dynamically changed during runtime.
*/
public abstract List<PopulationOperator> getPostEvaluationPopulationOperators(ExampleSet input) throws OperatorException;
/**
* Has to return true if the main loop can be stopped because a solution is
* considered to be good enough according to some criterion.
*/
public abstract boolean solutionGoodEnough(Population pop) throws OperatorException;
protected RandomGenerator getRandom() {
return random;
}
protected Population getPopulation() {
return population;
}
/**
* Applies the feature operator:
* <ol>
* <li>collects the pre- and postevaluation operators
* <li>create an initial population
* <li>evaluate the initial population
* <li>loop as long as solution is not good enough
* <ol>
* <li>apply all pre evaluation operators
* <li>evaluate the population
* <li>update the population's best individual
* <li>apply all post evaluation operators
* </ol>
* <li>return all generation's best individual
* </ol>
*/
@Override
public void doWork() throws OperatorException {
// init
this.random = RandomGenerator.getRandomGenerator(this);
this.maximalFitness = getParameterAsDouble(PARAMETER_MAXIMAL_FITNESS);
ExampleSet es = exampleSetInput.getData(ExampleSet.class);
// creating population evaluator
this.populationEvaluator = getPopulationEvaluator(es);
if (es.getAttributes().size() == 0) {
throw new UserError(this, 125, 0, 1);
}
this.exampleSet = es;
List preOps = getPreEvaluationPopulationOperators(es);
List postOps = getPostEvaluationPopulationOperators(es);
// stop dialog
boolean userDialogOk = true;
StopDialog stopDialog = null;
if (getParameterAsBoolean(PARAMETER_SHOW_STOP_DIALOG)) {
stopDialog = new StopDialog("Stop Dialog", "<html>Press the stop button to abort the search for best feature space.<br>" + "The best individual found so far is returned.</html>");
stopDialog.setVisible(true);
}
// create initial population
population = createInitialPopulation(es);
log("Initial population has " + population.getNumberOfIndividuals() + " individuals.");
evaluate(population, exampleSet);
// population plotter
PopulationPlotter popPlotter = null;
// Check must be made to ensure inner operators did not exite prematurely leaving null performance
checkForStop();
population.updateEvaluation();
if (getParameterAsBoolean(PARAMETER_SHOW_POPULATION_PLOTTER)) {
popPlotter = new PopulationPlotter(exampleSet, getParameterAsInt(PARAMETER_PLOT_GENERATIONS), getParameterAsBoolean(PARAMETER_CONSTRAINT_DRAW_RANGE), getParameterAsBoolean(PARAMETER_DRAW_DOMINATED_POINTS));
popPlotter.operate(population);
}
inApplyLoop();
// optimization loop
while (userDialogOk && !solutionGoodEnough(population) && !isMaximumReached()) {
population.nextGeneration();
applyOpList(preOps, population);
log(Tools.ordinalNumber(population.getGeneration()) + " generation has " + population.getNumberOfIndividuals() + " individuals.");
log("Evaluating " + Tools.ordinalNumber(population.getGeneration()) + " population.");
evaluate(population, exampleSet);
checkForStop();
population.updateEvaluation();
applyOpList(postOps, population);
if (popPlotter != null) {
popPlotter.operate(population);
}
userDialogOk = stopDialog == null ? true : stopDialog.isStillRunning();
inApplyLoop();
}
if (stopDialog != null) {
stopDialog.setVisible(false);
stopDialog.dispose();
}
// optimization finished: Check must be made to ensure inner operators did not exite prematurely leaving null performance
checkForStop();
applyOpList(postOps, population);
// write criteria data of the final population into a file
if (isParameterSet(PARAMETER_POPULATION_CRITERIA_DATA_FILE)) {
SimpleDataTable finalStatistics = PopulationPlotter.createDataTable(population);
PopulationPlotter.fillDataTable(finalStatistics, new HashMap<String, double[]>(), population, getParameterAsBoolean(PARAMETER_DRAW_DOMINATED_POINTS));
File outFile = getParameterAsFile(PARAMETER_POPULATION_CRITERIA_DATA_FILE, true);
PrintWriter out = null;
try {
out = new PrintWriter(new FileWriter(outFile));
finalStatistics.write(out);
} catch (IOException e) {
throw new UserError(this, e, 303, new Object[] { outFile, e.getMessage() });
} finally {
if (out != null) {
out.close();
}
}
}
// create result example set
Individual bestEver = null;
if (getParameterAsBoolean(PARAMETER_USER_RESULT_INDIVIDUAL_SELECTION)) {
IndividualSelector selector = new IndividualSelector(exampleSet, population);
selector.setVisible(true);
bestEver = selector.getSelectedIndividual();
if (bestEver == null)
logWarning("No individual selected. Using individual with highest fitness for main criterion...");
}
if (bestEver == null) {
bestEver = population.getBestIndividualEver();
}
// create resulting weights
double[] weights = bestEver.getWeights();
int a = 0;
AttributeWeights attributeWeights = new AttributeWeights();
for (Attribute attribute : exampleSet.getAttributes()) {
double weight = weights[a];
if (Double.isNaN(weight))
weight = 1.0d;
attributeWeights.setWeight(attribute.getName(), weight);
a++;
}
// normalize weights
if (getParameterAsBoolean(PARAMETER_NORMALIZE_WEIGHTS))
attributeWeights.normalize();
// clean up
exampleSetOutput.deliver(createCleanClone(exampleSet, weights));
attributeWeightsOutput.deliver(attributeWeights);
performanceOutput.deliver(bestEver.getPerformance());
this.population.clear();
this.population = null;
this.exampleSet = null;
}
public static ExampleSet createCleanClone(ExampleSet exampleSet, double[] weights) {
AttributeWeightedExampleSet clone = new AttributeWeightedExampleSet(exampleSet, null);
int a = 0;
for (Attribute attribute : clone.getAttributes()) {
clone.setWeight(attribute, weights[a++]);
}
return clone.createCleanClone();
}
/** Applies all PopulationOperators in opList to the population. */
void applyOpList(List opList, Population population) throws OperatorException {
Iterator i = opList.listIterator();
while (i.hasNext()) {
PopulationOperator op = (PopulationOperator) i.next();
if (op.performOperation(population.getGeneration())) {
try {
op.operate(population);
for (int k = 0; k < population.getNumberOfIndividuals(); k++) {
if (population.get(k).getNumberOfUsedAttributes() <= 0) {
logError("Population operator " + op + " has produced an example set without attributes!");
}
}
} catch (Exception e) {
throw new UserError(this, e, 108, e.toString());
}
}
}
}
/**
* Evaluates all individuals in the population by applying the inner
* operators.
*/
private void evaluate(Population population, ExampleSet originalExampleSet) throws OperatorException {
populationEvaluator.evaluate(population);
}
/**
* This method gives access to the subprocess for evaluating an example set
* @param exampleSet a weighted exampleSet
* @return
* @throws OperatorException
*/
public final PerformanceVector executeEvaluationProcess(ExampleSet exampleSet) throws OperatorException {
subprocessExampleOutput.deliver(exampleSet);
throughExtender.passDataThrough();
runEvaluationProcess();
return subprocessPerformanceInput.getData(PerformanceVector.class);
}
protected void runEvaluationProcess() throws OperatorException {
getSubprocess(0).execute();
}
/** This method checks if the maximum was reached for the main criterion. */
private boolean isMaximumReached() {
if (checkForMaximalFitness) {
PerformanceVector pv = population.getBestPerformanceEver();
if (pv == null) {
return false;
} else {
if (pv.getMainCriterion().getFitness() == Double.POSITIVE_INFINITY)
return true;
else if (pv.getMainCriterion().getMaxFitness() == pv.getMainCriterion().getFitness())
return true;
else
return pv.getMainCriterion().getFitness() >= maximalFitness;
}
} else {
return false;
}
}
/**
* Sets if the operator should check if the maximum was reached for the main
* criterion. Subclasses may want to set this to false, e.g. for
* multiobjective optimization.
*/
protected void setCheckForMaximum(boolean checkForMaximalFitness) {
this.checkForMaximalFitness = checkForMaximalFitness;
}
/**
* Returns if the operator should check if the maximum was reached for the
* main criterion. Subclasses may want to set this to false, e.g. for
* multiobjective optimization.
*/
protected boolean getCheckForMaximum() {
return this.checkForMaximalFitness;
}
public InputPort getExampleSetInput() {
return exampleSetInput;
}
@Override
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = super.getParameterTypes();
types.add(new ParameterTypeBoolean(PARAMETER_NORMALIZE_WEIGHTS, "Indicates if the final weights should be normalized.", true));
types.addAll(RandomGenerator.getRandomGeneratorParameters(this));
ParameterType type = new ParameterTypeBoolean(PARAMETER_SHOW_STOP_DIALOG, "Determines if a dialog with a button should be displayed which stops the run: the best individual is returned.", false);
type.setExpert(false);
types.add(type);
types.add(new ParameterTypeBoolean(PARAMETER_USER_RESULT_INDIVIDUAL_SELECTION, "Determines if the user wants to select the final result individual from the last population.", false));
// generation plot parameters
types.add(new ParameterTypeBoolean(PARAMETER_SHOW_POPULATION_PLOTTER, "Determines if the current population should be displayed in performance space.", false));
type = new ParameterTypeInt(PARAMETER_PLOT_GENERATIONS, "Update the population plotter in these generations.", 1, Integer.MAX_VALUE, 10);
type.registerDependencyCondition(new BooleanParameterCondition(this, PARAMETER_SHOW_POPULATION_PLOTTER, false, true));
types.add(type);
type = new ParameterTypeBoolean(PARAMETER_CONSTRAINT_DRAW_RANGE, "Determines if the draw range of the population plotter should be constrained between 0 and 1.", false);
type.registerDependencyCondition(new BooleanParameterCondition(this, PARAMETER_SHOW_POPULATION_PLOTTER, false, true));
types.add(type);
type = new ParameterTypeBoolean(PARAMETER_DRAW_DOMINATED_POINTS, "Determines if only points which are not Pareto dominated should be painted.", true);
type.registerDependencyCondition(new BooleanParameterCondition(this, PARAMETER_SHOW_POPULATION_PLOTTER, false, true));
types.add(type);
types.add(new ParameterTypeFile(PARAMETER_POPULATION_CRITERIA_DATA_FILE, "The path to the file in which the criteria data of the final population should be saved.", "cri", true));
types.add(new ParameterTypeDouble(PARAMETER_MAXIMAL_FITNESS, "The optimization will stop if the fitness reaches the defined maximum.", 0.0d, Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY));
return types;
}
protected PopulationEvaluator getPopulationEvaluator(ExampleSet exampleSet) throws OperatorException {
return new SimplePopulationEvaluator(this, exampleSet);
}
}