/*
* RapidMiner
*
* Copyright (C) 2001-2014 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.features.construction;
import java.util.Iterator;
import java.util.List;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.AttributeWeights;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.set.AttributeWeightedExampleSet;
import com.rapidminer.gui.dialog.StopDialog;
import com.rapidminer.operator.OperatorChain;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.ValueDouble;
import com.rapidminer.operator.performance.PerformanceVector;
import com.rapidminer.operator.ports.InputPort;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.operator.ports.metadata.ExampleSetPassThroughRule;
import com.rapidminer.operator.ports.metadata.SetRelation;
import com.rapidminer.operator.ports.metadata.SubprocessTransformRule;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeDouble;
import com.rapidminer.tools.RandomGenerator;
import com.rapidminer.tools.Tools;
/**
* This class is the superclass of all feature selection and generation
* operators. It provides an easy to use plug-in interface for operators that
* modify populations. Subclasses just have to supply lists of
* <tt>PopulationOperators</tt> by overriding
* <tt>getPreEvalutaionPopulationOperators()</tt> and
* <tt>getPostEvalutaionPopulationOperators()</tt> during a loop which will
* terminate if <tt>solutionGoodEnough()</tt> returns true.
*
* @author Ingo Mierswa
* <br>
*/
public abstract class ExampleSetBasedFeatureOperator extends OperatorChain {
public static final String PARAMETER_SHOW_STOP_DIALOG = "show_stop_dialog";
public static final String PARAMETER_MAXIMAL_FITNESS = "maximal_fitness";
private ExampleSetBasedPopulation population;
/** The optimization stops if this maximal fitness was reached. */
private double maximalFitness = Double.POSITIVE_INFINITY;
private boolean checkForMaximalFitness = true;
private int evaluationCounter = 0;
private int totalEvaluations = 0;
private RandomGenerator random;
private final InputPort exampleSetInput = getInputPorts().createPort("example set in", ExampleSet.class);
private final OutputPort innerExampleSetSource = getSubprocess(0).getInnerSources().createPort("example set source");
private final InputPort innerPerformanceSink = getSubprocess(0).getInnerSinks().createPort("performance sink", PerformanceVector.class);
private final OutputPort exampleSetOutput = getOutputPorts().createPort("example set out");
private final OutputPort attributeWeightsOutput = getOutputPorts().createPort("attribute weights out");
private final OutputPort performanceOutput = getOutputPorts().createPort("performance out");
public ExampleSetBasedFeatureOperator(OperatorDescription description) {
super(description, "Evaluation Process");
getTransformer().addRule(new ExampleSetPassThroughRule(exampleSetInput, innerExampleSetSource, SetRelation.SUBSET));
getTransformer().addRule(new SubprocessTransformRule(getSubprocess(0)));
getTransformer().addPassThroughRule(innerPerformanceSink, performanceOutput);
getTransformer().addRule(new ExampleSetPassThroughRule(exampleSetInput, exampleSetOutput, SetRelation.SUBSET));
getTransformer().addGenerationRule(attributeWeightsOutput, AttributeWeights.class);
addValue(new ValueDouble("generation", "The number of the current generation.") {
@Override
public double getDoubleValue() {
if (population == null)
return 0;
return population.getGeneration();
}
});
addValue(new ValueDouble("performance", "The performance of the current generation (main criterion).") {
@Override
public double getDoubleValue() {
if (population == null)
return Double.NaN;
if (population.getCurrentBestPerformance() == null)
return Double.NaN;
PerformanceVector pv = population.getCurrentBestPerformance();
if (pv == null)
return Double.NaN;
return pv.getMainCriterion().getAverage();
}
});
addValue(new ValueDouble("best", "The performance of the best individual ever (main criterion).") {
@Override
public double getDoubleValue() {
if (population == null)
return Double.NaN;
PerformanceVector pv = population.getBestPerformanceEver();
if (pv == null)
return Double.NaN;
return pv.getMainCriterion().getAverage();
}
});
addValue(new ValueDouble("average_length", "The average number of attributes.") {
@Override
public double getDoubleValue() {
if (population == null)
return Double.NaN;
else {
double lengthSum = 0.0d;
for (int i = 0; i < population.getNumberOfIndividuals(); i++)
lengthSum += population.get(i).getExampleSet().getNumberOfUsedAttributes();
return lengthSum / population.getNumberOfIndividuals();
}
}
});
addValue(new ValueDouble("best_length", "The number of attributes of the best example set.") {
@Override
public double getDoubleValue() {
if (population == null)
return Double.NaN;
ExampleSetBasedIndividual individual = population.getBestIndividualEver();
if (individual != null) {
AttributeWeightedExampleSet eSet = individual.getExampleSet();
if (eSet != null)
return eSet.getNumberOfUsedAttributes();
else
return Double.NaN;
} else {
return Double.NaN;
}
}
});
}
/**
* Create an initial population. The example set will be cloned before the
* method is invoked. This method is invoked after the pre- and
* post-evaluation population operators were collected.
*/
public abstract ExampleSetBasedPopulation createInitialPopulation(ExampleSet es) throws OperatorException;
/**
* Must return a list of <tt>PopulationOperator</tt>s. All operators are
* applied to the population in their order within the list before the
* population is evaluated. Since this method is invoked only once the list
* cannot by dynamically changed during runtime.
*/
public abstract List<ExampleSetBasedPopulationOperator> getPreEvaluationPopulationOperators(ExampleSet input) throws OperatorException;
/**
* Must return a list of <tt>PopulationOperator</tt>s. All operators are
* applied to the population in their order within the list after the
* population is evaluated. Since this method is invoked only once the list
* cannot by dynamically changed during runtime.
*/
public abstract List<ExampleSetBasedPopulationOperator> getPostEvaluationPopulationOperators(ExampleSet input) throws OperatorException;
/**
* Has to return true if the main loop can be stopped because a solution is
* considered to be good enough according to some criterion.
*/
public abstract boolean solutionGoodEnough(ExampleSetBasedPopulation pop) throws OperatorException;
protected RandomGenerator getRandom() {
return random;
}
protected ExampleSetBasedPopulation getPopulation() {
return population;
}
/**
* Applies the feature operator:
* <ol>
* <li>collects the pre- and postevaluation operators
* <li>create an initial population
* <li>evaluate the initial population
* <li>loop as long as solution is not good enough
* <ol>
* <li>apply all pre evaluation operators
* <li>evaluate the population
* <li>update the population's best individual
* <li>apply all post evaluation operators
* </ol>
* <li>return all generation's best individual
* </ol>
*/
@Override
public void doWork() throws OperatorException {
// init
this.random = RandomGenerator.getRandomGenerator(this);
this.evaluationCounter = 0;
this.totalEvaluations = 0;
this.maximalFitness = getParameterAsDouble(PARAMETER_MAXIMAL_FITNESS);
ExampleSet es = exampleSetInput.getData(ExampleSet.class);
if (es.getAttributes().size() == 0) {
throw new UserError(this, 125, 0, 1);
}
List preOps = getPreEvaluationPopulationOperators(es);
List postOps = getPostEvaluationPopulationOperators(es);
// stop dialog
boolean userDialogOk = true;
StopDialog stopDialog = null;
if (getParameterAsBoolean(PARAMETER_SHOW_STOP_DIALOG)) {
stopDialog = new StopDialog("Stop Dialog", "<html>Press the stop button to abort the search for best feature space.<br>" + "The best individual found so far is returned.</html>");
stopDialog.setVisible(true);
}
// create initial population
population = createInitialPopulation(es);
log("Initial population has " + population.getNumberOfIndividuals() + " individuals.");
evaluate(population);
// optimization loop
while (userDialogOk && !solutionGoodEnough(population) && !isMaximumReached()) {
population.nextGeneration();
applyOpList(preOps, population);
log(Tools.ordinalNumber(population.getGeneration()) + " generation has " + population.getNumberOfIndividuals() + " individuals.");
log("Evaluating " + Tools.ordinalNumber(population.getGeneration()) + " population.");
evaluate(population);
population.updateEvaluation();
applyOpList(postOps, population);
userDialogOk = stopDialog == null ? true : stopDialog.isStillRunning();
inApplyLoop();
}
if (stopDialog != null) {
stopDialog.setVisible(false);
stopDialog.dispose();
}
// optimization finished
applyOpList(postOps, population);
log("Optimization finished. " + evaluationCounter + " / " + totalEvaluations + " evaluations performed.");
// create result example set
ExampleSetBasedIndividual bestEver = population.getBestIndividualEver();
// create resulting weights
AttributeWeightedExampleSet weightedResultSet = bestEver.getExampleSet();
for (Attribute attribute : weightedResultSet.getAttributes()) {
if (Double.isNaN(weightedResultSet.getWeight(attribute)))
weightedResultSet.setWeight(attribute, 1.0d);
}
AttributeWeights weights = weightedResultSet.getAttributeWeights();
Iterator<String> n = weights.getAttributeNames().iterator();
while (n.hasNext()) {
String name = n.next();
if (weightedResultSet.getAttributes().get(name) == null) {
weights.setWeight(name, 0.0d);
}
}
// normalize weights
weights.normalize();
exampleSetOutput.deliver(weightedResultSet.createCleanClone());
attributeWeightsOutput.deliver(weights);
performanceOutput.deliver(bestEver.getPerformance());
}
/** Applies all PopulationOperators in opList to the population. */
void applyOpList(List opList, ExampleSetBasedPopulation population) throws OperatorException {
Iterator i = opList.listIterator();
while (i.hasNext()) {
ExampleSetBasedPopulationOperator op = (ExampleSetBasedPopulationOperator) i.next();
if (op.performOperation(population.getGeneration())) {
try {
op.operate(population);
for (int k = 0; k < population.getNumberOfIndividuals(); k++) {
if (population.get(k).getExampleSet().getNumberOfUsedAttributes() <= 0) {
getLogger().warning("Population operator " + op + " has produced an example set without attributes!");
}
}
} catch (Exception e) {
throw new UserError(this, e, 108, e.toString());
}
}
}
}
/**
* Evaluates all individuals in the population by applying the inner
* operators.
*/
protected void evaluate(ExampleSetBasedPopulation population) throws OperatorException {
for (int i = 0; i < population.getNumberOfIndividuals(); i++) {
evaluate(population.get(i));
}
}
/**
* Evaluates the given individual. The performance is set as user data of
* the individual and also returned by this method.
*/
protected PerformanceVector evaluate(ExampleSetBasedIndividual individual) throws OperatorException {
totalEvaluations++;
if (individual.getPerformance() != null) {
return individual.getPerformance();
} else {
evaluationCounter++;
AttributeWeightedExampleSet clone = individual.getExampleSet().createCleanClone();
innerExampleSetSource.deliver(clone);
getSubprocess(0).execute();
PerformanceVector performanceVector = innerPerformanceSink.getData(PerformanceVector.class);
individual.setPerformance(performanceVector);
return performanceVector;
}
}
/** This method checks if the maximum was reached for the main criterion. */
private boolean isMaximumReached() {
if (checkForMaximalFitness) {
PerformanceVector pv = population.getBestPerformanceEver();
if (pv == null) {
return false;
} else {
if (pv.getMainCriterion().getFitness() == Double.POSITIVE_INFINITY)
return true;
else if (pv.getMainCriterion().getMaxFitness() == pv.getMainCriterion().getFitness())
return true;
else
return pv.getMainCriterion().getFitness() >= maximalFitness;
}
} else {
return false;
}
}
/**
* Sets if the operator should check if the maximum was reached for the main
* criterion. Subclasses may want to set this to false, e.g. for
* multiobjective optimization.
*/
protected void setCheckForMaximum(boolean checkForMaximalFitness) {
this.checkForMaximalFitness = checkForMaximalFitness;
}
/**
* Returns if the operator should check if the maximum was reached for the
* main criterion. Subclasses may want to set this to false, e.g. for
* multiobjective optimization.
*/
protected boolean getCheckForMaximum() {
return this.checkForMaximalFitness;
}
@Override
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = super.getParameterTypes();
types.addAll(RandomGenerator.getRandomGeneratorParameters(this));
ParameterType type = new ParameterTypeBoolean(PARAMETER_SHOW_STOP_DIALOG, "Determines if a dialog with a button should be displayed which stops the run: the best individual is returned.", false);
types.add(type);
types.add(new ParameterTypeDouble(PARAMETER_MAXIMAL_FITNESS, "The optimization will stop if the fitness reaches the defined maximum.", 0.0d, Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY));
return types;
}
}