/*
* RapidMiner
*
* Copyright (C) 2001-2014 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.preprocessing;
import java.util.Collection;
import java.util.List;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.set.NonSpecialAttributesExampleSet;
import com.rapidminer.operator.Model;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.operator.ports.metadata.AttributeMetaData;
import com.rapidminer.operator.ports.metadata.ExampleSetMetaData;
import com.rapidminer.operator.ports.metadata.GenerateModelTransformationRule;
import com.rapidminer.operator.tools.AttributeSubsetSelector;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.tools.container.Pair;
/**
* Superclass for all preprocessing operators. Classes which extend this class
* must implement the method {@link #createPreprocessingModel(ExampleSet)}. This
* method can also be returned by this operator and will be combined with other
* models.
*
* @author Ingo Mierswa
*/
public abstract class PreprocessingOperator extends AbstractDataProcessing {
private final OutputPort modelOutput = getOutputPorts().createPort("preprocessing model");
protected final AttributeSubsetSelector attributeSelector = new AttributeSubsetSelector(this, getExampleSetInputPort(), getFilterValueTypes());
/** The parameter name for "Indicates if the preprocessing model should also be returned" */
public static final String PARAMETER_RETURN_PREPROCESSING_MODEL = "return_preprocessing_model";
/**
* Indicates if this operator should create a view (new example set on the view stack) instead of directly changing
* the data.
*/
public static final String PARAMETER_CREATE_VIEW = "create_view";
public PreprocessingOperator(OperatorDescription description) {
super(description);
getTransformer().addRule(new GenerateModelTransformationRule(getExampleSetInputPort(), modelOutput, getPreprocessingModelClass()));
getExampleSetInputPort().addPrecondition(attributeSelector.makePrecondition());
}
/**
* Subclasses might override this method to define the meta data transformation performed by this operator.
* The default implementation takes all attributes specified by the {@link AttributeSubsetSelector}
* and passes them to {@link #modifyAttributeMetaData(ExampleSetMetaData, AttributeMetaData)} and replaces them accordingly.
* @throws UndefinedParameterError
*/
@Override
protected ExampleSetMetaData modifyMetaData(ExampleSetMetaData exampleSetMetaData) throws UndefinedParameterError {
ExampleSetMetaData subsetMetaData = attributeSelector.getMetaDataSubset(exampleSetMetaData, isSupportingAttributeRoles());
checkSelectedSubsetMetaData(subsetMetaData);
for (AttributeMetaData amd : subsetMetaData.getAllAttributes()) {
Collection<AttributeMetaData> replacement = null;
replacement = modifyAttributeMetaData(exampleSetMetaData, amd);
if (replacement != null) {
if (replacement.size() == 1) {
AttributeMetaData replacementAttribute = replacement.iterator().next();
replacementAttribute.setRole(exampleSetMetaData.getAttributeByName(amd.getName()).getRole());
}
exampleSetMetaData.removeAttribute(amd);
exampleSetMetaData.addAllAttributes(replacement);
}
}
return exampleSetMetaData;
}
/** Can be overridden to check the selected attributes for compatibility. */
protected void checkSelectedSubsetMetaData(ExampleSetMetaData subsetMetaData) {
}
/** If this preprocessing operator generates new attributes, the corresponding meta data should be
* returned by this method. The attribute will be replaced by the collection.
* If this operator modifies a single one, amd itself should be modified as a side effect
* and null should be returned. Note: If an empty collection is returned, amd will be removed,
* but no new attribute will be added.
**/
protected abstract Collection<AttributeMetaData> modifyAttributeMetaData(ExampleSetMetaData emd, AttributeMetaData amd) throws UndefinedParameterError ;
public abstract PreprocessingModel createPreprocessingModel(ExampleSet exampleSet) throws OperatorException;
/**
* This method allows subclasses to easily get a collection of the affected attributes.
*
* @throws UndefinedParameterError
* @throws UserError
*/
protected final ExampleSet getSelectedAttributes(ExampleSet exampleSet) throws UndefinedParameterError, UserError {
return attributeSelector.getSubset(exampleSet, isSupportingAttributeRoles());
}
@Override
public final ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
ExampleSet workingSet = (isSupportingAttributeRoles()) ? getSelectedAttributes(exampleSet) : new NonSpecialAttributesExampleSet(getSelectedAttributes(exampleSet));
Model model = createPreprocessingModel(workingSet);
model.setParameter(PARAMETER_CREATE_VIEW, getParameterAsBoolean(PARAMETER_CREATE_VIEW));
if (getExampleSetOutputPort().isConnected())
exampleSet = model.apply(exampleSet);
modelOutput.deliver(model);
return exampleSet;
}
/**
* Helper wrapper for {@link #exampleSetInput that can be called by other operators to apply this operator when it
* is created anonymously.
*/
public ExampleSet doWork(ExampleSet exampleSet) throws OperatorException {
ExampleSet workingSet = (isSupportingAttributeRoles()) ? getSelectedAttributes(exampleSet) : new NonSpecialAttributesExampleSet(getSelectedAttributes(exampleSet));
Model model = createPreprocessingModel(workingSet);
model.setParameter(PARAMETER_CREATE_VIEW, getParameterAsBoolean(PARAMETER_CREATE_VIEW));
return model.apply(exampleSet);
}
public Pair<ExampleSet, Model> doWorkModel(ExampleSet exampleSet) throws OperatorException {
exampleSet = apply(exampleSet);
Model model = modelOutput.getData(Model.class);
return new Pair<ExampleSet, Model>(exampleSet, model);
}
@Override
public boolean writesIntoExistingData() {
return !getParameterAsBoolean(PARAMETER_CREATE_VIEW);
}
@Override
public boolean shouldAutoConnect(OutputPort outputPort) {
if (outputPort == modelOutput) {
return getParameterAsBoolean(PARAMETER_RETURN_PREPROCESSING_MODEL);
} else {
return super.shouldAutoConnect(outputPort);
}
}
/**
* Defines the value types of the attributes which are processed or
* affected by this operator. Has to be overridden to restrict
* the attributes which can be chosen by an {@link AttributeSubsetSelector}.
* @return array of value types
*/
protected abstract int[] getFilterValueTypes();
public abstract Class<? extends PreprocessingModel> getPreprocessingModelClass();
@Override
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = super.getParameterTypes();
ParameterType type = new ParameterTypeBoolean(PARAMETER_RETURN_PREPROCESSING_MODEL, "Indicates if the preprocessing model should also be returned", false);
type.setHidden(true);
types.add(type);
type = new ParameterTypeBoolean(PARAMETER_CREATE_VIEW, "Create View to apply preprocessing instead of changing the data", false);
type.setHidden(!isSupportingView());
types.add(type);
types.addAll(attributeSelector.getParameterTypes());
return types;
}
/**
* Subclasses which need to have the attribute roles must return true. Otherwise all selected attributes are
* converted into regular and afterwards given their old roles.
*/
public boolean isSupportingAttributeRoles() {
return false;
}
/**
* Subclasses might overwrite this in order to hide the create_view parameter
*
* @return
*/
public boolean isSupportingView() {
return true;
}
public OutputPort getPreprocessingModelOutputPort() {
return modelOutput;
}
}