/*
* RapidMiner
*
* Copyright (C) 2001-2014 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.learner.functions.neuralnet;
import java.util.List;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.Model;
import com.rapidminer.operator.OperatorCapability;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.learner.AbstractLearner;
import com.rapidminer.operator.learner.PredictionModel;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeDouble;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.ParameterTypeList;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.tools.RandomGenerator;
/**
* <p>This operator learns a model by means of a feed-forward neural network trained by a
* backpropagation algorithm (multi-layer perceptron). The user can define the structure
* of the neural network with the parameter list "hidden_layers". Each list entry
* describes a new hidden layer. The key of each entry must correspond to the layer name.
* The value of each entry must be a number defining the size of the hidden layer. A size value
* of -1 indicates that the layer size should be calculated from the number of attributes
* of the input example set. In this case, the layer size will be set to
* (number of attributes + number of classes) / 2 + 1.</p>
*
* <p>If the user does not specify any hidden layers, a default hidden layer with
* sigmoid type and size (number of attributes + number of classes) / 2 + 1 will be created and
* added to the net. If only a single layer without nodes is specified, the input nodes are
* directly connected to the output nodes and no hidden layer will be used.</p>
*
* <p>The used activation function is the usual sigmoid function. Therefore, the values ranges
* of the attributes should be scaled to -1 and +1. This is also done by this operator if
* not specified otherwise by the corresponding parameter setting. The type of the output node
* is sigmoid if the learning data describes a classification task and linear for numerical
* regression tasks.</p>
*
* @rapidminer.index Neural Net
*
* @author Ingo Mierswa
*/
public class ImprovedNeuralNetLearner extends AbstractLearner {
/** The parameter name for "The number of hidden layers. Only used if no layers are defined by the list hidden_layer_types." */
public static final String PARAMETER_HIDDEN_LAYERS = "hidden_layers";
/** The parameter name for "The number of training cycles used for the neural network training." */
public static final String PARAMETER_TRAINING_CYCLES = "training_cycles";
/** The parameter name for "The optimization is stopped if the training error gets below this epsilon value." */
public static final String PARAMETER_ERROR_EPSILON = "error_epsilon";
/** The parameter name for "The learning rate determines by how much we change the weights at each step." */
public static final String PARAMETER_LEARNING_RATE = "learning_rate";
/** The parameter name for "The momentum simply adds a fraction of the previous weight update to the current one (prevent local maxima and smoothes optimization directions)." */
public static final String PARAMETER_MOMENTUM = "momentum";
/** Indicates if the learning rate should be cooled down. */
public static final String PARAMETER_DECAY = "decay";
/** Indicates if the input data should be shuffled before learning. */
public static final String PARAMETER_SHUFFLE = "shuffle";
/** Indicates if the input data should be normalized between -1 and 1 before learning. */
public static final String PARAMETER_NORMALIZE = "normalize";
public ImprovedNeuralNetLearner(OperatorDescription description) {
super(description);
}
public Model learn(ExampleSet exampleSet) throws OperatorException {
com.rapidminer.example.Tools.onlyNonMissingValues(exampleSet, "Neural Net");
ImprovedNeuralNetModel model = new ImprovedNeuralNetModel(exampleSet);
List<String[]> hiddenLayers = getParameterList(PARAMETER_HIDDEN_LAYERS);
int maxCycles = getParameterAsInt(PARAMETER_TRAINING_CYCLES);
double maxError = getParameterAsDouble(PARAMETER_ERROR_EPSILON);
double learningRate = getParameterAsDouble(PARAMETER_LEARNING_RATE);
double momentum = getParameterAsDouble(PARAMETER_MOMENTUM);
boolean decay = getParameterAsBoolean(PARAMETER_DECAY);
boolean shuffle = getParameterAsBoolean(PARAMETER_SHUFFLE);
boolean normalize = getParameterAsBoolean(PARAMETER_NORMALIZE);
RandomGenerator randomGenerator = RandomGenerator.getRandomGenerator(this);
model.train(exampleSet, hiddenLayers, maxCycles, maxError, learningRate, momentum, decay, shuffle, normalize, randomGenerator);
return model;
}
@Override
public Class<? extends PredictionModel> getModelClass() {
return ImprovedNeuralNetModel.class;
}
/**
* Returns true for all types of attributes and numerical and binominal labels.
*/
public boolean supportsCapability(OperatorCapability lc) {
switch (lc) {
case NUMERICAL_ATTRIBUTES:
case POLYNOMINAL_LABEL:
case BINOMINAL_LABEL:
case NUMERICAL_LABEL:
case WEIGHTED_EXAMPLES:
return true;
default:
return false;
}
}
@Override
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = super.getParameterTypes();
ParameterType type = new ParameterTypeList(PARAMETER_HIDDEN_LAYERS, "Describes the name and the size of all hidden layers.",
new ParameterTypeString("hidden_layer_name", "The name of the hidden layer."),
new ParameterTypeInt("hidden_layer_sizes", "The size of the hidden layers. A size of < 0 leads to a layer size of (number_of_attributes + number of classes) / 2 + 1.", -1, Integer.MAX_VALUE, -1));
type.setExpert(false);
types.add(type);
type = new ParameterTypeInt(PARAMETER_TRAINING_CYCLES, "The number of training cycles used for the neural network training.", 1, Integer.MAX_VALUE, 500);
type.setExpert(false);
types.add(type);
type = new ParameterTypeDouble(PARAMETER_LEARNING_RATE, "The learning rate determines by how much we change the weights at each step. May not be 0.", Double.MIN_VALUE, 1.0d, 0.3d);
type.setExpert(false);
types.add(type);
types.add(new ParameterTypeDouble(PARAMETER_MOMENTUM, "The momentum simply adds a fraction of the previous weight update to the current one (prevent local maxima and smoothes optimization directions).", 0.0d, 1.0d, 0.2d));
types.add(new ParameterTypeBoolean(PARAMETER_DECAY, "Indicates if the learning rate should be decreased during learningh", false));
types.add(new ParameterTypeBoolean(PARAMETER_SHUFFLE, "Indicates if the input data should be shuffled before learning (increases memory usage but is recommended if data is sorted before)", true));
types.add(new ParameterTypeBoolean(PARAMETER_NORMALIZE, "Indicates if the input data should be normalized between -1 and +1 before learning (increases runtime but is in most cases necessary)", true));
types.add(new ParameterTypeDouble(PARAMETER_ERROR_EPSILON, "The optimization is stopped if the training error gets below this epsilon value.", 0.0d, Double.POSITIVE_INFINITY, 0.00001d));
types.addAll(RandomGenerator.getRandomGeneratorParameters(this));
return types;
}
}