/*
* RapidMiner
*
* Copyright (C) 2001-2014 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.tools.jep.function;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.Stack;
import org.nfunk.jep.JEP;
import org.nfunk.jep.ParseException;
import org.nfunk.jep.SymbolTable;
import org.nfunk.jep.Variable;
import org.nfunk.jep.function.PostfixMathCommand;
import org.nfunk.jep.function.PostfixMathCommandI;
import org.nfunk.jep.type.Complex;
import com.rapidminer.Process;
import com.rapidminer.operator.preprocessing.filter.ChangeAttributeName;
import com.rapidminer.tools.expression.parser.AbstractExpressionParser;
import com.rapidminer.tools.expression.parser.Function;
import com.rapidminer.tools.expression.parser.JEPFunctionException;
import com.rapidminer.tools.jep.function.expressions.Average;
import com.rapidminer.tools.jep.function.expressions.BitwiseAnd;
import com.rapidminer.tools.jep.function.expressions.BitwiseNot;
import com.rapidminer.tools.jep.function.expressions.BitwiseOr;
import com.rapidminer.tools.jep.function.expressions.BitwiseXor;
import com.rapidminer.tools.jep.function.expressions.Constant;
import com.rapidminer.tools.jep.function.expressions.LogarithmDualis;
import com.rapidminer.tools.jep.function.expressions.MacroValue;
import com.rapidminer.tools.jep.function.expressions.Maximum;
import com.rapidminer.tools.jep.function.expressions.Minimum;
import com.rapidminer.tools.jep.function.expressions.Missing;
import com.rapidminer.tools.jep.function.expressions.ParameterValue;
import com.rapidminer.tools.jep.function.expressions.Random;
import com.rapidminer.tools.jep.function.expressions.Signum;
import com.rapidminer.tools.jep.function.expressions.date.Date2String;
import com.rapidminer.tools.jep.function.expressions.date.Date2StringCustom;
import com.rapidminer.tools.jep.function.expressions.date.Date2StringWithLocale;
import com.rapidminer.tools.jep.function.expressions.date.DateAdd;
import com.rapidminer.tools.jep.function.expressions.date.DateAfter;
import com.rapidminer.tools.jep.function.expressions.date.DateBefore;
import com.rapidminer.tools.jep.function.expressions.date.DateCreate;
import com.rapidminer.tools.jep.function.expressions.date.DateDiff;
import com.rapidminer.tools.jep.function.expressions.date.DateGet;
import com.rapidminer.tools.jep.function.expressions.date.DateParse;
import com.rapidminer.tools.jep.function.expressions.date.DateParseCustom;
import com.rapidminer.tools.jep.function.expressions.date.DateParseWithLocale;
import com.rapidminer.tools.jep.function.expressions.date.DateSet;
import com.rapidminer.tools.jep.function.expressions.number.Str;
import com.rapidminer.tools.jep.function.expressions.text.CharAt;
import com.rapidminer.tools.jep.function.expressions.text.Compare;
import com.rapidminer.tools.jep.function.expressions.text.Concat;
import com.rapidminer.tools.jep.function.expressions.text.Contains;
import com.rapidminer.tools.jep.function.expressions.text.EndsWith;
import com.rapidminer.tools.jep.function.expressions.text.Equals;
import com.rapidminer.tools.jep.function.expressions.text.EscapeHTML;
import com.rapidminer.tools.jep.function.expressions.text.Finds;
import com.rapidminer.tools.jep.function.expressions.text.IndexOf;
import com.rapidminer.tools.jep.function.expressions.text.Length;
import com.rapidminer.tools.jep.function.expressions.text.LowerCase;
import com.rapidminer.tools.jep.function.expressions.text.Matches;
import com.rapidminer.tools.jep.function.expressions.text.ParseNumber;
import com.rapidminer.tools.jep.function.expressions.text.Prefix;
import com.rapidminer.tools.jep.function.expressions.text.Replace;
import com.rapidminer.tools.jep.function.expressions.text.ReplaceRegex;
import com.rapidminer.tools.jep.function.expressions.text.StartsWith;
import com.rapidminer.tools.jep.function.expressions.text.Substring;
import com.rapidminer.tools.jep.function.expressions.text.Suffix;
import com.rapidminer.tools.jep.function.expressions.text.Trim;
import com.rapidminer.tools.jep.function.expressions.text.UpperCase;
/**
* <p>
* This class can be used as expression parser in order to generate new
* attributes. The parser constructs new attributes from the attributes of the
* input example set.
* </p>
*
* <p>
* The following <em>operators</em> are supported:
* <ul>
* <li>Addition: +</li>
* <li>Subtraction: -</li>
* <li>Multiplication: *</li>
* <li>Division: /</li>
* <li>Power: ^</li>
* <li>Modulus: %</li>
* <li>Less Than: <</li>
* <li>Greater Than: ></li>
* <li>Less or Equal: <=</li>
* <li>More or Equal: >=</li>
* <li>Equal: ==</li>
* <li>Not Equal: !=</li>
* <li>Boolean Not: !</li>
* <li>Boolean And: &&</li>
* <li>Boolean Or: ||</li>
* </ul>
* </p>
*
* <p>
* The following <em>log and exponential functions</em> are supported:
* <ul>
* <li>Natural Logarithm: ln(x)</li>
* <li>Logarithm Base 10: log(x)</li>
* <li>Logarithm Dualis (Base 2): ld(x)</li>
* <li>Exponential (e^x): exp(x)</li>
* <li>Power: pow(x,y)</li>
* </ul>
* </p>
*
* <p>
* The following <em>trigonometric functions</em> are supported:
* <ul>
* <li>Sine: sin(x)</li>
* <li>Cosine: cos(x)</li>
* <li>Tangent: tan(x)</li>
* <li>Arc Sine: asin(x)</li>
* <li>Arc Cosine: acos(x)</li>
* <li>Arc Tangent: atan(x)</li>
* <li>Arc Tangent (with 2 parameters): atan2(x,y)</li>
* <li>Hyperbolic Sine: sinh(x)</li>
* <li>Hyperbolic Cosine: cosh(x)</li>
* <li>Hyperbolic Tangent: tanh(x)</li>
* <li>Inverse Hyperbolic Sine: asinh(x)</li></li>
* <li>Inverse Hyperbolic Cosine: acosh(x)</li></li>
* <li>Inverse Hyperbolic Tangent: atanh(x)</li></li>
* </ul>
* </p>
*
* <p>
* The following <em>statistical functions</em> are supported:
* <ul>
* <li>Round: round(x)</li>
* <li>Round to p decimals: round(x,p)</li>
* <li>Floor: floor(x)</li>
* <li>Ceiling: ceil(x)</li>
* </ul>
* </p>
*
* <p>
* The following <em>aggregation functions</em> are supported:
* <ul>
* <li>Average: avg(x,y,z...)</li>
* <li>Minimum: min(x,y,z...)</li>
* <li>Maximum: max(x,y,z...)</li>
* </ul>
* </p>
*
* <p>
* The following <em>text functions</em> are supported:
* <ul>
* <li>Number to string: str(x)</li>
* <li>String to number: parse(text)</li>
* <li>Substring: cut(text, start, length)</li>
* <li>Concatenation (also possible by "+"): concat(text1, text2,
* text3...)</li>
* <li>Replace: replace(text, what, by)</li>
* <li>Replace All: replaceAll(text, what, by)</li>
* <li>To lower case: lower(text)</li>
* <li>To upper case: upper(text)</li>
* <li>First position of string in text: index(text, string)</li>
* <li>Length: length(text)</li>
* <li>Character at position pos in text: char(text, pos)</li>
* <li>Compare: compare(text1, text2)</li>
* <li>Contains string in text: contains(text, string)</li>
* <li>Equals: equals(text1, text2)</li>
* <li>Starts with string: starts(text, string)</li>
* <li>Ends with string: ends(text, string)</li>
* <li>Matches with regular expression exp: matches(text, exp)</li>
* <li>Suffix of length: suffix(text, length)</li>
* <li>Prefix of length: prefix(text, length)</li>
* <li>Trim (remove leading and trailing whitespace): trim(text)</li>
* </ul>
* </p>
*
* <p>
* The following <em>date functions</em> are supported:
* <ul>
* <li>Parse date: date_parse(x)</li>
* <li>Parse date using locale: date_parse_loc(x, code)</li>
* <li>Parse date using custom format: date_parse_custom(x, format, code)</li>
* <li>Date before: date_before(x, y)</li>
* <li>Date after: date_after(x, y)</li>
* <li>Date to string: date_str(x)</li>
* <li>Date to string using locale: date_str_loc(x, code)</li>
* <li>Date to string with custom pattern: date_str_custom(x, pattern, code)</li>
* <li>Current date: date_now()</li>
* <li>Date difference: date_diff(x, y)</li>
* <li>Date add: date_add(x, y, unit)</li>
* <li>Date set: date_set(x, y, unit)</li>
* <li>Date get: date_get(x, unit)</li>
* </ul>
*
* <p>
* The following <em>process related functions</em> are supported:
* <ul>
* <li>Retrieving a parameter value: param("operator", "parameter")</li>
* <li>Retrieving a macro value: macro("macro", "default Value")</li>
* </ul>
* </p>
*
* <p>
* The following <em>miscellaneous functions</em> are supported:
* <ul>
* <li>If-Then-Else: if(cond,true-evaluation, false-evaluation)</li>
* <li>Absolute: abs(x)</li>
* <li>Constant: const(x)</li>
* <li>Square Root: sqrt(x)</li>
* <li>Signum (delivers the sign of a number): sgn(x)</li>
* <li>Random Number (between 0 and 1): rand()</li>
* <li>Modulus (x % y): mod(x,y)</li>
* <li>Sum of k Numbers: sum(x,y,z...)</li>
* <li>Binomial Coefficients: binom(n, i)</li>
* <li>Check for Missing: missing(x)</li>
* <li>Bitwise OR: bit_or(x, y)</li>
* <li>Bitwise AND: bit_and(x, y)</li>
* <li>Bitwise XOR: bit_xor(x, y)</li>
* <li>Bitwise NOT: bit_not(x)</li>
* </ul>
* </p>
*
*
* <p>
* Beside those operators and functions, this operator also supports the
* constants pi and e if this is indicated by the corresponding parameter
* (default: true). You can also use strings in formulas (for example in a
* conditioned if-formula) but the string values have to be enclosed in double
* quotes.
* </p>
*
* <p>
* Please note that there are some restrictions for the attribute names in order
* to let this operator work properly:
* <ul>
* <li>If the standard constants are usable, attribute names with names like
* "e" or "pi" are not allowed.</li>
* <li>Attribute names with function or operator names are also not allowed.</li>
* <li>Attribute names containing parentheses are not allowed.</li>
* </ul>
* If these conditions are not fulfilled, the names must be changed beforehand,
* for example with the {@link ChangeAttributeName} operator.
* </p>
*
* <p>
* <br/>
* <em>Examples:</em><br/>
* a1+sin(a2*a3)<br/>
* if (att1>5, att2*att3, -abs(att1))<br/>
* </p>
*
* @author Ingo Mierswa
*/
public class ExpressionParser extends AbstractExpressionParser {
private JEP parser;
/** Static map to remember already created {@link PostfixMathCommand}s. This prevents from creating new ones every time the JEP is instanciated. */
private static Map<String, PostfixMathCommand> REGISTERED_CUSTOM_FUNCTIONS = new HashMap<String, PostfixMathCommand>();
private ExpressionParser(boolean useStandardConstants) {
this(useStandardConstants, null);
}
/**
* This constructor allows additional functions if called within a process.
*/
private ExpressionParser(boolean useStandardConstants, Process process) {
initParser(useStandardConstants, process);
}
public static AbstractExpressionParser getExpressionParser(boolean useStandardConstants) {
return new ExpressionParser(useStandardConstants);
}
public static AbstractExpressionParser getExpressionParser(boolean useStandardConstants, Process process) {
return new ExpressionParser(useStandardConstants, process);
}
@Override
public void setAllowUndeclared(boolean value) {
getParser().setAllowUndeclared(value);
}
@Override
protected void addCustomConstants() {
addConstant("true", Boolean.valueOf(true));
addConstant("false", Boolean.valueOf(false));
super.addCustomConstants();
}
@Override
protected void addCustomFunctions() {
addFunction("const", new Constant());
addFunction("str", new Str());
addFunction("avg", new Average());
addFunction("min", new Minimum());
addFunction("max", new Maximum());
addFunction("ld", new LogarithmDualis());
addFunction("sgn", new Signum());
addFunction("missing", new Missing());
addFunction("bit_or", new BitwiseOr());
addFunction("bit_and", new BitwiseAnd());
addFunction("bit_xor", new BitwiseXor());
addFunction("bit_not", new BitwiseNot());
// text functions
addFunction("parse", new ParseNumber());
addFunction("cut", new Substring());
addFunction("concat", new Concat());
addFunction("replace", new Replace());
addFunction("replaceAll", new ReplaceRegex());
addFunction("lower", new LowerCase());
addFunction("upper", new UpperCase());
addFunction("index", new IndexOf());
addFunction("length", new Length());
addFunction("char", new CharAt());
addFunction("compare", new Compare());
addFunction("equals", new Equals());
addFunction("contains", new Contains());
addFunction("starts", new StartsWith());
addFunction("ends", new EndsWith());
addFunction("matches", new Matches());
addFunction("finds", new Finds());
addFunction("prefix", new Prefix());
addFunction("suffix", new Suffix());
addFunction("trim", new Trim());
addFunction("escape_html", new EscapeHTML());
// date functions
addFunction("date_parse", new DateParse());
addFunction("date_parse_loc", new DateParseWithLocale());
addFunction("date_parse_custom", new DateParseCustom());
addFunction("date_before", new DateBefore());
addFunction("date_after", new DateAfter());
addFunction("date_str", new Date2String());
addFunction("date_str_loc", new Date2StringWithLocale());
addFunction("date_str_custom", new Date2StringCustom());
addFunction("date_now", new DateCreate());
addFunction("date_diff", new DateDiff());
addFunction("date_add", new DateAdd());
addFunction("date_set", new DateSet());
addFunction("date_get", new DateGet());
for (final Function function : getCustomFunctions()) {
String functionName = function.getFunctionName();
PostfixMathCommand postfixMathCommand = REGISTERED_CUSTOM_FUNCTIONS.get(functionName);
// if function has not yet been created, create and save it now
if(postfixMathCommand == null) {
postfixMathCommand = new PostfixMathCommand() {
{
numberOfParameters = function.getFunctionDescription().getNumberOfArguments();
}
@Override
public void run(Stack stack) throws ParseException {
int numParams = numberOfParameters == -1 ? curNumberOfParameters : numberOfParameters;
ArrayList<Object> arguments = new ArrayList<Object>();
for (int i = 0; i < numParams; i++) {
arguments.add(stack.pop());
}
Object result;
try {
result = function.compute(arguments.toArray());
} catch (JEPFunctionException e) {
throw new ParseException(e.getMessage());
}
stack.push(result);
}
};
REGISTERED_CUSTOM_FUNCTIONS.put(functionName, postfixMathCommand);
}
addFunction(functionName, postfixMathCommand);
}
}
public boolean hasError() {
return getParser().hasError();
}
@Override
public void parseExpression(String expression) throws ExpressionParserException {
getParser().parseExpression(expression);
if (hasError()) {
throw new ExpressionParserException(getErrorInfo());
}
}
@Override
public String getErrorInfo() {
if (getParser().hasError()) {
return getParser().getErrorInfo();
}
return "";
}
@Override
public void addVariable(String name, Object object) {
getParser().addVariable(name, object);
}
@Override
public Object getValueAsObject() {
return getParser().getValueAsObject();
}
public void addConstant(String constantName, Object value) {
getParser().addConstant(constantName, value);
}
@Override
public void initParser(boolean useStandardConstants) {
initParser(useStandardConstants, null);
}
@Override
public void initParser(boolean useStandardConstants, Process process) {
parser = new JEP();
parser.addStandardFunctions();
if (useStandardConstants)
parser.addStandardConstants();
addCustomFunctions();
addCustomConstants();
setAllowUndeclared(false);
setImplicitMul(false);
if (process != null) {
parser.addFunction("param", new ParameterValue(process));
parser.addFunction("macro", new MacroValue(process));
parser.removeFunction("rand");
parser.addFunction("rand", new Random(process));
}
}
@Override
public void setVarValue(String variableName, Object value) {
getParser().setVarValue(variableName, value);
}
@Override
public void setImplicitMul(boolean b) {
getParser().setImplicitMul(b);
}
@Override
public Collection getSymbolTableValues() {
SymbolTable symbolTable = parser.getSymbolTable();
return symbolTable.values();
}
private JEP getParser() {
return parser;
}
@Override
public void addStandardConstants() {
getParser().addStandardConstants();
}
@Override
public boolean isComplex(Object result) {
if (result instanceof Complex) {
return true;
}
return false;
}
@Override
public String getVariableName(Object variableObj) {
Variable variable = (Variable) variableObj;
return variable.getName();
}
@Override
public boolean isConstant(Object variableObj) {
Variable variable = (Variable) variableObj;
return variable.isConstant();
}
@Override
public double getDoubleValueofComplex(Object result) {
Complex cmplx = (Complex) result;
return cmplx.doubleValue();
}
@Override
public void addFunction(String functionName, Object value) {
getParser().addFunction(functionName, (PostfixMathCommandI) value);
}
}