package org.mozilla.javascript;
/*
* Copyright (c) 2002-2010 Gargoyle Software Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.mozilla.javascript.regexp.NativeRegExp;
import org.mozilla.javascript.regexp.RegExpImpl;
/**
* Begins customization of JavaScript RegExp base on JDK regular expression
* support.
*
* @version $Revision: 5777 $
* @author Marc Guillemot
* @author Ahmed Ashour
*/
public class HtmlUnitRegExpProxy extends RegExpImpl {
private final RegExpProxy wrapped_;
//miki notice
/**
* Wraps a proxy to enhance it.
*
* @param wrapped
* the original proxy
*/
public HtmlUnitRegExpProxy(final RegExpProxy wrapped) {
wrapped_ = wrapped;
}
/**
* Use the wrapped proxy except for replacement with string arg where it
* uses Java regular expression. {@inheritDoc}
*/
@Override
public Object action(final Context cx, final Scriptable scope,
final Scriptable thisObj, final Object[] args, final int actionType) {
try {
return doAction(cx, scope, thisObj, args, actionType);
} catch (final StackOverflowError e) {
// TODO: We shouldn't have to catch this exception and fall back to
// Rhino's regex support!
// See HtmlUnitRegExpProxyTest.stackOverflow()
return wrapped_.action(cx, scope, thisObj, args, actionType);
}
}
private Object doAction(final Context cx, final Scriptable scope,
final Scriptable thisObj, final Object[] args, final int actionType) {
// in a first time just improve replacement with a String (not a
// function)
if (RA_REPLACE == actionType && args.length == 2
&& (args[1] instanceof String)) {
final String thisString = Context.toString(thisObj);
String replacement = (String) args[1];
final Object arg0 = args[0];
if (arg0 instanceof String) {
replacement = replacement.replaceAll("\\$\\$", "\\$");
// arg0 should *not* be interpreted as a RegExp
return replaceOnce(thisString, (String) arg0, replacement);
} else if (arg0 instanceof NativeRegExp) {
try {
final NativeRegExp regexp = (NativeRegExp) arg0;
final RegExpData reData = new RegExpData(regexp);
final String regex = reData.getJavaPattern();
final int flags = reData.getJavaFlags();
final Pattern pattern = Pattern.compile(regex, flags);
final Matcher matcher = pattern.matcher(thisString);
if (reData.hasFlag('g')) {
return doReplacement(thisString, replacement, matcher,
true);
}
return doReplacement(thisString, replacement, matcher,
false);
} catch (final PatternSyntaxException e) {
// TODO LOG.warn(e.getMessage(), e);
}
}
} else if (RA_MATCH == actionType) {
if (args.length == 0) {
return null;
}
final Object arg0 = args[0];
final String thisString = Context.toString(thisObj);
final RegExpData reData;
if (arg0 instanceof NativeRegExp) {
reData = new RegExpData((NativeRegExp) arg0);
} else {
reData = new RegExpData(Context.toString(arg0));
}
final Pattern pattern = Pattern.compile(reData.getJavaPattern(),
reData.getJavaFlags());
final Matcher matcher = pattern.matcher(thisString);
if (!matcher.find()) {
return null;
}
final int index = matcher.start(0);
final List<Object> groups = new ArrayList<Object>();
if (reData.hasFlag('g')) { // has flag g
groups.add(matcher.group(0));
while (matcher.find()) {
groups.add(matcher.group(0));
}
} else {
for (int i = 0; i <= matcher.groupCount(); ++i) {
Object group = matcher.group(i);
if (group == null) {
group = Context.getUndefinedValue();
}
groups.add(group);
}
}
final Scriptable response = cx.newArray(scope, groups.toArray());
// the additional properties (cf ECMA script reference 15.10.6.2 13)
response.put("index", response, new Integer(index));
response.put("input", response, thisString);
return response;
}
return wrappedAction(cx, scope, thisObj, args, actionType);
}
private String doReplacement(final String originalString,
final String replacement, final Matcher matcher,
final boolean replaceAll) {
// replacement = replacement.replaceAll("\\\\", "\\\\\\\\"); // \\ ->
// \\\\
// replacement = replacement.replaceAll("(?<!\\$)\\$(?!\\d)",
// "\\\\\\$"); // \$ -> \\\$
final StringBuffer sb = new StringBuffer();
int previousIndex = 0;
while (matcher.find()) {
sb.append(originalString.substring(previousIndex, matcher.start()));
String localReplacement = replacement;
if (replacement.contains("$")) {
localReplacement = computeReplacementValue(replacement,
originalString, matcher);
}
sb.append(localReplacement);
previousIndex = matcher.end();
if (!replaceAll) {
break;
}
}
sb.append(originalString.substring(previousIndex));
return sb.toString();
}
static String computeReplacementValue(final String replacement,
final String originalString, final Matcher matcher) {
int lastIndex = 0;
final StringBuilder result = new StringBuilder();
int i;
while ((i = replacement.indexOf('$', lastIndex)) > -1) {
if (i > 0) {
result.append(replacement.substring(lastIndex, i));
}
String ss = null;
if (i < replacement.length() - 1
&& (i == lastIndex || replacement.charAt(i - 1) != '$')) {
final char next = replacement.charAt(i + 1);
// only valid back reference are "evaluated"
if (next >= '1' && next <= '9') {
final int num1digit = next - '0';
final char next2 = (i + 2 < replacement.length()) ? replacement
.charAt(i + 2) : 'x';
final int num2digits;
// if there are 2 digits, the second one is considered as
// part of the group number
// only if there is such a group
if (next2 >= '1' && next2 <= '9') {
num2digits = num1digit * 10 + (next2 - '0');
} else {
num2digits = Integer.MAX_VALUE;
}
if (num2digits <= matcher.groupCount()) {
ss = matcher.group(num2digits);
i++;
} else if (num1digit <= matcher.groupCount()) {
ss = defaultString(matcher.group(num1digit));
}
} else {
switch (next) {
case '&':
ss = matcher.group();
break;
case '`':
ss = originalString.substring(0, matcher.start());
break;
case '\'':
ss = originalString.substring(matcher.end());
break;
case '$':
ss = "$";
break;
default:
}
}
}
if (ss != null) {
result.append(ss);
lastIndex = i + 2;
} else {
result.append('$');
lastIndex = i + 1;
}
}
result.append(replacement.substring(lastIndex));
return result.toString();
}
/**
* Indicates if the character at the given position is escaped or not.
*
* @param characters
* the characters to consider
* @param position
* the position
* @return <code>true</code> if escaped
*/
static boolean isEscaped(final String characters, final int position) {
int p = position;
int nbBackslash = 0;
while (p > 0 && characters.charAt(--p) == '\\') {
nbBackslash++;
}
return (nbBackslash % 2 == 1);
}
/**
* Calls action on the wrapped RegExp proxy.
*/
private Object wrappedAction(final Context cx, final Scriptable scope,
final Scriptable thisObj, final Object[] args, final int actionType) {
// take care to set the context's RegExp proxy to the original one as
// this is checked
// (cf net.sourceforge.htmlunit.corejs.javascript.regexp.RegExpImp:334)
try {
ScriptRuntime.setRegExpProxy(cx, wrapped_);
return wrapped_.action(cx, scope, thisObj, args, actionType);
} finally {
ScriptRuntime.setRegExpProxy(cx, this);
}
}
/**
* {@inheritDoc}
*/
@Override
public Object compileRegExp(final Context cx, final String source,
final String flags) {
try {
return wrapped_.compileRegExp(cx, source, flags);
} catch (final Exception e) {
// LOG.warn("compileRegExp() threw for >" + source + "<, flags: >"
// + flags + "<. "
// + "Replacing with a '####shouldNotFindAnything###'");
return wrapped_.compileRegExp(cx, "####shouldNotFindAnything###",
"");
}
}
/**
* {@inheritDoc}
*/
@Override
public int find_split(final Context cx, final Scriptable scope,
final String target, final String separator, final Scriptable re,
final int[] ip, final int[] matchlen, final boolean[] matched,
final String[][] parensp) {
return wrapped_.find_split(cx, scope, target, separator, re, ip,
matchlen, matched, parensp);
}
/**
* {@inheritDoc}
*/
@Override
public boolean isRegExp(final Scriptable obj) {
return wrapped_.isRegExp(obj);
}
/**
* {@inheritDoc}
*/
@Override
public Scriptable wrapRegExp(final Context cx, final Scriptable scope,
final Object compiled) {
return wrapped_.wrapRegExp(cx, scope, compiled);
}
private static class RegExpData {
private final String jsSource_;
private final String jsFlags_;
RegExpData(final NativeRegExp re) {
final String str = re.toString(); // the form is /regex/flags
jsSource_ = substringBeforeLast(str.substring(1), "/");
jsFlags_ = substringAfterLast(str, "/");
}
public RegExpData(final String string) {
jsSource_ = string;
jsFlags_ = "";
}
/**
* Converts the current JavaScript RegExp flags to Java Pattern flags.
*
* @return the Java Pattern flags
*/
public int getJavaFlags() {
int flags = 0;
if (jsFlags_.contains("i")) {
flags |= Pattern.CASE_INSENSITIVE;
}
if (jsFlags_.contains("m")) {
flags |= Pattern.MULTILINE;
}
return flags;
}
public String getJavaPattern() {
return jsRegExpToJavaRegExp(jsSource_);
}
boolean hasFlag(final char c) {
return jsFlags_.indexOf(c) != -1;
}
}
/**
* Transform a JavaScript regular expression to a Java regular expression
*
* @param re
* the JavaScript regular expression to transform
* @return the transformed expression
*/
static String jsRegExpToJavaRegExp(String re) {
re = re.replaceAll("\\[\\^\\\\\\d\\]", ".");
re = re.replaceAll("\\[([^\\]]*)\\\\b([^\\]]*)\\]", "[$1\\\\cH$2]"); // [...\b...]
// ->
// [...\cH...]
re = re.replaceAll("(?<!\\\\)\\[([^((?<!\\\\)\\[)\\]]*)\\[", "[$1\\\\["); // [...[...]
// ->
// [...\[...]
// back reference in character classes are simply ignored by browsers
re = re.replaceAll("(?<!\\\\)\\[([^\\]]*)(?<!\\\\)\\\\\\d", "[$1"); // [...ab\5cd...]
// ->
// [...abcd...]
// characters escaped without need should be "un-escaped"
re = re.replaceAll("(?<!\\\\)\\\\([ACE-RT-VX-Zaeg-mpqyz])", "$1");
re = escapeJSCurly(re);
return re;
}
/**
* Escape curly braces that are not used in an expression like "{n}", "{n,}"
* or "{n,m}" (where n and m are positive integers).
*
* @param re
* the regular expression to escape
* @return the escaped expression
*/
static String escapeJSCurly(String re) {
re = re.replaceAll("(?<!\\\\)\\{(?!\\d)", "\\\\{");
re = re.replaceAll("(?<!(\\d,?|\\\\))\\}", "\\\\}");
return re;
}
// ////////////////////////
private String replaceOnce(String text, String repl, String with) {
int max = 1;
if (text == null || isEmpty(repl) || with == null || max == 0) {
return text;
}
StringBuffer buf = new StringBuffer(text.length());
int start = 0, end = 0;
while ((end = text.indexOf(repl, start)) != -1) {
buf.append(text.substring(start, end)).append(with);
start = end + repl.length();
if (--max == 0) {
break;
}
}
buf.append(text.substring(start));
return buf.toString();
}
private static boolean isEmpty(String repl) {
return repl == null || "".equals(repl);
}
private static String defaultString(String s) {
return s == null ? "" : s;
}
private static String substringAfterLast(String str, String separator) {
if (isEmpty(str)) {
return str;
}
if (isEmpty(separator)) {
return "";
}
int pos = str.lastIndexOf(separator);
if (pos == -1 || pos == (str.length() - separator.length())) {
return "";
}
return str.substring(pos + separator.length());
}
private static String substringBeforeLast(String str, String separator) {
if (isEmpty(str) || isEmpty(separator)) {
return str;
}
int pos = str.lastIndexOf(separator);
if (pos == -1) {
return str;
}
return str.substring(0, pos);
}
}