/******************************************************************************* * Copyright (c) 2009-2011 Luaj.org. All rights reserved. * <p/> * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * <p/> * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * <p/> * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. ******************************************************************************/ package org.luaj.vm2.lib; //TODO 与3.0不一样 import org.luaj.vm2.Buffer; import org.luaj.vm2.LuaClosure; import org.luaj.vm2.LuaString; import org.luaj.vm2.LuaTable; import org.luaj.vm2.LuaValue; import org.luaj.vm2.Varargs; import org.luaj.vm2.compiler.DumpState; import java.io.ByteArrayOutputStream; import java.io.IOException; /** * Subclass of {@link LibFunction} which implements the lua standard {@code string} * library. * <p> * Typically, this library is included as part of a call to either * {@link JsePlatform#standardGlobals()} or {@link JmePlatform#standardGlobals()} * <pre> {@code * Globals globals = JsePlatform.standardGlobals(); * System.out.println( globals.get("string").get("upper").call( LuaValue.valueOf("abcde") ) ); * } </pre> * <p> * To instantiate and use it directly, * link it into your globals table via {@link LuaValue#load(LuaValue)} using code such as: * <pre> {@code * Globals globals = new Globals(); * globals.load(new JseBaseLib()); * globals.load(new PackageLib()); * globals.load(new StringLib()); * System.out.println( globals.get("string").get("upper").call( LuaValue.valueOf("abcde") ) ); * } </pre> * <p> * This is a direct port of the corresponding library in C. * @see LibFunction * @see JsePlatform * @see JmePlatform * @see <a href="http://www.lua.org/manual/5.2/manual.html#6.4">Lua 5.2 String Lib Reference</a> */ public class StringLib extends TwoArgFunction { public static LuaTable instance; public StringLib() { } public LuaValue call(LuaValue modname, LuaValue env) { LuaTable t = new LuaTable(); bind(t, StringLib1.class, new String[]{ "dump", "len", "lower", "reverse", "upper"}); bind(t, StringLibV.class, new String[]{ "byte", "char", "find", "format", "gmatch", "gsub", "match", "rep", "sub"}); env.set("string", t); instance = t; if (LuaString.s_metatable == null) LuaString.s_metatable = tableOf(new LuaValue[]{INDEX, t}); env.get("package").get("loaded").set("string", t); return t; } static final class StringLib1 extends OneArgFunction { public LuaValue call(LuaValue arg) { switch (opcode) { case 0: return dump(arg); // dump (function) case 1: return StringLib.len(arg); // len (function) case 2: return lower(arg); // lower (function) case 3: return reverse(arg); // reverse (function) case 4: return upper(arg); // upper (function) } return NIL; } } static final class StringLibV extends VarArgFunction { public Varargs invoke(Varargs args) { switch (opcode) { case 0: return StringLib.byte_(args); case 1: return StringLib.char_(args); case 2: return StringLib.find(args); case 3: return StringLib.format(args); case 4: return StringLib.gmatch(args); case 5: return StringLib.gsub(args); case 6: return StringLib.match(args); case 7: return StringLib.rep(args); case 8: return StringLib.sub(args); } return NONE; } } /** * string.byte (s [, i [, j]]) * * Returns the internal numerical codes of the * characters s[i], s[i+1], ..., s[j]. The default value for i is 1; the * default value for j is i. * * Note that numerical codes are not necessarily portable across platforms. * * @param args the calling args */ static Varargs byte_(Varargs args) { LuaString s = args.checkstring(1); int l = s.m_length; int posi = posrelat(args.optint(2, 1), l); int pose = posrelat(args.optint(3, posi), l); int n, i; if (posi <= 0) posi = 1; if (pose > l) pose = l; if (posi > pose) return NONE; /* empty interval; return no values */ n = (int) (pose - posi + 1); if (posi + n <= pose) /* overflow? */ error("string slice too long"); LuaValue[] v = new LuaValue[n]; for (i = 0; i < n; i++) v[i] = valueOf(s.luaByte(posi + i - 1)); return varargsOf(v); } /** * string.char (...) * * Receives zero or more integers. Returns a string with length equal * to the number of arguments, in which each character has the internal * numerical code equal to its corresponding argument. * * Note that numerical codes are not necessarily portable across platforms. * * @param args the calling VM */ public static Varargs char_(Varargs args) { int n = args.narg(); byte[] bytes = new byte[n]; for (int i = 0, a = 1; i < n; i++, a++) { int c = args.checkint(a); if (c < 0 || c >= 256) argerror(a, "invalid value"); bytes[i] = (byte) c; } return LuaString.valueOf(bytes); } /** * string.dump (function) * * Returns a string containing a binary representation of the given function, * so that a later loadstring on this string returns a copy of the function. * function must be a Lua function without upvalues. * * TODO: port dumping code as optional add-on */ static LuaValue dump(LuaValue arg) { LuaValue f = arg.checkfunction(); ByteArrayOutputStream baos = new ByteArrayOutputStream(); try { DumpState.dump(((LuaClosure) f).p, baos, true); return LuaString.valueOf(baos.toByteArray()); } catch (IOException e) { return error(e.getMessage()); } } /** * string.find (s, pattern [, init [, plain]]) * * Looks for the first match of pattern in the string s. * If it finds a match, then find returns the indices of s * where this occurrence starts and ends; otherwise, it returns nil. * A third, optional numerical argument init specifies where to start the search; * its default value is 1 and may be negative. A value of true as a fourth, * optional argument plain turns off the pattern matching facilities, * so the function does a plain "find substring" operation, * with no characters in pattern being considered "magic". * Note that if plain is given, then init must be given as well. * * If the pattern has captures, then in a successful match the captured values * are also returned, after the two indices. */ static Varargs find(Varargs args) { return str_find_aux(args, true); } /** * string.format (formatstring, ...) * * Returns a formatted version of its variable number of arguments following * the description given in its first argument (which must be a string). * The format string follows the same rules as the printf family of standard C functions. * The only differences are that the options/modifiers *, l, L, n, p, and h are not supported * and that there is an extra option, q. The q option formats a string in a form suitable * to be safely read back by the Lua interpreter: the string is written between double quotes, * and all double quotes, newlines, embedded zeros, and backslashes in the string are correctly * escaped when written. For instance, the call * string.format('%q', 'a string with "quotes" and \n new line') * * will produce the string: * "a string with \"quotes\" and \ * new line" * * The options c, d, E, e, f, g, G, i, o, u, X, and x all expect a number as argument, * whereas q and s expect a string. * * This function does not accept string values containing embedded zeros, * except as arguments to the q option. */ static Varargs format(Varargs args) { LuaString fmt = args.checkstring(1); final int n = fmt.length(); Buffer result = new Buffer(n); int arg = 1; int c; for (int i = 0; i < n; ) { switch (c = fmt.luaByte(i++)) { case '\n': result.append("\n"); break; default: result.append((byte) c); break; case L_ESC: if (i < n) { if ((c = fmt.luaByte(i)) == L_ESC) { ++i; result.append((byte) L_ESC); } else { arg++; FormatDesc fdsc = new FormatDesc(args, fmt, i); i += fdsc.length; switch (fdsc.conversion) { case 'c': fdsc.format(result, (byte) args.checkint(arg)); break; case 'i': case 'd': fdsc.format(result, args.checkint(arg)); break; case 'o': case 'u': case 'x': case 'X': fdsc.format(result, args.checklong(arg)); break; case 'e': case 'E': case 'f': case 'g': case 'G': fdsc.format(result, args.checkdouble(arg), fdsc.precision); break; case 'q': addquoted(result, args.checkstring(arg)); break; case 's': { LuaString s = args.checkstring(arg); if (fdsc.precision == -1 && s.length() >= 100) { result.append(s); } else { fdsc.format(result, s); } } break; default: error("invalid option '%" + (char) fdsc.conversion + "' to 'format'"); break; } } } } } return result.tostring(); } private static void addquoted(Buffer buf, LuaString s) { int c; buf.append((byte) '"'); for (int i = 0, n = s.length(); i < n; i++) { switch (c = s.luaByte(i)) { case '"': case '\\': case '\n': buf.append((byte) '\\'); buf.append((byte) c); break; default: if (c <= 0x1F || c == 0x7F) { buf.append((byte) '\\'); if (i + 1 == n || s.luaByte(i + 1) < '0' || s.luaByte(i + 1) > '9') { buf.append(Integer.toString(c)); } else { buf.append((byte) '0'); buf.append((byte) (char) ('0' + c / 10)); buf.append((byte) (char) ('0' + c % 10)); } } else { buf.append((byte) c); } break; } } buf.append((byte) '"'); } private static final String FLAGS = "-+ #0"; static class FormatDesc { private boolean leftAdjust; private boolean zeroPad; private boolean explicitPlus; private boolean space; private boolean alternateForm; private static final int MAX_FLAGS = 5; private int width; private int precision; public final int conversion; public final int length; public FormatDesc(Varargs args, LuaString strfrmt, final int start) { int p = start, n = strfrmt.length(); int c = 0; boolean moreFlags = true; while (moreFlags) { switch (c = ((p < n) ? strfrmt.luaByte(p++) : 0)) { case '-': leftAdjust = true; break; case '+': explicitPlus = true; break; case ' ': space = true; break; case '#': alternateForm = true; break; case '0': zeroPad = true; break; default: moreFlags = false; break; } } if (p - start > MAX_FLAGS) error("invalid format (repeated flags)"); width = -1; if (Character.isDigit((char) c)) { width = c - '0'; c = ((p < n) ? strfrmt.luaByte(p++) : 0); if (Character.isDigit((char) c)) { width = width * 10 + (c - '0'); c = ((p < n) ? strfrmt.luaByte(p++) : 0); } } precision = -1; if (c == '.') { c = ((p < n) ? strfrmt.luaByte(p++) : 0); if (Character.isDigit((char) c)) { precision = c - '0'; c = ((p < n) ? strfrmt.luaByte(p++) : 0); if (Character.isDigit((char) c)) { precision = precision * 10 + (c - '0'); c = ((p < n) ? strfrmt.luaByte(p++) : 0); } } } if (Character.isDigit((char) c)) error("invalid format (width or precision too long)"); zeroPad &= !leftAdjust; // '-' overrides '0' conversion = c; length = p - start; } public void format(Buffer buf, byte c) { // TODO: not clear that any of width, precision, or flags apply here. buf.append(c); } public void format(Buffer buf, long number) { String digits; if (number == 0 && precision == 0) { digits = ""; } else { int radix; switch (conversion) { case 'x': case 'X': radix = 16; break; case 'o': radix = 8; break; default: radix = 10; break; } digits = Long.toString(number, radix); if (conversion == 'X') digits = digits.toUpperCase(); } int minwidth = digits.length(); int ndigits = minwidth; int nzeros; if (number < 0) { ndigits--; } else if (explicitPlus || space) { minwidth++; } if (precision > ndigits) nzeros = precision - ndigits; else if (precision == -1 && zeroPad && width > minwidth) nzeros = width - minwidth; else nzeros = 0; minwidth += nzeros; int nspaces = width > minwidth ? width - minwidth : 0; if (!leftAdjust) pad(buf, ' ', nspaces); if (number < 0) { if (nzeros > 0) { buf.append((byte) '-'); digits = digits.substring(1); } } else if (explicitPlus) { buf.append((byte) '+'); } else if (space) { buf.append((byte) ' '); } if (nzeros > 0) pad(buf, '0', nzeros); buf.append(digits); if (leftAdjust) pad(buf, ' ', nspaces); } public void format(Buffer buf, double x) { // TODO buf.append(String.valueOf(x)); } /** * 小数位数不足的情况下末尾补充0 * @param buf * @param x * @param precision */ public void format(Buffer buf, double x, int precision) { String value = String.valueOf(x); int dotIndex = value.indexOf('.'); int count = dotIndex == -1 ? precision : (precision - (value.length() - dotIndex)) + 1; buf.append(value); pad(buf, '0', count); } public void format(Buffer buf, LuaString s) { int nullindex = s.indexOf((byte) '\0', 0); if (nullindex != -1) s = s.substring(0, nullindex); buf.append(s); } public static final void pad(Buffer buf, char c, int n) { byte b = (byte) c; while (n-- > 0) buf.append(b); } } /** * string.gmatch (s, pattern) * * Returns an iterator function that, each time it is called, returns the next captures * from pattern over string s. If pattern specifies no captures, then the * whole match is produced in each call. * * As an example, the following loop * s = "hello world from Lua" * for w in string.gmatch(s, "%a+") do * print(w) * end * * will iterate over all the words from string s, printing one per line. * The next example collects all pairs key=value from the given string into a table: * t = {} * s = "from=world, to=Lua" * for k, v in string.gmatch(s, "(%w+)=(%w+)") do * t[k] = v * end * * For this function, a '^' at the start of a pattern does not work as an anchor, * as this would prevent the iteration. */ static Varargs gmatch(Varargs args) { LuaString src = args.checkstring(1); LuaString pat = args.checkstring(2); return new GMatchAux(args, src, pat); } static class GMatchAux extends VarArgFunction { private final int srclen; private final MatchState ms; private int soffset; public GMatchAux(Varargs args, LuaString src, LuaString pat) { this.srclen = src.length(); this.ms = new MatchState(args, src, pat); this.soffset = 0; } public Varargs invoke(Varargs args) { for (; soffset < srclen; soffset++) { ms.reset(); int res = ms.match(soffset, 0); if (res >= 0) { int soff = soffset; soffset = res; return ms.push_captures(true, soff, res); } } return NIL; } } /** * string.gsub (s, pattern, repl [, n]) * Returns a copy of s in which all (or the first n, if given) occurrences of the * pattern have been replaced by a replacement string specified by repl, which * may be a string, a table, or a function. gsub also returns, as its second value, * the total number of matches that occurred. * * If repl is a string, then its value is used for replacement. * The character % works as an escape character: any sequence in repl of the form %n, * with n between 1 and 9, stands for the value of the n-th captured substring (see below). * The sequence %0 stands for the whole match. The sequence %% stands for a single %. * * If repl is a table, then the table is queried for every match, using the first capture * as the key; if the pattern specifies no captures, then the whole match is used as the key. * * If repl is a function, then this function is called every time a match occurs, * with all captured substrings passed as arguments, in order; if the pattern specifies * no captures, then the whole match is passed as a sole argument. * * If the value returned by the table query or by the function call is a string or a number, * then it is used as the replacement string; otherwise, if it is false or nil, * then there is no replacement (that is, the original match is kept in the string). * * Here are some examples: * x = string.gsub("hello world", "(%w+)", "%1 %1") * --> x="hello hello world world" * * x = string.gsub("hello world", "%w+", "%0 %0", 1) * --> x="hello hello world" * * x = string.gsub("hello world from Lua", "(%w+)%s*(%w+)", "%2 %1") * --> x="world hello Lua from" * * x = string.gsub("home = $HOME, user = $USER", "%$(%w+)", os.getenv) * --> x="home = /home/roberto, user = roberto" * * x = string.gsub("4+5 = $return 4+5$", "%$(.-)%$", function (s) * return loadstring(s)() * end) * --> x="4+5 = 9" * * local t = {name="lua", version="5.1"} * x = string.gsub("$name-$version.tar.gz", "%$(%w+)", t) * --> x="lua-5.1.tar.gz" */ static Varargs gsub(Varargs args) { LuaString src = args.checkstring(1); final int srclen = src.length(); LuaString p = args.checkstring(2); LuaValue repl = args.arg(3); int max_s = args.optint(4, srclen + 1); final boolean anchor = p.length() > 0 && p.charAt(0) == '^'; Buffer lbuf = new Buffer(srclen); MatchState ms = new MatchState(args, src, p); int soffset = 0; int n = 0; while (n < max_s) { ms.reset(); int res = ms.match(soffset, anchor ? 1 : 0); if (res != -1) { n++; ms.add_value(lbuf, soffset, res, repl); } if (res != -1 && res > soffset) soffset = res; else if (soffset < srclen) lbuf.append((byte) src.luaByte(soffset++)); else break; if (anchor) break; } lbuf.append(src.substring(soffset, srclen)); return varargsOf(lbuf.tostring(), valueOf(n)); } /** * string.len (s) * * Receives a string and returns its length. The empty string "" has length 0. * Embedded zeros are counted, so "a\000bc\000" has length 5. */ static LuaValue len(LuaValue arg) { return arg.checkstring().len(); } /** * string.lower (s) * * Receives a string and returns a copy of this string with all uppercase letters * changed to lowercase. All other characters are left unchanged. * The definition of what an uppercase letter is depends on the current locale. */ static LuaValue lower(LuaValue arg) { return valueOf(arg.checkjstring().toLowerCase()); } /** * string.match (s, pattern [, init]) * * Looks for the first match of pattern in the string s. If it finds one, * then match returns the captures from the pattern; otherwise it returns * nil. If pattern specifies no captures, then the whole match is returned. * A third, optional numerical argument init specifies where to start the * search; its default value is 1 and may be negative. */ static Varargs match(Varargs args) { return str_find_aux(args, false); } /** * string.rep (s, n) * * Returns a string that is the concatenation of n copies of the string s. */ static Varargs rep(Varargs args) { LuaString s = args.checkstring(1); int n = args.checkint(2); final byte[] bytes = new byte[s.length() * n]; int len = s.length(); for (int offset = 0; offset < bytes.length; offset += len) { s.copyInto(0, bytes, offset, len); } return LuaString.valueOf(bytes); } /** * string.reverse (s) * * Returns a string that is the string s reversed. */ static LuaValue reverse(LuaValue arg) { LuaString s = arg.checkstring(); int n = s.length(); byte[] b = new byte[n]; for (int i = 0, j = n - 1; i < n; i++, j--) b[j] = (byte) s.luaByte(i); return LuaString.valueOf(b); } /** * string.sub (s, i [, j]) * * Returns the substring of s that starts at i and continues until j; * i and j may be negative. If j is absent, then it is assumed to be equal to -1 * (which is the same as the string length). In particular, the call * string.sub(s,1,j) * returns a prefix of s with length j, and * string.sub(s, -i) * returns a suffix of s with length i. */ static Varargs sub(Varargs args) { final LuaString s = args.checkstring(1); final int l = s.length(); int start = posrelat(args.checkint(2), l); int end = posrelat(args.optint(3, -1), l); if (start < 1) start = 1; if (end > l) end = l; if (start <= end) { return s.substring(start - 1, end); } else { return EMPTYSTRING; } } /** * string.upper (s) * * Receives a string and returns a copy of this string with all lowercase letters * changed to uppercase. All other characters are left unchanged. * The definition of what a lowercase letter is depends on the current locale. */ static LuaValue upper(LuaValue arg) { return valueOf(arg.checkjstring().toUpperCase()); } /** * This utility method implements both string.find and string.match. */ static Varargs str_find_aux(Varargs args, boolean find) { LuaString s = args.checkstring(1); LuaString pat = args.checkstring(2); int init = args.optint(3, 1); if (init > 0) { init = Math.min(init - 1, s.length()); } else if (init < 0) { init = Math.max(0, s.length() + init); } boolean fastMatch = find && (args.arg(4).toboolean() || pat.indexOfAny(SPECIALS) == -1); if (fastMatch) { int result = s.indexOf(pat, init); if (result != -1) { return varargsOf(valueOf(result + 1), valueOf(result + pat.length())); } } else { MatchState ms = new MatchState(args, s, pat); boolean anchor = false; int poff = 0; if (pat.luaByte(0) == '^') { anchor = true; poff = 1; } int soff = init; do { int res; ms.reset(); if ((res = ms.match(soff, poff)) != -1) { if (find) { return varargsOf(valueOf(soff + 1), valueOf(res), ms.push_captures(false, soff, res)); } else { return ms.push_captures(true, soff, res); } } } while (soff++ < s.length() && !anchor); } return NIL; } private static int posrelat(int pos, int len) { return (pos >= 0) ? pos : len + pos + 1; } // Pattern matching implementation private static final int L_ESC = '%'; private static final LuaString SPECIALS = valueOf("^$*+?.([%-"); private static final int MAX_CAPTURES = 32; private static final int CAP_UNFINISHED = -1; private static final int CAP_POSITION = -2; private static final byte MASK_ALPHA = 0x01; private static final byte MASK_LOWERCASE = 0x02; private static final byte MASK_UPPERCASE = 0x04; private static final byte MASK_DIGIT = 0x08; private static final byte MASK_PUNCT = 0x10; private static final byte MASK_SPACE = 0x20; private static final byte MASK_CONTROL = 0x40; private static final byte MASK_HEXDIGIT = (byte) 0x80; private static final byte[] CHAR_TABLE; static { CHAR_TABLE = new byte[256]; for (int i = 0; i < 256; ++i) { final char c = (char) i; CHAR_TABLE[i] = (byte) ((Character.isDigit(c) ? MASK_DIGIT : 0) | (Character.isLowerCase(c) ? MASK_LOWERCASE : 0) | (Character.isUpperCase(c) ? MASK_UPPERCASE : 0) | ((c < ' ' || c == 0x7F) ? MASK_CONTROL : 0)); if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') || (c >= '0' && c <= '9')) { CHAR_TABLE[i] |= MASK_HEXDIGIT; } if ((c >= '!' && c <= '/') || (c >= ':' && c <= '@')) { CHAR_TABLE[i] |= MASK_PUNCT; } if ((CHAR_TABLE[i] & (MASK_LOWERCASE | MASK_UPPERCASE)) != 0) { CHAR_TABLE[i] |= MASK_ALPHA; } } CHAR_TABLE[' '] = MASK_SPACE; CHAR_TABLE['\r'] |= MASK_SPACE; CHAR_TABLE['\n'] |= MASK_SPACE; CHAR_TABLE['\t'] |= MASK_SPACE; CHAR_TABLE[0x0C /* '\v' */] |= MASK_SPACE; CHAR_TABLE['\f'] |= MASK_SPACE; } ; static class MatchState { final LuaString s; final LuaString p; final Varargs args; int level; int[] cinit; int[] clen; MatchState(Varargs args, LuaString s, LuaString pattern) { this.s = s; this.p = pattern; this.args = args; this.level = 0; this.cinit = new int[MAX_CAPTURES]; this.clen = new int[MAX_CAPTURES]; } void reset() { level = 0; } private void add_s(Buffer lbuf, LuaString news, int soff, int e) { int l = news.length(); for (int i = 0; i < l; ++i) { byte b = (byte) news.luaByte(i); if (b != L_ESC) { lbuf.append((byte) b); } else { ++i; // skip ESC b = (byte) news.luaByte(i); if (!Character.isDigit((char) b)) { lbuf.append(b); } else if (b == '0') { lbuf.append(s.substring(soff, e)); } else { lbuf.append(push_onecapture(b - '1', soff, e).strvalue()); } } } } public void add_value(Buffer lbuf, int soffset, int end, LuaValue repl) { switch (repl.type()) { case LuaValue.TSTRING: case LuaValue.TNUMBER: add_s(lbuf, repl.strvalue(), soffset, end); return; case LuaValue.TFUNCTION: repl = repl.invoke(push_captures(true, soffset, end)).arg1(); break; case LuaValue.TTABLE: // Need to call push_onecapture here for the error checking repl = repl.get(push_onecapture(0, soffset, end)); break; default: error("bad argument: string/function/table expected"); return; } if (!repl.toboolean()) { repl = s.substring(soffset, end); } else if (!repl.isstring()) { error("invalid replacement value (a " + repl.typename() + ")"); } lbuf.append(repl.strvalue()); } Varargs push_captures(boolean wholeMatch, int soff, int end) { int nlevels = (this.level == 0 && wholeMatch) ? 1 : this.level; switch (nlevels) { case 0: return NONE; case 1: return push_onecapture(0, soff, end); } LuaValue[] v = new LuaValue[nlevels]; for (int i = 0; i < nlevels; ++i) v[i] = push_onecapture(i, soff, end); return varargsOf(v); } private LuaValue push_onecapture(int i, int soff, int end) { if (i >= this.level) { if (i == 0) { return s.substring(soff, end); } else { return error("invalid capture index"); } } else { int l = clen[i]; if (l == CAP_UNFINISHED) { return error("unfinished capture"); } if (l == CAP_POSITION) { return valueOf(cinit[i] + 1); } else { int begin = cinit[i]; return s.substring(begin, begin + l); } } } private int check_capture(int l) { l -= '1'; if (l < 0 || l >= level || this.clen[l] == CAP_UNFINISHED) { error("invalid capture index"); } return l; } private int capture_to_close() { int level = this.level; for (level--; level >= 0; level--) if (clen[level] == CAP_UNFINISHED) return level; error("invalid pattern capture"); return 0; } int classend(int poffset) { switch (p.luaByte(poffset++)) { case L_ESC: if (poffset == p.length()) { error("malformed pattern (ends with %)"); } return poffset + 1; case '[': if (p.luaByte(poffset) == '^') poffset++; do { if (poffset == p.length()) { error("malformed pattern (missing ])"); } if (p.luaByte(poffset++) == L_ESC && poffset != p.length()) poffset++; } while (p.luaByte(poffset) != ']'); return poffset + 1; default: return poffset; } } static boolean match_class(int c, int cl) { final char lcl = Character.toLowerCase((char) cl); int cdata = CHAR_TABLE[c]; boolean res; switch (lcl) { case 'a': res = (cdata & MASK_ALPHA) != 0; break; case 'd': res = (cdata & MASK_DIGIT) != 0; break; case 'l': res = (cdata & MASK_LOWERCASE) != 0; break; case 'u': res = (cdata & MASK_UPPERCASE) != 0; break; case 'c': res = (cdata & MASK_CONTROL) != 0; break; case 'p': res = (cdata & MASK_PUNCT) != 0; break; case 's': res = (cdata & MASK_SPACE) != 0; break; case 'w': res = (cdata & (MASK_ALPHA | MASK_DIGIT)) != 0; break; case 'x': res = (cdata & MASK_HEXDIGIT) != 0; break; case 'z': res = (c == 0); break; default: return cl == c; } return (lcl == cl) ? res : !res; } boolean matchbracketclass(int c, int poff, int ec) { boolean sig = true; if (p.luaByte(poff + 1) == '^') { sig = false; poff++; } while (++poff < ec) { if (p.luaByte(poff) == L_ESC) { poff++; if (match_class(c, p.luaByte(poff))) return sig; } else if ((p.luaByte(poff + 1) == '-') && (poff + 2 < ec)) { poff += 2; if (p.luaByte(poff - 2) <= c && c <= p.luaByte(poff)) return sig; } else if (p.luaByte(poff) == c) return sig; } return !sig; } boolean singlematch(int c, int poff, int ep) { switch (p.luaByte(poff)) { case '.': return true; case L_ESC: return match_class(c, p.luaByte(poff + 1)); case '[': return matchbracketclass(c, poff, ep - 1); default: return p.luaByte(poff) == c; } } /** * Perform pattern matching. If there is a match, returns offset into s * where match ends, otherwise returns -1. */ int match(int soffset, int poffset) { while (true) { // Check if we are at the end of the pattern - // equivalent to the '\0' case in the C version, but our pattern // string is not NUL-terminated. if (poffset == p.length()) return soffset; switch (p.luaByte(poffset)) { case '(': if (++poffset < p.length() && p.luaByte(poffset) == ')') return start_capture(soffset, poffset + 1, CAP_POSITION); else return start_capture(soffset, poffset, CAP_UNFINISHED); case ')': return end_capture(soffset, poffset + 1); case L_ESC: if (poffset + 1 == p.length()) error("malformed pattern (ends with '%')"); switch (p.luaByte(poffset + 1)) { case 'b': soffset = matchbalance(soffset, poffset + 2); if (soffset == -1) return -1; poffset += 4; continue; case 'f': { poffset += 2; if (p.luaByte(poffset) != '[') { error("Missing [ after %f in pattern"); } int ep = classend(poffset); int previous = (soffset == 0) ? -1 : s.luaByte(soffset - 1); if (matchbracketclass(previous, poffset, ep - 1) || matchbracketclass(s.luaByte(soffset), poffset, ep - 1)) return -1; poffset = ep; continue; } default: { int c = p.luaByte(poffset + 1); if (Character.isDigit((char) c)) { soffset = match_capture(soffset, c); if (soffset == -1) return -1; return match(soffset, poffset + 2); } } } case '$': if (poffset + 1 == p.length()) return (soffset == s.length()) ? soffset : -1; } int ep = classend(poffset); boolean m = soffset < s.length() && singlematch(s.luaByte(soffset), poffset, ep); int pc = (ep < p.length()) ? p.luaByte(ep) : '\0'; switch (pc) { case '?': int res; if (m && ((res = match(soffset + 1, ep + 1)) != -1)) return res; poffset = ep + 1; continue; case '*': return max_expand(soffset, poffset, ep); case '+': return (m ? max_expand(soffset + 1, poffset, ep) : -1); case '-': return min_expand(soffset, poffset, ep); default: if (!m) return -1; soffset++; poffset = ep; continue; } } } int max_expand(int soff, int poff, int ep) { int i = 0; while (soff + i < s.length() && singlematch(s.luaByte(soff + i), poff, ep)) i++; while (i >= 0) { int res = match(soff + i, ep + 1); if (res != -1) return res; i--; } return -1; } int min_expand(int soff, int poff, int ep) { for (; ; ) { int res = match(soff, ep + 1); if (res != -1) return res; else if (soff < s.length() && singlematch(s.luaByte(soff), poff, ep)) soff++; else return -1; } } int start_capture(int soff, int poff, int what) { int res; int level = this.level; if (level >= MAX_CAPTURES) { error("too many captures"); } cinit[level] = soff; clen[level] = what; this.level = level + 1; if ((res = match(soff, poff)) == -1) this.level--; return res; } int end_capture(int soff, int poff) { int l = capture_to_close(); int res; clen[l] = soff - cinit[l]; if ((res = match(soff, poff)) == -1) clen[l] = CAP_UNFINISHED; return res; } int match_capture(int soff, int l) { l = check_capture(l); int len = clen[l]; if ((s.length() - soff) >= len && LuaString.equals(s, cinit[l], s, soff, len)) return soff + len; else return -1; } int matchbalance(int soff, int poff) { final int plen = p.length(); if (poff == plen || poff + 1 == plen) { error("unbalanced pattern"); } final int slen = s.length(); if (soff >= slen) return -1; final int b = p.luaByte(poff); if (s.luaByte(soff) != b) return -1; final int e = p.luaByte(poff + 1); int cont = 1; while (++soff < slen) { if (s.luaByte(soff) == e) { if (--cont == 0) return soff + 1; } else if (s.luaByte(soff) == b) cont++; } return -1; } } }