/*
* PerlTokenMarker.java - Perl token marker
* Copyright (C) 1998, 1999 Slava Pestov
*
* You may use and modify this package for any purpose. Redistribution is
* permitted, in both source and binary form, provided that this notice
* remains intact in all source distributions of this package.
*/
import javax.swing.text.Segment;
/**
* Perl token marker.
*
* @author Slava Pestov
* @version $Id: PerlTokenMarker.java,v 1.11 1999/12/13 03:40:30 sp Exp $
*/
public class PerlTokenMarker extends TokenMarker
{
// public members
public static final byte S_ONE = Token.INTERNAL_FIRST;
public static final byte S_TWO = (byte)(Token.INTERNAL_FIRST + 1);
public static final byte S_END = (byte)(Token.INTERNAL_FIRST + 2);
public PerlTokenMarker()
{
this(getKeywords());
}
public PerlTokenMarker(KeywordMap keywords)
{
this.keywords = keywords;
}
public byte markTokensImpl(byte _token, Segment line, int lineIndex)
{
char[] array = line.array;
int offset = line.offset;
token = _token;
lastOffset = offset;
lastKeyword = offset;
matchChar = '\0';
matchCharBracket = false;
matchSpacesAllowed = false;
int length = line.count + offset;
if(token == Token.LITERAL1 && lineIndex != 0
&& lineInfo[lineIndex - 1].obj != null)
{
String str = (String)lineInfo[lineIndex - 1].obj;
if(str != null && str.length() == line.count
&& SyntaxUtilities.regionMatches(false,line,
offset,str))
{
addToken(line.count,token);
return Token.NULL;
}
else
{
addToken(line.count,token);
lineInfo[lineIndex].obj = str;
return token;
}
}
boolean backslash = false;
loop: for(int i = offset; i < length; i++)
{
int i1 = (i+1);
char c = array[i];
if(c == '\\')
{
backslash = !backslash;
continue;
}
switch(token)
{
case Token.NULL:
switch(c)
{
case '#':
if(doKeyword(line,i,c))
break;
if(backslash)
backslash = false;
else
{
addToken(i - lastOffset,token);
addToken(length - i,Token.COMMENT1);
lastOffset = lastKeyword = length;
break loop;
}
break;
case '=':
backslash = false;
if(i == offset)
{
token = Token.COMMENT2;
addToken(length - i,token);
lastOffset = lastKeyword = length;
break loop;
}
else
doKeyword(line,i,c);
break;
case '$': case '&': case '%': case '@':
backslash = false;
if(doKeyword(line,i,c))
break;
if(length - i > 1)
{
if(c == '&' && (array[i1] == '&'
|| Character.isWhitespace(
array[i1])))
i++;
else
{
addToken(i - lastOffset,token);
lastOffset = lastKeyword = i;
token = Token.KEYWORD2;
}
}
break;
case '"':
if(doKeyword(line,i,c))
break;
if(backslash)
backslash = false;
else
{
addToken(i - lastOffset,token);
token = Token.LITERAL1;
lineInfo[lineIndex].obj = null;
lastOffset = lastKeyword = i;
}
break;
case '\'':
if(backslash)
backslash = false;
else
{
int oldLastKeyword = lastKeyword;
if(doKeyword(line,i,c))
break;
if(i != oldLastKeyword)
break;
addToken(i - lastOffset,token);
token = Token.LITERAL2;
lastOffset = lastKeyword = i;
}
break;
case '`':
if(doKeyword(line,i,c))
break;
if(backslash)
backslash = false;
else
{
addToken(i - lastOffset,token);
token = Token.OPERATOR;
lastOffset = lastKeyword = i;
}
break;
case '<':
if(doKeyword(line,i,c))
break;
if(backslash)
backslash = false;
else
{
if(length - i > 2 && array[i1] == '<'
&& !Character.isWhitespace(array[i+2]))
{
addToken(i - lastOffset,token);
lastOffset = lastKeyword = i;
token = Token.LITERAL1;
int len = length - (i+2);
if(array[length - 1] == ';')
len--;
lineInfo[lineIndex].obj =
createReadinString(array,i + 2,len);
}
}
break;
case ':':
backslash = false;
if(doKeyword(line,i,c))
break;
// Doesn't pick up all labels,
// but at least doesn't mess up
// XXX::YYY
if(lastKeyword != 0)
break;
addToken(i1 - lastOffset,Token.LABEL);
lastOffset = lastKeyword = i1;
break;
case '-':
backslash = false;
if(doKeyword(line,i,c))
break;
if(i != lastKeyword || length - i <= 1)
break;
switch(array[i1])
{
case 'r': case 'w': case 'x':
case 'o': case 'R': case 'W':
case 'X': case 'O': case 'e':
case 'z': case 's': case 'f':
case 'd': case 'l': case 'p':
case 'S': case 'b': case 'c':
case 't': case 'u': case 'g':
case 'k': case 'T': case 'B':
case 'M': case 'A': case 'C':
addToken(i - lastOffset,token);
addToken(2,Token.KEYWORD3);
lastOffset = lastKeyword = i+2;
i++;
}
break;
case '/': case '?':
if(doKeyword(line,i,c))
break;
if(length - i > 1)
{
backslash = false;
char ch = array[i1];
if(Character.isWhitespace(ch))
break;
matchChar = c;
matchSpacesAllowed = false;
addToken(i - lastOffset,token);
token = S_ONE;
lastOffset = lastKeyword = i;
}
break;
default:
backslash = false;
if(!Character.isLetterOrDigit(c)
&& c != '_')
doKeyword(line,i,c);
break;
}
break;
case Token.KEYWORD2:
backslash = false;
// This test checks for an end-of-variable
// condition
if(!Character.isLetterOrDigit(c) && c != '_'
&& c != '#' && c != '\'' && c != ':'
&& c != '&')
{
// If this is the first character
// of the variable name ($'aaa)
// ignore it
if(i != offset && array[i-1] == '$')
{
addToken(i1 - lastOffset,token);
lastOffset = lastKeyword = i1;
}
// Otherwise, end of variable...
else
{
addToken(i - lastOffset,token);
lastOffset = lastKeyword = i;
// Wind back so that stuff
// like $hello$fred is picked
// up
i--;
token = Token.NULL;
}
}
break;
case S_ONE: case S_TWO:
if(backslash)
backslash = false;
else
{
if(matchChar == '\0')
{
if(Character.isWhitespace(matchChar)
&& !matchSpacesAllowed)
break;
else
matchChar = c;
}
else
{
switch(matchChar)
{
case '(':
matchChar = ')';
matchCharBracket = true;
break;
case '[':
matchChar = ']';
matchCharBracket = true;
break;
case '{':
matchChar = '}';
matchCharBracket = true;
break;
case '<':
matchChar = '>';
matchCharBracket = true;
break;
default:
matchCharBracket = false;
break;
}
if(c != matchChar)
break;
if(token == S_TWO)
{
token = S_ONE;
if(matchCharBracket)
matchChar = '\0';
}
else
{
token = S_END;
addToken(i1 - lastOffset,
Token.LITERAL2);
lastOffset = lastKeyword = i1;
}
}
}
break;
case S_END:
backslash = false;
if(!Character.isLetterOrDigit(c)
&& c != '_')
doKeyword(line,i,c);
break;
case Token.COMMENT2:
backslash = false;
if(i == offset)
{
addToken(line.count,token);
if(length - i > 3 && SyntaxUtilities
.regionMatches(false,line,offset,"=cut"))
token = Token.NULL;
lastOffset = lastKeyword = length;
break loop;
}
break;
case Token.LITERAL1:
if(backslash)
backslash = false;
/* else if(c == '$')
backslash = true; */
else if(c == '"')
{
addToken(i1 - lastOffset,token);
token = Token.NULL;
lastOffset = lastKeyword = i1;
}
break;
case Token.LITERAL2:
if(backslash)
backslash = false;
/* else if(c == '$')
backslash = true; */
else if(c == '\'')
{
addToken(i1 - lastOffset,Token.LITERAL1);
token = Token.NULL;
lastOffset = lastKeyword = i1;
}
break;
case Token.OPERATOR:
if(backslash)
backslash = false;
else if(c == '`')
{
addToken(i1 - lastOffset,token);
token = Token.NULL;
lastOffset = lastKeyword = i1;
}
break;
default:
throw new InternalError("Invalid state: "
+ token);
}
}
if(token == Token.NULL)
doKeyword(line,length,'\0');
switch(token)
{
case Token.KEYWORD2:
addToken(length - lastOffset,token);
token = Token.NULL;
break;
case Token.LITERAL2:
addToken(length - lastOffset,Token.LITERAL1);
break;
case S_END:
addToken(length - lastOffset,Token.LITERAL2);
token = Token.NULL;
break;
case S_ONE: case S_TWO:
addToken(length - lastOffset,Token.INVALID); // XXX
token = Token.NULL;
break;
default:
addToken(length - lastOffset,token);
break;
}
return token;
}
// private members
private KeywordMap keywords;
private byte token;
private int lastOffset;
private int lastKeyword;
private char matchChar;
private boolean matchCharBracket;
private boolean matchSpacesAllowed;
private boolean doKeyword(Segment line, int i, char c)
{
int i1 = i+1;
if(token == S_END)
{
addToken(i - lastOffset,Token.LITERAL2);
token = Token.NULL;
lastOffset = i;
lastKeyword = i1;
return false;
}
int len = i - lastKeyword;
byte id = keywords.lookup(line,lastKeyword,len);
if(id == S_ONE || id == S_TWO)
{
if(lastKeyword != lastOffset)
addToken(lastKeyword - lastOffset,Token.NULL);
addToken(len,Token.LITERAL2);
lastOffset = i;
lastKeyword = i1;
if(Character.isWhitespace(c))
matchChar = '\0';
else
matchChar = c;
matchSpacesAllowed = true;
token = id;
return true;
}
else if(id != Token.NULL)
{
if(lastKeyword != lastOffset)
addToken(lastKeyword - lastOffset,Token.NULL);
addToken(len,id);
lastOffset = i;
}
lastKeyword = i1;
return false;
}
// Converts < EOF >, < 'EOF' >, etc to <EOF>
private String createReadinString(char[] array, int start, int len)
{
int idx1 = start;
int idx2 = start + len - 1;
while((idx1 <= idx2) && (!Character.isLetterOrDigit(array[idx1])))
idx1++;
while((idx1 <= idx2) && (!Character.isLetterOrDigit(array[idx2])))
idx2--;
return new String(array, idx1, idx2 - idx1 + 1);
}
private static KeywordMap perlKeywords;
private static KeywordMap getKeywords()
{
if(perlKeywords == null)
{
perlKeywords = new KeywordMap(false);
perlKeywords.add("my",Token.KEYWORD1);
perlKeywords.add("local",Token.KEYWORD1);
perlKeywords.add("new",Token.KEYWORD1);
perlKeywords.add("if",Token.KEYWORD1);
perlKeywords.add("until",Token.KEYWORD1);
perlKeywords.add("while",Token.KEYWORD1);
perlKeywords.add("elsif",Token.KEYWORD1);
perlKeywords.add("else",Token.KEYWORD1);
perlKeywords.add("eval",Token.KEYWORD1);
perlKeywords.add("unless",Token.KEYWORD1);
perlKeywords.add("foreach",Token.KEYWORD1);
perlKeywords.add("continue",Token.KEYWORD1);
perlKeywords.add("exit",Token.KEYWORD1);
perlKeywords.add("die",Token.KEYWORD1);
perlKeywords.add("last",Token.KEYWORD1);
perlKeywords.add("goto",Token.KEYWORD1);
perlKeywords.add("next",Token.KEYWORD1);
perlKeywords.add("redo",Token.KEYWORD1);
perlKeywords.add("goto",Token.KEYWORD1);
perlKeywords.add("return",Token.KEYWORD1);
perlKeywords.add("do",Token.KEYWORD1);
perlKeywords.add("sub",Token.KEYWORD1);
perlKeywords.add("use",Token.KEYWORD1);
perlKeywords.add("require",Token.KEYWORD1);
perlKeywords.add("package",Token.KEYWORD1);
perlKeywords.add("BEGIN",Token.KEYWORD1);
perlKeywords.add("END",Token.KEYWORD1);
perlKeywords.add("eq",Token.OPERATOR);
perlKeywords.add("ne",Token.OPERATOR);
perlKeywords.add("not",Token.OPERATOR);
perlKeywords.add("and",Token.OPERATOR);
perlKeywords.add("or",Token.OPERATOR);
perlKeywords.add("abs",Token.KEYWORD3);
perlKeywords.add("accept",Token.KEYWORD3);
perlKeywords.add("alarm",Token.KEYWORD3);
perlKeywords.add("atan2",Token.KEYWORD3);
perlKeywords.add("bind",Token.KEYWORD3);
perlKeywords.add("binmode",Token.KEYWORD3);
perlKeywords.add("bless",Token.KEYWORD3);
perlKeywords.add("caller",Token.KEYWORD3);
perlKeywords.add("chdir",Token.KEYWORD3);
perlKeywords.add("chmod",Token.KEYWORD3);
perlKeywords.add("chomp",Token.KEYWORD3);
perlKeywords.add("chr",Token.KEYWORD3);
perlKeywords.add("chroot",Token.KEYWORD3);
perlKeywords.add("chown",Token.KEYWORD3);
perlKeywords.add("closedir",Token.KEYWORD3);
perlKeywords.add("close",Token.KEYWORD3);
perlKeywords.add("connect",Token.KEYWORD3);
perlKeywords.add("cos",Token.KEYWORD3);
perlKeywords.add("crypt",Token.KEYWORD3);
perlKeywords.add("dbmclose",Token.KEYWORD3);
perlKeywords.add("dbmopen",Token.KEYWORD3);
perlKeywords.add("defined",Token.KEYWORD3);
perlKeywords.add("delete",Token.KEYWORD3);
perlKeywords.add("die",Token.KEYWORD3);
perlKeywords.add("dump",Token.KEYWORD3);
perlKeywords.add("each",Token.KEYWORD3);
perlKeywords.add("endgrent",Token.KEYWORD3);
perlKeywords.add("endhostent",Token.KEYWORD3);
perlKeywords.add("endnetent",Token.KEYWORD3);
perlKeywords.add("endprotoent",Token.KEYWORD3);
perlKeywords.add("endpwent",Token.KEYWORD3);
perlKeywords.add("endservent",Token.KEYWORD3);
perlKeywords.add("eof",Token.KEYWORD3);
perlKeywords.add("exec",Token.KEYWORD3);
perlKeywords.add("exists",Token.KEYWORD3);
perlKeywords.add("exp",Token.KEYWORD3);
perlKeywords.add("fctnl",Token.KEYWORD3);
perlKeywords.add("fileno",Token.KEYWORD3);
perlKeywords.add("flock",Token.KEYWORD3);
perlKeywords.add("fork",Token.KEYWORD3);
perlKeywords.add("format",Token.KEYWORD3);
perlKeywords.add("formline",Token.KEYWORD3);
perlKeywords.add("getc",Token.KEYWORD3);
perlKeywords.add("getgrent",Token.KEYWORD3);
perlKeywords.add("getgrgid",Token.KEYWORD3);
perlKeywords.add("getgrnam",Token.KEYWORD3);
perlKeywords.add("gethostbyaddr",Token.KEYWORD3);
perlKeywords.add("gethostbyname",Token.KEYWORD3);
perlKeywords.add("gethostent",Token.KEYWORD3);
perlKeywords.add("getlogin",Token.KEYWORD3);
perlKeywords.add("getnetbyaddr",Token.KEYWORD3);
perlKeywords.add("getnetbyname",Token.KEYWORD3);
perlKeywords.add("getnetent",Token.KEYWORD3);
perlKeywords.add("getpeername",Token.KEYWORD3);
perlKeywords.add("getpgrp",Token.KEYWORD3);
perlKeywords.add("getppid",Token.KEYWORD3);
perlKeywords.add("getpriority",Token.KEYWORD3);
perlKeywords.add("getprotobyname",Token.KEYWORD3);
perlKeywords.add("getprotobynumber",Token.KEYWORD3);
perlKeywords.add("getprotoent",Token.KEYWORD3);
perlKeywords.add("getpwent",Token.KEYWORD3);
perlKeywords.add("getpwnam",Token.KEYWORD3);
perlKeywords.add("getpwuid",Token.KEYWORD3);
perlKeywords.add("getservbyname",Token.KEYWORD3);
perlKeywords.add("getservbyport",Token.KEYWORD3);
perlKeywords.add("getservent",Token.KEYWORD3);
perlKeywords.add("getsockname",Token.KEYWORD3);
perlKeywords.add("getsockopt",Token.KEYWORD3);
perlKeywords.add("glob",Token.KEYWORD3);
perlKeywords.add("gmtime",Token.KEYWORD3);
perlKeywords.add("grep",Token.KEYWORD3);
perlKeywords.add("hex",Token.KEYWORD3);
perlKeywords.add("import",Token.KEYWORD3);
perlKeywords.add("index",Token.KEYWORD3);
perlKeywords.add("int",Token.KEYWORD3);
perlKeywords.add("ioctl",Token.KEYWORD3);
perlKeywords.add("join",Token.KEYWORD3);
perlKeywords.add("keys",Token.KEYWORD3);
perlKeywords.add("kill",Token.KEYWORD3);
perlKeywords.add("lcfirst",Token.KEYWORD3);
perlKeywords.add("lc",Token.KEYWORD3);
perlKeywords.add("length",Token.KEYWORD3);
perlKeywords.add("link",Token.KEYWORD3);
perlKeywords.add("listen",Token.KEYWORD3);
perlKeywords.add("log",Token.KEYWORD3);
perlKeywords.add("localtime",Token.KEYWORD3);
perlKeywords.add("lstat",Token.KEYWORD3);
perlKeywords.add("map",Token.KEYWORD3);
perlKeywords.add("mkdir",Token.KEYWORD3);
perlKeywords.add("msgctl",Token.KEYWORD3);
perlKeywords.add("msgget",Token.KEYWORD3);
perlKeywords.add("msgrcv",Token.KEYWORD3);
perlKeywords.add("no",Token.KEYWORD3);
perlKeywords.add("oct",Token.KEYWORD3);
perlKeywords.add("opendir",Token.KEYWORD3);
perlKeywords.add("open",Token.KEYWORD3);
perlKeywords.add("ord",Token.KEYWORD3);
perlKeywords.add("pack",Token.KEYWORD3);
perlKeywords.add("pipe",Token.KEYWORD3);
perlKeywords.add("pop",Token.KEYWORD3);
perlKeywords.add("pos",Token.KEYWORD3);
perlKeywords.add("printf",Token.KEYWORD3);
perlKeywords.add("print",Token.KEYWORD3);
perlKeywords.add("push",Token.KEYWORD3);
perlKeywords.add("quotemeta",Token.KEYWORD3);
perlKeywords.add("rand",Token.KEYWORD3);
perlKeywords.add("readdir",Token.KEYWORD3);
perlKeywords.add("read",Token.KEYWORD3);
perlKeywords.add("readlink",Token.KEYWORD3);
perlKeywords.add("recv",Token.KEYWORD3);
perlKeywords.add("ref",Token.KEYWORD3);
perlKeywords.add("rename",Token.KEYWORD3);
perlKeywords.add("reset",Token.KEYWORD3);
perlKeywords.add("reverse",Token.KEYWORD3);
perlKeywords.add("rewinddir",Token.KEYWORD3);
perlKeywords.add("rindex",Token.KEYWORD3);
perlKeywords.add("rmdir",Token.KEYWORD3);
perlKeywords.add("scalar",Token.KEYWORD3);
perlKeywords.add("seekdir",Token.KEYWORD3);
perlKeywords.add("seek",Token.KEYWORD3);
perlKeywords.add("select",Token.KEYWORD3);
perlKeywords.add("semctl",Token.KEYWORD3);
perlKeywords.add("semget",Token.KEYWORD3);
perlKeywords.add("semop",Token.KEYWORD3);
perlKeywords.add("send",Token.KEYWORD3);
perlKeywords.add("setgrent",Token.KEYWORD3);
perlKeywords.add("sethostent",Token.KEYWORD3);
perlKeywords.add("setnetent",Token.KEYWORD3);
perlKeywords.add("setpgrp",Token.KEYWORD3);
perlKeywords.add("setpriority",Token.KEYWORD3);
perlKeywords.add("setprotoent",Token.KEYWORD3);
perlKeywords.add("setpwent",Token.KEYWORD3);
perlKeywords.add("setsockopt",Token.KEYWORD3);
perlKeywords.add("shift",Token.KEYWORD3);
perlKeywords.add("shmctl",Token.KEYWORD3);
perlKeywords.add("shmget",Token.KEYWORD3);
perlKeywords.add("shmread",Token.KEYWORD3);
perlKeywords.add("shmwrite",Token.KEYWORD3);
perlKeywords.add("shutdown",Token.KEYWORD3);
perlKeywords.add("sin",Token.KEYWORD3);
perlKeywords.add("sleep",Token.KEYWORD3);
perlKeywords.add("socket",Token.KEYWORD3);
perlKeywords.add("socketpair",Token.KEYWORD3);
perlKeywords.add("sort",Token.KEYWORD3);
perlKeywords.add("splice",Token.KEYWORD3);
perlKeywords.add("split",Token.KEYWORD3);
perlKeywords.add("sprintf",Token.KEYWORD3);
perlKeywords.add("sqrt",Token.KEYWORD3);
perlKeywords.add("srand",Token.KEYWORD3);
perlKeywords.add("stat",Token.KEYWORD3);
perlKeywords.add("study",Token.KEYWORD3);
perlKeywords.add("substr",Token.KEYWORD3);
perlKeywords.add("symlink",Token.KEYWORD3);
perlKeywords.add("syscall",Token.KEYWORD3);
perlKeywords.add("sysopen",Token.KEYWORD3);
perlKeywords.add("sysread",Token.KEYWORD3);
perlKeywords.add("syswrite",Token.KEYWORD3);
perlKeywords.add("telldir",Token.KEYWORD3);
perlKeywords.add("tell",Token.KEYWORD3);
perlKeywords.add("tie",Token.KEYWORD3);
perlKeywords.add("tied",Token.KEYWORD3);
perlKeywords.add("time",Token.KEYWORD3);
perlKeywords.add("times",Token.KEYWORD3);
perlKeywords.add("truncate",Token.KEYWORD3);
perlKeywords.add("uc",Token.KEYWORD3);
perlKeywords.add("ucfirst",Token.KEYWORD3);
perlKeywords.add("umask",Token.KEYWORD3);
perlKeywords.add("undef",Token.KEYWORD3);
perlKeywords.add("unlink",Token.KEYWORD3);
perlKeywords.add("unpack",Token.KEYWORD3);
perlKeywords.add("unshift",Token.KEYWORD3);
perlKeywords.add("untie",Token.KEYWORD3);
perlKeywords.add("utime",Token.KEYWORD3);
perlKeywords.add("values",Token.KEYWORD3);
perlKeywords.add("vec",Token.KEYWORD3);
perlKeywords.add("wait",Token.KEYWORD3);
perlKeywords.add("waitpid",Token.KEYWORD3);
perlKeywords.add("wantarray",Token.KEYWORD3);
perlKeywords.add("warn",Token.KEYWORD3);
perlKeywords.add("write",Token.KEYWORD3);
perlKeywords.add("m",S_ONE);
perlKeywords.add("q",S_ONE);
perlKeywords.add("qq",S_ONE);
perlKeywords.add("qw",S_ONE);
perlKeywords.add("qx",S_ONE);
perlKeywords.add("s",S_TWO);
perlKeywords.add("tr",S_TWO);
perlKeywords.add("y",S_TWO);
}
return perlKeywords;
}
}