/* --- Copyright Jonathan Meyer 1996. All rights reserved. -----------------
> File: jasmin/src/jasmin/Scanner.java
> Purpose: Tokenizer for Jasmin
> Author: Jonathan Meyer, 10 July 1996
*/
/* Scanner.java - class for tokenizing Jasmin files. This is rather
* cheap and cheerful.
*/
package jasmin;
import jas.*;
import java_cup.runtime.*;
import java.util.*;
import java.io.Reader;
class Scanner {
Reader inp;
// single lookahead character
int next_char;
// temporary buffer
char chars[];
static private int chars_size = 512;
// Whitespace characters
static final String WHITESPACE = " \n\t\r";
// Separator characters
static final String SEPARATORS = WHITESPACE + ":=";
/*
// Character can be present in signature
static final String SIGCHARS = ";:()[/.^*+-<>@";
*/
// used for error reporting to print out where an error is on the line
public int line_num, token_line_num, char_num, int_char_num, int_line_num;
public StringBuffer line;
public String int_line;
// used by the .set directive to define new variables.
public Hashtable dict = new Hashtable();
//
// returns true if a character code is a whitespace character
//
protected static boolean whitespace(int c) {
return (WHITESPACE.indexOf(c) != -1);
}
//
// returns true if a character code is a separator character
//
protected static boolean separator(int c) {
return (c == -1 || SEPARATORS.indexOf(c) != -1);
}
//
// Advanced the input by one character
//
protected void advance() throws java.io.IOException
{
next_char = inp.read();
switch (next_char) {
case -1: // EOF
if (char_num == 0) {
char_num = -1;
break;
}
next_char = '\n';
// pass thru
case '\n': // a new line
line_num++;
char_num = 0;
break;
default:
line.append((char)next_char);
char_num++;
return;
}
line.setLength(0);
}
//
// initialize the scanner
//
public Scanner(Reader i) throws java.io.IOException, jasError
{
inp = i;
line_num = 1;
char_num = 0;
line = new StringBuffer();
chars = new char[chars_size];
next_char = 0; // no start comment
skip_empty_lines();
if ( next_char == -1 )
throw new jasError("empty source file");
}
private void chars_expand()
{
char temp[] = new char[chars_size * 2];
System.arraycopy(chars, 0, temp, 0, chars_size);
chars_size *= 2;
chars = temp;
}
private void skip_empty_lines() throws java.io.IOException
{
for (;;) {
if (next_char != ';') {
do { advance(); } while (whitespace(next_char));
if (next_char != ';')
return;
}
do {
advance();
if (next_char == -1)
return;
} while (next_char != '\n');
}
}
private char uniEscape()
throws java.io.IOException, jasError
{
int res = 0;
for(int i = 0; i < 4; i++) {
advance();
if(next_char == -1)
return 0;
int tmp = Character.digit((char)next_char, 16);
if (tmp == -1)
throw new jasError("Bad '\\u' escape sequence");
res = (res << 4) | tmp;
}
return (char)res;
}
private char nameEscape()
throws java.io.IOException, jasError
{
advance();
if (next_char != 'u')
throw new jasError("Only '\\u' escape sequence allowed in names");
char chval = uniEscape();
if (next_char == -1)
throw new jasError("Left over '\\u' escape sequence");
/*
if ( SIGCHARS.indexOf(chval) == -1
&& ( !Character.isJavaIdentifierPart(chval)
|| Character.isIdentifierIgnorable(chval)))
{
throw new jasError("Invalid unicode char from name/signature");
}
*/
return chval;
}
//
// recognize and return the next complete token
//
public token next_token()
throws java.io.IOException, jasError
{
token_line_num = line_num;
for (;;) switch (next_char) {
case ';': // a comment
case '\n':
// return single SEP token (skip multiple newlines
// interspersed with whitespace or comments)
skip_empty_lines();
token_line_num = line_num;
return new token(sym.SEP);
case -1: // EOF token
char_num = -1;
return new token(sym.EOF);
case '-': case '+':
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
case '.': // a number
{
int pos = 0;
do {
chars[pos] = (char)next_char;
pos++;
if(pos == chars_size) chars_expand();
advance();
}while(!separator(next_char));
String str = new String(chars, 0, pos);
token tok;
// This catches directives like ".method"
if ((tok = ReservedWords.get(str)) != null)
return tok;
Number num;
try {
num = ScannerUtils.convertNumber(str);
} catch (NumberFormatException e) {
if (chars[0] != '.')
throw new jasError("Badly formatted number");
throw new jasError("Unknown directive or badly formed number.");
}
if (num instanceof Integer) {
int_line = line.toString();
int_line_num = token_line_num;
int_char_num = char_num;
return new int_token(sym.Int, num.intValue());
}
return new num_token(sym.Num, num);
}
case '"': // quoted string
{
boolean already = false;
for (int pos = 0; ; ) {
if (already) already = false;
else advance();
if (next_char == '"') {
advance(); // skip close quote
return new str_token(sym.Str, new String(chars, 0, pos));
}
if(next_char == -1)
throw new jasError("Unterminated string");
char chval = (char)next_char;
if (chval == '\\') {
advance();
switch (next_char) {
case -1: already = true; continue;
case 'n': chval = '\n'; break;
case 'r': chval = '\r'; break;
case 't': chval = '\t'; break;
case 'f': chval = '\f'; break;
case 'b': chval = '\b'; break;
case '"' : chval = '"'; break;
case '\'' : chval = '\''; break;
case '\\' : chval = '\\'; break;
case 'u':
chval = uniEscape();
if(next_char == -1) {
already = true;
continue;
}
break;
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
{
int res = next_char&7;
advance();
if (next_char < '0' || next_char > '7')
already = true;
else {
res = res*8 + (next_char&7);
advance();
if (next_char < '0' || next_char > '7')
already = true;
else {
int val = res*8 + (next_char&7);
if (val >= 0x100)
already = true;
else
res = val;
}
}
chval = (char)res;
}
break;
default:
throw new jasError("Bad backslash escape sequence");
}
}
chars[pos] = chval;
pos++;
if(pos == chars_size) chars_expand();
}
}
case '\'': // quotation for overloading reserved words
for (int pos = 0; ; ) {
advance();
if (separator(next_char))
throw new jasError("Unterminated ''-enclosed name");
if (next_char == '\'') {
if (pos == 0)
throw new jasError("Empty ''-enclosed name");
advance(); // skip close quote
if (!separator(next_char))
throw new jasError("Not separator after ''-enclosed name");
return new str_token(sym.Word, new String(chars, 0, pos));
}
char chval = (char)next_char;
if (next_char == '\\')
chval = nameEscape();
chars[pos] = chval;
pos++;
if(pos == chars_size) chars_expand();
}
case ' ':
case '\t':
case '\r': // whitespace
advance();
break;
case '=': // EQUALS token
advance();
return new token(sym.EQ);
case ':': // COLON token
advance();
return new token(sym.COLON);
default:
{
// read up until a separatorcharacter
int pos = 0;
boolean only_name = false;
do {
char chval = (char)next_char;
if (next_char == '\\') {
chval = nameEscape();
only_name = true;
}
chars[pos] = chval;
pos++;
if(pos == chars_size) chars_expand();
advance();
}while(!separator(next_char));
// convert the byte array into a String
String str = new String(chars, 0, pos);
if (!only_name) {
token tok;
// Jasmin keyword or directive ?
if ((tok = ReservedWords.get(str)) != null)
return tok;
// its a JVM instruction ?
if (InsnInfo.contains(str))
return new str_token(sym.Insn, str);
if (str.charAt(0) == '$') {
String s = str.substring(1);
Object v;
int n = 10;
boolean neg = false;
boolean sign = false;
switch(s.charAt(0)) {
default:
break;
case '-':
neg = true;;
case '+':
s = s.substring(1);
if (s.startsWith("0x")) {
n = 16;
s = s.substring(2);
}
try {
n = Integer.parseInt(s, n);
} catch (NumberFormatException e) {
throw new jasError("Badly relative offset number");
}
if(neg) n = -n;
return new relative_num_token(sym.Relative, n);
}
// Perform variable substitution
if ((v = dict.get(s)) != null)
return (token)v;
} // not begin from '$'
} // !only_name
// Unrecognized string token (e.g. a classname)
return new str_token(sym.Word, str);
} /* default */
} /* switch and for */
}
};
/* --- Revision History ---------------------------------------------------
--- Iouri Kharon, Mar 13 2006
Added support for '\\u' escape sequnce in name/signature
Added '' enclosed names (overload of reserved words)
--- Iouri Kharon, Feb 17 2006
Remove infinite loop when last line in source file do not have EOL
--- Iouri Kharon, Dec 19 2005
Added '\\u' escape sequence
Change '\octal' escape sequence
Added very long string support
--- Daniel Reynaud, Oct 19 2005
Added '\\' escape sequence
--- Jonathan Meyer, Feb 8 1997
Converted to be non-static
--- Jonathan Meyer, Oct 30 1996
Added support for more \ escapes in quoted strings (including octals).
--- Jonathan Meyer, Oct 1 1996
Added .interface and .implements
--- Jonathan Meyer, July 25 1996
changed IN to IS. Added token_line_num, which is the line number of the
last token returned by next_token().
--- Jonathan Meyer, July 24 1996 added mods to recognize '\r' as whitespace.
*/