/*
* FindBugs - Find bugs in Java programs
* Copyright (C) 2003,2004 University of Maryland
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package edu.umd.cs.findbugs;
import java.io.IOException;
import java.io.PushbackReader;
import java.io.Reader;
import java.util.BitSet;
/**
* A simple tokenizer for Java source text. This is not intended to be a
* compliant lexer; instead, it is for quick and dirty scanning.
*
* @author David Hovemeyer
* @see Token
*/
public class Tokenizer {
private static final BitSet whiteSpace = new BitSet();
static {
whiteSpace.set(' ');
whiteSpace.set('\t');
whiteSpace.set('\r');
whiteSpace.set('\f');
}
private static final BitSet single = new BitSet();
static {
single.set('!');
single.set('%');
single.set('^');
single.set('&');
single.set('*');
single.set('(');
single.set(')');
single.set('-');
single.set('+');
single.set('=');
single.set('[');
single.set(']');
single.set('{');
single.set('}');
single.set('|');
single.set(':');
single.set(';');
single.set(',');
single.set('.');
single.set('<');
single.set('>');
single.set('?');
single.set('~');
}
private PushbackReader reader;
/**
* Constructor.
*
* @param reader
* the Reader for the Java source text
*/
public Tokenizer(Reader reader) {
this.reader = new PushbackReader(reader);
}
/**
* Get the next Token in the stream.
*
* @return the Token
*/
public Token next() throws IOException {
skipWhitespace();
int c = reader.read();
if (c < 0)
return new Token(Token.EOF);
else if (c == '\n')
return new Token(Token.EOL);
else if (c == '\'' || c == '"')
return munchString(c);
else if (c == '/')
return maybeComment();
else if (single.get(c))
return new Token(Token.SINGLE, String.valueOf((char) c));
else {
reader.unread(c);
return parseWord();
}
}
private void skipWhitespace() throws IOException {
for (;;) {
int c = reader.read();
if (c < 0)
break;
if (!whiteSpace.get(c)) {
reader.unread(c);
break;
}
}
}
private Token munchString(int delimiter) throws IOException {
final int SCAN = 0;
final int ESCAPE = 1;
final int DONE = 2;
StringBuilder result = new StringBuilder();
result.append((char) delimiter);
int state = SCAN;
while (state != DONE) {
int c = reader.read();
if (c < 0)
break;
result.append((char) c);
switch (state) {
case SCAN:
if (c == delimiter)
state = DONE;
else if (c == '\\')
state = ESCAPE;
break;
case ESCAPE:
state = SCAN;
break;
}
}
return new Token(Token.STRING, result.toString());
}
private Token maybeComment() throws IOException {
int c = reader.read();
if (c == '/') {
// Single line comment
StringBuilder result = new StringBuilder();
result.append("//");
for (;;) {
c = reader.read();
if (c < 0)
break;
else if (c == '\n') {
reader.unread(c);
break;
}
result.append((char) c);
}
return new Token(Token.COMMENT, result.toString());
} else if (c == '*') {
// C-style multiline comment
StringBuilder result = new StringBuilder();
result.append("/*");
final int SCAN = 0;
final int STAR = 1;
final int DONE = 2;
int state = SCAN;
while (state != DONE) {
c = reader.read();
if (c < 0)
state = DONE;
else
result.append((char) c);
switch (state) {
case SCAN:
if (c == '*')
state = STAR;
break;
case STAR:
if (c == '/')
state = DONE;
else if (c != '*')
state = SCAN;
break;
case DONE:
break;
}
}
return new Token(Token.COMMENT, result.toString());
} else {
if (c >= 0)
reader.unread(c);
return new Token(Token.SINGLE, "/");
}
}
private Token parseWord() throws IOException {
StringBuilder result = new StringBuilder();
for (;;) {
int c = reader.read();
if (c < 0)
break;
if (whiteSpace.get(c) || c == '\n' || single.get(c)) {
reader.unread(c);
break;
}
result.append((char) c);
}
return new Token(Token.WORD, result.toString());
}
}
// vim:ts=4