/*******************************************************************************
* Copyright (c) 2008 Scott Stanchfield.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Based on the ANTLR parser generator by Terence Parr, http://antlr.org
* Ric Klaren <klaren@cs.utwente.nl>
* Scott Stanchfield - Modifications for XML Parsing
*******************************************************************************/
package com.javadude.antxr;
import java.util.Map;
import com.javadude.antxr.collections.impl.BitSet;
public abstract class CharScanner implements TokenStream {
static final char NO_CHAR = 0;
public static final char EOF_CHAR = (char)-1;
protected ANTXRStringBuffer text; // text of current token
protected boolean saveConsumedInput = true; // does consume() save characters?
protected Class<?> tokenObjectClass; // what kind of tokens to create?
protected boolean caseSensitive = true;
protected boolean caseSensitiveLiterals = true;
protected Map<ANTXRHashString, Integer> literals; // set by subclass
/** Tab chars are handled by tab() according to this value; override
* method to do anything weird with tabs.
*/
protected int tabsize = 8;
protected Token _returnToken = null; // used to return tokens w/o using return val.
// Hash string used so we don't new one every time to check literals table
protected ANTXRHashString hashString;
protected LexerSharedInputState inputState;
/** Used during filter mode to indicate that path is desired.
* A subsequent scan error will report an error as usual if
* acceptPath=true;
*/
protected boolean commitToPath = false;
/** Used to keep track of indentdepth for traceIn/Out */
protected int traceDepth = 0;
public CharScanner() {
text = new ANTXRStringBuffer();
hashString = new ANTXRHashString(this);
setTokenObjectClass("com.javadude.antxr.CommonToken");
}
public CharScanner(InputBuffer cb) { // SAS: use generic buffer
this();
inputState = new LexerSharedInputState(cb);
}
public CharScanner(LexerSharedInputState sharedState) {
this();
inputState = sharedState;
}
public void append(char c) {
if (saveConsumedInput) {
text.append(c);
}
}
public void append(String s) {
if (saveConsumedInput) {
text.append(s);
}
}
public void commit() {
inputState.input.commit();
}
public void consume() throws CharStreamException {
if (inputState.guessing == 0) {
char c = LA(1);
if (caseSensitive) {
append(c);
}
else {
// use input.LA(), not LA(), to get original case
// CharScanner.LA() would toLower it.
append(inputState.input.LA(1));
}
if (c == '\t') {
tab();
}
else {
inputState.column++;
}
}
inputState.input.consume();
}
/** Consume chars until one matches the given char */
public void consumeUntil(int c) throws CharStreamException {
while (LA(1) != CharScanner.EOF_CHAR && LA(1) != c) {
consume();
}
}
/** Consume chars until one matches the given set */
public void consumeUntil(BitSet set) throws CharStreamException {
while (LA(1) != CharScanner.EOF_CHAR && !set.member(LA(1))) {
consume();
}
}
public boolean getCaseSensitive() {
return caseSensitive;
}
public final boolean getCaseSensitiveLiterals() {
return caseSensitiveLiterals;
}
public int getColumn() {
return inputState.column;
}
public void setColumn(int c) {
inputState.column = c;
}
public boolean getCommitToPath() {
return commitToPath;
}
public String getFilename() {
return inputState.filename;
}
public InputBuffer getInputBuffer() {
return inputState.input;
}
public LexerSharedInputState getInputState() {
return inputState;
}
public void setInputState(LexerSharedInputState state) {
inputState = state;
}
public int getLine() {
return inputState.line;
}
/** return a copy of the current text buffer */
public String getText() {
return text.toString();
}
public Token getTokenObject() {
return _returnToken;
}
public char LA(int i) throws CharStreamException {
if (caseSensitive) {
return inputState.input.LA(i);
}
return toLower(inputState.input.LA(i));
}
protected Token makeToken(int t) {
try {
Token tok = (Token)tokenObjectClass.newInstance();
tok.setType(t);
tok.setColumn(inputState.tokenStartColumn);
tok.setLine(inputState.tokenStartLine);
// tracking real start line now: tok.setLine(inputState.line);
return tok;
}
catch (InstantiationException ie) {
panic("can't instantiate token: " + tokenObjectClass);
}
catch (IllegalAccessException iae) {
panic("Token class is not accessible" + tokenObjectClass);
}
return Token.badToken;
}
public int mark() {
return inputState.input.mark();
}
public void match(char c) throws MismatchedCharException, CharStreamException {
if (LA(1) != c) {
throw new MismatchedCharException(LA(1), c, false, this);
}
consume();
}
public void match(BitSet b) throws MismatchedCharException, CharStreamException {
if (!b.member(LA(1))) {
throw new MismatchedCharException(LA(1), b, false, this);
}
consume();
}
public void match(String s) throws MismatchedCharException, CharStreamException {
int len = s.length();
for (int i = 0; i < len; i++) {
if (LA(1) != s.charAt(i)) {
throw new MismatchedCharException(LA(1), s.charAt(i), false, this);
}
consume();
}
}
public void matchNot(char c) throws MismatchedCharException, CharStreamException {
if (LA(1) == c) {
throw new MismatchedCharException(LA(1), c, true, this);
}
consume();
}
public void matchRange(char c1, char c2) throws MismatchedCharException, CharStreamException {
if (LA(1) < c1 || LA(1) > c2) {
throw new MismatchedCharException(LA(1), c1, c2, false, this);
}
consume();
}
public void newline() {
inputState.line++;
inputState.column = 1;
}
/** advance the current column number by an appropriate amount
* according to tab size. This method is called from consume().
*/
public void tab() {
int c = getColumn();
int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1; // calculate tab stop
setColumn( nc );
}
public void setTabSize( int size ) {
tabsize = size;
}
public int getTabSize() {
return tabsize;
}
/** @see #panic(String)
*/
public void panic() {
System.err.println("CharScanner: panic");
Utils.error("");
}
/** This method is executed by ANTXR internally when it detected an illegal
* state that cannot be recovered from.
* The default implementation of this method calls
* {@link java.lang.System.exit(int)} and writes directly to
* {@link java.lang.System.err)} , which is usually not appropriate when
* a translator is embedded into a larger application. <em>It is highly
* recommended that this method be overridden to handle the error in a
* way appropriate for your application (e.g. throw an unchecked
* exception)</em>.
*/
public void panic(String s) {
System.err.println("CharScanner; panic: " + s);
Utils.error(s);
}
/** Parser error-reporting function can be overridden in subclass */
public void reportError(RecognitionException ex) {
System.err.println(ex);
}
/** Parser error-reporting function can be overridden in subclass */
public void reportError(String s) {
if (getFilename() == null) {
System.err.println("error: " + s);
}
else {
System.err.println(getFilename() + ": error: " + s);
}
}
/** Parser warning-reporting function can be overridden in subclass */
public void reportWarning(String s) {
if (getFilename() == null) {
System.err.println("warning: " + s);
}
else {
System.err.println(getFilename() + ": warning: " + s);
}
}
public void resetText() {
text.setLength(0);
inputState.tokenStartColumn = inputState.column;
inputState.tokenStartLine = inputState.line;
}
public void rewind(int pos) {
inputState.input.rewind(pos);
// RK: should not be here, it is messing up column calculation
// setColumn(inputState.tokenStartColumn);
}
public void setCaseSensitive(boolean t) {
caseSensitive = t;
}
public void setCommitToPath(boolean commit) {
commitToPath = commit;
}
public void setFilename(String f) {
inputState.filename = f;
}
public void setLine(int line) {
inputState.line = line;
}
public void setText(String s) {
resetText();
text.append(s);
}
public void setTokenObjectClass(String cl) {
try {
tokenObjectClass = Utils.loadClass(cl);
}
catch (ClassNotFoundException ce) {
panic("ClassNotFoundException: " + cl);
}
}
// Test the token text against the literals table
// Override this method to perform a different literals test
public int testLiteralsTable(int ttype) {
hashString.setBuffer(text.getBuffer(), text.length());
Integer literalsIndex = literals.get(hashString);
if (literalsIndex != null) {
ttype = literalsIndex.intValue();
}
return ttype;
}
/** Test the text passed in against the literals table
* Override this method to perform a different literals test
* This is used primarily when you want to test a portion of
* a token.
*/
public int testLiteralsTable(String testToTest, int ttype) {
ANTXRHashString s = new ANTXRHashString(testToTest, this);
Integer literalsIndex = literals.get(s);
if (literalsIndex != null) {
ttype = literalsIndex.intValue();
}
return ttype;
}
// Override this method to get more specific case handling
public char toLower(char c) {
return Character.toLowerCase(c);
}
public void traceIndent() {
for (int i = 0; i < traceDepth; i++) {
System.out.print(" ");
}
}
public void traceIn(String rname) throws CharStreamException {
traceDepth += 1;
traceIndent();
System.out.println("> lexer " + rname + "; c==" + LA(1));
}
public void traceOut(String rname) throws CharStreamException {
traceIndent();
System.out.println("< lexer " + rname + "; c==" + LA(1));
traceDepth -= 1;
}
/** This method is called by YourLexer.nextToken() when the lexer has
* hit EOF condition. EOF is NOT a character.
* This method is not called if EOF is reached during
* syntactic predicate evaluation or during evaluation
* of normal lexical rules, which presumably would be
* an IOException. This traps the "normal" EOF condition.
*
* uponEOF() is called after the complete evaluation of
* the previous token and only if your parser asks
* for another token beyond that last non-EOF token.
*
* You might want to throw token or char stream exceptions
* like: "Heh, premature eof" or a retry stream exception
* ("I found the end of this file, go back to referencing file").
*/
public void uponEOF() {
// nothing
}
}