/*
GNU General Public License
CacheWolf is a software for PocketPC, Win and Linux that
enables paperless caching.
It supports the sites geocaching.com and opencaching.de
Copyright (C) 2006 CacheWolf development team
See http://www.cachewolf.de/ for more information.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
package CacheWolf;
import CacheWolf.utils.MyLocale;
import ewe.util.Vector;
/**
* Class to tokenise (break up) the code into single tokens, so the
* parser my do its job.
*
*/
public class Tokenizer {
/**
* Normally only a semicolon (;) can be used to separate commands. If this variable is set to true,
* newlines also terminate a command. If a newline is preceded with a backslash (=line continuation character),
* the newline does not terminate the command even if this variable is true.
*/
public boolean newLineIsSeparator = true;
/** instructions to tokenise */
String mySource;
/** source character */
char look;
/** pointer to next character to read */
int sourcePointer = 0;
/** (partial) token */
String currentStream;
Vector TokenStack = new Vector();
/** position of token */
int currentLine, currentPos;
TokenObj thisToken;
Vector messageStack;
public Tokenizer() { // Public constructor
}
private void err(String str) throws Exception {
messageStack.add(MyLocale.getMsg(1700, "Error on line: ") + currentLine + " " + MyLocale.getMsg(1701, " position: ") + currentPos);
messageStack.add(str);
MainTab.itself.solverPanel.setSelectionRange(0, currentLine - 1, currentPos, currentLine - 1);
throw new Exception("Error " + str);
}
private boolean isAlpha(char c) {
return "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz".indexOf(c) != -1;
}
private boolean isDigit(char c) {
return "0123456789".indexOf(c) != -1;
}
private boolean isSymbol(char c) {
return "?!<>(){}*/,;^+-=".indexOf(c) != -1;
}
/**
* Convert Unicode version of special chars to normal
*
* @param c
* Char to convert
* @return Converted char
*/
private char standardiseSourceChar(char c) {
if (c == '\u00A0' || (c >= '\u2002' && c <= '\u200b'))
c = ' ';
if (c >= '\u2010' && c <= '\u2015')
c = '-';
if (c >= '\u201c' && c <= '\u201f')
c = '"';
if (c == '[')
c = '(';
if (c == ']')
c = ')';
if (c == '\u00f7' || c == '\u2044')
c = '/';
if (c == '\u2024')
c = '.';
return c;
}
private boolean getChar() {
if (sourcePointer >= mySource.length()) {
look = '\n';
return false;
}
look = mySource.charAt(sourcePointer++);
currentPos++;
return true;
}
private char lookAhead() {
if (sourcePointer >= mySource.length())
return '\n';
else {
char c = standardiseSourceChar(mySource.charAt(sourcePointer));
return c;
}
}
private void backUp() {
sourcePointer--;
currentPos--;
}
/**
* Create a new token object and remember the place where it started.
* String tokens could span several lines, so we need to remember the starting line and position.
*/
private void startToken() {
thisToken = new TokenObj();
thisToken.line = currentLine;
thisToken.position = currentPos;
}
/** Add the previously started token to the token stack */
private void emitToken(int tt) {
thisToken.token = currentStream;
thisToken.tt = tt;
TokenStack.add(thisToken);
currentStream = "";
}
private void streamAlphas() {
startToken();
while (getChar()) {
if (isAlpha(look) || isDigit(look))
currentStream += look;
else
break;
}
String s = currentStream.toUpperCase();
if (s.equals("STOP") || s.equals("ST"))
emitToken(TokenObj.TT_STOP);
else if (s.equals("IF"))
emitToken(TokenObj.TT_IF);
else if (s.equals("THEN"))
emitToken(TokenObj.TT_THEN);
else if (s.equals("ENDIF") || s.equals("FI")) {
currentStream = "ENDIF";
emitToken(TokenObj.TT_ENDIF);
} else
emitToken(TokenObj.TT_VARIABLE);
// We have read one character too far, so back off
backUp();
}
private void streamDigits() {
boolean foundDecSep = false; // To check that only one decimal point is allowed in a number
startToken();
while (getChar()) {
look = standardiseSourceChar(look);
if (isDigit(look) || (look == '.' && !foundDecSep)) {
currentStream += look;
if (look == '.')
foundDecSep = true;
} else
break;
}
emitToken(TokenObj.TT_NUMBER);
// We have read one character too far, so back off
backUp();
}
private void streamString() throws Exception {
startToken();
currentStream = "";
while (getChar()) { // collect chars until next "
if (look == '"') {
if (lookAhead() != '"')
break; // " not followed by " => End of string
// Two " following each other are replaced by "
currentStream += "\"";
getChar();
} else if (look == '\\') {
if (!getChar())
break;
if (look == 'n')
currentStream += "\n";
else
currentStream += look;
} else
currentStream += look;
// Need to count newlines inside a string spanning multiple lines so that we don't loose track
if (look == '\n') {
currentLine++;
currentPos = 0;
}
} // EOT or look=="
if (look != '"') {
// Restore start position of string for correct indication of error
currentLine = thisToken.line;
currentPos = thisToken.position;
err(MyLocale.getMsg(1730, "Unterminated string"));
}
emitToken(TokenObj.TT_STRING);
}
private void streamSymbol() {
startToken();
// Check for == != <= >= <> ><
if (look == '=' || look == '!' || look == '<' || look == '>') {
getChar();
currentStream += look;
if (currentStream.equals("==")) {
emitToken(TokenObj.TT_EQ);
return;
}
if (currentStream.equals("!=") || currentStream.equals("><") || currentStream.equals("<>")) {
emitToken(TokenObj.TT_NE);
;
return;
}
if (currentStream.equals("<=")) {
emitToken(TokenObj.TT_LE);
return;
}
if (currentStream.equals(">=")) {
emitToken(TokenObj.TT_GE);
return;
}
backUp(); // Not a valid comparison symbol, forget the last character
currentStream = currentStream.substring(0, 1);
if (currentStream.equals("="))
emitToken(TokenObj.TT_EQ);
else if (currentStream.equals("<"))
emitToken(TokenObj.TT_LT);
else if (currentStream.equals(">"))
emitToken(TokenObj.TT_GT);
else
emitToken(TokenObj.TT_SYMBOL);
} else
emitToken(TokenObj.TT_SYMBOL);
}
/** Eat up all characters until next newline as we are in a comment */
private void eatUpComment() {
while (getChar() && look != '\n')
;
currentStream = ";"; // Insert a dummy ;
startToken();
emitToken(TokenObj.TT_SYMBOL);
currentStream = "";
currentLine++;
currentPos = 0;
}
private void formatString() throws Exception {
currentStream = "";
startToken();
while (getChar() && look != ':') {
look = standardiseSourceChar(look);
currentStream += look;
if (look != '.' && look != '0' && look != '#')
err(MyLocale.getMsg(1731, "Invalid format character"));
}
emitToken(TokenObj.TT_FORMATSTR);
}
public void tokenizeSource(String src, Vector msg) {
mySource = src + "\n";
sourcePointer = 0;
TokenStack.clear();
messageStack = msg;
currentLine = 1;
currentPos = 0;
currentStream = "";
try {
while (getChar()) {
look = standardiseSourceChar(look);
if (look == ' ')
continue;
currentStream += look;
if (isAlpha(look) || look == '$')
streamAlphas();
else if (isDigit(look))
streamDigits();
else if (isSymbol(look))
streamSymbol();
else if (look == '"')
streamString();
else if (look == '\n') {
if (newLineIsSeparator && !currentStream.equals("\\\n") && !currentStream.equals("_\n")) {
currentStream = ";";
startToken();
emitToken(TokenObj.TT_SYMBOL);
}
currentStream = "";
currentLine++;
currentPos = 0;
} else if (look == '#')
eatUpComment(); // Ignore characters until EOL
else if (look == ':')
formatString();
else if (newLineIsSeparator && (look == '\\' || look == '_'))
;
else
err(MyLocale.getMsg(1732, "Invalid character"));
}
} catch (Exception e) {
// Preferences.itself().log("Ignored Exception", e, true);
}
}
}