/* * FindBugs - Find Bugs in Java programs * Copyright (C) 2003-2007 University of Maryland * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package edu.umd.cs.findbugs.jaif; import java.io.BufferedReader; import java.io.IOException; import java.io.Reader; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Lexical scanner for external annotation files. * * @author David Hovemeyer * @see <a * href="http://groups.csail.mit.edu/pag/jsr308/annotation-file-utilities/">Annotation * File Utilities/</a> */ public class JAIFScanner { static class TokenPattern { private Pattern pattern; private JAIFTokenKind kind; public TokenPattern(String regex, JAIFTokenKind kind) { this.pattern = Pattern.compile("^" + regex); this.kind = kind; } public JAIFTokenKind getKind(String lexeme) { return kind; } public Pattern getPattern() { return pattern; } } // See http://java.sun.com/docs/books/jls/third_edition/html/lexical.html // Hexidecimal floating-point literals are not implemented. // Unicode escapes are not implemented (but could be implemented in the // fillLineBuf() method). private static final String ID_START = "[@A-Za-z_\\$]"; private static final String ID_REST = "[A-Za-z0-9_\\$]"; private static final String DIGIT = "[0-9]"; private static final String DIGITS = DIGIT + "+"; private static final String DIGITS_OPT = DIGIT + "*"; private static final String SIGN_OPT = "[+-]?"; private static final String DOT = "\\."; private static final String EXP_PART = "([Ee]" + SIGN_OPT + DIGITS + ")"; private static final String EXP_PART_OPT = EXP_PART + "?"; private static final String FLOAT_TYPE_SUFFIX = "[FfDd]"; private static final String FLOAT_TYPE_SUFFIX_OPT = FLOAT_TYPE_SUFFIX + "?"; private static final String OCTAL_DIGITS = "[0-7]+"; private static final String HEX_SIGNIFIER = "0[Xx]"; private static final String HEX_DIGITS = "[0-9A-Fa-f]+"; private static final String INT_TYPE_SUFFIX_OPT = "[Ll]?"; private static final String INPUT_CHAR = "[^\\\\\\\"]";// anything other // than backslash or // double-quote // character private static final String OCT_ESCAPE = "([0-7]|[0-3]?[0-7][0-7])"; private static final String ESCAPE_SEQ = "(\\\\[btnfr\"'\\\\]|\\\\" + OCT_ESCAPE + ")"; private static final String STRING_CHARS_OPT = "(" + INPUT_CHAR + "|" + ESCAPE_SEQ + ")*"; private static final TokenPattern[] TOKEN_PATTERNS = { // Misc. syntax new TokenPattern(":", JAIFTokenKind.COLON), new TokenPattern("\\(", JAIFTokenKind.LPAREN), new TokenPattern("\\)", JAIFTokenKind.RPAREN), new TokenPattern(",", JAIFTokenKind.COMMA), new TokenPattern("=", JAIFTokenKind.EQUALS), // Identifiers and keywords new TokenPattern(ID_START + "(" + ID_REST + ")*", JAIFTokenKind.IDENTIFIER_OR_KEYWORD), // FP literals new TokenPattern(DIGITS + DOT + DIGITS_OPT + EXP_PART_OPT + FLOAT_TYPE_SUFFIX_OPT, JAIFTokenKind.FLOATING_POINT_LITERAL), new TokenPattern(DOT + DIGITS + EXP_PART_OPT + FLOAT_TYPE_SUFFIX_OPT, JAIFTokenKind.FLOATING_POINT_LITERAL), new TokenPattern(DIGITS + EXP_PART + FLOAT_TYPE_SUFFIX_OPT, JAIFTokenKind.FLOATING_POINT_LITERAL), new TokenPattern(DIGITS + EXP_PART_OPT + FLOAT_TYPE_SUFFIX, JAIFTokenKind.FLOATING_POINT_LITERAL), // This must come after the FP literal patterns new TokenPattern(DOT, JAIFTokenKind.DOT), // Integer literals new TokenPattern("0" + OCTAL_DIGITS + INT_TYPE_SUFFIX_OPT, JAIFTokenKind.OCTAL_LITERAL), new TokenPattern(HEX_SIGNIFIER + HEX_DIGITS + INT_TYPE_SUFFIX_OPT, JAIFTokenKind.HEX_LITERAL), new TokenPattern(DIGITS + INT_TYPE_SUFFIX_OPT, JAIFTokenKind.DECIMAL_LITERAL), // String literals new TokenPattern("\"" + STRING_CHARS_OPT + "\"", JAIFTokenKind.STRING_LITERAL), }; private BufferedReader reader; private JAIFToken next; private String lineBuf; private int lineNum; /** * @param reader */ public JAIFScanner(Reader reader) { this.reader = new BufferedReader(reader); this.lineNum = 0; } public int getLineNumber() { return lineNum; } public JAIFToken nextToken() throws IOException, JAIFSyntaxException { if (next == null) { fetchToken(); } JAIFToken result = next; next = null; return result; } public JAIFToken peekToken() throws IOException, JAIFSyntaxException { if (next == null) { fetchToken(); } return next; } public boolean atEOF() throws IOException { fillLineBuf(); return lineBuf == null; } private void fillLineBuf() throws IOException { if (lineBuf == null) { lineBuf = reader.readLine(); if (lineBuf != null) { ++lineNum; } } } private boolean isHorizWhitespace(char c) { return c == ' ' || c == '\t'; } private void fetchToken() throws IOException, JAIFSyntaxException { assert next == null; fillLineBuf(); if (lineBuf == null) { throw new JAIFSyntaxException(this, "Unexpected end of file"); } // Strip leading whitespace, if any int wsCount = 0; while (wsCount < lineBuf.length() && isHorizWhitespace(lineBuf.charAt(wsCount))) { wsCount++; } if (wsCount > 0) { lineBuf = lineBuf.substring(wsCount); } // System.out.println("Consumed " + wsCount + // " characters of horizontal whitespace"); if (lineBuf.equals("")) { // Reached end of line. next = new JAIFToken(JAIFTokenKind.NEWLINE, "\n", lineNum); lineBuf = null; return; } // Try matching line buffer against all known patterns // until we fine one that matches. for (TokenPattern tokenPattern : TOKEN_PATTERNS) { Matcher m = tokenPattern.getPattern().matcher(lineBuf); if (m.find()) { String lexeme = m.group(); lineBuf = lineBuf.substring(lexeme.length()); next = new JAIFToken(tokenPattern.getKind(lexeme), lexeme, lineNum); return; } } throw new JAIFSyntaxException(this, "Unrecognized token (trying to match text `" + lineBuf + "')"); } }