package japa.parser.ast.comments; import java.io.*; import java.nio.charset.Charset; import java.util.*; /** * This parser cares exclusively about comments. */ public class CommentsParser { private enum State { CODE, WAITING_FOR_LINE_COMMENT, IN_LINE_COMMENT, WAITING_FOR_BLOCK_COMMENT, IN_BLOCK_COMMENT, WAITING_TO_LEAVE_BLOCK_COMMENT, IN_STRING; } private static final int COLUMNS_PER_TAB = 4; public CommentsCollection parse(final String source) throws IOException, UnsupportedEncodingException { InputStream in = new ByteArrayInputStream(source.getBytes(Charset.defaultCharset())); return parse(in, Charset.defaultCharset().name()); } public CommentsCollection parse(final InputStream in, final String charsetName) throws IOException, UnsupportedEncodingException { boolean lastWasASlashR = false; BufferedReader br = new BufferedReader(new InputStreamReader(in, charsetName)); CommentsCollection comments = new CommentsCollection(); int r; Deque prevTwoChars = new LinkedList<Character>(Arrays.asList('z','z')); State state = State.CODE; LineComment currentLineComment = null; BlockComment currentBlockComment = null; StringBuffer currentContent = null; int currLine = 1; int currCol = 1; while ((r=br.read()) != -1){ char c = (char)r; if (c=='\r'){ lastWasASlashR = true; } else if (c=='\n'&&lastWasASlashR){ lastWasASlashR=false; continue; } else { lastWasASlashR=false; } switch (state) { case CODE: if (prevTwoChars.peekLast().equals('/') && c == '/') { currentLineComment = new LineComment(); currentLineComment.setBeginLine(currLine); currentLineComment.setBeginColumn(currCol - 1); state = State.IN_LINE_COMMENT; currentContent = new StringBuffer(); } else if (prevTwoChars.peekLast().equals('/') && c == '*') { currentBlockComment = new BlockComment(); currentBlockComment.setBeginLine(currLine); currentBlockComment.setBeginColumn(currCol - 1); state = State.IN_BLOCK_COMMENT; currentContent = new StringBuffer(); } else if (c == '"') { state = State.IN_STRING; } else { // nothing to do } break; case IN_LINE_COMMENT: if (c=='\n' || c=='\r'){ currentLineComment.setContent(currentContent.toString()); currentLineComment.setEndLine(currLine); currentLineComment.setEndColumn(currCol); comments.addComment(currentLineComment); state = State.CODE; } else { currentContent.append(c); } break; case IN_BLOCK_COMMENT: if (prevTwoChars.peekLast().equals('*') && c=='/' && !prevTwoChars.peekFirst().equals('/')){ // delete last character String content = currentContent.deleteCharAt(currentContent.toString().length()-1).toString(); if (content.startsWith("*")){ JavadocComment javadocComment = new JavadocComment(); javadocComment.setContent(content.substring(1)); javadocComment.setBeginLine(currentBlockComment.getBeginLine()); javadocComment.setBeginColumn(currentBlockComment.getBeginColumn()); javadocComment.setEndLine(currLine); javadocComment.setEndColumn(currCol+1); comments.addComment(javadocComment); } else { currentBlockComment.setContent(content); currentBlockComment.setEndLine(currLine); currentBlockComment.setEndColumn(currCol+1); comments.addComment(currentBlockComment); } state = State.CODE; } else { currentContent.append(c=='\r'?'\n':c); } break; case IN_STRING: if (!prevTwoChars.peekLast().equals('\\') && c == '"') { state = State.CODE; } break; default: throw new RuntimeException("Unexpected"); } switch (c){ case '\n': case '\r': currLine+=1; currCol = 1; break; case '\t': currCol+=COLUMNS_PER_TAB; break; default: currCol+=1; } prevTwoChars.remove(); prevTwoChars.add(c); } if (state==State.IN_LINE_COMMENT){ currentLineComment.setContent(currentContent.toString()); currentLineComment.setEndLine(currLine); currentLineComment.setEndColumn(currCol); comments.addComment(currentLineComment); } return comments; } }