CommentsParser.java example

Explorer
effectivejava-master
- test-resources
  - sample-codebases
package japa.parser.ast.comments;

import java.io.*;
import java.nio.charset.Charset;
import java.util.*;

/**
 * This parser cares exclusively about comments.
 */
public class CommentsParser {

    private enum State {
        CODE,
        WAITING_FOR_LINE_COMMENT,
        IN_LINE_COMMENT,
        WAITING_FOR_BLOCK_COMMENT,
        IN_BLOCK_COMMENT,
        WAITING_TO_LEAVE_BLOCK_COMMENT,
        IN_STRING;
    }

    private static final int COLUMNS_PER_TAB = 4;

    public CommentsCollection parse(final String source) throws IOException, UnsupportedEncodingException {
        InputStream in = new ByteArrayInputStream(source.getBytes(Charset.defaultCharset()));
        return parse(in, Charset.defaultCharset().name());
    }

    public CommentsCollection parse(final InputStream in, final String charsetName) throws IOException, UnsupportedEncodingException {
        boolean lastWasASlashR = false;
        BufferedReader br = new BufferedReader(new InputStreamReader(in, charsetName));
        CommentsCollection comments = new CommentsCollection();
        int r;

        Deque prevTwoChars = new LinkedList<Character>(Arrays.asList('z','z'));

        State state = State.CODE;
        LineComment currentLineComment = null;
        BlockComment currentBlockComment = null;
        StringBuffer currentContent = null;

        int currLine = 1;
        int currCol  = 1;

        while ((r=br.read()) != -1){
            char c = (char)r;
            if (c=='\r'){
                lastWasASlashR = true;
            } else if (c=='\n'&&lastWasASlashR){
                lastWasASlashR=false;
                continue;
            } else {
                lastWasASlashR=false;
            }
            switch (state) {
                case CODE:
                    if (prevTwoChars.peekLast().equals('/') && c == '/') {
                        currentLineComment = new LineComment();
                        currentLineComment.setBeginLine(currLine);
                        currentLineComment.setBeginColumn(currCol - 1);
                        state = State.IN_LINE_COMMENT;
                        currentContent = new StringBuffer();
                    } else if (prevTwoChars.peekLast().equals('/') && c == '*') {
                        currentBlockComment = new BlockComment();
                        currentBlockComment.setBeginLine(currLine);
                        currentBlockComment.setBeginColumn(currCol - 1);
                        state = State.IN_BLOCK_COMMENT;
                        currentContent = new StringBuffer();
                    } else if (c == '"') {
                        state = State.IN_STRING;
                    } else {
                        // nothing to do
                    }
                    break;
                case IN_LINE_COMMENT:
                    if (c=='\n' || c=='\r'){
                        currentLineComment.setContent(currentContent.toString());
                        currentLineComment.setEndLine(currLine);
                        currentLineComment.setEndColumn(currCol);
                        comments.addComment(currentLineComment);
                        state = State.CODE;
                    } else {
                        currentContent.append(c);
                    }
                    break;
                case IN_BLOCK_COMMENT:
                    if (prevTwoChars.peekLast().equals('*') && c=='/' && !prevTwoChars.peekFirst().equals('/')){

                        // delete last character
                        String content = currentContent.deleteCharAt(currentContent.toString().length()-1).toString();

                        if (content.startsWith("*")){
                            JavadocComment javadocComment = new JavadocComment();
                            javadocComment.setContent(content.substring(1));
                            javadocComment.setBeginLine(currentBlockComment.getBeginLine());
                            javadocComment.setBeginColumn(currentBlockComment.getBeginColumn());
                            javadocComment.setEndLine(currLine);
                            javadocComment.setEndColumn(currCol+1);
                            comments.addComment(javadocComment);
                        } else {
                            currentBlockComment.setContent(content);
                            currentBlockComment.setEndLine(currLine);
                            currentBlockComment.setEndColumn(currCol+1);
                            comments.addComment(currentBlockComment);
                        }
                        state = State.CODE;
                    } else {
                        currentContent.append(c=='\r'?'\n':c);
                    }
                    break;
                case IN_STRING:
                    if (!prevTwoChars.peekLast().equals('\\') && c == '"') {
                        state = State.CODE;
                    }
                    break;
                default:
                    throw new RuntimeException("Unexpected");
            }
            switch (c){
                case '\n':
                case '\r':
                    currLine+=1;
                    currCol = 1;
                    break;
                case '\t':
                    currCol+=COLUMNS_PER_TAB;
                    break;
                default:
                    currCol+=1;
            }
            prevTwoChars.remove();
            prevTwoChars.add(c);
        }

        if (state==State.IN_LINE_COMMENT){
            currentLineComment.setContent(currentContent.toString());
            currentLineComment.setEndLine(currLine);
            currentLineComment.setEndColumn(currCol);
            comments.addComment(currentLineComment);
        }

        return comments;
    }

}