/** * Регулярное выражение поиска тэга */ private static final Pattern BBTAG_REGEXP = Pattern.compile("\\[\\[?/?([A-Za-z\\*]+)(:[a-f0-9]+)?(=[^\\]]+)?\\]?\\]"); /** * Регулярное выражения поиска двойного перевода строки */ private static final Pattern P_REGEXP = Pattern.compile("(\r?\n){2,}"); private final ParserParameters parserParameters; /** * Конструктор по умолчанию. * * @param parserParameters параметры парсера */ public Parser(ParserParameters parserParameters) { this.parserParameters = parserParameters; } public static String escape(String html) { return StringUtil.escapeHtml(html); } public RootNode createRootNode() { return new RootNode(parserParameters); } /** * Точка входа для разбора LORCODE * * @param rootNode корневой узел нового дерева * @param bbcode обрабатываемы LORCODE * @return возвращает инвалидный html */ public RootNode parseRoot(RootNode rootNode, String bbcode) { Node currentNode = rootNode; ParserAutomatonState automatonState = new ParserAutomatonState(rootNode, parserParameters); while (automatonState.getPos() < bbcode.length()) { Matcher match = BBTAG_REGEXP.matcher(bbcode).region(automatonState.getPos(), bbcode.length()); if (match.find()) { if (!automatonState.isFirstCode()) { currentNode = pushTextNode(automatonState, currentNode, bbcode.substring(automatonState.getPos(), match.start())); } else { currentNode = trimNewLine(automatonState, currentNode, bbcode, match); } automatonState.processTagMatcher(match); if (automatonState.isTagEscaped()) { currentNode = processEscapedTag(currentNode, automatonState); } else { if (automatonState.getAllTagsNames().contains(automatonState.getTagname())) { currentNode = processKnownTag(currentNode, automatonState); } else { currentNode = pushTextNode(automatonState, currentNode, automatonState.getWholematch()); } } automatonState.setPos(match.end()); } else { currentNode = pushTextNode(automatonState, currentNode, bbcode.substring(automatonState.getPos())); automatonState.setPos(bbcode.length()); } } return automatonState.getRootNode(); } /** * Добавление текстового узда * * @param automatonState текущее состояние автомата * @param currentNode текущий узел * @param text текст * @return возвращает новый текущий узел */ private Node pushTextNode(ParserAutomatonState automatonState, Node currentNode, String text) { if (text.trim().isEmpty() && !currentNode.allows("text")) { return currentNode; } while (!currentNode.allows("text")) { if (currentNode.allows("p")) { TagNode node = new TagNode(currentNode, parserParameters, "p", "", automatonState.getRootNode()); currentNode.addChildren(node); currentNode = node; } else if (currentNode.allows("div")) { TagNode node = new TagNode(currentNode, parserParameters, "div", "", automatonState.getRootNode()); currentNode.addChildren(node); currentNode = node; } else { currentNode = currentNode.getParent(); } } boolean isParagraph = false; boolean isAllow = true; boolean isParagraphed = false; if (TagNode.class.isInstance(currentNode)) { TagNode tempNode = (TagNode) currentNode; Set<String> disallowedParagraphTags = parserParameters.getDisallowedParagraphTags(); Set<String> paragraphedTags = parserParameters.getParagraphedTags(); if (disallowedParagraphTags.contains(tempNode.getBbtag().getName())) { isAllow = false; } if (paragraphedTags.contains(tempNode.getBbtag().getName())) { isParagraphed = true; } if ("p".equals(tempNode.getBbtag().getName())) { isParagraph = true; } } /** * Если мы находим двойной перенос строки и в тексте * и в текущем тэге разрешена вставка нового тэга p - * вставляем p * за исключеним, если текущий тэг p, тогда поднимаемся на уровень * выше в дереве и вставляем p с текстом */ Matcher matcher = P_REGEXP.matcher(text); if (isAllow && matcher.find()) { String head = text.substring(0, matcher.start()); String tail = text.substring(matcher.end()); if (!head.isEmpty()) { currentNode.addChildren(rawPushTextNode(automatonState, currentNode, head)); } if (isParagraph) { currentNode = currentNode.getParent(); } if (!tail.isEmpty()) { TagNode node = new TagNode(currentNode, parserParameters, "p", " ", automatonState.getRootNode()); currentNode.addChildren(node); currentNode = node; currentNode = pushTextNode(automatonState, currentNode, tail); } } else { if (isParagraphed) { currentNode.addChildren(rawPushTextNode(automatonState, currentNode, text)); } else { currentNode.addChildren(rawPushTextNode(automatonState, currentNode, matcher.replaceAll(""))); } } return currentNode; } private TextNode rawPushTextNode(ParserAutomatonState automatonState, Node currentNode, String text) { if (!automatonState.isCode()) { return new TextNode(currentNode, parserParameters, text, automatonState); } else { return new TextCodeNode(currentNode, parserParameters, text, automatonState); } } /** * Добавление в дерево нового узла с тэгом * * @param automatonState текущее состояние автомата * @param currentNode текущий узел * @param name название тэга * @param parameter параметры тэга * @return возвращает новый текущий узел дерева */ private Node pushTagNode(ParserAutomatonState automatonState, Node currentNode, String name, String parameter) { if (!currentNode.allows(name)) { Map<String, Tag> allTagsDict = parserParameters.getAllTagsDict(); Set<String> blockLevelTags = parserParameters.getBlockLevelTags(); Tag newTag = allTagsDict.get(name); if (newTag.isDiscardable()) { return currentNode; } else if (currentNode == automatonState.getRootNode() || blockLevelTags.contains(((TagNode) currentNode).getBbtag().getName()) && newTag.getImplicitTag() != null) { if (currentNode != automatonState.getRootNode() && TagNode.class.isInstance(currentNode)) { TagNode currentTagNode = (TagNode) currentNode; if ("p".equals(currentTagNode.getBbtag().getName())) { currentNode = currentNode.getParent(); return pushTagNode(automatonState, currentNode, name, parameter); } } currentNode = pushTagNode(automatonState, currentNode, newTag.getImplicitTag(), ""); currentNode = pushTagNode(automatonState, currentNode, name, parameter); } else { currentNode = currentNode.getParent(); currentNode = pushTagNode(automatonState, currentNode, name, parameter); } } else { TagNode node = new TagNode(currentNode, parserParameters, name, parameter, automatonState.getRootNode()); currentNode.addChildren(node); if (!node.getBbtag().isSelfClosing()) { currentNode = node; } } return currentNode; } /** * Обрабатывает закрытие тэга * * @param rootNode корневой узел * @param currentNode текущий узел * @param name имя закрываемого тэга * @return новый текущий узел после закрытия тэга */ private Node closeTagNode(RootNode rootNode, Node currentNode, String name) { Node tempNode = currentNode; while (true) { if (tempNode == rootNode) { break; } if (TagNode.class.isInstance(tempNode)) { TagNode node = (TagNode) tempNode; String tagName = node.getBbtag().getName(); if (tagName.equals(name) || ("url".equals(name) && "url2".equals(tagName))) { currentNode = tempNode; currentNode = currentNode.getParent(); break; } } tempNode = tempNode.getParent(); } return currentNode; } private Node processKnownTag(Node currentNode, ParserAutomatonState automatonState) { if (automatonState.getWholematch().startsWith("[[")) { currentNode = pushTextNode(automatonState, currentNode, "["); } boolean tagNameIsCode = "code".equals(automatonState.getTagname()) || "inline".equals(automatonState.getTagname()); if (automatonState.isCloseTag()) { currentNode = processCloseTag(automatonState, currentNode, tagNameIsCode); } else { currentNode = processTag(automatonState, currentNode, tagNameIsCode); } if (automatonState.getWholematch().endsWith("]]")) { currentNode = pushTextNode(automatonState, currentNode, "]"); } return currentNode; } private Node processTag(ParserAutomatonState automatonState, Node currentNode, boolean tagNameIsCode) { if (automatonState.isCode() && !tagNameIsCode) { String text = automatonState.getWholematch(); if (text.startsWith("[[")) { text = text.substring(1); } if (text.endsWith("]]")) { text = text.substring(0, text.length()-1); } currentNode = pushTextNode(automatonState, currentNode, text); } else if (tagNameIsCode) { automatonState.setCode(true); automatonState.setFirstCode(true); currentNode = pushTagNode(automatonState, currentNode, automatonState.getTagname(), automatonState.getParameter()); } else { if ("url".equals(automatonState.getTagname()) && ! StringUtils.isEmpty(automatonState.getParameter())) { // специальная проверка для [url] с параметром currentNode = pushTagNode(automatonState, currentNode, "url2", automatonState.getParameter()); } else { currentNode = pushTagNode(automatonState, currentNode, automatonState.getTagname(), automatonState.getParameter()); } } return currentNode; } private Node processEscapedTag(Node currentNode, ParserAutomatonState automatonState) { String textNode; if (automatonState.getAllTagsNames().contains(automatonState.getTagname()) && !automatonState.isCode()) { textNode = automatonState.getWholematch().substring(1, automatonState.getWholematch().length() - 1); } else { textNode = automatonState.getWholematch(); } currentNode = pushTextNode(automatonState, currentNode, textNode); return currentNode; } private Node processCloseTag(ParserAutomatonState automatonState, Node currentNode, boolean tagNameIsCode) { if (!automatonState.isCode() || tagNameIsCode) { currentNode = closeTagNode(automatonState.getRootNode(), currentNode, automatonState.getTagname()); } else { currentNode = pushTextNode(automatonState, currentNode, automatonState.getWholematch()); } if (tagNameIsCode) { automatonState.setCode(false); } return currentNode; } private Node trimNewLine(ParserAutomatonState automatonState, Node currentNode, String bbcode, Matcher match) { String fixWhole = bbcode.substring(automatonState.getPos(), match.start()); if (fixWhole.startsWith("\n")) { fixWhole = fixWhole.substring(1); // откусить ведущий перевод строки } else if (fixWhole.startsWith("\r\n")) { fixWhole = fixWhole.substring(2); // откусить ведущий перевод строки } automatonState.setFirstCode(false); return pushTextNode(automatonState, currentNode, fixWhole); } public class ParserAutomatonState { private final RootNode rootNode; private final Set<String> allTagsNames; private int pos = 0; private boolean isCode = false; private boolean firstCode = false; private final RuTypoChanger changer = new RuTypoChanger(); private String tagname; private String parameter; private String wholematch; private ParserAutomatonState(RootNode rootNode, ParserParameters parserParameters) { this.rootNode = rootNode; allTagsNames = parserParameters.getAllTagsNames(); } private void processTagMatcher(Matcher match) { tagname = match.group(1).toLowerCase(); parameter = match.group(3); wholematch = match.group(0); if (!StringUtils.isEmpty(parameter)){ parameter = parameter.substring(1); } } private boolean isTagEscaped() { return wholematch.startsWith("[[") && wholematch.endsWith("]]"); } private boolean isCloseTag() { return wholematch.startsWith("[/") || wholematch.startsWith("[[/"); } private int getPos() { return pos; } private void setPos(int pos) { this.pos = pos; } private boolean isCode() { return isCode; } private void setCode(boolean code) { isCode = code; } private boolean isFirstCode() { return firstCode; } private void setFirstCode(boolean firstCode) { this.firstCode = firstCode; } private String getTagname() { return tagname; } private String getParameter() { return parameter; } private String getWholematch() { return wholematch; } public RootNode getRootNode() { return rootNode; } private Set<String> getAllTagsNames() { return allTagsNames; } public RuTypoChanger getTypoChanger() { return changer; } } }