/* * Copyright (C) 2011 René Jeschke <rene_jeschke@yahoo.de> * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package divconq.web.md.process; import java.util.LinkedList; /** * Utilities. * * @author René Jeschke <rene_jeschke@yahoo.de> */ public class Utils { /** Random number generator value. */ private static int RND = (int)System.nanoTime(); /** * LCG random number generator. * * @return A pseudo random number between 0 and 1023 */ public static int rnd() { return (RND = RND * 1664525 + 1013904223) >>> 22; } /** * Skips spaces in the given String. * * @param in * Input String. * @param start * Starting position. * @return The new position or -1 if EOL has been reached. */ public static int skipSpaces(String in, int start) { int pos = start; while(pos < in.length() && (in.charAt(pos) == ' ' || in.charAt(pos) == '\n')) pos++; return pos < in.length() ? pos : -1; } /** * Processed the given escape sequence. * * @param out * The StringBuilder to write to. * @param ch * The character. * @param pos * Current parsing position. * @return The new position. */ public static int escape(StringBuilder out, char ch, int pos) { switch(ch) { case '\\': case '[': case ']': case '(': case ')': case '{': case '}': case '#': case '"': case '\'': case '.': case '>': case '<': case '*': case '+': case '-': case '_': case '!': case '`': case '^': out.append(ch); return pos + 1; default: out.append('\\'); return pos; } } /** * Reads characters until any 'end' character is encountered. * * @param out * The StringBuilder to write to. * @param in * The Input String. * @param start * Starting position. * @param end * End characters. * @return The new position or -1 if no 'end' char was found. */ public static int readUntil(StringBuilder out, String in, int start, char... end) { int pos = start; while(pos < in.length()) { char ch = in.charAt(pos); if(ch == '\\' && pos + 1 < in.length()) { pos = escape(out, in.charAt(pos + 1), pos); } else { boolean endReached = false; for(int n = 0; n < end.length; n++) { if(ch == end[n]) { endReached = true; break; } } if(endReached) break; out.append(ch); } pos++; } return (pos == in.length()) ? -1 : pos; } /** * Reads a markdown link. * * @param out * The StringBuilder to write to. * @param in * Input String. * @param start * Starting position. * @return The new position or -1 if this is no valid markdown link. */ public static int readMdLink(StringBuilder out, String in, int start) { int pos = start; int counter = 1; while(pos < in.length()) { char ch = in.charAt(pos); if(ch == '\\' && pos + 1 < in.length()) { pos = escape(out, in.charAt(pos + 1), pos); } else { boolean endReached = false; switch(ch) { case '(': counter++; break; case ' ': if(counter == 1) endReached = true; break; case ')': counter--; if(counter == 0) endReached = true; break; } if(endReached) break; out.append(ch); } pos++; } return (pos == in.length()) ? -1 : pos; } /** * Reads a markdown link ID. * * @param out * The StringBuilder to write to. * @param in * Input String. * @param start * Starting position. * @return The new position or -1 if this is no valid markdown link ID. */ public static int readMdLinkId(StringBuilder out, String in, int start) { int pos = start; int counter = 1; while(pos < in.length()) { char ch = in.charAt(pos); boolean endReached = false; switch(ch) { case '\n': out.append(' '); break; case '[': counter++; out.append(ch); break; case ']': counter--; if(counter == 0) endReached = true; else out.append(ch); break; default: out.append(ch); break; } if(endReached) break; pos++; } return (pos == in.length()) ? -1 : pos; } /** * Reads characters until the end character is encountered, ignoring escape * sequences. * * @param out * The StringBuilder to write to. * @param in * The Input String. * @param start * Starting position. * @param end * End characters. * @return The new position or -1 if no 'end' char was found. */ public static int readRawUntil(StringBuilder out, String in, int start, char end) { int pos = start; while(pos < in.length()) { char ch = in.charAt(pos); if(ch == end) break; out.append(ch); pos++; } return (pos == in.length()) ? -1 : pos; } /** * Extracts the tag from an XML element. * * @param out * The StringBuilder to write to. * @param in * Input String. */ public static void getXMLTag(StringBuilder out, String in) { int pos = 1; if(in.charAt(1) == '/') pos++; while(Character.isLetterOrDigit(in.charAt(pos))) { out.append(in.charAt(pos++)); } } public static int scanHTML(String in, int start) { LinkedList<String> tags = new LinkedList<String>(); StringBuilder temp = new StringBuilder(); int pos = start; if (in.length() <= 4) return -1; /* TODO add comment support if (in.charAt(pos + 1) == '!') { if (this.readXMLComment(this, this.leading) > 0) return true; } */ pos = Utils.readXML(temp, in, pos, false); String element, tag; if (pos > -1) { element = temp.toString(); temp.setLength(0); Utils.getXMLTag(temp, element); tag = temp.toString().toLowerCase(); char sl = element.charAt(element.length() - 2); if (sl == '/') return pos; tags.add(tag); while (pos < in.length()) { // TODO check/add support for xml comments while (pos < in.length() && in.charAt(pos) != '<') pos++; if (pos >= in.length()) return -1; temp.setLength(0); int newPos = Utils.readXML(temp, in, pos, false); if (newPos > 0) { element = temp.toString(); temp.setLength(0); Utils.getXMLTag(temp, element); tag = temp.toString().toLowerCase(); sl = element.charAt(element.length() - 2); if(element.charAt(1) == '/') { if(!tags.getLast().equals(tag)) return -1; tags.removeLast(); } else if (sl != '/') { tags.addLast(tag); } pos = newPos; } else { pos++; } if (tags.size() == 0) return pos; } } return -1; } /** * Reads an XML element. * * @param out * The StringBuilder to write to. * @param in * Input String. * @param start * Starting position. * @param safeMode * Whether to escape unsafe HTML tags or not * @return The new position or -1 if this is no valid XML element. */ public static int readXML(StringBuilder out, String in, int start, boolean safeMode) { int pos; boolean isCloseTag; try { if(in.charAt(start + 1) == '/') { isCloseTag = true; pos = start + 2; } else if(in.charAt(start + 1) == '!') { out.append("<!"); return start + 1; } else { isCloseTag = false; pos = start + 1; } if(safeMode) { StringBuilder temp = new StringBuilder(); pos = readRawUntil(temp, in, pos, ' ', '/', '>'); if(pos == -1) return -1; //String tag = temp.toString().trim().toLowerCase(); // TODO? out.append("<"); if(isCloseTag) out.append('/'); out.append(temp); } else { out.append('<'); if(isCloseTag) out.append('/'); pos = readRawUntil(out, in, pos, ' ', '/', '>'); } if(pos == -1) return -1; pos = readRawUntil(out, in, pos, '/', '>'); if(in.charAt(pos) == '/') { out.append(" /"); pos = readRawUntil(out, in, pos + 1, '>'); if(pos == -1) return -1; } if(in.charAt(pos) == '>') { out.append('>'); return pos; } } catch (StringIndexOutOfBoundsException e) { return -1; } return -1; } /** * Reads characters until any 'end' character is encountered, ignoring * escape sequences. * * @param out * The StringBuilder to write to. * @param in * The Input String. * @param start * Starting position. * @param end * End characters. * @return The new position or -1 if no 'end' char was found. */ public static int readRawUntil(StringBuilder out, String in, int start, char... end) { int pos = start; while(pos < in.length()) { char ch = in.charAt(pos); boolean endReached = false; for(int n = 0; n < end.length; n++) { if(ch == end[n]) { endReached = true; break; } } if(endReached) break; out.append(ch); pos++; } return (pos == in.length()) ? -1 : pos; } /** * Appends the given string to the given StringBuilder, replacing '&', * '<' and '>' by their respective HTML entities. * * @param out * The StringBuilder to append to. * @param value * The string to append. * @param offset * The character offset into value from where to start */ public static void codeEncode(StringBuilder out, String value, int offset) { for(int i = offset; i < value.length(); i++) { char c = value.charAt(i); switch(c) { case '&': out.append("&"); break; case '<': out.append("<"); break; case '>': out.append(">"); break; default: out.append(c); } } } /** * Removes trailing <code>`</code> and trims spaces. * * @param fenceLine * Fenced code block starting line * @return Rest of the line after trimming and backtick removal * @since 0.7 */ public static String getMetaFromFence(String fenceLine) { for(int i = 0; i < fenceLine.length(); i++) { char c = fenceLine.charAt(i); if(!Character.isWhitespace(c) && c != '`' && c != '~' && c != '%') { return fenceLine.substring(i).trim(); } } return ""; } }