/** * Copyright winterwell Mathematics Ltd. * @author Daniel Winterstein * 11 Jan 2007 */ package winterwell.markdown.pagemodel; import java.io.File; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.eclipse.jface.preference.IPreferenceStore; import winterwell.markdown.Activator; import winterwell.markdown.StringMethods; import winterwell.markdown.preferences.MarkdownPreferencePage; import winterwell.utils.FailureException; import winterwell.utils.Process; import winterwell.utils.StrUtils; import winterwell.utils.Utils; import winterwell.utils.io.FileUtils; import com.petebevin.markdown.MarkdownProcessor; /** * Understands Markdown syntax. * * @author Daniel Winterstein */ public class MarkdownPage { /** * Strip leading and trailing #s and whitespace * * @param line * @return cleaned up line */ private String cleanHeader(String line) { for (int j = 0; j < line.length(); j++) { char c = line.charAt(j); if (c != '#' && !Character.isWhitespace(c)) { line = line.substring(j); break; } } for (int j = line.length() - 1; j > 0; j--) { char c = line.charAt(j); if (c != '#' && !Character.isWhitespace(c)) { line = line.substring(0, j + 1); break; } } return line; } /** * Represents information about a section header. E.g. ## Misc Warblings * * @author daniel */ public class Header { /** * 1 = top-level (i.e. #), 2= 2nd-level (i.e. ##), etc. */ final int level; /** * The text of the Header */ final String heading; /** * Sub-sections, if any */ final List<Header> subHeaders = new ArrayList<Header>(); /** * The line on which this header occurs. */ final int lineNumber; public int getLineNumber() { return lineNumber; } /** * * @return the next section (at this depth if possible), null if none */ public Header getNext() { if (parent == null) { int ti = level1Headers.indexOf(this); if (ti == -1 || ti == level1Headers.size() - 1) return null; return level1Headers.get(ti + 1); } int i = parent.subHeaders.indexOf(this); assert i != -1 : this; if (i == parent.subHeaders.size() - 1) return parent.getNext(); return parent.subHeaders.get(i + 1); } /** * * @return the next section (at this depth if possible), null if none */ public Header getPrevious() { if (parent == null) { int ti = level1Headers.indexOf(this); if (ti == -1 || ti == 0) return null; return level1Headers.get(ti - 1); } int i = parent.subHeaders.indexOf(this); assert i != -1 : this; if (i == 0) return parent.getPrevious(); return parent.subHeaders.get(i - 1); } /** * The parent section. Can be null. */ private Header parent; /** * Create a marker for a section Header * * @param level * 1 = top-level (i.e. #), 2= 2nd-level (i.e. ##), etc. * @param lineNumber * The line on which this header occurs * @param heading * The text of the Header, trimmed of #s * @param currentHeader * The previous Header. This is used to find the parent * section if there is one. Can be null. */ Header(int level, int lineNumber, String heading, Header currentHeader) { this.lineNumber = lineNumber; this.level = level; this.heading = cleanHeader(heading); // Heading Tree setParent(currentHeader); } private void setParent(Header currentHeader) { if (currentHeader == null) { parent = null; return; } if (currentHeader.level < level) { parent = currentHeader; parent.subHeaders.add(this); return; } setParent(currentHeader.parent); } public Header getParent() { return parent; } /** * Sub-sections. May be zero-length, never null. */ public List<Header> getSubHeaders() { return subHeaders; } @Override public String toString() { return heading; } public int getLevel() { return level; } } /** * The raw text, broken up into individual lines. */ private List<String> lines; /** * The raw text, broken up into individual lines. */ public List<String> getText() { return Collections.unmodifiableList(lines); } public enum KLineType { NORMAL, H1, H2, H3, H4, H5, H6, BLANK, // TODO LIST, BLOCKQUOTE, /** A line marking Markdown info about the preceding line, e.g. ====== */ MARKER, /** A line containing meta-data, e.g. title: My Page */ META } /** * Information about each line. */ private List<KLineType> lineTypes; private Map<Integer,Object> pageObjects = new HashMap<Integer, Object>(); // TODO meta-data, footnotes, tables, link & image attributes private static Pattern multiMarkdownTag = Pattern.compile("^([\\w].*):(.*)"); private Map<String, String> multiMarkdownTags = new HashMap<String, String>(); // Regular expression for Github support private static Pattern githubURLDetection = Pattern.compile("((https?|ftp|file)://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|])"); /** * The top-level headers. FIXME handle documents which have a 2nd level * header before any 1st level ones */ private final List<Header> level1Headers = new ArrayList<Header>(); private final IPreferenceStore pStore; /** * Create a page. * * @param text */ public MarkdownPage(String text) { pStore = Activator.getDefault().getPreferenceStore(); setText(text); } /** * Reset the text for this page. * * @param text */ private void setText(String text) { // Get lines lines = StringMethods.splitLines(text); // Clean out old level1Headers.clear(); lineTypes = new ArrayList<KLineType>(lines.size()); pageObjects.clear(); // Dummy level-1 header in case there are none Header dummyTopHeader = new Header(1, 0, "", null); level1Headers.add(dummyTopHeader); Header currentHeader = dummyTopHeader; // Identify line types int lineNum = 0; // Check if we should support the Multi-Markdown Metadata boolean multiMarkdownMetadataSupport = pStore.getBoolean(MarkdownPreferencePage.PREF_MULTIMARKDOWN_METADATA); // Multi-markdown header if (multiMarkdownMetadataSupport) { // The key is the text before the colon, and the data is the text // after the // colon. In the above example, notice that there are two lines of // information // for the Author key. If you end a line with “space-space-newline”, // the newline // will be included when converted to other formats. // // There must not be any whitespace above the metadata, and the // metadata block // ends with the first whitespace only line. The metadata is // stripped from the // document before it is passed on to the syntax parser. // // Check if the Metdatas are valid // boolean validMetadata = true; for (lineNum = 0; lineNum < lines.size(); lineNum++) { String line = lines.get(lineNum); if (Utils.isBlank(line)) { break; } Matcher m = multiMarkdownTag.matcher(line); if (!m.find()) { if (lineNum == 0) { // No MultiMarkdown metadata validMetadata = false; break; } else if (!line.matches("^\\s.*\n")) { // The next line was not intended (ie. it does not start // with a whitespace) validMetadata = false; break; } } } // Valid Metadatas have been found. We need to retrieve these keys/values. if (validMetadata) { String data = ""; String tag = ""; for (lineNum = 0; lineNum < lines.size(); lineNum++) { String line = lines.get(lineNum); if (Utils.isBlank(line)) { break; } Matcher m = multiMarkdownTag.matcher(line); if (!m.find()) { if (lineNum == 0) { break; } // Multi-line tag lineTypes.add(KLineType.META); data += StrUtils.LINEEND + line.trim(); multiMarkdownTags.put(tag, data); } else { lineTypes.add(KLineType.META); tag = m.group(0); data = m.group(1).trim(); if (m.group(1).endsWith(line)) multiMarkdownTags.put(tag, data); } } } else { lineNum = 0; } } boolean githubSyntaxSupport = pStore.getBoolean(MarkdownPreferencePage.PREF_GITHUB_SYNTAX); boolean inCodeBlock = false; for (; lineNum < lines.size(); lineNum++) { String line = lines.get(lineNum); // Code blocks if (githubSyntaxSupport && line.startsWith("```")) { inCodeBlock = !inCodeBlock; } if (!inCodeBlock) { // Headings int h = numHash(line); String hLine = line; int hLineNum = lineNum; int underline = -1; if (lineNum != 0) { underline = just(line, '=') ? 1 : just(line, '-') ? 2 : -1; } if (underline != -1) { h = underline; hLineNum = lineNum - 1; hLine = lines.get(lineNum - 1); lineTypes.set(hLineNum, KLineType.values()[h]); lineTypes.add(KLineType.MARKER); } // Create a Header object if (h > 0) { if (underline == -1) lineTypes.add(KLineType.values()[h]); Header header = new Header(h, hLineNum, hLine, currentHeader); if (h == 1) { level1Headers.add(header); } pageObjects.put(hLineNum, header); currentHeader = header; continue; } } // TODO List // TODO Block quote // Blank line if (Utils.isBlank(line)) { lineTypes.add(KLineType.BLANK); continue; } // Normal lineTypes.add(KLineType.NORMAL); } // end line-loop // Remove dummy header? if (dummyTopHeader.getSubHeaders().size() == 0) { level1Headers.remove(dummyTopHeader); } if (githubSyntaxSupport) { /* * Support Code block */ inCodeBlock = false; for (lineNum = 0; lineNum < lines.size(); lineNum++) { String line = lines.get(lineNum); // Found the start or end of a code block if (line.matches("^```.*\n")) { // We reverse the boolean value inCodeBlock = !inCodeBlock; // We force the line to be blank. But we mark it as normal // to prevent to be stripped lines.set(lineNum, "\n"); lineTypes.set(lineNum, KLineType.NORMAL); continue; } if (inCodeBlock) { lines.set(lineNum, " " + line); } } /* * Support for URL Detection * We search for links that are not captured by Markdown syntax */ for (lineNum = 0; lineNum < lines.size(); lineNum++) { String line = lines.get(lineNum); // When a link has been replaced we need to scan again the string // as the offsets have changed (we add '<' and '>' to the link to // be interpreted by the markdown library) boolean urlReplaced; do { urlReplaced = false; Matcher m = githubURLDetection.matcher(line); while (m.find()) { // Ignore the URL following the format <link> if ((m.start() - 1 >= 0) && (m.end() < line.length()) && (line.charAt(m.start() - 1) == '<') && (line.charAt(m.end()) == '>')) { continue; } // Ignore the URL following the format [description](link) if ((m.start() - 2 >= 0) && (m.end() < line.length()) && (line.charAt(m.start() - 2) == ']') && (line.charAt(m.start() - 1) == '(') && (line.charAt(m.end()) == ')')) { continue; } // Ignore the URL following the format [description](link "title") if ((m.start() - 2 >= 0) && (m.end() + 1 < line.length()) && (line.charAt(m.start() - 2) == ']') && (line.charAt(m.start() - 1) == '(') && (line.charAt(m.end()) == ' ') && (line.charAt(m.end() + 1) == '"')) { continue; } if (m.start() - 1 >= 0) { // Case when the link is at the beginning of the string line = line.substring(0, m.start()) + "<" + m.group(0) + ">" + line.substring(m.end()); } else { line = "<" + m.group(0) + ">" + line.substring(m.end()); } // We replaced the string in the array lines.set(lineNum, line); urlReplaced = true; break; } } while (urlReplaced); } } } /** * @param line * @param c * @return true if line is just cs (and whitespace at the start/end) */ boolean just(String line, char c) { return line.matches("\\s*"+c+"+\\s*"); } /** * @param line * @return The number of # symbols prepending the line. */ private int numHash(String line) { for (int i = 0; i < line.length(); i++) { if (line.charAt(i) != '#') return i; } return line.length(); } /** * * @param parent * Can be null for top-level * @return List of sub-headers. Never null. FIXME handle documents which * have a 2nd level header before any 1st level ones */ public List<Header> getHeadings(Header parent) { if (parent == null) { return Collections.unmodifiableList(level1Headers); } return Collections.unmodifiableList(parent.subHeaders); } // public WebPage getWebPage() { // WebPage page = new WebPage(); // // Add the lines, one by one // boolean inParagraph = false; // for (int i=0; i<lines.size(); i++) { // String line = lines.get(i); // KLineType type = lineTypes.get(i); // switch(type) { // // Heading? // case H1: case H2: case H3: // case H4: case H5: case H6: // if (inParagraph) page.addText("</p>"); // line = cleanHeader(line); // page.addText("<"+type+">"+line+"</"+type+">"); // continue; // case MARKER: // Ignore // continue; // // TODO List? // // TODO Block quote? // } // // Paragraph end? // if (Utils.isBlank(line)) { // if (inParagraph) page.addText("</p>"); // continue; // } // // Paragraph start? // if (!inParagraph) { // page.addText("<p>"); // inParagraph = true; // } // // Plain text // page.addText(line); // } // return page; // } /** * Get the HTML for this page. Uses the MarkdownJ project. */ public String html() { // Section numbers?? boolean sectionNumbers = pStore .getBoolean(MarkdownPreferencePage.PREF_SECTION_NUMBERS); // Chop out multi-markdown header StringBuilder sb = new StringBuilder(); assert lines.size() == lineTypes.size(); for (int i = 0, n = lines.size(); i < n; i++) { KLineType type = lineTypes.get(i); if (type == KLineType.META) continue; String line = lines.get(i); if (sectionNumbers && isHeader(type) && line.contains("$section")) { // TODO Header section = headers.get(i); // String secNum = section.getSectionNumber(); // line.replace("$section", secNum); } sb.append(line); } String text = sb.toString(); // Use external converter? final String cmd = pStore .getString(MarkdownPreferencePage.PREF_MARKDOWN_COMMAND); if (Utils.isBlank(cmd) || (cmd.startsWith("(") && cmd.contains("MarkdownJ"))) { // Use MarkdownJ MarkdownProcessor markdown = new MarkdownProcessor(); // MarkdownJ doesn't convert £s for some reason text = text.replace("£", "£"); String html = markdown.markdown(text); return html; } // Attempt to run external command try { final File md = File.createTempFile("tmp", ".md"); FileUtils.write(md, text); Process process = new Process(cmd+" "+md.getAbsolutePath()); process.run(); int ok = process.waitFor(10000); if (ok != 0) throw new FailureException(cmd+" failed:\n"+process.getError()); String html = process.getOutput(); FileUtils.delete(md); return html; } catch (Exception e) { throw Utils.runtime(e); } } /** * @param type * @return */ private boolean isHeader(KLineType type) { return type == KLineType.H1 || type == KLineType.H2 || type == KLineType.H3 || type == KLineType.H4 || type == KLineType.H5 || type == KLineType.H6; } /** * Return the raw text of this page. */ @Override public String toString() { StringBuilder sb = new StringBuilder(); for (String line : lines) { sb.append(line); } return sb.toString(); } /** * Line type information for the raw text. * * @return */ public List<KLineType> getLineTypes() { return Collections.unmodifiableList(lineTypes); } /** * @param line * @return */ public Object getPageObject(int line) { return pageObjects.get(line); } }