/*
* FindBugs - Find Bugs in Java programs
* Copyright (C) 2003-2008 University of Maryland
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package edu.umd.cs.findbugs.util;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.io.StringWriter;
import java.io.Writer;
import javax.swing.text.AttributeSet;
import javax.swing.text.BadLocationException;
import javax.swing.text.EditorKit;
import javax.swing.text.Element;
import javax.swing.text.html.HTMLDocument;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.HTMLWriter;
public class HTML {
private static final class HTMLtoPlainTextWriter2 extends HTMLWriter {
boolean inPre = false;
boolean startingParagraph = false;
/**
* @param w
* @param doc
*/
public HTMLtoPlainTextWriter2(Writer w, HTMLDocument doc) {
super(w, doc);
setLineLength(80);
setCanWrapLines(true);
}
@Override
protected void startTag(Element elem) throws IOException {
String name = elem.getName();
startingParagraph = true;
if (name.equals("ul")) {
super.incrIndent();
write(" ");
} else if (name.equals("pre")) {
inPre = true;
} else if (name.equals("li")) {
super.incrIndent();
write("* ");
} else if (name.equals("p")) {
}
};
@Override
protected void writeEmbeddedTags(AttributeSet attr) throws IOException {
}
@Override
protected void endTag(Element elem) throws IOException {
String name = elem.getName();
if (name.equals("p")) {
writeLineSeparator();
indent();
} else if (name.equals("pre")) {
inPre = false;
} else if (name.equals("ul")) {
super.decrIndent();
writeLineSeparator();
indent();
} else if (name.equals("li")) {
super.decrIndent();
writeLineSeparator();
indent();
}
}
@Override
protected void incrIndent() {
};
@Override
protected void decrIndent() {
};
@Override
protected void emptyTag(Element elem) throws IOException, BadLocationException {
if (elem.getName().equals("content"))
super.emptyTag(elem);
};
@Override
protected void text(Element elem) throws IOException, BadLocationException {
String contentStr = getText(elem);
if (!inPre) {
contentStr = contentStr.replaceAll("\\s+", " ");
if (startingParagraph) {
while (contentStr.length() > 0 && contentStr.charAt(0) == ' ')
contentStr = contentStr.substring(1);
}
startingParagraph = false;
}
if (contentStr.length() > 0) {
setCanWrapLines(!inPre);
write(contentStr);
}
}
}
private HTML() {
};
public static void convertHtmlToText(Reader reader, Writer writer) throws IOException, BadLocationException {
EditorKit kit = new HTMLEditorKit();
HTMLDocument doc = new HTMLDocument();
kit.read(reader, doc, 0);
HTMLtoPlainTextWriter2 x = new HTMLtoPlainTextWriter2(writer, doc);
x.write();
writer.close();
}
public static String convertHtmlSnippetToText(String htmlSnippet) throws IOException, BadLocationException {
StringWriter writer = new StringWriter();
StringReader reader = new StringReader("<HTML><BODY>" + htmlSnippet + "</BODY></HTML>");
convertHtmlToText(reader, writer);
return writer.toString();
}
}