/*
* Copyright 2007 Guy Van den Broeck
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.outerj.daisy.diff.html.dom;
import java.util.ArrayList;
import java.util.List;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class DomTreeBuilder extends DefaultHandler implements DomTree {
private List<TextNode> textNodes = new ArrayList<TextNode>(50);
private BodyNode bodyNode = new BodyNode();
private TagNode currentParent = bodyNode;
private StringBuilder newWord = new StringBuilder();
protected boolean documentStarted = false;
protected boolean documentEnded = false;
protected boolean bodyStarted = false;
protected boolean bodyEnded = false;
private boolean whiteSpaceBeforeThis = false;
/** When greater than 0, this indicates that the node being parsed is a descendant of a pre tag. */
private int numberOfActivePreTags = 0; // calculating this as required for every node is expensive.
private Node lastSibling = null;
public BodyNode getBodyNode() {
return bodyNode;
}
public List<TextNode> getTextNodes() {
return textNodes;
}
@Override
public void startDocument() throws SAXException {
if (documentStarted)
throw new IllegalStateException(
"This Handler only accepts one document");
documentStarted = true;
}
@Override
public void endDocument() throws SAXException {
if (!documentStarted || documentEnded)
throw new IllegalStateException();
endWord();
documentEnded = true;
documentStarted = false;
}
@Override
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
if (!documentStarted || documentEnded)
throw new IllegalStateException();
if (bodyStarted && !bodyEnded) {
endWord();
TagNode newTagNode = new TagNode(currentParent, localName,
attributes);
currentParent = newTagNode;
lastSibling = null;
if (whiteSpaceBeforeThis && newTagNode.isInline()) {
newTagNode.setWhiteBefore(true);
}
whiteSpaceBeforeThis = false;
if (newTagNode.isPre()) {
numberOfActivePreTags++;
}
if (isSeparatingTag(newTagNode)) {
addSeparatorNode();
}
} else if (bodyStarted) {
// Ignoring element after body tag closed
} else if (localName.equalsIgnoreCase("body")) {
bodyStarted = true;
}
}
@Override
public void endElement(String uri, String localName, String qName)
throws SAXException {
if (!documentStarted || documentEnded)
throw new IllegalStateException();
if (localName.equalsIgnoreCase("body")) {
bodyEnded = true;
} else if (bodyStarted && !bodyEnded) {
if (localName.equalsIgnoreCase("img")) {
// Insert a dummy leaf for the image
ImageNode img = new ImageNode(currentParent, currentParent
.getAttributes());
img.setWhiteBefore(whiteSpaceBeforeThis);
lastSibling = img;
textNodes.add(img);
}
endWord();
if (currentParent.isInline()) {
lastSibling = currentParent;
} else {
lastSibling = null;
}
if (localName.equalsIgnoreCase("pre")) {
numberOfActivePreTags--;
}
if (isSeparatingTag(currentParent)) {
addSeparatorNode();
}
currentParent = currentParent.getParent();
whiteSpaceBeforeThis = false;
}
}
@Override
public void characters(char ch[], int start, int length)
throws SAXException {
if (!documentStarted || documentEnded)
throw new IllegalStateException();
for (int i = start; i < start + length; i++) {
char c = ch[i];
if (isDelimiter(c)) {
endWord();
if (WhiteSpaceNode.isWhiteSpace(c) && numberOfActivePreTags == 0) {
if (lastSibling != null)
lastSibling.setWhiteAfter(true);
whiteSpaceBeforeThis = true;
} else {
TextNode textNode = new TextNode(currentParent, Character
.toString(c));
textNode.setWhiteBefore(whiteSpaceBeforeThis);
whiteSpaceBeforeThis = false;
lastSibling = textNode;
textNodes.add(textNode);
}
} else {
newWord.append(c);
}
}
}
private void endWord() {
if (newWord.length() > 0) {
TextNode node = new TextNode(currentParent, newWord.toString());
node.setWhiteBefore(whiteSpaceBeforeThis);
whiteSpaceBeforeThis = false;
lastSibling = node;
textNodes.add(node);
newWord.setLength(0);
}
}
/**
* Returns <code>true</code> if the given tag separates text nodes
* from being successive. I.e. every block starts a new distinct text flow.
* @param aTagNode
* @return
*/
private boolean isSeparatingTag(TagNode aTagNode) {
// treat all block tags as separating
return aTagNode.isBlockLevel();
}
/**
* Ensures that a separator is added after the last text node.
*/
private void addSeparatorNode() {
if (textNodes.isEmpty()) {
return;
}
// don't add multiple separators
if (textNodes.get(textNodes.size() - 1) instanceof SeparatingNode) {
return;
}
textNodes.add(new SeparatingNode(currentParent));
}
public static boolean isDelimiter(char c) {
if (WhiteSpaceNode.isWhiteSpace(c))
return true;
switch (c) {
// Basic Delimiters
case '/':
case '.':
case '!':
case ',':
case ';':
case '?':
case '=':
case '\'':
case '"':
// Extra Delimiters
case '[':
case ']':
case '{':
case '}':
case '(':
case ')':
case '&':
case '|':
case '\\':
case '-':
case '_':
case '+':
case '*':
case ':':
return true;
default:
return false;
}
}
}