/*
* Copyright 2001-2008 Geert Bevin (gbevin[remove] at uwyn dot com)
* Licensed under the Apache License, Version 2.0 (the "License")
* $Id: ParsedHtml.java 3918 2008-04-14 17:35:35Z gbevin $
*/
package com.uwyn.rife.test;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import org.apache.xerces.parsers.DOMParser;
import org.cyberneko.html.HTMLConfiguration;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
/**
* Retrieves the text content of a {@link MockResponse} and parses it as HTML.
*
* @author Geert Bevin (gbevin[remove] at uwyn dot com)
* @version $Revision: 3918 $
* @since 1.1
*/
public class ParsedHtml
{
private static final String DEFAULT_ENCODING = "http://cyberneko.org/html/properties/default-encoding";
private static final String TAG_NAME_CASE = "http://cyberneko.org/html/properties/names/elems";
private static final String ATTRIBUTE_NAME_CASE = "http://cyberneko.org/html/properties/names/attrs";
private MockResponse mResponse;
private Document mDocument;
private List<MockForm> mForms = new ArrayList<MockForm>();
private List<MockLink> mLinks = new ArrayList<MockLink>();
private ParsedHtml(MockResponse response, Document document)
{
mResponse = response;
mDocument = document;
// get all the forms
NodeList form_nodes = document.getElementsByTagName("form");
for (int i = 0; i < form_nodes.getLength(); i++)
{
Node form_node = form_nodes.item(i);
MockForm form = new MockForm(mResponse, form_node);
mForms.add(form);
}
// get all the links
NodeList link_nodes = document.getElementsByTagName("a");
for (int i = 0; i < link_nodes.getLength(); i++)
{
Node link_node = link_nodes.item(i);
MockLink link = new MockLink(mResponse, link_node);
mLinks.add(link);
}
}
/**
* Parses the text content of a {@link MockResponse} object as HTML and
* returns the result as an instance of <code>ParsedHtml</code>.
*
* @param response the response whose text content will be parsed
* @return the resulting instance of <code>ParsedHtml</code>
* @since 1.1
*/
public static ParsedHtml parse(MockResponse response)
throws IOException, SAXException
{
return parse(response, response.getText());
}
static ParsedHtml parse(MockResponse response, String text)
throws IOException, SAXException
{
Reader reader = new StringReader(text);
InputSource inputsource = new InputSource(reader);
HTMLConfiguration config = new HTMLConfiguration();
config.setProperty(DEFAULT_ENCODING, "UTF-8");
config.setProperty(TAG_NAME_CASE, "lower");
config.setProperty(ATTRIBUTE_NAME_CASE, "lower");
DOMParser parser = new DOMParser(config);
parser.parse(inputsource);
Document document = parser.getDocument();
return new ParsedHtml(response, document);
}
/**
* Retrieves the DOM XML document that corresponds to the parsed HTML.
*
* @return the DOM XML document
* @since 1.1
*/
public Document getDocument()
{
return mDocument;
}
/**
* Retrieves the text of the <code>title</code> tag.
*
* @return the title
* @since 1.1
*/
public String getTitle()
{
NodeList list = mDocument.getElementsByTagName("title");
if (0 == list.getLength())
{
return null;
}
return list.item(0).getTextContent();
}
/**
* Retrieves the list of all the forms in the HTML document.
*
* @return a list with {@link MockForm} instances
* @see #getFormWithName
* @see #getFormWithId
* @since 1.1
*/
public List<MockForm> getForms()
{
return mForms;
}
/**
* Retrieves the first form in the HTML document with a particular
* <code>name</code> attribute.
*
* @param name the content of the <code>name</code> attribute
* @return the first {@link MockForm} whose <code>name</code> attribute
* matches; or
* <p><code>null</code> if no such form could be found
* @see #getForms
* @see #getFormWithId
* @since 1.1
*/
public MockForm getFormWithName(String name)
{
if (null == name) throw new IllegalArgumentException("name can't be null");
if (0 == name.length()) throw new IllegalArgumentException("name can't be empty");
for (MockForm form : mForms)
{
if (name.equals(form.getName()))
{
return form;
}
}
return null;
}
/**
* Retrieves the first form in the HTML document with a particular
* <code>id</code> attribute.
*
* @param id the content of the <code>id</code> attribute
* @return the first {@link MockForm} whose <code>id</code> attribute
* matches; or
* <p><code>null</code> if no such form could be found
* @see #getForms
* @see #getFormWithName
* @since 1.1
*/
public MockForm getFormWithId(String id)
{
if (null == id) throw new IllegalArgumentException("id can't be null");
if (0 == id.length()) throw new IllegalArgumentException("id can't be empty");
for (MockForm form : mForms)
{
if (form.getId().equals(id))
{
return form;
}
}
return null;
}
/**
* Retrieves the list of all the links in the HTML document.
*
* @return a list with {@link MockLink} instances
* @see #getLinkWithName
* @see #getLinkWithId
* @see #getLinkWithText
* @see #getLinkWithImageAlt
* @see #getLinkWithImageName
* @since 1.1
*/
public List<MockLink> getLinks()
{
return mLinks;
}
/**
* Retrieves the first link in the HTML document with a particular
* <code>name</code> attribute.
*
* @param name the content of the <code>name</code> attribute
* @return the first {@link MockLink} whose <code>name</code> attribute
* matches; or
* <p><code>null</code> if no such link could be found
* @see #getLinks
* @see #getLinkWithId
* @see #getLinkWithText
* @see #getLinkWithImageAlt
* @see #getLinkWithImageName
* @since 1.1
*/
public MockLink getLinkWithName(String name)
{
if (null == name) throw new IllegalArgumentException("name can't be null");
if (0 == name.length()) throw new IllegalArgumentException("name can't be empty");
for (MockLink link : mLinks)
{
if (link.getName().equals(name))
{
return link;
}
}
return null;
}
/**
* Retrieves the first link in the HTML document with a particular
* <code>id</code> attribute.
*
* @param id the content of the <code>id</code> attribute
* @return the first {@link MockLink} whose <code>id</code> attribute
* matches; or
* <p><code>null</code> if no such link could be found
* @see #getLinks
* @see #getLinkWithName
* @see #getLinkWithText
* @see #getLinkWithImageAlt
* @see #getLinkWithImageName
* @since 1.1
*/
public MockLink getLinkWithId(String id)
{
if (null == id) throw new IllegalArgumentException("id can't be null");
if (0 == id.length()) throw new IllegalArgumentException("id can't be empty");
for (MockLink link : mLinks)
{
if (id.equals(link.getId()))
{
return link;
}
}
return null;
}
/**
* Retrieves the first link in the HTML document that surrounds a particular
* text.
*
* @param text the surrounded text
* @return the first {@link MockLink} whose surrounded text matches; or
* <p><code>null</code> if no such link could be found
* @see #getLinks
* @see #getLinkWithName
* @see #getLinkWithId
* @see #getLinkWithText
* @see #getLinkWithImageName
* @since 1.1
*/
public MockLink getLinkWithText(String text)
{
if (null == text) throw new IllegalArgumentException("text can't be null");
for (MockLink link : mLinks)
{
if (link.getText() != null &&
link.getText().equals(text))
{
return link;
}
}
return null;
}
/**
* Retrieves the first link in the HTML document that surrounds an
* <code>img</code> tag with a certain <code>alt</code> attribute.
*
* @param alt the content of the <code>alt</code> attribute
* @return the first {@link MockLink} that has an <code>img</code> tag
* whose <code>alt</code> attribute matches; or
* <p><code>null</code> if no such link could be found
* @see #getLinks
* @see #getLinkWithName
* @see #getLinkWithId
* @see #getLinkWithText
* @see #getLinkWithImageName
* @since 1.1
*/
public MockLink getLinkWithImageAlt(String alt)
{
if (null == alt) throw new IllegalArgumentException("alt can't be null");
for (MockLink link : mLinks)
{
Node node = link.getNode();
NodeList child_nodes = node.getChildNodes();
if (child_nodes != null &&
child_nodes.getLength() > 0)
{
for (int i = 0; i < child_nodes.getLength(); i++)
{
Node child_node = child_nodes.item(i);
if ("img".equals(child_node.getNodeName()))
{
String alt_text = getNodeAttribute(child_node, "alt", null);
if (alt_text != null &&
alt_text.equals(alt))
{
return link;
}
}
}
}
}
return null;
}
/**
* Retrieves the first link in the HTML document that surrounds an
* <code>img</code> tag with a certain <code>name</code> attribute.
*
* @param name the content of the <code>name</code> attribute
* @return the first {@link MockLink} that has an <code>img</code> tag
* whose <code>name</code> attribute matches; or
* <p><code>null</code> if no such link could be found
* @see #getLinks
* @see #getLinkWithName
* @see #getLinkWithId
* @see #getLinkWithText
* @see #getLinkWithImageAlt
* @since 1.1
*/
public MockLink getLinkWithImageName(String name)
{
if (null == name) throw new IllegalArgumentException("name can't be null");
if (0 == name.length()) throw new IllegalArgumentException("name can't be empty");
for (MockLink link : mLinks)
{
Node node = link.getNode();
NodeList child_nodes = node.getChildNodes();
if (child_nodes != null &&
child_nodes.getLength() > 0)
{
for (int i = 0; i < child_nodes.getLength(); i++)
{
Node child_node = child_nodes.item(i);
if ("img".equals(child_node.getNodeName()))
{
String alt_text = getNodeAttribute(child_node, "name", null);
if (alt_text != null &&
alt_text.equals(name))
{
return link;
}
}
}
}
}
return null;
}
/**
* Retrieves the value of the attribute of an XML DOM node.
*
* @param node the node where the attribute should be obtained from
* @param attributeName the name of the attribute
* @return the value of the attribute; or
* <p><code>null</code> if no attribute could be found
* @since 1.2
*/
public static String getNodeAttribute(Node node, String attributeName)
{
return getNodeAttribute(node, attributeName, null);
}
static String getNodeAttribute(Node node, String attributeName, String defaultValue)
{
NamedNodeMap attributes = node.getAttributes();
if (attributes == null) return defaultValue;
Node attribute = attributes.getNamedItem(attributeName);
return (attribute == null) ? defaultValue : attribute.getNodeValue();
}
}