/**
* pdfXtk - PDF Extraction Toolkit
* Copyright (c) by the authors/contributors. All rights reserved.
* This project includes code from PDFBox and TouchGraph.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* 3. Neither the names pdfXtk or PDF Extraction Toolkit; nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* http://pdfxtk.sourceforge.net
*
*/
package at.ac.tuwien.dbai.pdfwrap.model.graph;
// todo: linear segments method only for first build on one level
// (i.e. with getElementsAbove, etc)
import at.ac.tuwien.dbai.pdfwrap.model.document.GenericSegment;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
/**
* The document graph
*
* @author Tamir Hassan, pdfanalyser@tamirhassan.com
* @version PDF Analyser 0.9
*/
public class DocumentGraph // extends com.touchgraph.graphlayout.graphelements.GraphEltSet
implements Cloneable // extends UndirectedSparseGraph
{
// protected ListUtils horiz;
// protected ListUtils vert;
protected List<DocNode> nodes;
protected List<DocEdge> edges;
protected HashMap<GenericSegment, DocNode> segNodeHash;
protected HashMap<DocNode, GenericSegment> nodeSegHash;
// TODO: generic segment/text segment problem...
// sloppy code... sort out at some time!
// potential hole...
// used for graph matching only at this stage...
/**
* Constructor.
*
* initializes a blank document graph
*
*/
public DocumentGraph()
// initialize a blank neighbourhood graph
{
nodes = new ArrayList<DocNode>();
edges = new ArrayList<DocEdge>();
}
public DocumentGraph(AdjacencyGraph<?> ag)
{
nodes = new ArrayList<DocNode>();
edges = new ArrayList<DocEdge>();
segNodeHash = new HashMap<GenericSegment, DocNode>();
nodeSegHash = new HashMap<DocNode, GenericSegment>();
// System.out.println("in AG with nodes: " + ag.getVertList().size() + " and edges: " + ag.getEdges().size());
for (Object o : ag.getVertSegmentList())
{
GenericSegment gs = (GenericSegment)o; // MUST be a GenericSegment
DocNode n = new DocNode(gs);
nodes.add(n);
// addNode(n);
segNodeHash.put(gs, n);
nodeSegHash.put(n, gs);
}
for (AdjacencyEdge<?> ae : ag.getEdges())
{
GenericSegment segFrom = ae.getNodeFrom();
GenericSegment segTo = ae.getNodeTo();
DocNode nodeFrom = segNodeHash.get(segFrom);
DocNode nodeTo = segNodeHash.get(segTo);
// add AttributedEdge TODO:...
if (ae.getDirection() == AdjacencyEdge.REL_RIGHT ||
ae.getDirection() == AdjacencyEdge.REL_BELOW)
{
DocEdge atr = new DocEdge(ae, nodeFrom, nodeTo);
edges.add(atr);
// addEdge(atr);
}
}
System.out.println("creating DG with nodes: " + nodes.size() + " edges: " + edges.size());
}
public DocumentGraph(NodeList listOfItems)
{
this();
for (int s = 0; s < listOfItems.getLength(); s ++)
{
Node itemNode = listOfItems.item(s);
if(itemNode.getNodeType() == Node.ELEMENT_NODE)
{
if(itemNode.getNodeName().equals("node"))
{
nodes.add(new DocNode((Element)itemNode));
}
}
}
// edges must be added after all the nodes have been added
for (int s = 0; s < listOfItems.getLength(); s ++)
{
Node itemNode = listOfItems.item(s);
if(itemNode.getNodeType() == Node.ELEMENT_NODE)
{
if(itemNode.getNodeName().equals("edge"))
{
edges.add(new DocEdge((Element)itemNode, nodes));
// hash?
}
}
}
}
public List<DocNode> getNodes() {
return nodes;
}
/*
public Iterator<DocNode> getNodes()
{
if ( nodes.size() == 0 ) return null;
return nodes.iterator();
}
*/
/*
public void addNode( DocNode node )
{
try {
super.addNode(node);
} catch (TGException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
*/
public void setNodes(List<DocNode> nodes) {
this.nodes = nodes;
}
public List<DocEdge> getEdges() {
return edges;
}
/*
public Iterator<DocEdge> getEdges() {
if ( edges.size() == 0 ) return null;
else return edges.iterator();
}
*/
public List<DocEdge> edgesFrom(DocNode n)
{
List<DocEdge> retVal = new ArrayList<DocEdge>();
for (Object o : edges)
{
DocEdge e = (DocEdge)o;
if (e.getFrom() == n)
retVal.add(e);
}
return retVal;
}
public List<DocEdge> edgesTo(DocNode n)
{
List<DocEdge> retVal = new ArrayList<DocEdge>();
for (Object o : edges)
{
DocEdge e = (DocEdge)o;
if (e.getTo() == n)
retVal.add(e);
}
return retVal;
}
public List<DocEdge> edgesFromTo(DocNode n)
{
List<DocEdge> retVal = new ArrayList<DocEdge>();
for (Object o : edges)
{
DocEdge e = (DocEdge)o;
if (e.getFrom() == n || e.getTo() == n)
retVal.add(e);
}
return retVal;
}
/*
public void addEdge( DocEdge e )
{
super.addEdge(e);
}
*/
public void setEdges(List<DocEdge> edges) {
this.edges = edges;
}
public DocNode getNodeFromHash(GenericSegment gs)
{
return segNodeHash.get(gs);
}
public GenericSegment getSegmentFromHash(DocNode n)
{
return nodeSegHash.get(n);
}
public DocumentGraph subGraph(List<DocNode> nodes)
{
DocumentGraph retVal = new DocumentGraph();
retVal.nodes.addAll(nodes);
for (Object o : edges)
{
DocEdge e = (DocEdge)o;
if (nodes.contains(e.getFrom()) || nodes.contains(e.getTo()))
retVal.edges.add(e);
}
return retVal;
}
public DocumentGraph deepCopy()
{
DocumentGraph retVal = new DocumentGraph();
// HashMap from nodes to newly cloned nodes...
HashMap <DocNode, DocNode> nhm = new HashMap<DocNode, DocNode>();
for (Object o : nodes)
{
DocNode n = (DocNode)o;
DocNode cln = (DocNode)n.clone();
retVal.nodes.add(cln);
nhm.put(n, cln);
}
// HashMap from dgEdges to newly cloned instanceEdges
//HashMap hm = new HashMap();
// EdgeList instanceEdges = new EdgeList();
for (Object o : edges)
{
DocEdge ae = (DocEdge)o;
DocEdge cae = (DocEdge)ae.clone();
cae.setFrom(nhm.get(cae.getFrom()));
cae.setTo(nhm.get(cae.getTo()));
//instanceEdges.add(cae);
retVal.edges.add(cae);
//hm.put(ae, cae);
}
return retVal;
}
/*
public Object clone ()
//throws CloneNotSupportedException
{
try
{
return super.clone();
}
catch(CloneNotSupportedException cnse)
{
cnse.printStackTrace();
}
return null;
}
*/
public void addAsXMLGraph(Document resultDocument, Element parent,
boolean addDisabledItems)
{
for(Object o : nodes)
{
DocNode thisItem = (DocNode)o;
if (!thisItem.isRemoveFromInstance() || addDisabledItems)
thisItem.addAsXMLNode(resultDocument, parent);
}
for (Object o: edges)
{
DocEdge thisItem = (DocEdge)o;
if (!thisItem.isRemoveFromInstance() || addDisabledItems)
thisItem.addAsXMLEdge(resultDocument, parent);
}
}
/*
public String toString()
{
StringBuffer vertices = new StringBuffer("");
StringBuffer edges = new StringBuffer("");
// output all vertices (think go through vert)
for (DocNode n : nodes)
{
// vertices.append("\"");
vertices.append("" + n + " " + "\"text=\'" + n.getSegText()
+ "\' " + "x1=" + n.getSegX1() + " x2="
+ n.getSegX2() + " y1=" + n.getSegY1() + " y2="
+ n.getSegY2() + "\"\n");
EdgeList neighbours = getEdges(n); // thisNode.getNeighbours();
Iterator eIter = neighbours.iterator();
while (eIter.hasNext())
{
AttributedEdge e2 = (AttributedEdge) eIter.next();
GenericSegment node2 = e2.getNodeTo();
// GenericSegment temp2 = node2.getSegment();
if (node2 instanceof TextSegment)
{
TextSegment thisNeighbour = (TextSegment) node2;
edges.append("" + n + " " + vert.indexOf(node2) + " "
+ e2.getWeight() + "\n");
}
}
}
return "*Vertices\n" + vertices.toString() + "\n*Edges\n"
+ edges.toString();
}
*/
}