/**
* pdfXtk - PDF Extraction Toolkit
* Copyright (c) by the authors/contributors. All rights reserved.
* This project includes code from PDFBox and TouchGraph.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* 3. Neither the names pdfXtk or PDF Extraction Toolkit; nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* http://pdfxtk.sourceforge.net
*
*/
package at.ac.tuwien.dbai.pdfwrap.analysis;
import at.ac.tuwien.dbai.pdfwrap.comparators.XComparator;
import at.ac.tuwien.dbai.pdfwrap.comparators.YComparator;
import at.ac.tuwien.dbai.pdfwrap.model.document.GenericSegment;
import at.ac.tuwien.dbai.pdfwrap.model.document.LineSegment;
import at.ac.tuwien.dbai.pdfwrap.model.document.RectSegment;
import at.ac.tuwien.dbai.pdfwrap.utils.SegmentUtils;
import at.ac.tuwien.dbai.pdfwrap.utils.Utils;
import org.apache.commons.collections.comparators.ReverseComparator;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
/**
* Methods to obtain and process ruling lines found on the page
*
* @author Tamir Hassan, pdfanalyser@tamirhassan.com
* @version PDF Analyser 0.9
*/
public class RulingObjectProcessor
{
//AdjacencyGraph ng;
//EdgeList edges;
protected List<LineSegment> rulingLines;
public RulingObjectProcessor()
//AdjacencyGraph ng)
{
//this.ng = ng;
//edges = ng.getEdges();
rulingLines = new ArrayList<LineSegment>();
}
// TODO: Interface IRulingObject
public RulingObjectProcessor(List<? extends GenericSegment> rulingObjects)
{
rulingLines = new ArrayList<LineSegment>();
this.addRulingObjects(rulingObjects);
}
public void addRulingObjects(List<? extends GenericSegment> rulingObjects)
{
Iterator roIter = rulingObjects.iterator();
while(roIter.hasNext())
{
GenericSegment thisObj = (GenericSegment)roIter.next();
if (thisObj instanceof LineSegment)
{
rulingLines.add((LineSegment)thisObj);
}
else if (thisObj instanceof RectSegment)
{
rulingLines.addAll(((RectSegment)thisObj).toLines());
}
}
}
public void removeDuplicateLines()//(SegmentList theLines)
{
List<LineSegment> theLines = this.getRulingLines();
float tolerance = 6.0f;
List<LineSegment> hRetVal = new ArrayList<LineSegment>();
List<LineSegment> vRetVal = new ArrayList<LineSegment>();
List<LineSegment> horizLines = new ArrayList<LineSegment>();
List<LineSegment> vertLines = new ArrayList<LineSegment>();
Iterator lineIter = theLines.iterator();
while(lineIter.hasNext())
{
LineSegment thisLine = (LineSegment)lineIter.next();
if (thisLine.getDirection() == LineSegment.DIR_HORIZ)
{
horizLines.add(thisLine);
}
else if (thisLine.getDirection() == LineSegment.DIR_VERT)
{
vertLines.add(thisLine);
}
else
{
// DIR_OTHER... dunno what we would do here
// think this would not usually occur in our
// application.
}
}
// note: all the Comparators use X1 or Y1
// don't think this matters... we wanna avoid
// the lines being added in the order:
// left, right, middle; this way we would
// end up with two separate lines even
// if they join...
// sort all horizontal lines in x order
Collections.sort(horizLines, new XComparator());
// and all vertical lines in y order
//Collections.sort(vertLines, Collections.reverseOrder(new YComparator()));
Collections.sort(vertLines, new ReverseComparator(new YComparator()));
// todo: replace all 'tolerance' lines with GenericSegment.getDilatedSegment
// method. (this includes the -6.0f / +6.0f as well).
Iterator hIter = horizLines.iterator();
while(hIter.hasNext())
{
LineSegment thisLine = (LineSegment)hIter.next();
boolean addedToExistingLine = false;
for (int n = 0; n < hRetVal.size(); n ++)
{
LineSegment l = (LineSegment)hRetVal.get(n);
if (Utils.within(thisLine.getYmid(), l.getYmid(), tolerance))
{
if (SegmentUtils.horizIntersect(l, thisLine.getX1() - 6.0f,
thisLine.getX2() + 6.0f))
{
l.setX1(Utils.minimum(thisLine.getX1(), l.getX1()));
l.setX2(Utils.maximum(thisLine.getX2(), l.getX2()));
addedToExistingLine = true;
}
}
}
if (!addedToExistingLine)
{
hRetVal.add(thisLine);
}
}
Iterator vIter = vertLines.iterator();
while(vIter.hasNext())
{
LineSegment thisLine = (LineSegment)vIter.next();
boolean addedToExistingLine = false;
for (int n = 0; n < vRetVal.size(); n ++)
{
LineSegment l = (LineSegment)vRetVal.get(n);
if (Utils.within(thisLine.getXmid(), l.getXmid(), tolerance))
{
if (SegmentUtils.vertIntersect(l, thisLine.getY1() - 6.0f,
thisLine.getY2() + 6.0f))
{
l.setY1(Utils.minimum(thisLine.getY1(), l.getY1()));
l.setY2(Utils.maximum(thisLine.getY2(), l.getY2()));
addedToExistingLine = true;
}
}
}
if (!addedToExistingLine)
{
vRetVal.add(thisLine);
}
}
List<LineSegment> retVal = new ArrayList<LineSegment>();
retVal.addAll(hRetVal);
retVal.addAll(vRetVal);
//return retVal;
this.setRulingLines(retVal);
}
public List<LineSegment> getRulingLines() {
return rulingLines;
}
public void setRulingLines(List<LineSegment> rulingLines) {
this.rulingLines = rulingLines;
}
}