/**
* pdfXtk - PDF Extraction Toolkit
* Copyright (c) by the authors/contributors. All rights reserved.
* This project includes code from PDFBox and TouchGraph.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* 3. Neither the names pdfXtk or PDF Extraction Toolkit; nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* http://pdfxtk.sourceforge.net
*
*/
package at.ac.tuwien.dbai.pdfwrap.model.document;
// import java.util.Collection;
import java.util.*;
/**
* Segment which contains other sub-segments; base class
*
* @author Tamir Hassan, pdfanalyser@tamirhassan.com
* @version PDF Analyser 0.9
*/
public class CompositeSegment<T extends GenericSegment> extends TextSegment
implements Cloneable
{
// TODO: multiple inheritance (sort of); create a
// CompoundSegment.
protected List<T> items;
// protected boolean constantFont = true;
// protected boolean constantFontSize = true;
/**
* Constructor.
*
* @param x1
* The x1 coordinate of the segment.
* @param x2
* The x2 coordinate of the segment.
* @param y1
* The y1 coordinate of the segment.
* @param y2
* The y2 coordinate of the segment.
* @param text
* The textual contents of the segment.
* @param font
* The (main) font of the segment.
* @param fontSize
* The (main) font size in the segment.
*/
/* 30.11.06: these constructors appear to be useless*/
// 1.12.06: but they are used by TextBlock...
public CompositeSegment(float x1, float x2, float y1, float y2,
String text, String fontName, float fontSize)
{
super(x1, x2, y1, y2, text, fontName, fontSize);
this.items = new ArrayList<T>();
}
public CompositeSegment(float x1, float x2, float y1, float y2)
{
super(x1, x2, y1, y2);
this.items = new ArrayList<T>();
}
public CompositeSegment(float x1, float x2, float y1, float y2,
String text, String fontName, float fontSize, List<T> items)
{
super(x1, x2, y1, y2, text, fontName, fontSize);
this.items = items;
}
public CompositeSegment(float x1, float x2, float y1, float y2,
List<T> items)
{
super(x1, x2, y1, y2);
this.items = items;
}
public CompositeSegment(List<T> items)
{
super();
this.items = items;
}
public CompositeSegment()
{
// most common method if initialization now
// the fields are filled once all the items have
// been added...
super();
this.items = new ArrayList<T>();
}
public List<T> getItems() {
return items;
}
public T getFirstItem() {
return items.get(0);
}
public T getLastItem() {
return items.get(items.size() - 1);
}
public void setItems(List<T> items) {
this.items = items;
}
/**
* @return Returns a clone of this segment, i.e.
* the co-ordinates and other attributes and a
* _shallowly cloned_ list sub-objects
*/
public Object clone()
{
CompositeSegment retVal = (CompositeSegment)super.clone();
// 2011-01-24 List is not cloneable
List<T> cloneList = new ArrayList<T>();
cloneList.addAll(this.items);
retVal.items = cloneList;
return retVal;
}
// overrides super with number of items
public String toString()
{
return tagName() + " no. items: " + items.size() + " - " + getAttributes();
}
public void printSubItems()
{
System.out.println(this);
printSubItems(0);
}
public void printSubItems(int indent)
{
// System.out.println(this);
for (GenericSegment gs : items)
{
for (int n = 0; n < indent; n ++)
System.out.print(" ");
System.out.println(gs);
if (gs instanceof CompositeSegment<?>)
{
CompositeSegment<?> cs = (CompositeSegment<?>)gs;
cs.printSubItems(indent + 1);
}
}
}
/**
* returns string representation including sub-items
*/
public String toExtendedString()
{
StringBuffer sb = new StringBuffer(toString() + "\nSub-items:\n");
for (GenericSegment gs : items)
{
sb.append(gs.toString() + "\n");
}
sb.append("\n");
return(sb.toString());
}
public void setCalculatedFields()
{
findBoundingBox();
findText();
findFontName();
findFontSize();
}
public void setCalculatedFields(TextSegment ts)
{
this.setBoundingBox(ts.getBoundingBox());
this.setText(ts.getText());
this.setFontName(ts.getFontName());
this.setFontSize(ts.getFontSize());
}
// TODO: remove following method? or move to utils? count mode etc.
public void findFontName()
{
HashMap fontHash = new HashMap();
ArrayList numList = new ArrayList(items.size() + 1);
for (int n = 0; n < (items.size() + 1); n ++)
{
//numList.add(new GenericSegment(0, 0, 0, 0));
numList.add(null);
// dummy value; null doesn't work (doesn't increase the size)
}
for (GenericSegment thisSegment : items)
{
if (thisSegment instanceof TextSegment &&
!(thisSegment instanceof IBlankSegment))
{
TextSegment thisTextSegment =
(TextSegment)thisSegment;
if (fontHash.containsKey(thisTextSegment.getFontName()))
{
int count =
((Integer)fontHash.get(thisTextSegment.getFontName()))
.intValue();
count ++;
fontHash.put(thisTextSegment.getFontName(), new Integer(count));
if (numList.get(count) == null)//(numList.get(count) instanceof GenericSegment)
numList.set(count, thisTextSegment.getFontName());
}
else
{
fontHash.put(thisTextSegment.getFontName(), new Integer(1));
if (numList.get(1) == null)//(numList.get(1) instanceof GenericSegment)
numList.set(1, thisTextSegment.getFontName());
}
}
}
// loop through items in the hash and find the modal frequency
Collection counts = fontHash.values();
int maxCount = 0;
Iterator cIter = counts.iterator();
while(cIter.hasNext())
{
Integer countObj = (Integer)cIter.next();
int count = countObj.intValue();
if (count > maxCount)
maxCount = count;
}
// now find the first font object with this frequency and set that as the font
if (maxCount > 0) // otherwise no font!
{
this.fontName = (String)numList.get(maxCount);
}
}
public void findBoundingBox()
{
boolean first = true;
int noItems = 0;
double fontSizeTotal = 0.0;
for (GenericSegment thisSegment : items)
{
if (!(thisSegment instanceof IBlankSegment))
{
if (thisSegment instanceof TextSegment)
{
noItems++;
fontSizeTotal += ((TextSegment) thisSegment).getFontSize();
}
if (first)
{
x1 = thisSegment.getX1();
x2 = thisSegment.getX2();
y1 = thisSegment.getY1();
y2 = thisSegment.getY2();
first = false;
} else
{
growBoundingBox(thisSegment);
}
}
}
if (noItems >= 0)
{
fontSize = (float) (fontSizeTotal / noItems);
} else
{
fontSize = -1.0f;
}
}
public void findText()
{
text = "";
for (GenericSegment gs : items)
{
if (gs instanceof TextSegment)
{
TextSegment ts = (TextSegment)gs;
// add space between line objects
if (ts.getText() == "") text += " ";
else text += ts.getText();
}
}
}
// TODO: replace in TextBlock to count modal size
// at character level
public void findFontSize()
{
// what if no text seg? avg is screwed up...
int size = getItems().size();
float sum = 0;
for (GenericSegment gs : items)
{
if (gs instanceof TextSegment)
{
TextSegment ts = (TextSegment)gs;
sum += ts.getFontSize();
}
}
setFontSize(sum/size);
}
}