package technology.tabula;
import java.awt.geom.Point2D;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.pdfbox.pdmodel.PDPage;
@SuppressWarnings("serial")
// TODO: this class should probably be called "PageArea" or something like that
public class Page extends Rectangle {
private Integer rotation;
private int pageNumber;
private List<TextElement> texts;
private List<Ruling> rulings, cleanRulings = null, verticalRulingLines = null, horizontalRulingLines = null;
private float minCharWidth;
private float minCharHeight;
private RectangleSpatialIndex<TextElement> spatial_index;
private PDPage pdPage;
public Page(float top, float left, float width, float height, int rotation, int page_number, PDPage pdPage) {
super(top, left, width, height);
this.rotation = rotation;
this.pageNumber = page_number;
this.pdPage = pdPage;
}
public Page(float top, float left, float width, float height, int rotation, int page_number, PDPage pdPage,
List<TextElement> characters, List<Ruling> rulings) {
this(top, left, width, height, rotation, page_number, pdPage);
this.texts = characters;
this.rulings = rulings;
}
public Page(float top, float left, float width, float height, int rotation, int page_number, PDPage pdPage,
List<TextElement> characters, List<Ruling> rulings,
float minCharWidth, float minCharHeight, RectangleSpatialIndex<TextElement> index) {
this(top, left, width, height, rotation, page_number, pdPage, characters, rulings);
this.minCharHeight = minCharHeight;
this.minCharWidth = minCharWidth;
this.spatial_index = index;
}
public Page getArea(Rectangle area) {
List<TextElement> t = getText(area);
float min_char_width = 7;
float min_char_height = 7;
if(t.size() > 0){
min_char_width = Collections.min(t, new Comparator<TextElement>() {
@Override
public int compare(TextElement te1, TextElement te2) {
return java.lang.Float.compare(te1.width, te2.width);
}}).width;
min_char_height = Collections.min(t, new Comparator<TextElement>() {
@Override
public int compare(TextElement te1, TextElement te2) {
return java.lang.Float.compare(te1.height, te2.height);
}}).height;
}
Page rv = new Page(
(float) area.getTop(),
(float) area.getLeft(),
(float) area.getWidth(),
(float) area.getHeight(),
rotation,
pageNumber,
pdPage,
t,
Ruling.cropRulingsToArea(getRulings(), area),
min_char_width,
min_char_height,
spatial_index);
rv.addRuling(new Ruling(
new Point2D.Double(rv.getLeft(),
rv.getTop()),
new Point2D.Double(rv.getRight(),
rv.getTop())));
rv.addRuling(new Ruling(
new Point2D.Double(rv.getRight(),
rv.getTop()),
new Point2D.Double(rv.getRight(),
rv.getBottom())));
rv.addRuling(new Ruling(
new Point2D.Double(rv.getRight(),
rv.getBottom()),
new Point2D.Double(rv.getLeft(),
rv.getBottom())));
rv.addRuling(new Ruling(
new Point2D.Double(rv.getLeft(),
rv.getBottom()),
new Point2D.Double(rv.getLeft(),
rv.getTop())));
return rv;
}
public Page getArea(float top, float left, float bottom, float right) {
Rectangle area = new Rectangle(top, left, right - left, bottom - top);
return this.getArea(area);
}
public List<TextElement> getText() {
return texts;
}
public List<TextElement> getText(Rectangle area) {
return this.spatial_index.contains(area);
}
public List<TextElement> getText(float top, float left, float bottom, float right) {
return this.getText(new Rectangle(top, left, right - left, bottom - top));
}
public Integer getRotation() {
return rotation;
}
public int getPageNumber() {
return pageNumber;
}
public List<TextElement> getTexts() {
return texts;
}
/**
* Returns the minimum bounding box that contains all the TextElements on this Page
*/
public Rectangle getTextBounds() {
List<TextElement> texts = this.getText();
if (!texts.isEmpty()) {
return Utils.bounds(texts);
}
else {
return new Rectangle();
}
}
public List<Ruling> getRulings() {
if (this.cleanRulings != null) {
return this.cleanRulings;
}
if (this.rulings == null || this.rulings.isEmpty()) {
this.verticalRulingLines = new ArrayList<Ruling>();
this.horizontalRulingLines = new ArrayList<Ruling>();
return new ArrayList<Ruling>();
}
Utils.snapPoints(this.rulings, this.minCharWidth, this.minCharHeight);
List<Ruling> vrs = new ArrayList<Ruling>();
for (Ruling vr: this.rulings) {
if (vr.vertical()) {
vrs.add(vr);
}
}
this.verticalRulingLines = Ruling.collapseOrientedRulings(vrs);
List<Ruling> hrs = new ArrayList<Ruling>();
for (Ruling hr: this.rulings) {
if (hr.horizontal()) {
hrs.add(hr);
}
}
this.horizontalRulingLines = Ruling.collapseOrientedRulings(hrs);
this.cleanRulings = new ArrayList<Ruling>(this.verticalRulingLines);
this.cleanRulings.addAll(this.horizontalRulingLines);
return this.cleanRulings;
}
public List<Ruling> getVerticalRulings() {
if (this.verticalRulingLines != null) {
return this.verticalRulingLines;
}
this.getRulings();
return this.verticalRulingLines;
}
public List<Ruling> getHorizontalRulings() {
if (this.horizontalRulingLines != null) {
return this.horizontalRulingLines;
}
this.getRulings();
return this.horizontalRulingLines;
}
public void addRuling(Ruling r) {
if (r.oblique()) {
throw new UnsupportedOperationException("Can't add an oblique ruling");
}
this.rulings.add(r);
// clear caches
this.verticalRulingLines = null;
this.horizontalRulingLines = null;
this.cleanRulings = null;
}
public List<Ruling> getUnprocessedRulings() {
return this.rulings;
}
public float getMinCharWidth() {
return minCharWidth;
}
public float getMinCharHeight() {
return minCharHeight;
}
public PDPage getPDPage() {
return pdPage;
}
public RectangleSpatialIndex<TextElement> getSpatialIndex() {
return this.spatial_index;
}
public boolean hasText() {
return this.texts.size() > 0;
}
}