PDFObjectExtractor.java example

Explorer

pdfxtk-master
- graphwrap
  - src
    - main
      - java
        at
        ac
        tuwien
        dbai
        pdfwrap
        GraphMatcher.java
        gui
        GUI.java
        displayable
        Block2.java
        Image2.java
        TextArea.java
        TextArea2.java
        graphbrowser
        DocGBPanel.java
        DocNavigateUI.java
        DocWrapperUI.java
        model
        graph
        DocEdge.java
        DocGraphEltSet.java
        DocNode.java
        DocumentGraph.java
        WrappingInstance.java
        package-info.java
        com
        touchgraph
        graphlayout
        Edge.java
        Node.java
        TGPanel.java
        interaction
        DragAddUI.java
        DragMultiselectUI.java
        DragNodeUI.java
        GLEditUI.java
        GLNavigateUI.java
        HVRotateDragUI.java
        HVScroll.java
        HyperScroll.java
        LocalityScroll.java
        RotateScroll.java
        TGAbstractClickUI.java
        TGAbstractDragUI.java
        TGAbstractMouseMotionUI.java
        TGAbstractMousePausedUI.java
        TGSelfDeactivatingUI.java
        TGUIManager.java
        TGUserInterface.java
        ZoomScroll.java
        iiuf
        awt
        Applet.java
        AppletFrame.java
        Awt.java
        BorderLayout.java
        DOM.java
        DateChooser.java
        Dialog.java
        Find.java
        FindListener.java
        Frame.java
        HLine.java
        HNav.java
        InfoList.java
        LoginRequester.java
        MultiLineLabel.java
        ProgressBar.java
        Requester.java
        SplashScreen.java
        StringRequester.java
        TableView.java
        VNav.java
        WaitBar.java
        dom
        Comparison.java
        DOM.java
        DOMContext.java
        DOMElementFactory.java
        DOMHandler.java
        DOMManager.java
        DOMUtils.java
        DOMUtilsNS.java
        DOMable.java
        DefaultElement.java
        ElementList.java
        ProjectX.java
        Xerces.java
        test
        DOM.java
        jai
        BinarizeDescriptor.java
        BinarizeOpImage.java
        BlackOrDescriptor.java
        BlackOrOpImage.java
        CCDescriptor.java
        CCOpImage.java
        DirectRasterAccessor.java
        DisplayImage.java
        DisplayImageLayer.java
        DisplayImagePanel.java
        DisplayRect.java
        FolderImageStorage.java
        HilightImagePanel.java
        HistogramPanel.java
        ImageStorage.java
        ImageStorageException.java
        ImageViewer.java
        JAITest.java
        LUV_ColorSpace.java
        PowerDescriptor.java
        PowerOpImage.java
        ProjectionProfile.java
        ProjectionProfileDescriptor.java
        ProjectionProfileOpImage.java
        RLSADescriptor.java
        RLSAOpImage.java
        RandomizeDescriptor.java
        RandomizeOpImage.java
        RectDisplayImageLayer.java
        SkeletonDescriptor.java
        SkeletonOpImage.java
        Util.java
        test
        ConnectedComponents.java
        ImageStorageTest.java
        ProjectionProfiles.java
        Scale.java
        log
        Client.java
        Const.java
        List.java
        Log.java
        LogListener.java
        LogMessage.java
        LogMessageListener.java
        LogTableModel.java
        Overview.java
        Server.java
        Stdout.java
        swing
        AbstractPreview.java
        AudioPreview.java
        AutoExpandingJTree.java
        ButtonTreePathView.java
        ChartPanel.java
        CheckBoxList.java
        ChooserTreeView.java
        ContextMenu.java
        ContextMenuEnabled.java
        ContextMenuManager.java
        DOM.java
        HexagonalBorder.java
        ImagePreview.java
        JInternalFrame.java
        JNumberField.java
        JTreeView.java
        JWindowToolBarUI.java
        ListTreeModel.java
        LocatedIcon.java
        MemoryMonitor.java
        MultiColumnListUI.java
        MultiLineToolTip.java
        Preferences.java
        PreviewFileChooser.java
        ProgressMonitor.java
        Resource.java
        SetSelectionModel.java
        SimpleTreeModel.java
        SplitPaneTreeView.java
        SubTreeModel.java
        Swing.java
        TableMap.java
        TableSorter.java
        TreeView.java
        TreeViewTest.java
        VerticalLabelUI.java
        graph
        AbstractGraphEdgeFactory.java
        AbstractNodeComponentFactory.java
        AbstractPortFactory.java
        AbstractPropertiesFactory.java
        ConnectingNode.java
        DOM.java
        DefaultNL.java
        DistanceOrderedNL.java
        EdgeEditor.java
        EdgeMarker.java
        EquilateralTriangleMarker.java
        ForceDirectedNL.java
        GeometryEditor.java
        GraphEdge.java
        GraphEdgeUtils.java
        GraphNodeComponent.java
        GraphNodePort.java
        GraphPanel.java
        GraphPanelEditor.java
        GraphRouter.java
        LabelMarker.java
        NodeLayouter.java
        OrthogonalRouter.java
        StraightLineRouter.java
        StyleEditor.java
        TreeNL.java
        propertiespanel
        ComboBox.java
        Group.java
        List.java
        ListAccess.java
        NumberField.java
        PropertiesPanel.java
        Property.java
        StringCheckbox.java
        TextField.java
        util
        AppletPreferences.java
        AsyncAccelerator.java
        AsyncInvocation.java
        Attributable.java
        AttributeFactory.java
        Base64Decoder.java
        Base64Encoder.java
        Base64FormatException.java
        BinaryTree.java
        BinaryTreeNode.java
        CacheArray.java
        CacheArrayBackEnd.java
        ClusteringQueue.java
        CopyThread.java
        Crypt.java
        DBPreferences.java
        DOM.java
        DefaultAttributable.java
        DirTree.java
        EventListenerList.java
        FilePreferences.java
        ListParser.java
        NestedException.java
        NotImplementedException.java
        PrefNamer.java
        PrefPropertyWatcher.java
        PrefReanimator.java
        PrefWatcher.java
        Preferences.java
        PreferencesHandler.java
        PreferencesStore.java
        ProgressListener.java
        ProgressWatcher.java
        Queue.java
        RectMap.java
        RectMapFilter.java
        RedBlackNode.java
        RedBlackTree.java
        StopWatch.java
        Strings.java
        SysPreferences.java
        Timer.java
        Trans.java
        Tree.java
        TreeNode.java
        TreeWalk.java
        UTHTML.java
        UTMacOS.java
        UTTeX.java
        Unicode.java
        UnicodeTrans.java
        UnicodeTranslator.java
        Util.java
        graph
        AbstractGraphModel.java
        DOM.java
        DefaultGraphEdge.java
        DefaultGraphModel.java
        DefaultGraphNode.java
        DefaultGraphPort.java
        GraphEdge.java
        GraphException.java
        GraphModel.java
        GraphModelListener.java
        GraphNode.java
        GraphPort.java
        GraphPortListener.java
        GraphWalk.java
        Path.java
        Utilities.java
        xmillum
        ActionHandler.java
        ActionHandlerFactory.java
        ActionHandlerParam.java
        AllFlagListener.java
        BrowserContext.java
        BrowserPanel.java
        Displayable.java
        DisplayableAppearance.java
        DisplayableClass.java
        DisplayableFactory.java
        DocumentChangeEvent.java
        DocumentChangeListener.java
        ElementTagger.java
        FlagAccess.java
        FlagListener.java
        FlagManager.java
        IllumDocument.java
        IllumException.java
        IllumSource.java
        Image.java
        ImageFactory.java
        ImageListener.java
        JAIImageFactory.java
        JavaImageFactory.java
        Parameter.java
        ParameterException.java
        StatusListener.java
        Style.java
        StyleRegistry.java
        Tool.java
        ToolFactory.java
        Window.java
        WindowCreator.java
        XMIllumDesktop.java
        XMIllumFrame.java
        displayable
        Block.java
        Image.java
        Polygon.java
        Root.java
        TextArea.java
        handlers
        Info.java
        Invalidate.java
        LabelAttribute.java
        Output.java
        PopupFlagger.java
        Select.java
        Split.java
        TextUpdate.java
        tool
        FlagOutput.java
        Hottable.java
        InfoWindow.java
        LabelWizard.java
        TextUpdate.java
        XMLTree.java
- pdfXtk
  - src
    - main
      - java
        at
        ac
        tuwien
        dbai
        pdfwrap
        ProcessFile.java
        analysis
        AbstractPageSegmenter.java
        CandidateCluster.java
        LineProcessor.java
        PageProcessor.java
        RulingObjectProcessor.java
        SegmentationResult.java
        TextBlockPageSegmenter.java
        comparators
        EdgeAttributeComparator.java
        EdgeLengthComparator.java
        XComparator.java
        XYTextComparator.java
        YComparator.java
        exceptions
        DocumentProcessingException.java
        gui
        EdgeSegment.java
        elements
        CheckBoxList.java
        MainFrame.java
        PDFPanel.java
        PageSpinner.java
        SelectionPanel.java
        exceptions
        UnknownShapeException.java
        layer
        Shapes.java
        Style.java
        StyledSegment.java
        tools
        MultiLineTooltip.java
        OpenDocFileFilter.java
        PDF_XMLSerializer.java
        XMLLayerLoader.java
        model
        document
        AttributeTuple.java
        CharSegment.java
        CompositeSegment.java
        GenericSegment.java
        IBlankSegment.java
        IXHTMLSegment.java
        IXmillumSegment.java
        ImageSegment.java
        LineFragment.java
        LineSegment.java
        OpTuple.java
        Page.java
        RectSegment.java
        TextBlock.java
        TextFragment.java
        TextLine.java
        TextSegment.java
        graph
        AdjacencyEdge.java
        AdjacencyGraph.java
        operator
        AppendRectangleToPath.java
        BeginInlineImage.java
        ClosePath.java
        CurveTo.java
        CurveToReplicateFinalPoint.java
        CurveToReplicateInitialPoint.java
        EndPath.java
        FillEvenOddRule.java
        FillNonZeroAndStrokePath.java
        FillNonZeroRule.java
        GRestore.java
        GSave.java
        Invoke.java
        LineTo.java
        ModifyClippingPath.java
        MoveTo.java
        SetLineWidth.java
        SetNonStrokingCMYKColor.java
        SetNonStrokingColorSpace.java
        SetNonStrokingRGBColor.java
        SetStrokingCMYKColor.java
        SetStrokingColorSpace.java
        SetStrokingRGBColor.java
        ShowText.java
        ShowTextGlyph.java
        StrokePath.java
        pdfread
        PDFObjectExtractor.java
        PDFPage.java
        utils
        ListUtils.java
        SegmentUtils.java
        SerializationUtil.java
        Utils.java
        com
        javazoid
        functions
        FileFunctions.java
    - test
      - java
        at
        ac
        tuwien
        dbai
        pdfwrap
        comparators
        EdgeAttributeComparatorTest.java
        utils
        SegmentUtilsTest.java

/**
 * pdfXtk - PDF Extraction Toolkit
 * Copyright (c) by the authors/contributors.  All rights reserved.
 * This project includes code from PDFBox and TouchGraph.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 * 3. Neither the names pdfXtk or PDF Extraction Toolkit; nor the names of its
 *    contributors may be used to endorse or promote products derived from this
 *    software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * http://pdfxtk.sourceforge.net
 *
 */
package at.ac.tuwien.dbai.pdfwrap.pdfread;

import at.ac.tuwien.dbai.pdfwrap.model.document.*;
import at.ac.tuwien.dbai.pdfwrap.utils.SegmentUtils;
import at.ac.tuwien.dbai.pdfwrap.utils.Utils;
import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.exceptions.CryptographyException;
import org.apache.pdfbox.exceptions.InvalidPasswordException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.common.PDStream;
import org.apache.pdfbox.pdmodel.encryption.PDEncryptionDictionary;
import org.apache.pdfbox.pdmodel.encryption.PDStandardEncryption;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectImage;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceDictionary;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem;
import org.apache.pdfbox.util.*;

import java.awt.*;
import java.awt.geom.AffineTransform;
import java.awt.geom.GeneralPath;
import java.awt.geom.Point2D;
import java.awt.image.BufferedImage;
import java.io.IOException;
import java.util.*;
import java.util.List;

/**
 * This extracts the low-level information in the PDF
 *
 * Based on PDF code
 * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
 * @author Tamir Hassan, pdfanalyser@tamirhassan.com
 * @version PDF Analyser 0.9
 */
public class PDFObjectExtractor extends PDFStreamEngine
{
	// 13.04.09 2332
	// changes: copied showString, addCharacter from PDFObjectExtractorOld.java
	// these methods did not exist before
	// disabled adding of text block items within showCharacter
	// refactoring would be a good idea...
	
	// copied from PDFStreamEngine as not visible to this method
    public static final byte[] SPACE_BYTES = { (byte)32 };
    
	protected Graphics2D graphics;
    protected Dimension pageSize;
    protected PDPage page;

    protected List lineSubPaths = new ArrayList();
    protected GeneralPath linePath = new GeneralPath();
    
    // 100409: necessary?
    protected Color strokingColor = Color.BLACK;
    protected Color nonStrokingColor = Color.BLACK;
    protected Color currentColor = Color.black;
    
    protected List<ImageSegment> imageList = new ArrayList<ImageSegment>();
    protected List<TextFragment> fragmentList = new ArrayList<TextFragment>();
    protected List<LineSegment> lineList = new ArrayList<LineSegment>();
    protected List<RectSegment> rectList = new ArrayList<RectSegment>();
    // 22.01.07
    protected List<CharSegment> charList = new ArrayList<CharSegment>();
    
    // current subpath
    protected List<LineSegment> currentLines = new ArrayList<LineSegment>();
    protected List<RectSegment> currentRects = new ArrayList<RectSegment>();
    // current path
    protected List<LineSegment> linesToAdd = new ArrayList<LineSegment>();
    protected List<RectSegment> rectsToAdd = new ArrayList<RectSegment>();
    // following relate to subpaths!
    // subpaths containing curves are thrown away
    protected boolean pathContainsCurve = false;
    protected float pathBeginX, pathBeginY;
    protected boolean pathBeginSet = false, pathClosed = false;
    
    // new from 23 Apr. 09
    protected GenericSegment clipBounds = null;
    protected Stack clipBoundsStack = new Stack();
    
    protected float currentX = -1;
    protected float currentY = -1;
    protected boolean newPath = false;
    protected CompositeSegment lastStringFragment = null;
    
    // 24.04.09  Tj operator
    protected boolean newTextFragment = false;
    protected boolean mergeAcrossTextInstructions = false;
    protected int opIndex = -1;
    
    protected int currentPageNo = 0;
    protected int startPage = 1;
    protected int endPage = Integer.MAX_VALUE;
    protected PDOutlineItem startBookmark = null;
    protected int startBookmarkPageNumber = -1;
    protected PDOutlineItem endBookmark = null;
    protected int endBookmarkPageNumber = -1;
    protected PDDocument document;
    protected boolean suppressDuplicateOverlappingText = true;
    protected boolean shouldSeparateByBeads = true;
    protected boolean sortByPosition = false;
    
    protected List pageArticles = null;
    
    /**
     * The charactersByArticle is used to extract text by article divisions.  For example
     * a PDF that has two columns like a newspaper, we want to extract the first column and
     * then the second column.  In this example the PDF would have 2 beads(or articles), one for
     * each column.  The size of the charactersByArticle would be 5, because not all text on the 
     * screen will fall into one of the articles.  The five divisions are shown below
     * 
     * Text before first article
     * first article text
     * text between first article and second article
     * second article text
     * text after second article
     * 
     * Most PDFs won't have any beads, so charactersByArticle will contain a single entry.
     */
    protected Vector charactersByArticle = new Vector();
    
    protected Map characterListMapping = new HashMap();
    
    protected String lineSeparator = System.getProperty("line.separator");
    protected String pageSeparator = System.getProperty("line.separator");
    protected String wordSeparator = " ";
    
    /**
     * Default constructor, loads properties from file.
     * 
     * @throws IOException If there is an error loading properties from the file.
     */
    public PDFObjectExtractor() throws IOException
    {
        // super( ResourceLoader.loadProperties( "Resources/PDFObjectExtractor.properties", true ) );
    	// 100904 don't know what this parameter means, but was set to false before...
    	
//    	super( ResourceLoader.loadProperties( Utils.getRootDir() + "/Resources/PDFObjectExtractor.properties", false ) );
    	super( ResourceLoader.loadProperties( "PDFObjectExtractor.properties", false ) );
    }

    /**
     * This will draw the page to the requested context.
     *
     * @param g The graphics context to draw onto.
     * @param p The page to draw.
     * @param pageDimension The size of the page to draw.
     *
     * @throws IOException If there is an IO error while drawing the page.
     */
    public void drawPage( Graphics g, PDPage p, Dimension pageDimension ) throws IOException
    {
        graphics = (Graphics2D)g;
        page = p;
        pageSize = pageDimension;
        
        graphics.setRenderingHint( RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON );
        PDResources resources = page.findResources();
        processStream( page, resources, page.getContents().getStream() );
        List annotations = page.getAnnotations();
        for( int i=0; i<annotations.size(); i++ )
        {
            PDAnnotation annot = (PDAnnotation)annotations.get( i );
            PDRectangle rect = annot.getRectangle();
            String appearanceName = annot.getAppearanceStream();
            PDAppearanceDictionary appearDictionary = annot.getAppearance();
            if( appearDictionary != null )
            {
                if( appearanceName == null )
                {
                    appearanceName = "default";
                }
                Map appearanceMap = appearDictionary.getNormalAppearance();
                PDAppearanceStream appearance = 
                    (PDAppearanceStream)appearanceMap.get( appearanceName );
                if( appearance != null )
                {
                    g.translate( (int)rect.getLowerLeftX(), (int)-rect.getLowerLeftY()  );
                    //g.translate( 20, -20 );
                    processSubStream( page, appearance.getResources(), appearance.getStream() );
                    g.translate( (int)-rect.getLowerLeftX(), (int)+rect.getLowerLeftY()  );
                }
            }
        }
        // Transformations should be done in order
        // 1 - Translate
        // 2 - Rotate
        // 3 - Scale
        // Refer to PDFReference p176 (or 188 in xpdf)
        /*AffineTransform transform = graphics.getTransform();        
        transform.setToTranslation( 0, page.findMediaBox().getHeight()/2 );
        transform.setToRotation((double)p.getRotation());
        transform.setTransform( 1, 0, 0, 1, 0, 0 );        
        transform.setToScale( 1, 1 );
        
        AffineTransform rotation = graphics.getTransform();
        rotation.rotate( (page.findRotation() * Math.PI) / 180d );
        graphics.setTransform( rotation );*/
        
    }

    @Override
    protected void processOperator(PDFOperator arg0, List arg1)
    		throws IOException {
    	opIndex ++;
    	/*
    	System.out.print("arg0: " + arg0);
    	for (Object o : arg1)
    	{
    		if (o instanceof COSString)
    		{
    			System.out.println("  " + ((COSString)o).getString());
    		}
    		else
    		{
    			System.out.print("  " + o);
    		}
    	}
    	System.out.println();
    	*/
    	// TODO Auto-generated method stub
    	super.processOperator(arg0, arg1);
    }
    
    /**
     * You should override this method if you want to perform an action when a
     * string is being shown.
     *
     * @param string The string to display.
     *
     * @throws IOException If there is an error showing the string
     */
    public void showString( byte[] string , int argIndex) throws IOException
    {
    	OpTuple sourceOp = new OpTuple(opIndex, argIndex);
    	
    	//super.showString(string);
    	
    	ArrayList charactersToAdd = new ArrayList();
    	
    	float[] individualWidths = new float[2048];
        float spaceWidth = 0;
        float spacing = 0;
        StringBuffer stringResult = new StringBuffer(string.length);
        
        float characterHorizontalDisplacement = 0;
        float characterVerticalDisplacement = 0;
        float spaceDisplacement = 0;
        float fontSize = getGraphicsState().getTextState().getFontSize();
        float horizontalScaling = getGraphicsState().getTextState().getHorizontalScalingPercent()/100f;
        float verticalScaling = horizontalScaling;//not sure if this is right but what else to do???
        float rise = getGraphicsState().getTextState().getRise();
        final float wordSpacing = getGraphicsState().getTextState().getWordSpacing();
        final float characterSpacing = getGraphicsState().getTextState().getCharacterSpacing();
        float wordSpacingDisplacement = 0;
        //We won't know the actual number of characters until
        //we process the byte data(could be two bytes each) but
        //it won't ever be more than string.length*2(there are some cases
        //were a single byte will result in two output characters "fi"
        
        
        PDFont font = getGraphicsState().getTextState().getFont();
        
        //This will typically be 1000 but in the case of a type3 font
        //this might be a different number
        float glyphSpaceToTextSpaceFactor = 1f/font.getFontMatrix().getValue( 0, 0 );
        float averageWidth = font.getAverageFontWidth();

        Matrix initialMatrix = new Matrix();
        initialMatrix.setValue(0,0,1);
        initialMatrix.setValue(0,1,0);
        initialMatrix.setValue(0,2,0);
        initialMatrix.setValue(1,0,0);
        initialMatrix.setValue(1,1,1);
        initialMatrix.setValue(1,2,0);
        initialMatrix.setValue(2,0,0);
        initialMatrix.setValue(2,1,rise);
        initialMatrix.setValue(2,2,1);


        //this
        int codeLength = 1;
        Matrix ctm = getGraphicsState().getCurrentTransformationMatrix();
        
        //lets see what the space displacement should be
        spaceDisplacement = (font.getFontWidth( SPACE_BYTES, 0, 1 )/glyphSpaceToTextSpaceFactor);
        if( spaceDisplacement == 0 )
        {
            spaceDisplacement = (averageWidth/glyphSpaceToTextSpaceFactor);
            //The average space width appears to be higher than necessary
            //so lets make it a little bit smaller.
            spaceDisplacement *= .80f;
        }
        int pageRotation = page.findRotation();
        Matrix trm = initialMatrix.multiply( getTextMatrix() ).multiply( ctm );
        float x = trm.getValue(2,0);
        float y = trm.getValue(2,1);
        if( pageRotation == 0 )
        {
            trm.setValue( 2,1, -y + page.findMediaBox().getHeight() );
        }
        else if( pageRotation == 90 || pageRotation == -270 )
        {
        	trm.setValue( 2,0, y );
          trm.setValue( 2,1, x ); //2013-03-12 fixed shifted coordinates in e.g. eu-014.pdf (table datset)
//        	                        TODO: update GUI code to correct positioning of raster graphic
//            trm.setValue( 2,1, x + page.findMediaBox().getHeight() - page.findMediaBox().getWidth() );
        }
        else if( pageRotation == 270 || pageRotation == -90 )
        {
            trm.setValue( 2,0, -y  + page.findMediaBox().getHeight() );
            trm.setValue( 2,1, x );
        }
        float xScale = trm.getXScale();
        float yScale = trm.getYScale(); 
        float xPos = trm.getXPosition();
        float yPos = trm.getYPosition();
        spaceWidth = spaceDisplacement * xScale * fontSize;
        wordSpacingDisplacement = wordSpacing*xScale * fontSize;
        float totalStringWidth = 0;
        
        // addition 15.04.09
        // to go back to older version, simply make x and yTrans 0
        // this handles (more or less) edocsacs.pdf
        float xPosBefore2 = getTextMatrix().getXPosition();
        float yPosBefore2 = getTextMatrix().getYPosition();
        float xTrans = 0, yTrans = 0;
        
        if (pageRotation == 0)
        {
	        xTrans = xPos - xPosBefore2;
	        //System.out.println("xPos: " + xPos + " tmxp: " + getTextMatrix().getXPosition() + " xTrans: " + xTrans);
	        yTrans = yPos - yPosBefore2;
	        //System.out.println("yPos: " + yPos + " tmyp: " + getTextMatrix().getYPosition() + " yTrans: " + yTrans);
        }
        else if (pageRotation == 90 || pageRotation == -270)
        {
        	xTrans = xPos - yPosBefore2;
/////        	System.out.println("xPos: " + xPos + " tmyp: " + getTextMatrix().getYPosition() + " xTrans: " + xTrans);
	        yTrans = yPos - xPosBefore2;
/////	        System.out.println("yPos: " + yPos + " tmxp: " + getTextMatrix().getXPosition() + " yTrans: " + yTrans);
        }
        else if (pageRotation == 270 || pageRotation == -90)
        {
        	yTrans = xPos - yPosBefore2;
/////        	System.out.println("xPos: " + xPos + " tmyp: " + getTextMatrix().getYPosition() + " xTrans: " + xTrans);
	        xTrans = yPos - xPosBefore2;
/////	        System.out.println("yPos: " + yPos + " tmxp: " + getTextMatrix().getXPosition() + " yTrans: " + yTrans);
        }
        //xTrans = 0;
        //yTrans = 0;
        // end addition
        
        for( int i=0; i<string.length; i+=codeLength )
        {
            
            codeLength = 1;

            String c = font.encode( string, i, codeLength );
            if( c == null && i+1<string.length)
            {
                //maybe a multibyte encoding
                codeLength++;
                c = font.encode( string, i, codeLength );
            }

            //todo, handle horizontal displacement
            
//            System.out.println("font: " + font + " font width: " + font.getFontWidth( string, i, codeLength ));
            
            characterHorizontalDisplacement = (font.getFontWidth( string, i, codeLength )/glyphSpaceToTextSpaceFactor); 
            characterVerticalDisplacement = 
                Math.max( 
                    characterVerticalDisplacement, 
                    font.getFontHeight( string, i, codeLength)/glyphSpaceToTextSpaceFactor);

            
            // PDF Spec - 5.5.2 Word Spacing
            //
            // Word spacing works the same was as character spacing, but applies
            // only to the space character, code 32.
            //
            // Note: Word spacing is applied to every occurrence of the single-byte
            // character code 32 in a string.  This can occur when using a simple
            // font or a composite font that defines code 32 as a single-byte code.
            // It does not apply to occurrences of the byte value 32 in multiple-byte
            // codes.
            //
            // RDD - My interpretation of this is that only character code 32's that
            // encode to spaces should have word spacing applied.  Cases have been
            // observed where a font has a space character with a character code
            // other than 32, and where word spacing (Tw) was used.  In these cases,
            // applying word spacing to either the non-32 space or to the character
            // code 32 non-space resulted in errors consistent with this interpretation.
            //
            if( (string[i] == 0x20) && c != null && c.equals( " " ) )
            {
                spacing = wordSpacing + characterSpacing;
            }
            else
            {
                spacing = characterSpacing;
            }
            
            // We want to update the textMatrix using the width, in text space units.
            //
            //The adjustment will always be zero.  The adjustment as shown in the
            //TJ operator will be handled separately.
            float adjustment=0;
            //todo, need to compute the vertical displacement
            float ty = 0;
            float tx = ((characterHorizontalDisplacement-adjustment/glyphSpaceToTextSpaceFactor)*fontSize + spacing)
                       *horizontalScaling;
            // tx2, td2 added by TH sometime
            float tx2 = (characterHorizontalDisplacement-adjustment/glyphSpaceToTextSpaceFactor)*fontSize * horizontalScaling;
            
//            System.out.println("String: " + string + " tx: " + tx + " tx2: " + tx2);
//            System.out.println("characterHorizontalDisplacement: " + characterHorizontalDisplacement);
//            System.out.println("horizontalScaling: " + horizontalScaling);
//            System.out.println("adjustment: " + adjustment);
            
            tx2 = (characterHorizontalDisplacement)*fontSize * horizontalScaling;
            
            Matrix td = new Matrix();
            td.setValue( 2, 0, tx );
            td.setValue( 2, 1, ty );
            
            Matrix td2 = new Matrix();
            td2.setValue( 2, 0, tx2 );
            td2.setValue( 2, 1, ty );
            
            float xPosBefore = getTextMatrix().getXPosition();
            float yPosBefore = getTextMatrix().getYPosition();
            Matrix textMatrix2 = (Matrix)getTextMatrix().clone();
            setTextMatrix(td.multiply( getTextMatrix() ));
            textMatrix2 = td2.multiply( textMatrix2 );

            /*
            // addition 15.04.09
             * moved above as it needs to be the same value for the string...
            float xTrans = xPos - xPosBefore;
            System.out.println("xTrans: " + xTrans);
            float yTrans = yPos - yPosBefore;
            System.out.println("yTrans: " + yTrans);
            // end addition
            */
            
            float width = 0, width2 = 0;
            if( pageRotation == 0 )
            {
                width = (getTextMatrix().getXPosition() - xPosBefore);
                width2 = (textMatrix2.getXPosition() - xPosBefore);
            }
            else if( pageRotation == 90 || pageRotation == -270)
            {
                width = (getTextMatrix().getYPosition() - yPosBefore);
                width2 = (textMatrix2.getYPosition() - yPosBefore);
            }
            else if( pageRotation == 270 || pageRotation == -90 )
            {
                width = (yPosBefore - getTextMatrix().getYPosition());
                width2 = (yPosBefore - textMatrix2.getYPosition());
                
                //width = (textMatrix.getYPosition() - yPosBefore);
                //width2 = (textMatrix2.getYPosition() - yPosBefore);
            }
            //there are several cases where one character code will
            //output multiple characters.  For example "fi" or a
            //glyphname that has no mapping like "visiblespace"
            if( c != null )
            {
                float widthOfEachCharacterForCode = width/c.length();
                for( int j=0; j<c.length(); j++)
                {
                    if( stringResult.length()+j <individualWidths.length )
                    {
                        if( c.equals("-"))
                        {
                            //System.out.println( "stringResult.length()+j=" + (widthOfEachCharacterForCode));
                        }
                        individualWidths[stringResult.length()+j] = widthOfEachCharacterForCode;
                    }
                }
            }
            
            totalStringWidth += width;
            //System.out.println("new instruction: " + c + 
            //	" width: " + width + " xPosBefore: " + xPosBefore + " yPosBefore: " + yPosBefore);
            
            // TODO:
            // with this code (incorrect results), try on matrox
            // with indiv. characters as clusters (NG generation)
            // to see whether the graph generation does actually terminate :)
            /*
            TextFragment thisChar = new TextFragment
            	(xPosBefore, xPosBefore + width, yPosBefore, 
            	yPosBefore + + (fontSize * yScale), c, font, fontSize);
            addCharacter(thisChar);
            */
            
            CharSegment thisChar;
            if (c == null) c = "";
            if (pageRotation == 0)
            {
	            //TextFragment 
            	thisChar = new CharSegment
		        	(xPosBefore + xTrans, xPosBefore + width2 + xTrans, yPos, 
		        	yPos + (fontSize * yScale), c, font, fontSize * yScale, sourceOp);
            	// addCharacter(thisChar, false);
            	//System.out.println("thisChar: " + thisChar);
            	charactersToAdd.add(thisChar);
            }
            else // pageRotation == 90
            {
            	/*
            	System.out.println("c: " + c + " yPosBefore: " + yPosBefore +
            		" (width:) " + width + " width2: " + width2 + " xPos: " + xPos + 
            		" xPosBefore: " + xPosBefore + " yPos: " + yPos + " yPosBefore: " 
            		+ yPosBefore +
            		" fontSize: " + fontSize + " xScale: " + xScale + " yScale: " +
            		yScale);
            	*/
            	
            	// xScale? need to find a doc where they are different ;)
            	thisChar = new CharSegment
		        	(yPosBefore + xTrans, yPosBefore + width2 + xTrans, yPos, 
		        	yPos + (fontSize * xScale), c, font, fontSize * xScale, sourceOp);
            	//System.out.println("thisChar: " + thisChar);
            	if (pageRotation == 270 || pageRotation == 90)
            		//addCharacter(thisChar, true);
            		charactersToAdd.add(thisChar);
            	else
            		//addCharacter(thisChar, false);
            		charactersToAdd.add(thisChar);
            }
            
//            System.out.println("yPos1.5: " + yPos);
            stringResult.append( c );
        }
//        System.out.println("yPos2: " + yPos);
        float totalStringHeight = characterVerticalDisplacement * fontSize * yScale;
        String resultingString = stringResult.toString();
//        System.out.println("yPos2.5: " + yPos);
        if( individualWidths.length != resultingString.length() )
        {
            float[] tmp = new float[resultingString.length()];
            System.arraycopy( individualWidths, 0, tmp, 0, Math.min( individualWidths.length, resultingString.length() ));
            individualWidths = tmp;
            if( resultingString.equals( "- " ))
            {
                //System.out.println( "EQUALS " + individualWidths[0] );
            }
        }
        
//        System.out.println("yPos3: " + yPos);
        float charX1 = xPos;
        float charX2 = xPos + totalStringWidth;
        float charY1 = yPos;
        float charY2 = yPos + (fontSize * yScale);//totalStringHeight;
        String c = stringResult.toString();
        
        //TextFragment thisChar = new TextFragment(charX1, charX2, charY1, charY2, 
        //		c, font, fontSize * yScale);
        //addCharacter(thisChar);
            
   //     System.out.println("showCharacter with xPos: " + xPos + " and yPos " + yPos + " and string " + stringResult.toString());
        
        // addCharacter already called above
        // showCharacter does nothing; neither in super nor in this class
        
/* start commented out 1.1
        showCharacter(
                new TextPosition(
                    xPos,
                    yPos,
                    xScale,
                    yScale,
                    totalStringWidth,
                    individualWidths,
                    totalStringHeight,
                    spaceWidth,
                    stringResult.toString(),
                    font,
                    fontSize,
                    wordSpacingDisplacement ));
        //System.out.println("relief");
end commented out 1.1 */
        
        //no call addCharacter for the list of characters to be processed...
        
        CompositeSegment thisStringFragment = new CompositeSegment();
        thisStringFragment.setText(stringResult.toString());
        //System.out.println("stringResult: " + stringResult);
        thisStringFragment.getItems().addAll(charactersToAdd);
        thisStringFragment.findBoundingBox();
        
        //System.out.println("thisStringFragment: " + thisStringFragment);
        //System.out.println("lastStringFragment: " + lastStringFragment);
        
        //float tolerance = fontSize * 0.05f;
        // changed on 7.04.09 to allow for google19.pdf and other pdfs where yscale is used
        // to determine fontsize...
        float tolerance = fontSize * yScale * 0.05f;
        
        //System.out.println("lastStringFragment: " + lastStringFragment);
        //System.out.println("thisStringFragment: " + thisStringFragment);
        
        //System.out.println("tolerance: " + tolerance);
        
        if (lastStringFragment != null &&
        	thisStringFragment.getText().equals
        		(lastStringFragment.getText()) &&
        	Utils.within(thisStringFragment.getX1(),
        		lastStringFragment.getX1(), tolerance)  &&
    		Utils.within(thisStringFragment.getX2(),
            	lastStringFragment.getX2(), tolerance)  &&
    		Utils.within(thisStringFragment.getY1(),
            	lastStringFragment.getY1(), tolerance)  &&
    		Utils.within(thisStringFragment.getY2(),
            	lastStringFragment.getY2(), tolerance))
        //if (false)
        {
        	//System.out.println("stringResult overprint...");
        	// it's an overprint; code to come here 
        }
        else
	    {
        	// add the text fragment!
	        Iterator<CharSegment> ctaIter = charactersToAdd.iterator();
	        while(ctaIter.hasNext())
	        {
	        	CharSegment thisChar = ctaIter.next();
	        	
	        	if (pageRotation == 270 || pageRotation == -90)
	        	//if (pageRotation == 270 || pageRotation == 90)
	        		addCharacter(thisChar, true);
	        	else
	        		addCharacter(thisChar, false);
	        }
        }
        lastStringFragment = thisStringFragment;
    }
    
    public void addCharacter(CharSegment thisChar, boolean reverseCoords)
    {
    	boolean concatenate = false;
    	if (fragmentList.size() > 0)
    	{
    		TextFragment lastFragment = fragmentList.get(fragmentList.size() - 1);
//    		System.out.println("**thisChar: " + thisChar);
    		//System.out.println("lastFragment: " + lastFragment);
    		
    		// TODO: this lovely overprint code doesn't work on
    		// google19.pdf, where complete sentences are
    		// rendered at once... :(
    		CharSegment lastChar = (CharSegment)lastFragment.getItems().
    			get(lastFragment.getItems().size() - 1);
    		boolean overprint = false;
    		float tolerance = thisChar.getFontSize() * 0.05f;
    		if ((lastChar.getText().equals(thisChar.getText()) &&
    			 lastChar.getFontSize() == thisChar.getFontSize() &&
    			 Utils.within(lastChar.getX1(), thisChar.getX1(), tolerance) &&
    			 Utils.within(lastChar.getX2(), thisChar.getX2(), tolerance) &&
    			 Utils.within(lastChar.getY1(), thisChar.getY1(), tolerance) &&
    			 Utils.within(lastChar.getY2(), thisChar.getY2(), tolerance) &&
     			 lastChar.getFontName() == thisChar.getFontName())
    			 ) overprint = true;
    		if (overprint)
    		{
    			/*
    			System.out.println("Overprint with: ");
    			System.out.println("lastChar: " + lastChar);
    			System.out.println("lastFramgent: " + lastFragment);
    			System.out.println("thisChar: " + thisChar);
    			System.out.println("Setting lastChar to overprint");
    			System.out.println();
    			*/
    			lastChar.setOverprint(true);
    		}
    		else
    		{
	    		boolean sameLine;
	    		/*
	    		if (lastFragment.getY1() == thisChar.getY1())
	    			sameLine = true; else sameLine = false;
	    		*/
	    		// 23.06.08 added error margin
	    		if (Utils.within(lastFragment.getY1(), thisChar.getY1(), tolerance))
	    			sameLine = true; else sameLine = false;
	    		boolean sameWord;
	    		float spacing;

	    		if (reverseCoords)
	    		{
	    			//System.out.println("reverseCoords");
		    		float lfWidth = lastFragment.getWidth();
	    			float tcWidth = thisChar.getWidth();
	    			lastFragment.setX1(0 - lastFragment.getX1());
	    			lastFragment.setX2(lastFragment.getX1() + lfWidth);
	    			thisChar.setX1(0 - thisChar.getX1());
	    			thisChar.setX2(thisChar.getX1() + tcWidth);
	    		}

	    		spacing = thisChar.getX1() - lastFragment.getX2();
	    		//System.out.println("spacing: " + spacing);
	    		if (spacing < thisChar.getWidth() * 0.25 &&
	    			spacing > (0 - thisChar.getWidth() * 1.0))
	    			sameWord = true; else sameWord = false;
	    		if (sameLine && sameWord)
	    			concatenate = true; else concatenate = false;	
    	
	    		if (mergeAcrossTextInstructions)
	    			concatenate = !newTextFragment;
	    		
//	    		TEST 27.08.09
//	    		concatenate = !newTextFragment;
	    			
		    	if (concatenate)
		    	{
		    		// concatenate last fragment of fragmentList
		    		// logically, the size of fragmentList must be at least one
		    		
		    		lastFragment.setX2(thisChar.getX2());
		    		lastFragment.setText(lastFragment.getText().concat(thisChar.getText()));
		    		lastFragment.getItems().add(thisChar);
		    	}
		    	else
		    	{
		    		// !concatenate stuff was here but we
		    		// need to revert the co-ordinates back...
		    	}
	    	
		    	if (reverseCoords)
	    		{
		    		float lfWidth = lastFragment.getWidth();
	    			float tcWidth = thisChar.getWidth();
	    			lastFragment.setX1(0 - lastFragment.getX1());
	    			lastFragment.setX2(lastFragment.getX1() + lfWidth);
	    			thisChar.setX1(0 - thisChar.getX1());
	    			thisChar.setX2(thisChar.getX1() + tcWidth);
	    		}
		    	if (!concatenate)
		    	{
		    		TextFragment newFrag = new TextFragment(thisChar);
//		    		2011-01-25 constructor does all this automatically
//		    		newLine.getItems().add(thisChar);
//		    		newLine.setBoundingBox(thisChar.getBoundingBox());
//		    		newLine.setText(thisChar.getText());
//		    		newLine.setFontName(thisChar.getFontName());
//		    		newLine.setFontSize(thisChar.getFontSize());
		    		fragmentList.add(newFrag);
		    	}
    		}
    	}
    	else
    	{
    		TextFragment newFrag = new TextFragment(thisChar);
//    		2011-01-25 constructor does all this automatically
//    		newFrag.getItems().add(thisChar);
//    		newFrag.setBoundingBox(thisChar.getBoundingBox());
//    		newFrag.setText(thisChar.getText());
//    		newFrag.setFontName(thisChar.getFontName());
//    		newFrag.setFontSize(thisChar.getFontSize());
    		fragmentList.add(newFrag);
    	}
    	// do not add empty space characters (bmw example)
    	if (!thisChar.getText().equals(" "))
    		charList.add(thisChar);
    	
    	setNewTextFragment(false); // only needed when mergeAcrossTextInstructions == true
    }
    
    // pre: characters are in the correct order
	public static void removeLeadingTrailingSpaces
		(List<TextFragment> fragmentList)
	{
		List<TextFragment> fragsToRemove =
			new ArrayList<TextFragment>();
		for (TextFragment tf : fragmentList)
		{
			// remove from beginning
			while(tf.getItems().size() > 0 &&
				tf.getItems().get(0).getText().equals(" "))
			{
				tf.getItems().remove(0);
				tf.findBoundingBox();
				tf.findText();
			}
			
			// remove at end
			int lastIndex = tf.getItems().size() - 1;
			while(tf.getItems().size() > 0 &&
				tf.getItems().get(lastIndex).getText().equals(" "))
			{
				tf.getItems().remove(lastIndex);
				tf.findBoundingBox();
				tf.findText();
				
				// recalculate lastIndex -- important!
				lastIndex = tf.getItems().size() - 1;
			}
			
			if (tf.getItems().size() == 0)
				fragsToRemove.add(tf);
		}
		
		fragmentList.removeAll(fragsToRemove);
	}
	
    
    /**
     * You should override this method if you want to perform an action when a
     * string is being shown.
     *
     * @param text The string to display.
     */
/* start commented out 1.1
    protected void showCharacter( TextPosition text )
    {
        //should use colorspaces for the font color but for now assume that
        //the font color is black
        try
        {
            if( this.getGraphicsState().getTextState().getRenderingMode() == PDTextState.RENDERING_MODE_FILL_TEXT ) 
            {
                graphics.setColor( this.getGraphicsState().getNonStrokingColorSpace().createColor() );
            } 
            else if( this.getGraphicsState().getTextState().getRenderingMode() == PDTextState.RENDERING_MODE_STROKE_TEXT )
            {
                graphics.setColor( this.getGraphicsState().getStrokingColorSpace().createColor() );
            }
            else
            {
                //need to implement....
            }
            PDFont font = text.getFont();
//            System.out.println("gotten font: " + font.getBaseFont() + " subtype: " + font.getSubType());
            try
            {
            	font.drawString( text.getCharacter(), graphics, text.getFontSize(), text.getXScale(), text.getYScale(),
                    text.getX(), text.getY() );
            }
            catch (Exception e)
            {
            	// e.g. font name not found
            	e.printStackTrace();
            }
            // works for the top part of Daimler!! edocsacs.pdf
            //TextFragment newFragment = new TextFragment(text);
            //TextLine newLine = new TextLine(newFragment);
            //newLine.findBoundingBox();
            //newLine.findText(false);
            //fragmentList.add(newLine);
            //System.out.println("added fragment: " + newLine);
            
        }
        catch( IOException io )
        {
            io.printStackTrace();
        }
    }
end commented out 1.1 */    
  
    /**
     * Get the graphics that we are currently drawing on.
     * 
     * @return The graphics we are drawing on.
     */
    public Graphics2D getGraphics()
    {
        return graphics;
    }
    
    /**
     * Get the page that is currently being drawn.
     * 
     * @return The page that is being drawn.
     */
    public PDPage getPage()
    {
        return page;
    }
    
    /**
     * Get the size of the page that is currently being drawn.
     * 
     * @return The size of the page that is being drawn.
     */
    public Dimension getPageSize()
    {
        return pageSize;
    }
    
    /**
     * Fix the y coordinate based on page rotation.
     * 
     * @param x The x coordinate.
     * @param y The y coordinate.
     * @return The updated y coordinate.
     */
    public double fixY( double x, double y )
    {
    	return pageSize.getHeight() - y;
    }
    
    /**
     * Get the current line path to be drawn.
     * 
     * @return The current line path to be drawn.
     */
    public GeneralPath getLinePath()
    {
        return linePath;
    }

    /**
     * Set the line path to draw.
     * 
     * @param newLinePath Set the line path to draw.
     */
    public void setLinePath(GeneralPath newLinePath)
    {
        if (linePath == null || linePath.getCurrentPoint() == null){
            linePath = newLinePath;
        }else{
            linePath.append (newLinePath, false);
        }
    }
    
    /**
     * Get the current list of line paths to be drawn.
     * 
     * @return The current list of line paths to be drawn.
     */
    public List getLineSubPaths()
    {
        return lineSubPaths;
    }

    /**
     * Set the list of line paths to draw.
     * 
     * @param newLineSubPaths Set the list of line paths to draw.
     */
    public void setLineSubPaths(List newLineSubPaths)
    {
        lineSubPaths = newLineSubPaths;
    }
    	
    /**
     *
     * Fill the path
     * 
     * @param windingRule The winding rule this path will use.
     */
    public void fillPath(int windingRule) throws IOException{
    	
    	graphics.setColor( getGraphicsState().getNonStrokingColor().getJavaColor() );
        
        //logger().info("Filling the path with rule: " + windingRule);
        
    	getLinePath().setWindingRule(windingRule);
        
    	graphics.setRenderingHint( RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_OFF );
        List subPaths = getLineSubPaths();
        for( int i=0; i<subPaths.size(); i++ )
        {
            GeneralPath subPath = (GeneralPath)subPaths.get( i );
            if (subPath.getCurrentPoint() != null){ //Sector9's suggestion in bug 1672556
                subPath.closePath();
            }
            /*Rectangle bBox = subPath.getBounds();
            Point2D point1 = TransformedPoint(bBox.x, bBox.y);
            Point2D point2 = TransformedPoint(bBox.x + bBox.width, bBox.y + bBox.height);
            //RectSegment ls = new RectSegment(bBox.x, bBox.x*bBox.width, bBox.y, bBox.y+bBox.height);
            RectSegment ls = new RectSegment((float)point1.getX(), (float)point2.getX(), 
            	(float)point1.getY(), (float)point2.getY());
            System.out.println("fillPath adding line segment: " + ls);
            rectList.add(ls);*/
            
            graphics.fill( subPath );
        }
        
            graphics.fill( getLinePath() );
            getLinePath().reset();;
    }
    
        
    public void setStroke(BasicStroke newStroke){
    	getGraphics().setStroke( newStroke );
    }
    
    public void StrokePath() throws IOException{
    	graphics.setColor( getGraphicsState().getStrokingColor().getJavaColor() ); //per Ben's 11/15 change in StrokePath.java
        List subPaths = getLineSubPaths();
        for( int i=0; i<subPaths.size(); i++ )
        {
            GeneralPath subPath = (GeneralPath)subPaths.get( i );
            graphics.draw( subPath );
            /*
            Rectangle bBox = subPath.getBounds();
            Point2D point1 = TransformedPoint(bBox.x, bBox.y);
            Point2D point2 = TransformedPoint(bBox.x + bBox.width, bBox.y + bBox.height);
            //RectSegment ls = new RectSegment(bBox.x, bBox.x*bBox.width, bBox.y, bBox.y+bBox.height);
            RectSegment ls = new RectSegment((float)point1.getX(), (float)point2.getX(), 
            	(float)point1.getY(), (float)point2.getY());
            System.out.println("strokePath adding line segment: " + ls);
            rectList.add(ls);*/
        }
        subPaths.clear();
        GeneralPath path = getLinePath();
        graphics.draw( path );
        path.reset();
    }
    
    // these colour methods added from old method 100409
    /**
     * Get the non stroking color.
     * 
     * @return The non stroking color.
     */
    public Color getNonStrokingColor()
    {
        return nonStrokingColor;
    }

    /**
     * Set the non stroking color.
     * 
     * @param newNonStrokingColor The non stroking color.
     */
    public void setNonStrokingColor(Color newNonStrokingColor)
    {
        nonStrokingColor = newNonStrokingColor;
        currentColor = nonStrokingColor;
    }

    /**
     * Get the stroking color.
     * 
     * @return The stroking color.
     */
    public Color getStrokingColor()
    {
        return strokingColor;
    }

    /**
     * Set the stroking color.
     * 
     * @param newStrokingColor The stroking color.
     */
    public void setStrokingColor(Color newStrokingColor)
    {
        strokingColor = newStrokingColor;
        currentColor = strokingColor;
    }
    
    
    //If you need to do anything when a color changes, do it here ... or in an override of this function
    public void ColorChanged(Boolean bStroking) throws IOException{
        //logger().info("changing " + (bStroking ? "" : "non") + "stroking color");
/////    	System.out.println("changing " + (bStroking ? "" : "non") + "stroking color");
    }
    
    //This code generalizes the code Jim Lynch wrote for AppendRectangleToPath
    public java.awt.geom.Point2D.Double TransformedPoint (double x, double y){
        
        double scaleX = 0.0;
        double scaleY = 0.0;
        double transX = 0.0;
        double transY = 0.0;
        
        double finalX = x;
        double finalY = y;
        
        //Get the transformation matrix 
        Matrix ctm = getGraphicsState().getCurrentTransformationMatrix();
        AffineTransform at = ctm.createAffineTransform();

    	scaleX = at.getScaleX();
        scaleY = at.getScaleY();
        transX = at.getTranslateX();
        transY = at.getTranslateY();
        
        Point2D Pscale = ScaledPoint (finalX, finalY, scaleX, scaleY);
        finalX = Pscale.getX();
        finalY = Pscale.getY();
        
        finalX += transX;
      	finalY += transY;
	
        finalY = fixY( finalX, finalY );
        finalY -= .6;
        
        return new java.awt.geom.Point2D.Double(finalX, finalY);
    }
    
    //Use ScaledPoint rather than TransformedPoint in situations where most of the translation
    //need not be repeated.
    //Consider, for example, the second coordinate of a rectangle.
    public java.awt.geom.Point2D.Double ScaledPoint (double x, double y, double scaleX, double scaleY){
        
        double finalX = 0.0;
        double finalY = 0.0;
        
        if(scaleX > 0)
    	{
	    	finalX = x * scaleX;
    	}
        if(scaleY > 0)
        {
        	finalY = y * scaleY;
    	}
        
        return new java.awt.geom.Point2D.Double(finalX, finalY);
    }
    
    public java.awt.geom.Point2D.Double ScaledPoint (double x, double y){
        
        double scaleX = 0.0;
        double scaleY = 0.0;
        
        //Get the transformation matrix 
        Matrix ctm = getGraphicsState().getCurrentTransformationMatrix();
        AffineTransform at = ctm.createAffineTransform();
       	scaleX = at.getScaleX();
        scaleY = at.getScaleY();
        return ScaledPoint(x, y, scaleX, scaleY);
    }
    
    // 100409 methods from old class below...
    
    /**
     * Start a new page.  Default implementation is to do nothing.  Subclasses
     * may provide additional information.
     * 
     * @param page The page we are about to process.
     * 
     * @throws IOException If there is any error writing to the stream.
     */
    protected void startPage( PDPage page ) throws IOException
    {
        //default is to do nothing.
    }
    
    /**
     * End a page.  Default implementation is to do nothing.  Subclasses
     * may provide additional information.
     * 
     * @param page The page we are about to process.
     * 
     * @throws IOException If there is any error writing to the stream.
     */
    protected void endPage( PDPage page ) throws IOException
    {
        //default is to do nothing
    }
    
    /**
     * This will set the first page to be extracted by this class.
     *
     * @param startPageValue New value of property startPage.
     */
    public void setStartPage(int startPageValue)
    {
        startPage = startPageValue;
    }
    
    /**
     * This will set the last page to be extracted by this class.
     *
     * @param endPageValue New value of property endPage.
     */
    public void setEndPage(int endPageValue)
    {
        endPage = endPageValue;
    }

    /**
     * This is the page that the text extraction will start on.  The pages start
     * at page 1.  For example in a 5 page PDF document, if the start page is 1
     * then all pages will be extracted.  If the start page is 4 then pages 4 and 5
     * will be extracted.  The default value is 1.
     *
     * @return Value of property startPage.
     */
    public int getStartPage()
    {
        return startPage;
    }
    
    /**
     * This will get the last page that will be extracted.  This is inclusive,
     * for example if a 5 page PDF an endPage value of 5 would extract the
     * entire document, an end page of 2 would extract pages 1 and 2.  This defaults
     * to Integer.MAX_VALUE such that all pages of the pdf will be extracted.
     *
     * @return Value of property endPage.
     */
    public int getEndPage()
    {
        return endPage;
    }
    
    
    public BufferedImage pageImage(PDDocument doc, int pageNo) throws IOException
    {
    	BufferedImage retVal;
    	
    	// *** start copied from beginning of getObjects method
    	
    	PDEncryptionDictionary encDictionary = doc.getEncryptionDictionary();

        //only care about standard encryption and if it was decrypted with the
        //user password
        if( encDictionary instanceof PDStandardEncryption && 
            !doc.wasDecryptedWithOwnerPassword() )
        {
            PDStandardEncryption stdEncryption = (PDStandardEncryption)encDictionary;
            if( !stdEncryption.canExtractContent() )
            {
                throw new IOException( "You do not have permission to extract text" );
            }
        }
        currentPageNo = 1;
        document = doc;
        
        //output = outputStream;
        //startDocument(document);
        
        if( document.isEncrypted() )
        {
            // We are expecting non-encrypted documents here, but it is common
            // for users to pass in a document that is encrypted with an empty
            // password (such a document appears to not be encrypted by
            // someone viewing the document, thus the confusion).  We will
            // attempt to decrypt with the empty password to handle this case.
            //
            try
            {
                document.decrypt("");
            }
            catch (CryptographyException e)
            {
                throw new IOException("Error decrypting document, details: " + e.getMessage());
            }
            catch (InvalidPasswordException e)
            {
                throw new IOException("Error: document is encrypted");
            }
        }

        List allPages = document.getDocumentCatalog().getAllPages();
        
        // *** end copied from beginning of getObjects method
        
        PDPage page = (PDPage)allPages.get(pageNo);
    	PDResources resources = page.getResources();
    	Map  images = resources.getImages();
    	if( images != null )
    	{
    		Iterator  imageIter = images.keySet().iterator();
    		// ** TODO: return exception if >1 image on page?
    		while( imageIter.hasNext() )
	    	{
		    	String  key = (String )imageIter.next();
		    	PDXObjectImage image = (PDXObjectImage)images.get( key );
		    	retVal = image.getRGBImage();
		    	return retVal;
	    	}
    	}
        
    	throw new IOException("No images found");
    }
    
    
    /**
     * This will take a PDDocument and return a list of PDFPage objects for each page
     *
     * @param doc The document to get the data from.
     * @param outputStream The location to put the text.
     *
     * @throws IOException If the doc is in an invalid state.
     */
    public List<PDFPage> findObjects(PDDocument doc) throws IOException
    {
        PDEncryptionDictionary encDictionary = doc.getEncryptionDictionary();

        //only care about standard encryption and if it was decrypted with the
        //user password
        if( encDictionary instanceof PDStandardEncryption && 
            !doc.wasDecryptedWithOwnerPassword() )
        {
            PDStandardEncryption stdEncryption = (PDStandardEncryption)encDictionary;
            if( !stdEncryption.canExtractContent() )
            {
                throw new IOException( "You do not have permission to extract text" );
            }
        }
        currentPageNo = 1;
        document = doc;
        
        //output = outputStream;
        //startDocument(document);

        if( document.isEncrypted() )
        {
            // We are expecting non-encrypted documents here, but it is common
            // for users to pass in a document that is encrypted with an empty
            // password (such a document appears to not be encrypted by
            // someone viewing the document, thus the confusion).  We will
            // attempt to decrypt with the empty password to handle this case.
            //
            try
            {
                document.decrypt("");
            }
            catch (CryptographyException e)
            {
                throw new IOException("Error decrypting document, details: " + e.getMessage());
            }
            catch (InvalidPasswordException e)
            {
                throw new IOException("Error: document is encrypted");
            }
        }

        List allPages = document.getDocumentCatalog().getAllPages();
        int numPages = allPages.size();
        
        // take care of negative page numbers
        // and swap startPage and endPage if necessary
        
        if (startPage < 0)
            startPage = numPages + startPage;
        if (endPage < 0)
            endPage = numPages + endPage;
        if (startPage > endPage)
        {
            int tempVar = startPage;
            startPage = endPage;
            endPage = tempVar;
        }
        
        /*
        System.out.println("Number of pages: " + numPages);
        System.out.println("Start page is now: " + startPage);
        System.out.println("End page is now: " + endPage);
        */
        
        // return processPages( document.getDocumentCatalog().getAllPages() );
        return processPages(allPages);
        // writer only method?
        //endDocument(document);
    }
    
    /**
     * This will process all of the pages and the text that is in them.
     *
     * @param pages The pages object in the document.
     *
     * @throws IOException If there is an error parsing the text.
     */
    protected List<PDFPage> processPages( List pages ) throws IOException
    {
        List<PDFPage> retVal = new ArrayList<PDFPage>();
        
        if( startBookmark != null )
        {
            startBookmarkPageNumber = currentPageNumber( startBookmark, pages );
        }
        
        if( endBookmark != null )
        {
            endBookmarkPageNumber = currentPageNumber( endBookmark, pages );
        }
        
        if( startBookmarkPageNumber == -1 && startBookmark != null &&
            endBookmarkPageNumber == -1 && endBookmark != null &&
            startBookmark.getCOSObject() == endBookmark.getCOSObject() )
        {
            //this is a special case where both the start and end bookmark
            //are the same but point to nothing.  In this case
            //we will not extract any text.
            startBookmarkPageNumber = 0;
            endBookmarkPageNumber = 0;
        }

        Iterator pageIter = pages.iterator();
        // this loop uncommented to process only the first page
        while( pageIter.hasNext() )
        {
            PDPage nextPage = (PDPage)pageIter.next();
            
            // added for further development
            page = nextPage;
            
            try 
            {
            	System.out.println("Current page no: " + currentPageNo);
            
	            if (page.getArtBox() != null)
	            	System.out.println("Art Box: " + new GenericSegment(page.getArtBox()));
	            if (page.getBleedBox() != null)
	           		System.out.println("Bleed Box: " + new GenericSegment(page.getBleedBox()));
	            // the following value caused problems for pp120-hassan.pdf
	            if (page.getCropBox() != null)
	            	System.out.println("Crop Box: " + new GenericSegment(page.getCropBox()));
	            if (page.getMediaBox() != null)
	            	System.out.println("Media Box: " + new GenericSegment(page.getMediaBox()));
	            if (page.getTrimBox() != null)
	            	System.out.println("Trim Box: " + new GenericSegment(page.getTrimBox()));
	            
	            // if media box is null (not allowed!) find a substitute
	            
	            if (page.getMediaBox() == null)
	            {
	            	if (page.getBleedBox() != null)
	               		page.setMediaBox(page.getBleedBox());
	            	else if (page.getCropBox() != null)
	            		page.setMediaBox(page.getCropBox());
	            	else if (page.getTrimBox() != null)
	            		page.setMediaBox(page.getTrimBox());
	            	else if (page.getArtBox() != null)
	                	page.setMediaBox(page.getArtBox());
	            }
 
            }
            catch(NullPointerException npe)
            {
            	System.out.println("at least one of the boxes is missing!");
            	npe.printStackTrace();
            }
            int pageRotation = page.findRotation();
            System.out.println("Page rotation: " + pageRotation);
            // end of addition
            PDStream contentStream = nextPage.getContents();
            
            GenericSegment pageDim;
            /* simple version
            if (page.getMediaBox() != null)
            	pageDim = new GenericSegment(page.getMediaBox());
            else 
            	pageDim = new GenericSegment(page.getArtBox());
            */
            
            // 20.02.09: swapped artBox for trimBox...
            if (page.getMediaBox() != null && page.getTrimBox() != null)
            {
            	GenericSegment mediaBox = new GenericSegment(page.getMediaBox());
            	GenericSegment trimBox = new GenericSegment(page.getTrimBox());
            	
            	// fixed "Das Windsor-Syndrom" document
            	pageDim =
            		new GenericSegment(trimBox.getX1(), trimBox.getX2(),
            		trimBox.getY1(), trimBox.getY2());
            	
            	/* state at end of NextWrap
            	pageDim =
            		new GenericSegment(mediaBox.getX1(), mediaBox.getX2(),
            		mediaBox.getY1(), artBox.getY2());
            	*/
            }
            else if (page.getMediaBox() != null)
            {
            	pageDim = new GenericSegment(page.getMediaBox());
            }
            else if (page.getArtBox() != null)
            {
            	// will never reach this stage because
            	// a page always has a MediaBox... (above=...
            	pageDim = new GenericSegment(page.getArtBox());
            }
            else
            {
            	throw new IOException
            		("Cannot find a suitable page bounding box!");
            }
            
            
            System.out.println("Processing page: " + currentPageNo);
//            System.out.println("startPage: " + startPage);
//            System.out.println("endPage: " + endPage);
            
            if( currentPageNo >= startPage && currentPageNo <= endPage &&
                    (startBookmarkPageNumber == -1 || currentPageNo >= startBookmarkPageNumber ) && 
                    (endBookmarkPageNumber == -1 || currentPageNo <= endBookmarkPageNumber ))
            {
                if( contentStream != null )
                {
                    COSStream contents = contentStream.getStream();
                    PDFPage thePage = processPage(nextPage, contents);
                    
                // 20.06.08 this is now done in processPage...
                // because of the complications with rotation...
                //    thePage.setBoundingBox(pageDim.getBoundingBox()); 
                    //thePage.reverseYCoordinates();
                    
                    retVal.add(thePage);
                }
            }
            else
            {
                // skip to next page
                if( contentStream != null )
                {
                    COSStream contents = contentStream.getStream();
                    //System.out.println("processing page " + currentPageNo + "...");
                    //PDFPage throwawayVar = processPage( nextPage, contents );
                    	// unnecessary; all that did was increment currentPageNo!
                    currentPageNo ++;
                }
            }
        }
        return retVal;
    }
    
    protected int currentPageNumber( PDOutlineItem bookmark, List allPages ) throws IOException 
    {
        int pageNumber = -1;
        PDPage page = bookmark.findDestinationPage( document );
        if( page != null )
        {
            pageNumber = allPages.indexOf( page )+1;//use one based indexing
        }
        return pageNumber;
    }
    
    /**
     * This will process the contents of a page.
	 * modified by TH
     *
     * @param page The page to process.
     * @param content The contents of the page.
     *
     * @throws IOException If there is an error processing the page.
     */
    protected PDFPage processPage( PDPage page, COSStream content ) throws IOException
    {
    	// new from 23.04.09
    	clipBounds = new GenericSegment(Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY,
    		Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY);
    	
    	this.page = page;
//      pageSize = page.getArtBox().createDimension();
//    	pageSize = page.getCropBox().createDimension(); // seems to make no difference to SN...
    	
        PDResources resources = page.findResources();
        
        PDFPage thisPage = new PDFPage();
        //GenericSegment pageDim = new GenericSegment(page.getArtBox());
        GenericSegment pageDim; // = new GenericSegment(page.getCropBox()); // seems to make no difference to SN...
        
        if (page.getCropBox() != null)
        {
        	pageDim = new GenericSegment(page.getCropBox());
        	//pageSize = page.getCropBox().createDimension(); // needed for PDFBox methods
        }
        else
        {
        	pageDim = new GenericSegment(page.findMediaBox());
        	//pageSize = page.findMediaBox().createDimension(); // needed for PDFBox methods
        }
        pageSize = page.findMediaBox().createDimension(); // needed for PDFBox graphic drawing methods
        // if statement added for EFLensChart.pdf
        if (page.getRotation() == null) thisPage.setRotation(0);
        else thisPage.setRotation(page.getRotation());
        
        // PDFBox code for rasterizing image
        // the mBox is not used for our purposes
        int scaling = 2;
        PDRectangle mBox = page.findMediaBox();
        //PDRectangle mBox = page.getCropBox(); // also does nothing!
        int width = (int)(mBox.getWidth());//*2);
        int height = (int)(mBox.getHeight());//*2);
        Dimension pageDimension = new Dimension( width, height );
        
        //note we are doing twice as many pixels because
        //the default size is not really good resolution,
        //so create an image that is twice the size
        //and let the client scale it down.
        BufferedImage retval = new BufferedImage
        	( width*scaling, height*scaling, BufferedImage.TYPE_BYTE_INDEXED );
        //Graphics2D graphics = (Graphics2D)retval.getGraphics();
        graphics = (Graphics2D)retval.getGraphics();
        graphics.setColor( Color.WHITE );
        graphics.fillRect(0,0,width*scaling, height*scaling);
        graphics.scale( scaling, scaling );
        
        // end of new stuff...
        
        imageList = new ArrayList<ImageSegment>();
        lineList = new ArrayList<LineSegment>();
        rectList = new ArrayList<RectSegment>();
        fragmentList = new ArrayList<TextFragment>();
        charList = new ArrayList<CharSegment>();
        
        // TODO: method within GenericSegment instead of here?
        thisPage.setX1(pageDim.getX1());
        thisPage.setX2(pageDim.getX2());
        thisPage.setY1(pageDim.getY1());
        thisPage.setY2(pageDim.getY2());
        
        List<GenericSegment> pageItems = thisPage.getItems();
		// end of addition

        long start = System.currentTimeMillis();
        currentPageNo++;
        
        // former loop (page selection)
        // {
            startPage( page );
            pageArticles = page.getThreadBeads();
            int numberOfArticleSections = 1 + pageArticles.size() * 2;
            if( !shouldSeparateByBeads )
            {
                numberOfArticleSections = 1;
            }
            int originalSize = charactersByArticle.size();
            charactersByArticle.setSize( numberOfArticleSections );
            for( int i=0; i<numberOfArticleSections; i++ )
            {
                if( numberOfArticleSections < originalSize )
                {
                    ((List)charactersByArticle.get( i )).clear();
                }
                else
                {
                    charactersByArticle.set( i, new ArrayList() );
                }
            }
            
            characterListMapping.clear();
            long startProcess = System.currentTimeMillis();
            processStream( page, page.findResources(), content );
            long stopProcess = System.currentTimeMillis();
            long startFlush = System.currentTimeMillis();
            
            // TODO: rescue this bit of code someday!
            // add all text elements to page object
			for( int i=0; i<charactersByArticle.size(); i++)
        	{
            	List textList = (List)charactersByArticle.get( i );
            	Iterator textIter = textList.iterator();
				
            	while( textIter.hasNext() )
            	{
					TextPosition thisPos = (TextPosition)textIter.next();
					pageItems.add(new TextFragment(thisPos, pageDim));
					System.out.println("text fragment by article found");
				}
			}

			thisPage.getItems().addAll(fragmentList);
			// line below uncommented 17.03.07
			thisPage.getItems().addAll(charList);
			
			
			// remove anything not within the crop box
			if (page.getCropBox() != null)
			{
				//System.out.println("cropBox: " + new GenericSegment(page.getCropBox()));
				
//				2011-01-25 theItems already set
//				List items = thisPage.getItems();
				
				GenericSegment cropBox = new GenericSegment(page.getCropBox()); 
				
				
				// these lines only for manual input of the cropBox
				float new_Y2 = page.getMediaBox().getHeight() - cropBox.getY1();
				float new_Y1 = page.getMediaBox().getHeight() - cropBox.getY2();
				cropBox.setY1(new_Y1);
				cropBox.setY2(new_Y2);
				
				//System.out.println("current cropBox: " + cropBox);
				
				if (thisPage.getRotation() == 270 || thisPage.getRotation() == -90)
				{
					// TODO: refactor as e.g. GenericSegment.rotate
					// 3x 90 deg rotate
					// well, that's the cheeky way... :)
					cropBox = new GenericSegment(cropBox.getX1(), 
						cropBox.getX1() + cropBox.getHeight(), cropBox.getY1(),
						cropBox.getY1() + cropBox.getWidth());
					cropBox = new GenericSegment(cropBox.getX1(), 
						cropBox.getX1() + cropBox.getHeight(), cropBox.getY1(),
						cropBox.getY1() + cropBox.getWidth());
					cropBox = new GenericSegment(cropBox.getX1(), 
						cropBox.getX1() + cropBox.getHeight(), cropBox.getY1(),
						cropBox.getY1() + cropBox.getWidth());
				}
				else if (thisPage.getRotation() == 90 || thisPage.getRotation() == -270)
				{
					cropBox = new GenericSegment(cropBox.getX1(), 
						cropBox.getX1() + cropBox.getHeight(), cropBox.getY1(),
						cropBox.getY1() + cropBox.getWidth());
					
					//System.out.println("new cropBox: " + cropBox);
				}
				
				for (int n = 0; n < pageItems.size(); n ++)
				{
					GenericSegment gs = (GenericSegment)pageItems.get(n);
					if (!SegmentUtils.intersects(gs, cropBox))
					{
						pageItems.remove(gs);
						n --;
					}
				}
			}
			
			
			if (thisPage.getRotation() == 270 || thisPage.getRotation() == -90)
			{
				// I imagine somewhere in the PDFBox code it tries to be
				// clever and reverse the co-ordinates with the incorrect
				// page dimensions; we need to undo this(!)
				thisPage.reverseYCoordinatesPDF();
				
				// not quite as easy as just swapping X and Y co-ordinates
				// page needs to be rotated around its _top_ axis; this means
				// subtracting the difference between height & width...
				thisPage.setBoundingBox(new float[]
				    {thisPage.getY1(), thisPage.getY2(),
					 thisPage.getY2() - thisPage.getWidth(), thisPage.getY2()});
				// note getWidth is actually now the height :)
				
				thisPage.normalizeCoordinates();
				thisPage.reverseXCoordinates();
			}
			else
			{
				//thisPage.normalizeCoordinates();
				// reverseYCoordinates is common, so do nothing...
			}
			
			pageItems.addAll(rectList);
			pageItems.addAll(lineList);
			pageItems.addAll(imageList);
			
			thisPage.normalizeCoordinates();
			thisPage.reverseYCoordinatesPDF();
			thisPage.setLastOpIndex(opIndex);
			
            //pageItems.addAll(lineList);
            //pageItems.addAll(rectList);
            
            long stopFlush = System.currentTimeMillis();
            endPage( page );
        // end of former loop
        // }
            
        long stop = System.currentTimeMillis();
        
    //    System.out.println("returning thisPage: " + thisPage.toExtendedString());
        
        return thisPage;
    }
    
    public boolean isNewTextFragment() {
		return newTextFragment;
	}

	public void setNewTextFragment(boolean newTextFragment) {
		this.newTextFragment = newTextFragment;
	}

	public void pushClipBounds()
    {
		clipBoundsStack.push(clipBounds.clone());
    	// NOOOOOO!!!
		//clipBounds = new GenericSegment(Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY,
        //	Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY);
    }
    
    public void popClipBounds()
    {
    	clipBounds = (GenericSegment)clipBoundsStack.pop();
    }
    
    public void simpleModifyClippingPath()
    {
    	//GenericSegment pathBounds = new GenericSegment();
    	// TODO: getDilatedSegment and enlarge... redundancy?
/////    	System.out.println("path: " + ((CompoundTextSegment)getPathBounds()).toExtendedString());
    	clipBounds.shrinkBoundingBox(currentPathBounds());//.getDilatedSegment(5.0f));
/////    	System.out.println("clipBounds is now: " + clipBounds);
    }
    
    public GenericSegment currentPathBounds()
    {
    	CompositeSegment retVal = new CompositeSegment();
    	retVal.getItems().addAll(linesToAdd);
    	retVal.getItems().addAll(rectsToAdd);
    	retVal.getItems().addAll(currentLines);
    	retVal.getItems().addAll(currentRects);
    	retVal.findBoundingBox();
    	return retVal;
    }
    
    public void newPath()
	{
    	if (true)
    	//if (!pathContainsCurve)
    	{
	    	linesToAdd.addAll(currentLines);
			rectsToAdd.addAll(currentRects);
    	}
		newPath = true;
		pathContainsCurve = false;
		pathBeginSet = false;
		pathClosed = false;
		
		currentLines = new ArrayList<LineSegment>();
		currentRects = new ArrayList<RectSegment>();
	}
    
    public void endPath()
    {
    	newPath = true;
		pathContainsCurve = false;
		pathBeginSet = false;
		pathClosed = false;
		linesToAdd = new ArrayList<LineSegment>();
		rectsToAdd = new ArrayList<RectSegment>();
    }

    public void simpleCurveTo(float x1, float y1, float x2, float y2, float x3, float y3)
    {
    	// points already transformed!
    	pathContainsCurve = true;
    	LineSegment l1 = new LineSegment(currentX, x1, currentY, y1);
    	LineSegment l2 = new LineSegment(x1, x2, y1, y2);
    	LineSegment l3 = new LineSegment(x2, x3, y2, y3);
    	l1.setCurve(true);
    	l2.setCurve(true);
    	l3.setCurve(true);
    	currentLines.add(l1);
    	currentLines.add(l2);
    	currentLines.add(l3);
    	currentX = x3;
    	currentY = y3;
    }
    
    public void simpleClosePath()
    {
    	if (pathBeginSet && !pathClosed)
    	{
	    	simpleLineTo(pathBeginX, pathBeginY);
	    	pathClosed = true;
    	}
    }
    
	public void simpleMoveTo(float x, float y)
    {
    	Point2D Ppos = TransformedPoint(x, y);   
    	currentX = (float)Ppos.getX();
    	currentY = (float)Ppos.getY();
/////    	System.out.println("moving to: " + Ppos.getX() + ", " + Ppos.getY());
    	// newPath() called by calling OperatorProcessor method
    }
    
    public void simpleLineTo(float x, float y)
	{
    	/*
    	Matrix ctm = getGraphicsState().getCurrentTransformationMatrix();
    	
    	float xFrom = ctm.getXPosition();
		float yFrom = ctm.getYPosition();
		
		Point2D Pfrom = TransformedPoint(xFrom, yFrom);
		*/
    	float[] comp = getStrokingColor().getRGBColorComponents(null);
/////    	System.out.println("line stroke colour: " + comp[0] + " " + comp[1] + " " + comp[2]);
    	
		Point2D Pto = TransformedPoint(x, y);    	
    	
		//if (newPath)
		//{
	    	LineSegment newLine = new LineSegment(currentX, 
	    		(float)Pto.getX(), currentY, (float)Pto.getY());
	    	/*
	    	LineSegment newLine = new LineSegment(newLineTemp.getX1(),
	    		newLineTemp.getX2(), newLineTemp.getY1() - newLineTemp.getHeight(),
	    		newLineTemp.getY1());
	    	*/
/////	    	System.out.println("adding line: " + newLine);
	    	newLine.correctNegativeDimensions();
	    	newLine.rotate(page);
/////	    	System.out.println("after rotation: " + newLine);
	    	//lineList.add(newLine);
	    	linesToAdd.add(newLine);
	    	// lineList.add(newLine); naughty! 4.08.09
//	    	System.out.println("newLine: " + newLine);
		//}
		
    	currentX = (float)Pto.getX();
    	currentY = (float)Pto.getY();
	}
    
    public void simpleAddRect(float x, float y, float w, float h)
	{
		float[] comp = getStrokingColor().getRGBColorComponents(null);
		
/////		System.out.println("transparency: "+getStrokingColor().getTransparency());
/////		System.out.println("rect stroke colour: " + comp[0] + " " + comp[1] + " " + comp[2]);
	
		//if (!(comp[0] > 0.9 && comp[1] > 0.9 && comp[2] > 0.9))
		//{
			//rectsToAdd.add(newRect);
			// 11.04.09 funny behaviour owing to reverseYCoordinates
		/*
			RectSegment newRect = new RectSegment(x, x+w, y+h, y+2*h);
			if (h < 0)
				newRect = new RectSegment(x, x+w, y, y-h);
		*/
			RectSegment newRect = new RectSegment(x, x+w, y, y+h);
			//newRect.correctNegativeDimensions(); -- this didn't work right here
			//RectSegment newRect = new RectSegment(x, x+w, y+2*h, y+h);
/////			System.out.println("adding rect: " + newRect);
			
			//if (page.getRotation() != null)
			newRect.rotate(page);
/////			System.out.println("after rotation: " + newRect);
			//rectList.add(newRect);
			newRect.correctNegativeDimensions();
			
			rectsToAdd.add(newRect);
			
			// coordinates already transformed in calling OperatorProcessor method...
			currentX = x;
			currentY = y;
		//}
		//else
		//{
		//	System.out.println("ignored rect");
		//}
	}


    /**
     * TODO: extract byte[] from img and add it to ImageSegment
     *
     * @param x1
     * @param x2
     * @param y1
     * @param y2
     * @param img
     */
	public void simpleDrawImage(float x1, float x2, float y1, float y2, PDXObjectImage img ) {
		ImageSegment newImageSegment = new ImageSegment(x1, x2, y1, y2);
			//(ctm.getXPosition(), ctm.getXPosition() + (float)twh.getX(), 
				//ctm.getYPosition(), ctm.getYPosition() + (float)twh.getY());
/////		System.out.println("adding image segment: " + newImageSegment);
		newImageSegment.correctNegativeDimensions();
		newImageSegment.rotate(page);
/////		System.out.println("image before clipping: " + newImageSegment);
/////		System.out.println("clipBounds: " + clipBounds);
		newImageSegment.shrinkBoundingBox(clipBounds);
		if (!newImageSegment.isZeroSize())
		{
/////			System.out.println("adding image segment");
			imageList.add(newImageSegment);
		}
/////		else System.out.println("not adding image segment");
/////		System.out.println("after rotation: " + newImageSegment);
	}
    
    public void simpleStrokePath()
    {
    	newPath(); // adds contents of last sub-path to toAdd lists
    	
    	// from PageDrawer.java (Ben) hack 4.08.09
    	try {
			graphics.setColor( getGraphicsState().getStrokingColor().getJavaColor() );
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} //per Ben's 11/15 change in StrokePath.java
    	float[] comp = graphics.getColor().getRGBColorComponents(null);

    	//if (true);
		if (!(comp[0] > 0.9 && comp[1] > 0.9 && comp[2] > 0.9))
		{
			//lineList.addAll(linesToAdd);
			//rectList.addAll(rectsToAdd);
			Iterator<LineSegment> llIter = linesToAdd.iterator();
			while(llIter.hasNext()) // forgotten while statement added 4.08.09
			{
				LineSegment ls = llIter.next();
				ls.shrinkBoundingBox(clipBounds);
				if (!ls.isZeroSize())
					lineList.add(ls);
			}
			Iterator<RectSegment> rlIter = rectsToAdd.iterator();
			while(rlIter.hasNext()) // forgotten while statement added 4.08.09
			{
				RectSegment rs = rlIter.next();
				rs.setFilled(false); // is false anyway
				rs.shrinkBoundingBox(clipBounds);
				if (!rs.isZeroSize())
					rectList.add(rs);
			}
		}
		
		// empty toAdd lists
		//newPath();
		endPath();
    }
    
    public void simpleFillPath()
    {
    	newPath(); // adds contents of last sub-path to toAdd lists
    	
    	//float[] comp = getStrokingColor().getRGBColorComponents(null);
    	//System.out.println("rect non stroke colour1: " + comp[0] + " " + comp[1] + " " + comp[2]);
    	// from PageDrawer.java (Ben) hack 4.08.09
    	try {
			graphics.setColor( getGraphicsState().getNonStrokingColor().getJavaColor() );
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
    	//float[] comp = getNonStrokingColor().getRGBColorComponents(null);
		float[] comp = graphics.getColor().getRGBColorComponents(null);
		//System.out.println("rect non stroke colour2: " + comp[0] + " " + comp[1] + " " + comp[2]);
    	//comp = graphics.getColor().getRGBColorComponents(null);
    	//System.out.println("r " + graphics.getColor().getRed() + " g " + graphics.getColor().getGreen() + " b " + graphics.getColor().getBlue());
    	//System.out.println("rect non stroke colour3: " + comp[0] + " " + comp[1] + " " + comp[2]);
    	
    	
		if (!(comp[0] > 0.9 && comp[1] > 0.9 && comp[2] > 0.9))
		{
			//lineList.addAll(linesToAdd);
			
			
			
			//System.out.println("rectsToAdd: " + rectsToAdd);
			
			Iterator<RectSegment> rlIter = rectsToAdd.iterator(); // in practice, should not be more than one rect#
													// in this list
			while (rlIter.hasNext())
			{
				RectSegment rs = rlIter.next();
				rs.setFilled(true);
/////				System.out.println("rect before shrinking: " + rs);
/////				System.out.println("clipBounds: " + clipBounds);
				rs.shrinkBoundingBox(clipBounds);
				if (!rs.isZeroSize())
					rectList.add(rs);
			}
			
			// TODO: rectangle recognition from lines
			// this part added 4.08.09 (Sydney)
			// TODO: accept only if resembles a rectangle
				// find BBox
				// any lines with points o/s bbox (within error margin) lead to rejection
			RectSegment lineRect = null;
			Iterator lIter = linesToAdd.iterator();
			while(lIter.hasNext())
			{
				LineSegment ls = (LineSegment)lIter.next();
				if (lineRect == null)
					lineRect = new RectSegment
						(ls.getX1(), ls.getX2(), ls.getY1(), ls.getY2());
				// 4.08.09 previously lineRect.getX... got rid of a lot of rubbish...
				else
					lineRect.growBoundingBox(ls);
			}
			if (lineRect != null)
				rectList.add(lineRect);
			// end of addition
			
			
			if (lineRect != null)
			{
//				System.out.println("col: r: " + comp[0] + " g: " + comp[1] + " b: " + comp[2]);
//				System.out.println("adding lineRect: " + lineRect);
			}
			
			//rectList.addAll(rectsToAdd);
		}
		
		// empty toAdd lists
		//newPath();
		endPath();
    }
}