Select text using pdfbox when its location in pdf is known

Does pdfbox provide some utility for highlighting text when I coordinate it?

The borders of the text are known.

I know that there are other libraries that provide the same functions as pdfclown, etc. But does the PDFbox do something like this?

+4
source share
4 answers

I found this. it's simple.

PDDocument doc = PDDocument.load(/*path to the file*/);
PDPage page = (PDPage)doc.getDocumentCatalog.getAllPages.get(i);
List annots = page.getAnnotations;
PDAnnotationTextMarkup markup = new PDAnnotationTextMarkup(PDAnnotationTextMarkup.Su....);
markup.setRectangle(/*your PDRectangle*/);
markup.setQuads(/*float array of size eight with all the vertices of the PDRectangle in anticlockwise order*/);
annots.add(markup);
doc.save(/*path to the output file*/);
+6
source

This is an extended answer from number 1 here, and basically the same code as above.

Improves the coordinates of points relative to the page size in the current document, as well as yellow, which is very light, and sometimes, if the word is short and small, it is difficult to see.

, X, Y . .

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.List;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.graphics.color.PDColor;
import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationTextMarkup;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.TextPosition;

public class MainSource extends PDFTextStripper {

    public MainSource()  throws IOException {
        super();
    }

    public static void main(String[] args)  throws IOException {
        PDDocument document = null;
        String fileName = "C:/AnyPDFFile.pdf";
        try {
            document = PDDocument.load( new File(fileName) );
            PDFTextStripper stripper = new MainSource();
            stripper.setSortByPosition( true );

            stripper.setStartPage( 0 );
            stripper.setEndPage( document.getNumberOfPages() );

            Writer dummy = new OutputStreamWriter(new ByteArrayOutputStream());
            stripper.writeText(document, dummy);

            File file1 = new File("C:/AnyPDFFile-New.pdf");
            document.save(file1);
        }
        finally {
            if( document != null ) {
                document.close();
            }
        }
    }

    /**
     * Override the default functionality of PDFTextStripper.writeString()
     */

    @Override
    protected void writeString(String string, List<TextPosition> textPositions) throws IOException {
        boolean isFound = false;
        float posXInit  = 0, 
              posXEnd   = 0, 
              posYInit  = 0,
              posYEnd   = 0,
              width     = 0, 
              height    = 0, 
              fontHeight = 0;
        String[] criteria = {"Word1", "Word2", "Word3", ....};

        for (int i = 0; i < criteria.length; i++) {
            if (string.contains(criteria[i])) {
                isFound = true;
            } 
        }
        if (isFound) {
            posXInit = textPositions.get(0).getXDirAdj();
            posXEnd  = textPositions.get(textPositions.size() - 1).getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidth();
            posYInit = textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj();
            posYEnd  = textPositions.get(0).getPageHeight() - textPositions.get(textPositions.size() - 1).getYDirAdj();
            width    = textPositions.get(0).getWidthDirAdj();
            height   = textPositions.get(0).getHeightDir();

            System.out.println(string + "X-Init = " + posXInit + "; Y-Init = " + posYInit + "; X-End = " + posXEnd + "; Y-End = " + posYEnd + "; Font-Height = " + fontHeight);

            /* numeration is index-based. Starts from 0 */

            float quadPoints[] = {posXInit, posYEnd + height + 2, posXEnd, posYEnd + height + 2, posXInit, posYInit - 2, posXEnd, posYEnd - 2};

            List<PDAnnotation> annotations = document.getPage(this.getCurrentPageNo() - 1).getAnnotations();
            PDAnnotationTextMarkup highlight = new PDAnnotationTextMarkup(PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT);

            PDRectangle position = new PDRectangle();
            position.setLowerLeftX(posXInit);
            position.setLowerLeftY(posYEnd);
            position.setUpperRightX(posXEnd);
            position.setUpperRightY(posYEnd + height);

            highlight.setRectangle(position);

            // quadPoints is array of x,y coordinates in Z-like order (top-left, top-right, bottom-left,bottom-right) 
            // of the area to be highlighted

            highlight.setQuadPoints(quadPoints);

            PDColor yellow = new PDColor(new float[]{1, 1, 1 / 255F}, PDDeviceRGB.INSTANCE);
            highlight.setColor(yellow);
            annotations.add(highlight);
        }
    }

}
+2

pdfbox 2.0.7

PDDocument document = /* get doc */
/* numeration is index-based. Starts from 0 */
List<PDAnnotation> annotations = document.getPage(yourPageNumber - 1).getAnnotations();
PDAnnotationTextMarkup highlight = new PDAnnotationTextMarkup(PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT);
highlight.setRectangle(PDRectangle.A4);
// quadPoints is array of x,y coordinates in Z-like order (top-left, top-right, bottom-left,bottom-right) 
// of the area to be highlighted
highlight.setQuadPoints(quadPoints);
PDColor yellow = new PDColor(new float[]{1, 1, 204 / 255F}, PDDeviceRGB.INSTANCE);
highlight.setColor(yellow);
annotations.add(highlight);

: , doc , , , AppearanceStream. PDFBOX-3353

+1

... angular 1, - . ...

PDFBox...// PDDocument doc = new PDDocument(); PDPage page1 = PDPage(); doc.addPage ( .1);// PDPageContentStream stream1 = new PDPageContentStream (doc, page1);// // x , y , x , y end stream1.drawLine(20, 740, 590, 740);// , // x y stream1.addRect(345, 568, 70, 2); stream1.setNonStrokingColor(Color.BLACK); stream1.fill(); -

0

Source: https://habr.com/ru/post/1547896/


All Articles