How do I know if a field is on a particular page?

The PDFbox content stream runs on the page, but the fields are taken from a form that comes from a directory that comes from the pdf document itself. Therefore, I am not sure which fields are on which pages, and also make me write text in the wrong locations / pages.

T. I process the fields on the page, but I'm not sure which fields are on which pages.

Is there any way to find out which field is on which page? Or is there a way to get only the fields on the current page?

Thank!

Mark

code snippet:

PDDocument pdfDoc = PDDocument.load(file);
PDDocumentCatalog docCatalog = pdfDoc.getDocumentCatalog();
PDAcroForm acroForm = docCatalog.getAcroForm();

// Get field names
List<PDField> fieldList = acroForm.getFields();
List<PDPage> pages = pdfDoc.getDocumentCatalog().getAllPages();
for (PDPage page : pages) {
  PDPageContentStream contentStream = new PDPageContentStream(pdfDoc, page, true, true, true);
  processFields(acroForm, fieldList, contentStream, page);
  contentStream.close();
}
+4
source share
3 answers

PDFbox , , , pdf. ,

, PDF , . 0, 1 0, 1 PDF. , 1 .

PDFBox 1.8.x

, PDFBox PDAcroForm PDField . , , .

, :

@SuppressWarnings("unchecked")
public void printFormFields(PDDocument pdfDoc) throws IOException {
    PDDocumentCatalog docCatalog = pdfDoc.getDocumentCatalog();

    List<PDPage> pages = docCatalog.getAllPages();
    Map<COSDictionary, Integer> pageNrByAnnotDict = new HashMap<COSDictionary, Integer>();
    for (int i = 0; i < pages.size(); i++) {
        PDPage page = pages.get(i);
        for (PDAnnotation annotation : page.getAnnotations())
            pageNrByAnnotDict.put(annotation.getDictionary(), i + 1);
    }

    PDAcroForm acroForm = docCatalog.getAcroForm();

    for (PDField field : (List<PDField>)acroForm.getFields()) {
        COSDictionary fieldDict = field.getDictionary();

        List<Integer> annotationPages = new ArrayList<Integer>();
        List<COSObjectable> kids = field.getKids();
        if (kids != null) {
            for (COSObjectable kid : kids) {
                COSBase kidObject = kid.getCOSObject();
                if (kidObject instanceof COSDictionary)
                    annotationPages.add(pageNrByAnnotDict.get(kidObject));
            }
        }

        Integer mergedPage = pageNrByAnnotDict.get(fieldDict);

        if (mergedPage == null)
            if (annotationPages.isEmpty())
                System.out.printf("i Field '%s' not referenced (invisible).\n", field.getFullyQualifiedName());
            else
                System.out.printf("a Field '%s' referenced by separate annotation on %s.\n", field.getFullyQualifiedName(), annotationPages);
        else
            if (annotationPages.isEmpty())
                System.out.printf("m Field '%s' referenced as merged on %s.\n", field.getFullyQualifiedName(), mergedPage);
            else
                System.out.printf("x Field '%s' referenced as merged on %s and by separate annotation on %s. (Not allowed!)\n", field.getFullyQualifiedName(), mergedPage, annotationPages);
    }
}

, PDFBox PDAcroForm:

  • PDF , , . PDFBox , .

  • PDF , , , . PDFBox PDAcroForm.getFields .

PS: @mikhailvs , PDField.getWidget().getPage() , catalog.getAllPages().indexOf. getPage() : . , PDF , , , PDF , , , null.

PDFBox 2.0.x

2.0.x , , , , , .

:

int determineSafe(PDDocument document, PDAnnotationWidget widget) throws IOException
{
    COSDictionary widgetObject = widget.getCOSObject();
    PDPageTree pages = document.getPages();
    for (int i = 0; i < pages.getCount(); i++)
    {
        for (PDAnnotation annotation : pages.get(i).getAnnotations())
        {
            COSDictionary annotationObject = annotation.getCOSObject();
            if (annotationObject.equals(widgetObject))
                return i;
        }
    }
    return -1;
}

int determineFast(PDDocument document, PDAnnotationWidget widget)
{
    PDPage page = widget.getPage();
    return page != null ? document.getPages().indexOf(page) : -1;
}

:

PDAcroForm acroForm = document.getDocumentCatalog().getAcroForm();
if (acroForm != null)
{
    for (PDField field : acroForm.getFieldTree())
    {
        System.out.println(field.getFullyQualifiedName());
        for (PDAnnotationWidget widget : field.getWidgets())
        {
            System.out.print(widget.getAnnotationName() != null ? widget.getAnnotationName() : "(NN)");
            System.out.printf(" - fast: %s", determineFast(document, widget));
            System.out.printf(" - safe: %s\n", determineSafe(document, widget));
        }
    }
}

(DetermineWidgetPage.java)

( 1.8.x . , Map, 1.8.x.)

, : aFieldTwice.pdf

, : test_duplicate_field2.pdf

+4

OP ( ), - , :

PDDocumentCatalog catalog = doc.getDocumentCatalog();

int pageNumber = catalog.getAllPages().indexOf(yourField.getWidget().getPage());
+3

( ).

List<PDAnnotationWidget>  widget=field.getWidgets();
PDDocumentCatalog catalog = doc.getDocumentCatalog();
for(int i=0;i<widget.size();i++) {
int pageNumber = 1+ catalog.getPages().indexOf(field.getWidgets().get(i).getPage());

/ * field coordinate can also be here for one or more, and this will work. * /

// PDRectangle r = widget.get (i) .getRectangle ();

}
0
source

Source: https://habr.com/ru/post/1529245/


All Articles