I am trying to get the dimensions (width and depth) of images embedded in a PDF file. Images in PDF are high resolution vector images.
- I tried using PDFBox. PDFBox libraries perfectly extract images for normal graphics. But when it receives vector images, it extracts different layers as different images.
- I also read about iText. But iText can transform the entire page as a rasterized image. While my PDF page actually consists of several images, and I need to extract / get the size of all of them in different ways.
I am enclosing the code to remove the PDFBox image here. Please let me know how I can get one vector image as a single image, and not as layers.
My code is as follows:
package com.abp.pdf.util; import java.awt.image.BufferedImage; import java.io.File; import java.io.IOException; import java.util.Iterator; import java.util.List; import java.util.Map; import javax.imageio.ImageIO; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.encryption.AccessPermission; import org.apache.pdfbox.pdmodel.encryption.StandardDecryptionMaterial; import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObject; import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectForm; import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectImage; public class ExtractImages { private int imageCounter = 1; private ExtractImages() { } public static void main(String[] args) throws Exception { ExtractImages extractor = new ExtractImages(); extractor.extractImages(args); } private void extractImages(String[] args) throws Exception { String pdfFile = null; String password = ""; String prefix = null; boolean addKey = false; boolean useNonSeqParser = true; pdfFile = "/home/suvankar/Resources/myfile.pdf"; if (prefix == null && pdfFile.length() > 4) { prefix = pdfFile.substring(0, pdfFile.lastIndexOf("/") + 1) + "extracted/images" + pdfFile.substring(pdfFile.lastIndexOf("/"), pdfFile.length() - 4); } PDDocument document = null; try { if (useNonSeqParser) { document = PDDocument.loadNonSeq(new File(pdfFile), null, password); } else { document = PDDocument.load(pdfFile); if (document.isEncrypted()) { StandardDecryptionMaterial spm = new StandardDecryptionMaterial( password); document.openProtection(spm); } } AccessPermission ap = document.getCurrentAccessPermission(); if (!ap.canExtractContent()) { throw new IOException( "Error: You do not have permission to extract images."); } List pages = document.getDocumentCatalog().getAllPages(); Iterator iter = pages.iterator(); while (iter.hasNext()) { PDPage page = (PDPage) iter.next(); PDResources resources = page.getResources(); processResources(resources, prefix, addKey); } } finally { if (document != null) { document.close(); } } } private void processResources(PDResources resources, String prefix, boolean addKey) throws IOException { if (resources == null) { return; } Map<String, PDXObject> xobjects = resources.getXObjects(); if (xobjects != null) { Iterator<String> xobjectIter = xobjects.keySet().iterator(); while (xobjectIter.hasNext()) { String key = xobjectIter.next(); PDXObject xobject = xobjects.get(key);
source share