, . PDF. , , .
public static String getPageContent(String pdfPath, int pageNumber) throws IOException
{
PdfReader reader = new PdfReader(pdfPath);
StringWriter output = new StringWriter();
try {
output.append(PdfTextExtractor.getTextFromPage(reader, pageNumber, new SimpleTextExtractionStrategy()));
} catch (OutOfMemoryError e) {
e.printStackTrace();
}
return output.toString();
}
, 1 , . , , ( ). , . . , , . .
public static String getPageContent(String pdfPath, int pageNumber) throws IOException
{
PDDocument pdDoc = PDDocument.load(pdfPath);
PDPage specPage = (PDPage)pdDoc.getDocumentCatalog().getAllPages().get( 0 );
PDFTextStripperByArea stripper = new PDFTextStripperByArea();
stripper.setSortByPosition( true );
float width = (specPage.getMediaBox().getHeight())*25.4f;
float height = (specPage.getMediaBox().getWidth())*25.4f;
Rectangle rect = new Rectangle( 0, 0, Math.round(width), Math.round(height));
stripper.addRegion( "class1", rect );
List allPages = pdDoc.getDocumentCatalog().getAllPages();
PDPage firstPage = (PDPage)allPages.get( pageNumber-1 );
stripper.extractRegions( firstPage );
return stripper.getTextForRegion( "class1" );
}