for python3, there is another one: pip install pdfminer3k
from pdfminer.pdfinterp import PDFResourceManager, process_pdf from pdfminer.converter import TextConverter from pdfminer.layout import LAParams from io import StringIO import time from functools import wraps def fn_timer(function)://this is for calculating the run time(function) @wraps(function) def function_timer(*args, **kwargs): t0 = time.time() result = function(*args, **kwargs) t1 = time.time() print ("Total time running %s: %s seconds" % ('test', str(t1-t0)) ) return result return function_timer @fn_timer def convert_pdf(path, pages): rsrcmgr = PDFResourceManager() retstr = StringIO() laparams = LAParams() device = TextConverter(rsrcmgr, retstr, laparams=laparams) fp = open(path, 'rb') process_pdf(rsrcmgr, device, fp,pages) fp.close() device.close() str = retstr.getvalue() retstr.close() return str file = r'M:\a.pdf' print(convert_pdf(file,[1,]))
source share