warc.open() warc.WARCFile(), warc.WARCFile() fileobj, sys.stdin . - :
import sys
import warc
f = warc.open(fileobj=sys.stdin)
for record in f:
print record['WARC-Target-URI'], record['Content-Length']
hadoop , - .gz, hadoop \r\n WARC \n, WARC (. : hadoop \r\n \n ARC). warc "WARC/(\d+.\d+)\r\n" ( \r\n), , , :
IOError: Bad version line: 'WARC/1.0\n'
, PipeMapper.java, , , WARC.
, warc.py \n \r\n , Content-Length . , hadoop, Content-Length, , :
IOError: Expected '\n', found 'abc\n'