The sixth signature for the file is.xls as follows:
Excel Spreadsheet Subtitle (MS Office)
09 08 10 00 00 06 05 00 [512 byte offset]
You can read about the various other signatures on Wikipedia .
, - . , , . , . !
xls_sig = b'\x09\x08\x10\x00\x00\x06\x05\x00'
offset = 512
size = 8
with open('spreadsheet.xls', 'rb') as f:
f.seek(offset)
bytes = f.read(size)
if bytes == xls_sig:
print 'Uploaded file is an xls.'
else:
print 'File is not an xls.'
1
, , .xls.
2
, , xls xlsx:
import codecs
xlsx_sig = b'\x50\x4B\x05\06'
xls_sig = b'\x09\x08\x10\x00\x00\x06\x05\x00'
filenames = [
('spreadsheet.xls', 0, 512, 8),
('spreadsheet.xlsx', 2, -22, 4)]
for filename, whence, offset, size in filenames:
with open(filename, 'rb') as f:
f.seek(offset, whence)
bytes = f.read(size)
print codecs.getencoder('hex')(bytes)
if bytes == xls_sig:
msg = '"{}" is an xls.'
elif bytes == xlsx_sig:
msg = '"{}" is an xlsx.'
else:
msg = '"{}" is not an Excel document.'
print msg.format(filename)
:
('0908100000060500', 8)
"spreadsheet.xls" is an xls.
('504b0506', 4)
"spreadsheet.xlsx" is an xlsx.