:
(1) ASCII UTF-8 , ASCII, UTF-8. , ASCII .
(2) findreplace - , ASCII? , "" , , , /.
(3) SAME -, . UTF-8?
(4) UTF-8 ?
(5) , ?
(6) (UTF-16LE/UTF-16BE) x (BOM/no BOM) UTF-16? , - "utf-16" .
(7) , chardet UTF-16xE . chardet - .
, , "ANSI", , . : Windows.
import locale
ansi = locale.getdefaultlocale()[1]
f = open("input_file_path", "rb")
data = f.read()
f.close()
if data.startswith("\xEF\xBB\xBF"):
encodings = ["utf-8-sig"]
elif data.startswith(("\xFF\xFE", "\xFE\xFF")):
encodings = ["utf16"]
else:
encodings = ["utf8", ansi, "utf-16le"]
for enc in encodings:
try:
udata = data.decode(enc)
break
except UnicodeDecodeError:
pass
else:
raise Exception("unknown encoding")