Python 3
This checks each character of the passed string, regardless of whether it is in a Cyrillic block and returns Trueif a Cyrillic character is present in the string. Strings in Python3 are unicode by default. The function encodes each character in utf-8 and checks to see if it gives two bytes corresponding to a block of the table that contains Cyrillic characters.
def isCyrillic(filename):
for char in filename:
char_utf8 = char.encode('utf-8')
if len(char_utf8) == 2 \
and 0xd0 <= char_utf8[0] <= 0xd3\
and 0x80 <= char_utf8[1] <= 0xbf:
return True
return False
, ord(),
def isCyrillicOrd(filename):
for char in filename:
if 0x0400 <= ord(char) <= 0x04FF:
return True
return False
cycont
|
|
|
|
Test
import os
for (dirpath, dirnames, filenames) in os.walk('G:/cycont'):
for filename in filenames:
print(filename, isCyrillic(filename), isCyrillicOrd(filename))
asciifile.txt False False
.txt True True
ї́.txt True True
संस्कृत.txt False False