, , ? ok_expected , , & , . , & , cgi.escape, .
, : , . , getEntity.
import cgi
import re
import htmlentitydefs
def replace_entity(match):
c = int(match.group(1))
name = htmlentitydefs.codepoint2name.get(c, None)
if name:
return "&%s;" % name
return match.group(0)
def convertEntities(s):
s = cgi.escape(s)
s = s.encode('ascii', 'xmlcharrefreplace')
s = re.sub("&#([0-9]+);", replace_entity, s)
return s
ok = 'ascii: !@#$%^&*()<>'
not_ok = u'extended-ascii: ©®°±¼'
ok_expected = ok
not_ok_expected = u'extended-ascii: ©®°±¼'
ok_2 = convertEntities(ok)
not_ok_2 = convertEntities(not_ok)
if ok_2 == ok_expected:
print 'ascii worked'
else:
print 'ascii failed: "%s"' % ok_2
if not_ok_2 == not_ok_expected:
print 'extended-ascii worked'
else:
print 'extended-ascii failed: "%s"' % not_ok_2
source
share