Many emoji characters are not readable when reading a python file

I have a list of 1,500 Emoji dictionaries in a JSON file, and I wanted to import them into my python code, I read the file and convert it to a python dictionary, but now I have only 143 entries. How can I import all emoji into my code, this is my code.

import sys
import ast

file = open('emojidescription.json','r').read()
non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd)
emoji_dictionary = ast.literal_eval(file.translate(non_bmp_map))

#word = word.replaceAll(",", " ");

keys = list(emoji_dictionary["emojis"][0].keys())
values = list(emoji_dictionary["emojis"][0].values())

file_write = open('output.txt','a')

print(len(keys))
for i in range(len(keys)):
    try:
        content = 'word = word.replace("{0}", "{1}")'.format(keys[i],values[i][0])
    except Exception as e:
        content = 'word = word.replace("{0}", "{1}")'.format(keys[i],'')
    #file.write()
    #print(keys[i],values[i])
    print(content)


file_write.close()

This is my input example.

{

    "emojis": [
        {

            "๐Ÿ‘จโ€๐ŸŽ“": ["Graduate"],
            "ยฉ": ["Copy right"],
            "ยฎ": ["Registered"],
            "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘ง": ["family"],
            "๐Ÿ‘ฉโ€โค๏ธโ€๐Ÿ’‹โ€๐Ÿ‘ฉ": ["love"],
            "โ„ข": ["trademark"],
            "๐Ÿ‘จโ€โคโ€๐Ÿ‘จ": ["love"], 
            "โŒš": ["time"],
            "โŒ›": ["wait"], 
            "โญ": ["star"],
            "๐Ÿ˜": ["Elephant"],
            "๐Ÿ•": ["Cat"],
            "๐Ÿœ": ["ant"],
            "๐Ÿ”": ["cock"],
            "๐Ÿ“": ["cock"],

This is my result, and 143 indicates the amount of emoji.

143

word = word.replace ("", "family")

word = word.replace ("โ“‚", ")

word = word.replace ("โ™ฅ", ")

word = word.replace ("โ™ ", ")

word = word.replace ("โŒ›", "wait")

+4
source share
2

, 143 1500 ( ).

, , , ( ):

d = ..read json as python dict.
keys = d.keys()
values = d.values()
for i in range(len(keys)):
    key = keys[i]
    value = values[i]

. Python , , zip:

d = ..read json as python dict.
keys = d.keys()
values = d.values()
for key, value in zip(keys, values):  # zip picks pair-wise elements
    ...

dict :

for key, value in d.items():
    ...

json json ( ), :

import json

emojis = json.load(open('emoji.json', 'rb'))

with open('output.py', 'wb') as fp:
    for k,v in emojis['emojis'][0].items():
        val = u'word = word.replace("{0}", "{1}")\n'.format(k, v[0] if v else "")
        fp.write(val.encode('u8'))
+2

emojis 0xfffd :

non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd)
emoji_dictionary = ast.literal_eval(file.translate(non_bmp_map))

!

json:

import json

with open('emojidescription.json', encoding="utf8") as emojis:
    emojis = json.load(emojis)

with open('output.txt','a', encoding="utf8") as output:
    for emoji, text in emojis["emojis"][0].items():
        text = "" if not text else text[0]
        output.write('word = word.replace("{0}", "{1}")\n'.format(emoji, text))
+1

Source: https://habr.com/ru/post/1678933/


All Articles