, . , .
, , :
from enchant.checker import SpellChecker
text = "this is sme text with a speling mistake."
chkr = SpellChecker("en_US", text)
for err in chkr:
print(err.word + " at position " + str(err.wordpos))
err.replace("SPAM")
t = chkr.get_text()
print("\n" + t)
:
sme at position 8
speling at position 25
ing at position 29
ng at position 30
AMMstake at position 32
ake at position 37
ke at position 38
AMM at position 40
this is SPAM text with a SPAMSSPSPAM.SSPSPAM
, "" , , , , "" err var.
http://pythonhosted.org/pyenchant/api/enchant.checker.html, , , .
. , , :
Orinal:
>>> text = "This is sme text with a fw speling errors in it."
>>> chkr = SpellChecker("en_US",text)
>>> for err in chkr:
... err.replace("SPAM")
...
>>> chkr.get_text()
'This is SPAM text with a SPAM SPAM errors in it.'
:
from enchant.checker import SpellChecker
text = "This is sme text with a fw speling errors in it."
chkr = SpellChecker("en_US", text)
for err in chkr:
print(err.word + " at position " + str(err.wordpos))
err.replace("SPAM")
t = chkr.get_text()
print("\n" + t)
-:
sme at position 8
fw at position 25
speling at position 30
ing at position 34
ng at position 35
AMMrors at position 37 #<---- seems to add in parts of "SPAM"
This is SPAM text with a SPAM SPAMSSPSPAM in it. #<---- my output ???
, , . "" , , . , "" 100% . : " ". . , , NLP, .
import enchant
from enchant.checker import SpellChecker
from nltk.metrics.distance import edit_distance
class MySpellChecker():
def __init__(self, dict_name='en_US', max_dist=2):
self.spell_dict = enchant.Dict(dict_name)
self.max_dist = max_dist
def replace(self, word):
suggestions = self.spell_dict.suggest(word)
if suggestions:
for suggestion in suggestions:
if edit_distance(word, suggestion) <= self.max_dist:
return suggestions[0]
return word
if __name__ == '__main__':
text = "this is sme text with a speling mistake."
my_spell_checker = MySpellChecker(max_dist=1)
chkr = SpellChecker("en_US", text)
for err in chkr:
print(err.word + " at position " + str(err.wordpos))
err.replace(my_spell_checker.replace(err.word))
t = chkr.get_text()
print("\n" + t)