, , -.
:
w i-1
, , p KN (w i | w i-1), .
, , . , w i-1 ( ) w i , , , , , "" 1. , , <= 1.
from nltk.util import ngrams
from nltk.corpus import gutenberg
gut_ngrams = ( ngram for sent in gutenberg.sents() for ngram in ngrams(sent, 3, pad_left = True, pad_right = True, right_pad_symbol='EOS', left_pad_symbol="BOS"))
freq_dist = nltk.FreqDist(gut_ngrams)
kneser_ney = nltk.KneserNeyProbDist(freq_dist)
prob_sum = 0
for i in kneser_ney.samples():
if i[0] == "I" and i[1] == "confess":
prob_sum += kneser_ney.prob(i)
print "{0}:{1}".format(i, kneser_ney.prob(i))
print prob_sum
, NLTK Gutenberg, .
(u'I', u'confess', u'.--'):0.00657894736842
(u'I', u'confess', u'what'):0.00657894736842
(u'I', u'confess', u'myself'):0.00657894736842
(u'I', u'confess', u'also'):0.00657894736842
(u'I', u'confess', u'there'):0.00657894736842
(u'I', u'confess', u',"'):0.0328947368421
(u'I', u'confess', u'that'):0.164473684211
(u'I', u'confess', u'"--'):0.00657894736842
(u'I', u'confess', u'it'):0.0328947368421
(u'I', u'confess', u';'):0.00657894736842
(u'I', u'confess', u','):0.269736842105
(u'I', u'confess', u'I'):0.164473684211
(u'I', u'confess', u'unto'):0.00657894736842
(u'I', u'confess', u'is'):0.00657894736842
0.723684210526
, (.72) 1, , , , "I", "". .28 w i s, "I" "" . , ngrams, , , , 0 ngrams.
ngrams = nltk.trigrams("What a piece of work is man! how noble in reason! how infinite in faculty! in \
form and moving how express and admirable! in action how like an angel! in apprehension how like a god! \
the beauty of the world, the paragon of animals!")
? , .