n-, , . n- google, n-, nltk. , , . , . .
, , n-.
EDIT: nltk :
import math
from nltk import NgramModel
from nltk.corpus import brown
from nltk.util import ngrams
from nltk.probability import LidstoneProbDist
n = 2
est = lambda fdist, bins: LidstoneProbDist(fdist, 0.2)
lm = NgramModel(n, brown.words(categories='news'), estimator=est)
def sentenceprob(sentence):
bigrams = ngrams(sentence.split(), n)
sentence = sentence.lower()
tot = 0
for grams in bigrams:
score = lm.logprob(grams[-1], grams[:-1])
tot += score
return tot
sentence1 = "This is a standard English sentence"
sentence2 = "oomfi ow Ba wmnondmam BE wBwHo<oBoBm. Bowman as: Ham: 8 ooww om $5"
print sentenceprob(sentence1)
print sentenceprob(sentence2)
:
>>> python lmtest.py
42.7436688972
158.850086668
. (, ).