, , , .
- . , , . . , Zipf, n 1/(n log N), N - .
, , . , , . , , .
import math
words = open("words-by-frequency.txt").read().split()
wordcost = dict((k,math.log((i+1)*math.log(len(words)))) for i,k in enumerate(words))
maxword = max(len(x) for x in words)
def infer_spaces(s):
"""Uses dynamic programming to infer the location of spaces in a string
without spaces."""
def best_match(i):
candidates = enumerate(reversed(cost[max(0, i-maxword):i]))
return min((c + wordcost.get(s[i-k-1:i], 9e999), k+1) for k,c in candidates)
cost = [0]
for i in range(1,len(s)+1):
c,k = best_match(i)
cost.append(c)
out = []
i = len(s)
while i>0:
c,k = best_match(i)
assert c == cost[i]
out.append(s[i-k:i])
i -= k
return " ".join(reversed(out))
s = 'thumbgreenappleactiveassignmentweeklymetaphor'
print(infer_spaces(s))
125k, .
: thumbgreenappleactiveassignmentweeklymetaphor.
: thumb . .
: , , , odelimitedcharactersinthemforexamplethumbgreenappleactiveassignmentweeklymetapho rapparentlytherearethumbgreenappleetcinthestringialsohavealargedictionarytoquery whetherthewordisreasonablesowhatsthefastestwayofextractionalot.
: , html, , , . , , .. , , .
: itwasadarkandstormynighttherainfellintorrentsexceptatocial intervalswhenchwatchhecked theaviolentgustofwindwhichsweptupthestreetsforitisinlondonthatceneliesrattlingalhousetopsandfiercelyagitatingthescantyflameamphelthstthgrgledagainstthedarkness.
: , , , , , , , .
, . - , , , , .
, . , , .
, , . , 10000 1000 , . .