, , ( , , , ..), .
, levenshtein jaro-winkler, .
:
class DLDistance:
def __init__(self, s1):
self.s1 = s1
self.d = {}
self.lenstr1 = len(self.s1)
for i in xrange(-1,self.lenstr1+1):
self.d[(i,-1)] = i+1
def distance(self, s2):
lenstr2 = len(s2)
for j in xrange(-1,lenstr2+1):
self.d[(-1,j)] = j+1
for i in xrange(self.lenstr1):
for j in xrange(lenstr2):
if self.s1[i] == s2[j]:
cost = 0
else:
cost = 1
self.d[(i,j)] = min(
self.d[(i-1,j)] + 1,
self.d[(i,j-1)] + 1,
self.d[(i-1,j-1)] + cost,
)
if i and j and self.s1[i]==s2[j-1] and self.s1[i-1] == s2[j]:
self.d[(i,j)] = min (self.d[(i,j)], self.d[i-2,j-2] + cost)
return self.d[self.lenstr1-1,lenstr2-1]
if __name__ == '__main__':
base = u'abs'
cmpstrs = [u'abs', u'sdfbasz', u'asdf', u'hfghfg']
dl = DLDistance(base)
for s in cmpstrs:
print "damerau_levenshtein"
print dl.distance(s)
, , N * M , N , M . ( , , , , )
levenshtein distance: https://en.wikipedia.org/wiki/Levenshtein_distance
jaro-winkler: https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance