As @Hulk said, this is similar to Levenshtein distance. The only difference is that replacements are not allowed, but this can be corrected using replacement cost 2, which is the same as removing a character from both lines. Example:
def dist(s1, s2): cur = list(range(len(s2) + 1)) prev = [0] * (len(s2) + 1) for i in range(len(s1)): cur, prev = prev, cur cur[0] = i + 1 for j in range(len(s2)): # Substitution is same as two deletions sub = 0 if s1[i] == s2[j] else 2 cur[j+1] = min(prev[j] + sub, cur[j] + 1, prev[j+1] + 1) return cur[-1] cases=[('cat','bat'), ('bat','cat'), ('broom', 'ballroom'), ('boat','got'), ('foo', 'bar'), ('foobar', '')] for s1, s2 in cases: print('{} & {} = {}'.format(s1, s2, dist(s1, s2)))
Output:
cat & bat = 2 bat & cat = 2 broom & ballroom = 3 boat & got = 3 foo & bar = 6 foobar & = 6
source share