ngrams :
a = 'this is an example, whatever'.split()
b = 'this is another example, whatever'.split()
def ngrams(string, n):
return set(zip(*[string[i:] for i in range(n)]))
def common_ngrams(string1, string2, n):
return ngrams(string1, n) & ngrams(string2, n)
:
print(common_ngrams(a, b, 2))
{('this', 'is'), ('example,', 'whatever')}
print(common_ngrams(a, b, 1))
{('this',), ('is',), ('example,',), ('whatever',)}
, ngrams zip
zip(*[string[i:] for i in range(n)]
,
zip(string, string[1:], string[2:])
n = 3.
, , ...