I calculated the Kukkurens matrix with window size = 2
first write a function that gives the correct neighborhood words (here I used get context)
Create a matrix and just add 1 if a specific value is present in the adjacent cap.
Here is the Python code:
import numpy as np CORPUS=["abc def ijk pqr", "pqr klm opq", "lmn pqr xyz abc def pqr abc"] top2000 = [ "abc","pqr","def"]#list(set((' '.join(ctxs)).split(' '))) a = np.zeros((3,3), np.int32) for sentence in CORPUS: for index,word in enumerate(sentence.split(' ')): if word in top2000 : print(word) context=GetContext(sentence,index) print(context) for word2 in context: if word2 in top2000: a[top2000.index(word)][top2000.index(word2)]+=1 print(a)
get context function
def GetContext(sentence, index): words = sentence.split(' ') ret=[] for word in words: if index==0: ret.append(words[index+1]) ret.append(words[index+2]) elif index==1: ret.append(words[index-1]) ret.append(words[index+1]) if len(words)>3: ret.append(words[index+2]) elif index==(len(words)-1): ret.append(words[index-2]) ret.append(words[index-1]) elif index==(len(words)-2): ret.append(words[index-2]) ret.append(words[index-1]) ret.append(words[index+1]) else: ret.append(words[index-2]) ret.append(words[index-1]) ret.append(words[index+1]) ret.append(words[index+2]) return ret
here is the result:
array([[0, 3, 3], [3, 0, 2], [3, 2, 0]])
source share