R, , -:
df.loc[df.precedingWord.isin(neuter), "gender"] = "neuter"
df.loc[df.precedingWord.isin(non_neuter), "gender"] = "non_neuter"
df.loc[df.precedingWord.isin(neuter + non_neuter)==0, "gender"] = "rest"
pd.crosstab(df.node, df.gender)
gender neuter non_neuter rest
node
A-bom 0 4 2
acroniem 3 0 2
act 3 2 1
, neuter non_neuter precedingword, KeyError, .
, .
, :
ct = pd.crosstab(df.node, df.precedingWord)
:
pW dat de die een het n t
node
A-bom 0 3 1 1 0 1 0
acroniem 0 0 0 1 2 1 1
act 1 1 1 0 1 1 1
:
neuter = ["t", "het", "dat"]
non_neuter = ["de","die"]
freqDf = pd.DataFrame()
freqDf["neuter"] = ct[neuter].sum(axis=1)
ct.drop(neuter, axis=1, inplace=1)
freqDf["non_neuter"] = ct[non_neuter].sum(axis=1)
ct.drop(non_neuter, axis=1, inplace=1)
freqDf["rest"] = ct.sum(axis=1)
freqDf:
neuter non_neuter rest
node
A-bom 0 4 2
acroniem 3 0 2
act 3 2 1