You can try the following:
df <- data.frame(group = c(rep(c('A', 'B', 'C'), 3)),
feature = c('x','x','x','y','y','z','z','w','t'),
value=c(1,2,1,3,2,1,2,2,3),stringsAsFactors = F)
df_desired <- data.frame(group1 =c('A','A','B'), group2 = c('B','C','C'), shared_feature = c('x','x','x'), value1 = c(1, 1,2), value2 = c(1, 2,1))
library(data.table)
df_agg = aggregate(value ~ feature, data = df, FUN = length)
shared_feats = df_agg$feature[df_agg$value==length(unique(df$group))]
df = df[df$feature %in% shared_feats,]
create_comb_df <- function(feat_df)
{
df2 = as.data.frame(t(combn(feat_df$group,2)))
colnames(df2) = c('group1','group2')
df2$feature = feat_df$feature[1]
df2$value1 = feat_df$value[match(df2$group1,feat_df$group)]
df2$value2 = feat_df$value[match(df2$group2,feat_df$group)]
return(df2)
}
rbindlist(lapply(split(df,as.character(df$feature)),create_comb_df))
Conclusion:
group1 group2 feature value1 value2
1: A B x 1 2
2: A C x 1 1
3: B C x 2 1
Or, to get all the common features, replace
shared_feats = df_agg$feature[df_agg$value==length(unique(df$group))]
with
shared_feats = df_agg$feature[df_agg$value>1]
and results:
group1 group2 feature value1 value2
1: A B x 1 2
2: A C x 1 1
3: B C x 2 1
4: A B y 3 2
5: C A z 1 2
Hope this helps!