How to calculate the match in the table?

I have a simple matrix like

test <- matrix(c("u1","p1","u1","p2","u2","p2","u2",
                 "p3","u3","p1","u4","p2","u5","p1",
                 "u5","p3","u6","p3","u7","p4","u7",
                 "p3","u8","p1","u9","p4"),
               ncol=2,byrow=TRUE) 
colnames(test) <- c("user","product")
test1<-as.data.frame(test)

Test:

   user   product
1  u1      p1
2  u1      p2
3  u2      p2 
4  u2      p3
5  u3      p1
6  u4      p2
7  u5      p1
8  u5      p3
9  u6      p3
10 u7      p4
11 u7      p3
12 u8      p1
13 u9      p4

I want to calculate how many users bought a pair of products, for example p1 & p2, p2 & p3 ...

table(test1$product,test1$product) give me the following:

     p1   p2  p3  p4
 p1   4   0   0   0
 p2   0   3   0   0
 p3   0   0   4   0
 p4   0   0   0   2

How can I get the correct result like:

     p1   p2  p3  p4
 p1   4   1   1   0
 p2   1   3   1   0
 p3   1   1   4   1
 p4   0   0   1   2
+3
source share
3 answers

Looking at your desired result, you are looking for a function crossprod:

crossprod(table(test1))
#        product
# product p1 p2 p3 p4
#      p1  4  1  1  0
#      p2  1  3  1  0
#      p3  1  1  4  1
#      p4  0  0  1  2

This is the same as crossprod(table(test1$user, test1$product))(reflecting Dennis's comment).

+7
source

Ananda's solution is superior (it is lighter weight and does not require an external package), but I suppress another. I believe this is called an adjacency matrix (smarter people feel free to edit this if I'm wrong):

library(qdap)
adjmat(table(test1))$adjacency

##        product
## product p1 p2 p3 p4
##      p1  4  1  1  0
##      p2  1  3  1  0
##      p3  1  1  4  1
##      p4  0  0  1  2
+2

A , , , deleted. .

RcppEigen,

library(RcppEigen)
library(inline)
prodFun <- '
        typedef Eigen::Map<Eigen::MatrixXi> MapMti;
        const MapMti B(as<MapMti>(BB));
        const MapMti C(as<MapMti>(CC));
        return List::create(B.adjoint() * C);
        '

funCPr <- cxxfunction(signature(BB= "matrix", CC = "matrix"),
                     prodFun, plugin = "RcppEigen") 
tbl <- table(test1)
funCPr(tbl, tbl)[[1]]
#     [,1] [,2] [,3] [,4]
#[1,]    4    1    1    0
#[2,]    1    3    1    0
#[3,]    1    1    4    1
#[4,]    0    0    1    2

set.seed(24)
test2 <- data.frame(user = sample(1:5000, 1e6, replace=TRUE),
    product = sample(paste0("p", 1:50), 1e6, replace = TRUE),
    stringsAsFactors=FALSE)
tbl1 <- table(test2)

library(microbenchmark)
microbenchmark(cPP = funCPr(tbl1, tbl1)[[1]], 
              CrossP = crossprod(tbl1),
              adjMat = adjmat(tbl1)$adjacency,
              unit = "relative", times = 10L)
#Unit: relative
#   expr      min       lq     mean   median       uq       max neval cld
#    cPP 1.000000 1.000000 1.000000 1.000000 1.000000  1.000000    10  a 
# CrossP 2.079867 2.070509 2.234376 2.074388 2.290516  2.676798    10  a 
# adjMat 6.223034 6.500791 9.619088 7.197824 7.771270 31.394812    10   b

. , table Rcpp

0

Source: https://habr.com/ru/post/1675952/


All Articles