Creating a new data frame based on values ​​from another data frame

The data frame is as follows:

id pom.1 pom.2 pom.3 pom.4 pom.5 pom.6 pom.7 pom.8
20764422   1   3  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>
08049335   4   2   1   5   8   7   9   3
07668511   5   2   7  <NA>  <NA>  <NA>  <NA>  <NA>
20058102   7   4   2  <NA>  <NA>  <NA>  <NA>  <NA>
17318802   6   3   5   1   9   8   2  <NA>

where there is a list of 10 possible values ​​that can be found in this data frame.

I need to create another data file that will have 10 columns, one for each value from the list, and map it to the original data frame.

The new data frame should look like this:

id c1 c2 c3 c4 c5 c6 c7 c8 c9 c10
20764422 y n y n n n n n n n
08049335 y y y y y n y y y n
07668511 n y n n y n y n n n
20058102 n y n y n n y n n n
17318802 y y y n y y n y y n

where each row (c1-c10) must correspond to one value from the list of values. The values ​​"y" and "n" for each identifier mean that some value / is absent in the original data frame.

We hope that this expansion is good enough to understand what needs to be done.

, , . , , .

!

+4
4

1 0 "y" "n", - .

, (dput) , , , .

library(data.table)
dcast(melt(as.data.table(mydf), "id"), id ~ value)
# Aggregate function missing, defaulting to 'length'
#          id 1 2 3 4 5 6 7 8 9 NA
# 1:  7668511 0 1 0 0 1 0 1 0 0  5
# 2:  8049335 1 1 1 1 1 0 1 1 1  0
# 3: 17318802 1 1 1 0 1 1 0 1 1  1
# 4: 20058102 0 1 0 1 0 0 1 0 0  5
# 5: 20764422 1 0 1 0 0 0 0 0 0  6

, - :

dcast(melt(as.data.table(mydf), "id", na.rm = TRUE)[          ## melt and remove NA
      , value := factor(value, 1:10)],                        ## factor value column 
      id ~ value,                                             ## pivot value by id
      fun.aggregate = function(x) ifelse(is.na(x), "n", "y"), ## get your "y" and "n"
      fill = "n", drop = FALSE)                               ## don't drop missing factors

:

##          id 1 2 3 4 5 6 7 8 9 10
## 1: 07668511 n y n n y n y n n  n
## 2: 08049335 y y y y y n y y y  n
## 3: 17318802 y y y n y y n y y  n
## 4: 20058102 n y n y n n y n n  n
## 5: 20764422 y n y n n n n n n  n

Update

" " tabulate chartr:

temp <- `rownames<-`(t(apply(mydf[-1], 1, function(x) tabulate(x, nbins = 10))), mydf[[1]])
temp[] <- chartr("01", "ny", temp)
temp
#          [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
# 20764422 "y"  "n"  "y"  "n"  "n"  "n"  "n"  "n"  "n"  "n"  
# 08049335 "y"  "y"  "y"  "y"  "y"  "n"  "y"  "y"  "y"  "n"  
# 07668511 "n"  "y"  "n"  "n"  "y"  "n"  "y"  "n"  "n"  "n"  
# 20058102 "n"  "y"  "n"  "y"  "n"  "n"  "y"  "n"  "n"  "n"  
# 17318802 "y"  "y"  "y"  "n"  "y"  "y"  "n"  "y"  "y"  "n" 

, ( , ):

mydf <- structure(list(id = c("20764422", "08049335", "07668511", "20058102", 
    "17318802"), pom.1 = c(1L, 4L, 5L, 7L, 6L), pom.2 = c(3L, 2L, 
    2L, 4L, 3L), pom.3 = c(NA, 1L, 7L, 2L, 5L), pom.4 = c(NA, 5L, 
    NA, NA, 1L), pom.5 = c(NA, 8L, NA, NA, 9L), pom.6 = c(NA, 7L, 
    NA, NA, 8L), pom.7 = c(NA, 9L, NA, NA, 2L), pom.8 = c(NA, 3L, 
    NA, NA, NA)), .Names = c("id", "pom.1", "pom.2", "pom.3", "pom.4", 
    "pom.5", "pom.6", "pom.7", "pom.8"), row.names = c(NA, 5L), class = "data.frame")
+5

- - data.frame, :

# Creating some data that looks like yours
> df <- data.frame(matrix(c(101:105,sample(c(1:10,NA),40,replace=T)),5,9,dimnames=list(x=NULL,y=c("id",sapply(1:8,function(x) paste("pom",x))))))
> print(df)
   id pom.1 pom.2 pom.3 pom.4 pom.5 pom.6 pom.7 pom.8
1 101     2    NA     7    NA     5     1    NA     2
2 102     7     4     8     2     1     5    NA     4
3 103     6     8     5     2     9     8     2     7
4 104     9    NA     4     5     3     9     7     9
5 105     1     7     6     2     3     4     5     5
# Creating the output
> ndf <- t(apply(df,1,function(l) sapply(1:10,function(x) ifelse(x %in% l, 'y', 'n'))))
> dimnames(ndf) <- list(as.character(101:105),as.character(1:10))
> print(ndf)
    1   2   3   4   5   6   7   8   9   10
101 "y" "y" "n" "n" "y" "n" "y" "n" "n" "n"
102 "y" "y" "n" "y" "y" "n" "y" "y" "n" "n"
103 "n" "y" "n" "n" "y" "y" "y" "y" "y" "n"
104 "n" "n" "y" "y" "y" "n" "y" "n" "y" "n"
105 "y" "y" "y" "y" "y" "y" "y" "n" "n" "n"

, . @ , , .

+2

mtabulate

library(qdapTools)
cbind(dfN[1], mtabulate(as.data.frame(t(dfN[-1]))))

table base R. unlist , "id" , "t22" "id" (, ) unlist ed, , "n", "y" cbind "id" .

tbl <- !!table(dfN$id[row(dfN[-1])], factor(unlist(dfN[-1]), levels=1:10))
tbl[] <- c('n', 'y')[tbl+1L]
`row.names<-`(cbind(dfN[1], as.data.frame.matrix(tbl)), NULL)
#         id 1 2 3 4 5 6 7 8 9 10
#1 20764422 n y n n y n y n n  n
#2  8049335 y y y y y n y y y  n
#3  7668511 y y y n y y n y y  n
#4 20058102 n y n y n n y n n  n
#5 17318802 y n y n n n n n n  n
+1

Another solution in the R base using applyover matrix (using a smaller example):

df <- data.frame(col1 = c(1, NA, 3), col2 = c(2, 10, NA))

as.data.frame(ifelse(t(apply(df, 1, function(x) (1:10) %in% x[!is.na(x)])), 
                     'y', 'n'))

  V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
1  y  y  n  n  n  n  n  n  n   n
2  n  n  n  n  n  n  n  n  n   y
3  n  n  y  n  n  n  n  n  n   n

You may need to adapt the names of the growths and the names of the codes.

+1
source

Source: https://habr.com/ru/post/1617060/


All Articles