Creating a new data frame based on values from another data frame

Question

Creating a new data frame based on values from another data frame

The data frame is as follows:

id pom.1 pom.2 pom.3 pom.4 pom.5 pom.6 pom.7 pom.8
20764422   1   3  <NA>  <NA>  <NA>  <NA>  <NA>  <NA>
08049335   4   2   1   5   8   7   9   3
07668511   5   2   7  <NA>  <NA>  <NA>  <NA>  <NA>
20058102   7   4   2  <NA>  <NA>  <NA>  <NA>  <NA>
17318802   6   3   5   1   9   8   2  <NA>

where there is a list of 10 possible values that can be found in this data frame.

I need to create another data file that will have 10 columns, one for each value from the list, and map it to the original data frame.

The new data frame should look like this:

id c1 c2 c3 c4 c5 c6 c7 c8 c9 c10
20764422 y n y n n n n n n n
08049335 y y y y y n y y y n
07668511 n y n n y n y n n n
20058102 n y n y n n y n n n
17318802 y y y n y y n y y n

where each row (c1-c10) must correspond to one value from the list of values. The values "y" and "n" for each identifier mean that some value / is absent in the original data frame.

We hope that this expansion is good enough to understand what needs to be done.

, , . , , .

!

+4

r dataframe data.table

Branko 23 . '15 12:07

4

- - data.frame, :

# Creating some data that looks like yours
> df <- data.frame(matrix(c(101:105,sample(c(1:10,NA),40,replace=T)),5,9,dimnames=list(x=NULL,y=c("id",sapply(1:8,function(x) paste("pom",x))))))
> print(df)
   id pom.1 pom.2 pom.3 pom.4 pom.5 pom.6 pom.7 pom.8
1 101     2    NA     7    NA     5     1    NA     2
2 102     7     4     8     2     1     5    NA     4
3 103     6     8     5     2     9     8     2     7
4 104     9    NA     4     5     3     9     7     9
5 105     1     7     6     2     3     4     5     5
# Creating the output
> ndf <- t(apply(df,1,function(l) sapply(1:10,function(x) ifelse(x %in% l, 'y', 'n'))))
> dimnames(ndf) <- list(as.character(101:105),as.character(1:10))
> print(ndf)
    1   2   3   4   5   6   7   8   9   10
101 "y" "y" "n" "n" "y" "n" "y" "n" "n" "n"
102 "y" "y" "n" "y" "y" "n" "y" "y" "n" "n"
103 "n" "y" "n" "n" "y" "y" "y" "y" "y" "n"
104 "n" "n" "y" "y" "y" "n" "y" "n" "y" "n"
105 "y" "y" "y" "y" "y" "y" "y" "n" "n" "n"

, . @ , , .

+2

Vongo 23 . '15 13:06

mtabulate

library(qdapTools)
cbind(dfN[1], mtabulate(as.data.frame(t(dfN[-1]))))

table base R. unlist , "id" , "t22" "id" (, ) unlist ed, , "n", "y" cbind "id" .

tbl <- !!table(dfN$id[row(dfN[-1])], factor(unlist(dfN[-1]), levels=1:10))
tbl[] <- c('n', 'y')[tbl+1L]
`row.names<-`(cbind(dfN[1], as.data.frame.matrix(tbl)), NULL)
#         id 1 2 3 4 5 6 7 8 9 10
#1 20764422 n y n n y n y n n  n
#2  8049335 y y y y y n y y y  n
#3  7668511 y y y n y y n y y  n
#4 20058102 n y n y n n y n n  n
#5 17318802 y n y n n n n n n  n

+1

akrun 23 . '15 12:39

Another solution in the R base using applyover matrix (using a smaller example):

df <- data.frame(col1 = c(1, NA, 3), col2 = c(2, 10, NA))

as.data.frame(ifelse(t(apply(df, 1, function(x) (1:10) %in% x[!is.na(x)])), 
                     'y', 'n'))

  V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
1  y  y  n  n  n  n  n  n  n   n
2  n  n  n  n  n  n  n  n  n   y
3  n  n  y  n  n  n  n  n  n   n

You may need to adapt the names of the growths and the names of the codes.

+1

Patrick roocks Nov 23 '15 at 12:57

source share

A5C1D2H2I1M1N2O1R2T1 · Accepted Answer · 2015-11-23T12:29:25+0000

1 0 "y" "n", - .

, (dput) , , , .

library(data.table)
dcast(melt(as.data.table(mydf), "id"), id ~ value)
# Aggregate function missing, defaulting to 'length'
#          id 1 2 3 4 5 6 7 8 9 NA
# 1:  7668511 0 1 0 0 1 0 1 0 0  5
# 2:  8049335 1 1 1 1 1 0 1 1 1  0
# 3: 17318802 1 1 1 0 1 1 0 1 1  1
# 4: 20058102 0 1 0 1 0 0 1 0 0  5
# 5: 20764422 1 0 1 0 0 0 0 0 0  6

, - :

dcast(melt(as.data.table(mydf), "id", na.rm = TRUE)[          ## melt and remove NA
      , value := factor(value, 1:10)],                        ## factor value column 
      id ~ value,                                             ## pivot value by id
      fun.aggregate = function(x) ifelse(is.na(x), "n", "y"), ## get your "y" and "n"
      fill = "n", drop = FALSE)                               ## don't drop missing factors

:

##          id 1 2 3 4 5 6 7 8 9 10
## 1: 07668511 n y n n y n y n n  n
## 2: 08049335 y y y y y n y y y  n
## 3: 17318802 y y y n y y n y y  n
## 4: 20058102 n y n y n n y n n  n
## 5: 20764422 y n y n n n n n n  n

Update

" " tabulate chartr:

temp <- `rownames<-`(t(apply(mydf[-1], 1, function(x) tabulate(x, nbins = 10))), mydf[[1]])
temp[] <- chartr("01", "ny", temp)
temp
#          [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
# 20764422 "y"  "n"  "y"  "n"  "n"  "n"  "n"  "n"  "n"  "n"  
# 08049335 "y"  "y"  "y"  "y"  "y"  "n"  "y"  "y"  "y"  "n"  
# 07668511 "n"  "y"  "n"  "n"  "y"  "n"  "y"  "n"  "n"  "n"  
# 20058102 "n"  "y"  "n"  "y"  "n"  "n"  "y"  "n"  "n"  "n"  
# 17318802 "y"  "y"  "y"  "n"  "y"  "y"  "n"  "y"  "y"  "n"

, ( , ):

mydf <- structure(list(id = c("20764422", "08049335", "07668511", "20058102", 
    "17318802"), pom.1 = c(1L, 4L, 5L, 7L, 6L), pom.2 = c(3L, 2L, 
    2L, 4L, 3L), pom.3 = c(NA, 1L, 7L, 2L, 5L), pom.4 = c(NA, 5L, 
    NA, NA, 1L), pom.5 = c(NA, 8L, NA, NA, 9L), pom.6 = c(NA, 7L, 
    NA, NA, 8L), pom.7 = c(NA, 9L, NA, NA, 2L), pom.8 = c(NA, 3L, 
    NA, NA, NA)), .Names = c("id", "pom.1", "pom.2", "pom.3", "pom.4", 
    "pom.5", "pom.6", "pom.7", "pom.8"), row.names = c(NA, 5L), class = "data.frame")

Creating a new data frame based on values ​​from another data frame

Update

More articles:

Creating a new data frame based on values from another data frame