Create a new duplicate table from another by adding new values ​​from the originals using R

I need to create a table from the source table (both below).

In the source table, we have families A and B, and the members of each family are indicated by the PESS column. Beneficiary members of each family are marked with the number 1 in the BEN column. From this table I need to create a new table in which you should have 2 more columns. Taking family A as an example, members 1 and 4 are the beneficiaries. Then, family A should be doubled into two groups with one beneficiary at a time (column I_BPC_FAM2). The FAM2 column indicates the groups.

With the code below, I am creating a new table, however the column I_BPC_FAM2 is missing. The problem must be resolved in R.

Is it possible to complete this code to get to the final table?

library(tidyverse) 
tabela<-data.frame(FAM=c("A","A","A","A","B","B","B"), PESS=c(1,2,3,4,1,2,3),BEN=c(1,0,0,1,0,0,1))
tabela1<- summarise(group_by(tabela,FAM),contador=sum(BEN),cont=n()) #faz a tabela com contadores

tab2<-NULL
for(i in 1:length(tabela1$FAM)){
    x<-as.numeric(tabela1[i,"contador"])
    j<-as.numeric(tabela1[i,"cont"])
    for(l in 1:x){
      for(k in 1:j){
         tab<-data.frame(tabela1[i,"FAM"],PESS=as.numeric(k),FAM2=as.numeric(l))
         tab2<-rbind(tab2,tab)
         final<-merge(tab2,tabela,by=c("FAM","PESS"))
         final <- final[order(final$FAM, final$FAM2), ] 
      }
   }  
 }

Original table:

> tabela
  FAM PESS BEN
1   A    1   1
2   A    2   0
3   A    3   0
4   A    4   1
5   B    1   0
6   B    2   0
7   B    3   1

The table generated by my code

> final
   FAM PESS FAM2 BEN
1    A    1    1   1
3    A    2    1   0
5    A    3    1   0
7    A    4    1   1
2    A    1    2   1
4    A    2    2   0
6    A    3    2   0
8    A    4    2   1
9    B    1    1   0
10   B    2    1   0
11   B    3    1   1

Table I need to generate

   FAM PESS FAM2 BEN I_BPC_FAM2
1    A    1    1   1         1
3    A    2    1   0         0
5    A    3    1   0         0 
7    A    4    1   1         0
2    A    1    2   1         0
4    A    2    2   0         0
6    A    3    2   0         0
8    A    4    2   1         1
9    B    1    1   0         0
10   B    2    1   0         0
11   B    3    1   1         1
+4
source share
4 answers

Here is an alternative way:

table <-data.frame(FAM=c("A","A","A","A","B","B","B"), 
                   PESS=c(1,2,3,4,1,2,3),
                   BEN=c(1,0,0,1,0,0,1))

Create a unique identifier for each observation:

table %<>% mutate( unique_id = row_number())

Group a group of unique families that you want to get:

ben <- 
  table %>%
  filter(BEN == 1) %>% 
  mutate(FAM2 = unique_id) %>% 
  select(FAM2, FAM)

> ben
  FAM2 FAM
1    1   A
2    4   A
3    7   B

Combine and compare identifiers:

new_table<- merge(ben, table, by = "FAM") %>%
            mutate(I_BPC_FAM2 = as.integer(unique_id == FAM2)) %>%
            select(-unique_id)

Result:

new_table
> new_table
  FAM FAM2 PESS BEN I_BPC_FAM2
1    A    1    1   1          1
2    A    1    2   0          0
3    A    1    3   0          0
4    A    1    4   1          0
5    A    4    1   1          0
6    A    4    2   0          0
7    A    4    3   0          0
8    A    4    4   1          1
9    B    7    1   0          0
10   B    7    2   0          0
11   B    7    3   1          1

You can then convert the new family identifiers, if necessary, using

> new_table %>% mutate(FAM2 = as.integer(as.factor(FAM2)))
   FAM FAM2 PESS BEN I_BPC_FAM2
1    A    1    1   1          1
2    A    1    2   0          0
3    A    1    3   0          0
4    A    1    4   1          0
5    A    2    1   1          0
6    A    2    2   0          0
7    A    2    3   0          0
8    A    2    4   1          1
9    B    3    1   0          0
10   B    3    2   0          0
11   B    3    3   1          1
+2
source

In Nicholas's code, I would change this part:

ben <-  table %>%
    filter(BEN == 1) %>% 
    mutate(ID = unique_id)

    ben %<>% 
    group_by(FAM) %>%  
    mutate(FAM2=cumsum(BEN)) %>%
    select(ID,FAM2,FAM)


new_table<- merge(ben, table, by = "FAM") %>%
    mutate(I_BPC_FAM2 = as.integer(unique_id == ID)) %>%
    select(-unique_id,-ID)

which leads to the following:

 > new_table
    FAM FAM2 PESS BEN I_BPC_FAM2
1    A    1    1   1          1
2    A    1    2   0          0
3    A    1    3   0          0
4    A    1    4   1          0
5    A    2    1   1          0
6    A    2    2   0          0
7    A    2    3   0          0
8    A    2    4   1          1
9    B    1    1   0          0
10   B    1    2   0          0
11   B    1    3   1          1

Now we have FAM2 with the correct number of families which can be practical with the max () function

+2
source

. , , , . , , .

library(dplyr)

# original dataset
dt <- data.frame(FAM=c("A","A","A","A","B","B","B"), PESS=c(1,2,3,4,1,2,3),BEN=c(1,0,0,1,0,0,1))

# create multiple rows of FAM based on how many 1s they have in column BEN
dt %>%
  group_by(FAM) %>%
  mutate(sum_BEN = sum(BEN)) %>%
  group_by(FAM, PESS) %>%
  do(data.frame(., FAM2=seq(1,.$sum_BEN))) %>%
  select(-sum_BEN) %>%
  ungroup() %>%
  arrange(FAM, FAM2) %>%
  print() -> tbl1

# # A tibble: 11 × 4
#       FAM  PESS   BEN  FAM2
#    <fctr> <dbl> <dbl> <int>
# 1       A     1     1     1
# 2       A     2     0     1
# 3       A     3     0     1
# 4       A     4     1     1
# 5       A     1     1     2
# 6       A     2     0     2
# 7       A     3     0     2
# 8       A     4     1     2
# 9       B     1     0     1
# 10      B     2     0     1
# 11      B     3     1     1


# keep the relevant rows of FAM to put 1 for I_BPC_FAM2
dt %>%
  arrange(FAM, PESS) %>%
  group_by(FAM) %>%
  mutate(cumsum_BEN = cumsum(BEN)) %>%
  ungroup() %>%
  distinct(FAM, BEN, cumsum_BEN, .keep_all = T) %>%
  filter(BEN != 0) %>%
  mutate(I_BPC_FAM2 = 1) %>%
  rename(FAM2 = cumsum_BEN) %>%
  print() -> tbl2

# # A tibble: 3 × 5
#      FAM  PESS   BEN  FAM2 I_BPC_FAM2
#   <fctr> <dbl> <dbl> <dbl>      <dbl>
# 1      A     1     1     1          1
# 2      A     4     1     2          1
# 3      B     3     1     1          1


# join tables
tbl1 %>% 
  left_join(tbl2, by=c("FAM","PESS","BEN","FAM2")) %>%
  mutate(I_BPC_FAM2 = coalesce(I_BPC_FAM2, 0)) %>%
  arrange(FAM, FAM2)

# # A tibble: 11 × 5
#       FAM  PESS   BEN  FAM2 I_BPC_FAM2
#    <fctr> <dbl> <dbl> <dbl>      <dbl>
# 1       A     1     1     1          1
# 2       A     2     0     1          0
# 3       A     3     0     1          0
# 4       A     4     1     1          0
# 5       A     1     1     2          0
# 6       A     2     0     2          0
# 7       A     3     0     2          0
# 8       A     4     1     2          1
# 9       B     1     0     1          0
# 10      B     2     0     1          0
# 11      B     3     1     1          1
0
source

Here is a basic R solution using the split-apply methodology with split, lapplyand do.call/ rbind.

# construct of data.frames, one for each family 
myList <- lapply(split(df, df$FAM), function(i) {
  bens <- which(i$BEN == 1) # get the benefit indices
  rows <- nrow(i) # store the number of rows
  i <- i[rep(seq_len(rows), length(bens)),] # grow data.frame for each benefit
  i$I_BPC_FAM2 <- 0 # initialize variable
  i$I_BPC_FAM2[bens + (rows * (seq_along(bens)-1))] <- 1 fill in indicator
  i # return new data.frame
})

Now you can put the list along with

do.call(rbind, myList)
      FAM PESS BEN I_BPC_FAM2
A.1     A    1   1          1
A.2     A    2   0          0
A.3     A    3   0          0
A.4     A    4   1          0
A.1.1   A    1   1          0
A.2.1   A    2   0          0
A.3.1   A    3   0          0
A.4.1   A    4   1          1
B.5     B    1   0          0
B.6     B    2   0          0
B.7     B    3   1          1
0
source

Source: https://habr.com/ru/post/1669704/


All Articles